Coverage Report

Created: 2024-02-25 06:14

/src/libxml2/parser.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
 *            implemented on top of the SAX interfaces
4
 *
5
 * References:
6
 *   The XML specification:
7
 *     http://www.w3.org/TR/REC-xml
8
 *   Original 1.0 version:
9
 *     http://www.w3.org/TR/1998/REC-xml-19980210
10
 *   XML second edition working draft
11
 *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12
 *
13
 * Okay this is a big file, the parser core is around 7000 lines, then it
14
 * is followed by the progressive parser top routines, then the various
15
 * high level APIs to call the parser and a few miscellaneous functions.
16
 * A number of helper functions and deprecated ones have been moved to
17
 * parserInternals.c to reduce this file size.
18
 * As much as possible the functions are associated with their relative
19
 * production in the XML specification. A few productions defining the
20
 * different ranges of character are actually implanted either in
21
 * parserInternals.h or parserInternals.c
22
 * The DOM tree build is realized from the default SAX callbacks in
23
 * the module SAX.c.
24
 * The routines doing the validation checks are in valid.c and called either
25
 * from the SAX callbacks or as standalone functions using a preparsed
26
 * document.
27
 *
28
 * See Copyright for the status of this software.
29
 *
30
 * daniel@veillard.com
31
 */
32
33
/* To avoid EBCDIC trouble when parsing on zOS */
34
#if defined(__MVS__)
35
#pragma convert("ISO8859-1")
36
#endif
37
38
#define IN_LIBXML
39
#include "libxml.h"
40
41
#if defined(_WIN32)
42
#define XML_DIR_SEP '\\'
43
#else
44
#define XML_DIR_SEP '/'
45
#endif
46
47
#include <stdlib.h>
48
#include <limits.h>
49
#include <string.h>
50
#include <stdarg.h>
51
#include <stddef.h>
52
#include <ctype.h>
53
#include <stdlib.h>
54
#include <libxml/parser.h>
55
#include <libxml/xmlmemory.h>
56
#include <libxml/tree.h>
57
#include <libxml/parserInternals.h>
58
#include <libxml/valid.h>
59
#include <libxml/entities.h>
60
#include <libxml/xmlerror.h>
61
#include <libxml/encoding.h>
62
#include <libxml/xmlIO.h>
63
#include <libxml/uri.h>
64
#include <libxml/SAX2.h>
65
#ifdef LIBXML_CATALOG_ENABLED
66
#include <libxml/catalog.h>
67
#endif
68
69
#include "private/buf.h"
70
#include "private/dict.h"
71
#include "private/entities.h"
72
#include "private/error.h"
73
#include "private/html.h"
74
#include "private/io.h"
75
#include "private/parser.h"
76
77
6.96M
#define NS_INDEX_EMPTY  INT_MAX
78
3.58M
#define NS_INDEX_XML    (INT_MAX - 1)
79
2.02M
#define URI_HASH_EMPTY  0xD943A04E
80
124k
#define URI_HASH_XML    0xF0451F02
81
82
struct _xmlStartTag {
83
    const xmlChar *prefix;
84
    const xmlChar *URI;
85
    int line;
86
    int nsNr;
87
};
88
89
typedef struct {
90
    void *saxData;
91
    unsigned prefixHashValue;
92
    unsigned uriHashValue;
93
    unsigned elementId;
94
    int oldIndex;
95
} xmlParserNsExtra;
96
97
typedef struct {
98
    unsigned hashValue;
99
    int index;
100
} xmlParserNsBucket;
101
102
struct _xmlParserNsData {
103
    xmlParserNsExtra *extra;
104
105
    unsigned hashSize;
106
    unsigned hashElems;
107
    xmlParserNsBucket *hash;
108
109
    unsigned elementId;
110
    int defaultNsIndex;
111
    int minNsIndex;
112
};
113
114
struct _xmlAttrHashBucket {
115
    int index;
116
};
117
118
static int
119
xmlParseElementStart(xmlParserCtxtPtr ctxt);
120
121
static void
122
xmlParseElementEnd(xmlParserCtxtPtr ctxt);
123
124
static xmlEntityPtr
125
xmlLookupGeneralEntity(xmlParserCtxtPtr ctxt, const xmlChar *name, int inAttr);
126
127
static const xmlChar *
128
xmlParseEntityRefInternal(xmlParserCtxtPtr ctxt);
129
130
/************************************************************************
131
 *                  *
132
 *  Arbitrary limits set in the parser. See XML_PARSE_HUGE    *
133
 *                  *
134
 ************************************************************************/
135
136
#define XML_PARSER_BIG_ENTITY 1000
137
#define XML_PARSER_LOT_ENTITY 5000
138
139
/*
140
 * Constants for protection against abusive entity expansion
141
 * ("billion laughs").
142
 */
143
144
/*
145
 * A certain amount of entity expansion which is always allowed.
146
 */
147
4.06M
#define XML_PARSER_ALLOWED_EXPANSION 1000000
148
149
/*
150
 * Fixed cost for each entity reference. This crudely models processing time
151
 * as well to protect, for example, against exponential expansion of empty
152
 * or very short entities.
153
 */
154
4.08M
#define XML_ENT_FIXED_COST 20
155
156
/**
157
 * xmlParserMaxDepth:
158
 *
159
 * arbitrary depth limit for the XML documents that we allow to
160
 * process. This is not a limitation of the parser but a safety
161
 * boundary feature. It can be disabled with the XML_PARSE_HUGE
162
 * parser option.
163
 */
164
const unsigned int xmlParserMaxDepth = 256;
165
166
167
168
134M
#define XML_PARSER_BIG_BUFFER_SIZE 300
169
5.66M
#define XML_PARSER_BUFFER_SIZE 100
170
301k
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
171
172
/**
173
 * XML_PARSER_CHUNK_SIZE
174
 *
175
 * When calling GROW that's the minimal amount of data
176
 * the parser expected to have received. It is not a hard
177
 * limit but an optimization when reading strings like Names
178
 * It is not strictly needed as long as inputs available characters
179
 * are followed by 0, which should be provided by the I/O level
180
 */
181
#define XML_PARSER_CHUNK_SIZE 100
182
183
/**
184
 * xmlParserVersion:
185
 *
186
 * Constant string describing the internal version of the library
187
 */
188
const char *const
189
xmlParserVersion = LIBXML_VERSION_STRING LIBXML_VERSION_EXTRA;
190
191
/*
192
 * List of XML prefixed PI allowed by W3C specs
193
 */
194
195
static const char* const xmlW3CPIs[] = {
196
    "xml-stylesheet",
197
    "xml-model",
198
    NULL
199
};
200
201
202
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
203
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
204
                                              const xmlChar **str);
205
206
static void
207
xmlCtxtParseEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr ent);
208
209
static int
210
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
211
212
/************************************************************************
213
 *                  *
214
 *    Some factorized error routines        *
215
 *                  *
216
 ************************************************************************/
217
218
static void
219
6.07k
xmlErrMemory(xmlParserCtxtPtr ctxt) {
220
6.07k
    xmlCtxtErrMemory(ctxt);
221
6.07k
}
222
223
/**
224
 * xmlErrAttributeDup:
225
 * @ctxt:  an XML parser context
226
 * @prefix:  the attribute prefix
227
 * @localname:  the attribute localname
228
 *
229
 * Handle a redefinition of attribute error
230
 */
231
static void
232
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
233
                   const xmlChar * localname)
234
66.4k
{
235
66.4k
    if (prefix == NULL)
236
41.2k
        xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, XML_ERR_ATTRIBUTE_REDEFINED,
237
41.2k
                   XML_ERR_FATAL, localname, NULL, NULL, 0,
238
41.2k
                   "Attribute %s redefined\n", localname);
239
25.2k
    else
240
25.2k
        xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, XML_ERR_ATTRIBUTE_REDEFINED,
241
25.2k
                   XML_ERR_FATAL, prefix, localname, NULL, 0,
242
25.2k
                   "Attribute %s:%s redefined\n", prefix, localname);
243
66.4k
}
244
245
/**
246
 * xmlFatalErrMsg:
247
 * @ctxt:  an XML parser context
248
 * @error:  the error number
249
 * @msg:  the error message
250
 *
251
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
252
 */
253
static void LIBXML_ATTR_FORMAT(3,0)
254
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
255
               const char *msg)
256
78.7M
{
257
78.7M
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
258
78.7M
               NULL, NULL, NULL, 0, "%s", msg);
259
78.7M
}
260
261
/**
262
 * xmlWarningMsg:
263
 * @ctxt:  an XML parser context
264
 * @error:  the error number
265
 * @msg:  the error message
266
 * @str1:  extra data
267
 * @str2:  extra data
268
 *
269
 * Handle a warning.
270
 */
271
void LIBXML_ATTR_FORMAT(3,0)
272
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
273
              const char *msg, const xmlChar *str1, const xmlChar *str2)
274
311k
{
275
311k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_WARNING,
276
311k
               str1, str2, NULL, 0, msg, str1, str2);
277
311k
}
278
279
/**
280
 * xmlValidityError:
281
 * @ctxt:  an XML parser context
282
 * @error:  the error number
283
 * @msg:  the error message
284
 * @str1:  extra data
285
 *
286
 * Handle a validity error.
287
 */
288
static void LIBXML_ATTR_FORMAT(3,0)
289
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
290
              const char *msg, const xmlChar *str1, const xmlChar *str2)
291
176k
{
292
176k
    ctxt->valid = 0;
293
294
176k
    xmlCtxtErr(ctxt, NULL, XML_FROM_DTD, error, XML_ERR_ERROR,
295
176k
               str1, str2, NULL, 0, msg, str1, str2);
296
176k
}
297
298
/**
299
 * xmlFatalErrMsgInt:
300
 * @ctxt:  an XML parser context
301
 * @error:  the error number
302
 * @msg:  the error message
303
 * @val:  an integer value
304
 *
305
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
306
 */
307
static void LIBXML_ATTR_FORMAT(3,0)
308
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
309
                  const char *msg, int val)
310
7.21M
{
311
7.21M
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
312
7.21M
               NULL, NULL, NULL, val, msg, val);
313
7.21M
}
314
315
/**
316
 * xmlFatalErrMsgStrIntStr:
317
 * @ctxt:  an XML parser context
318
 * @error:  the error number
319
 * @msg:  the error message
320
 * @str1:  an string info
321
 * @val:  an integer value
322
 * @str2:  an string info
323
 *
324
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
325
 */
326
static void LIBXML_ATTR_FORMAT(3,0)
327
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
328
                  const char *msg, const xmlChar *str1, int val,
329
      const xmlChar *str2)
330
1.09M
{
331
1.09M
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
332
1.09M
               str1, str2, NULL, val, msg, str1, val, str2);
333
1.09M
}
334
335
/**
336
 * xmlFatalErrMsgStr:
337
 * @ctxt:  an XML parser context
338
 * @error:  the error number
339
 * @msg:  the error message
340
 * @val:  a string value
341
 *
342
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
343
 */
344
static void LIBXML_ATTR_FORMAT(3,0)
345
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
346
                  const char *msg, const xmlChar * val)
347
2.37M
{
348
2.37M
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
349
2.37M
               val, NULL, NULL, 0, msg, val);
350
2.37M
}
351
352
/**
353
 * xmlErrMsgStr:
354
 * @ctxt:  an XML parser context
355
 * @error:  the error number
356
 * @msg:  the error message
357
 * @val:  a string value
358
 *
359
 * Handle a non fatal parser error
360
 */
361
static void LIBXML_ATTR_FORMAT(3,0)
362
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
363
                  const char *msg, const xmlChar * val)
364
41.8k
{
365
41.8k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_ERROR,
366
41.8k
               val, NULL, NULL, 0, msg, val);
367
41.8k
}
368
369
/**
370
 * xmlNsErr:
371
 * @ctxt:  an XML parser context
372
 * @error:  the error number
373
 * @msg:  the message
374
 * @info1:  extra information string
375
 * @info2:  extra information string
376
 *
377
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
378
 */
379
static void LIBXML_ATTR_FORMAT(3,0)
380
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
381
         const char *msg,
382
         const xmlChar * info1, const xmlChar * info2,
383
         const xmlChar * info3)
384
855k
{
385
855k
    ctxt->nsWellFormed = 0;
386
387
855k
    xmlCtxtErr(ctxt, NULL, XML_FROM_NAMESPACE, error, XML_ERR_ERROR,
388
855k
               info1, info2, info3, 0, msg, info1, info2, info3);
389
855k
}
390
391
/**
392
 * xmlNsWarn
393
 * @ctxt:  an XML parser context
394
 * @error:  the error number
395
 * @msg:  the message
396
 * @info1:  extra information string
397
 * @info2:  extra information string
398
 *
399
 * Handle a namespace warning error
400
 */
401
static void LIBXML_ATTR_FORMAT(3,0)
402
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
403
         const char *msg,
404
         const xmlChar * info1, const xmlChar * info2,
405
         const xmlChar * info3)
406
106k
{
407
106k
    xmlCtxtErr(ctxt, NULL, XML_FROM_NAMESPACE, error, XML_ERR_WARNING,
408
106k
               info1, info2, info3, 0, msg, info1, info2, info3);
409
106k
}
410
411
static void
412
12.3M
xmlSaturatedAdd(unsigned long *dst, unsigned long val) {
413
12.3M
    if (val > ULONG_MAX - *dst)
414
0
        *dst = ULONG_MAX;
415
12.3M
    else
416
12.3M
        *dst += val;
417
12.3M
}
418
419
static void
420
4.11M
xmlSaturatedAddSizeT(unsigned long *dst, unsigned long val) {
421
4.11M
    if (val > ULONG_MAX - *dst)
422
0
        *dst = ULONG_MAX;
423
4.11M
    else
424
4.11M
        *dst += val;
425
4.11M
}
426
427
/**
428
 * xmlParserEntityCheck:
429
 * @ctxt:  parser context
430
 * @extra:  sum of unexpanded entity sizes
431
 *
432
 * Check for non-linear entity expansion behaviour.
433
 *
434
 * In some cases like xmlExpandEntityInAttValue, this function is called
435
 * for each, possibly nested entity and its unexpanded content length.
436
 *
437
 * In other cases like xmlParseReference, it's only called for each
438
 * top-level entity with its unexpanded content length plus the sum of
439
 * the unexpanded content lengths (plus fixed cost) of all nested
440
 * entities.
441
 *
442
 * Summing the unexpanded lengths also adds the length of the reference.
443
 * This is by design. Taking the length of the entity name into account
444
 * discourages attacks that try to waste CPU time with abusively long
445
 * entity names. See test/recurse/lol6.xml for example. Each call also
446
 * adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with
447
 * short entities.
448
 *
449
 * Returns 1 on error, 0 on success.
450
 */
451
static int
452
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long extra)
453
4.45M
{
454
4.45M
    unsigned long consumed;
455
4.45M
    unsigned long *expandedSize;
456
4.45M
    xmlParserInputPtr input = ctxt->input;
457
4.45M
    xmlEntityPtr entity = input->entity;
458
459
4.45M
    if ((entity) && (entity->flags & XML_ENT_CHECKED))
460
388k
        return(0);
461
462
    /*
463
     * Compute total consumed bytes so far, including input streams of
464
     * external entities.
465
     */
466
4.06M
    consumed = input->consumed;
467
4.06M
    xmlSaturatedAddSizeT(&consumed, input->cur - input->base);
468
4.06M
    xmlSaturatedAdd(&consumed, ctxt->sizeentities);
469
470
4.06M
    if (entity)
471
76.6k
        expandedSize = &entity->expandedSize;
472
3.99M
    else
473
3.99M
        expandedSize = &ctxt->sizeentcopy;
474
475
    /*
476
     * Add extra cost and some fixed cost.
477
     */
478
4.06M
    xmlSaturatedAdd(expandedSize, extra);
479
4.06M
    xmlSaturatedAdd(expandedSize, XML_ENT_FIXED_COST);
480
481
    /*
482
     * It's important to always use saturation arithmetic when tracking
483
     * entity sizes to make the size checks reliable. If "sizeentcopy"
484
     * overflows, we have to abort.
485
     */
486
4.06M
    if ((*expandedSize > XML_PARSER_ALLOWED_EXPANSION) &&
487
4.06M
        ((*expandedSize >= ULONG_MAX) ||
488
90.7k
         (*expandedSize / ctxt->maxAmpl > consumed))) {
489
3.32k
        xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
490
3.32k
                       "Maximum entity amplification factor exceeded, see "
491
3.32k
                       "xmlCtxtSetMaxAmplification.\n");
492
3.32k
        xmlHaltParser(ctxt);
493
3.32k
        return(1);
494
3.32k
    }
495
496
4.06M
    return(0);
497
4.06M
}
498
499
/************************************************************************
500
 *                  *
501
 *    Library wide options          *
502
 *                  *
503
 ************************************************************************/
504
505
/**
506
  * xmlHasFeature:
507
  * @feature: the feature to be examined
508
  *
509
  * Examines if the library has been compiled with a given feature.
510
  *
511
  * Returns a non-zero value if the feature exist, otherwise zero.
512
  * Returns zero (0) if the feature does not exist or an unknown
513
  * unknown feature is requested, non-zero otherwise.
514
  */
515
int
516
xmlHasFeature(xmlFeature feature)
517
0
{
518
0
    switch (feature) {
519
0
  case XML_WITH_THREAD:
520
0
#ifdef LIBXML_THREAD_ENABLED
521
0
      return(1);
522
#else
523
      return(0);
524
#endif
525
0
        case XML_WITH_TREE:
526
0
#ifdef LIBXML_TREE_ENABLED
527
0
            return(1);
528
#else
529
            return(0);
530
#endif
531
0
        case XML_WITH_OUTPUT:
532
0
#ifdef LIBXML_OUTPUT_ENABLED
533
0
            return(1);
534
#else
535
            return(0);
536
#endif
537
0
        case XML_WITH_PUSH:
538
0
#ifdef LIBXML_PUSH_ENABLED
539
0
            return(1);
540
#else
541
            return(0);
542
#endif
543
0
        case XML_WITH_READER:
544
0
#ifdef LIBXML_READER_ENABLED
545
0
            return(1);
546
#else
547
            return(0);
548
#endif
549
0
        case XML_WITH_PATTERN:
550
0
#ifdef LIBXML_PATTERN_ENABLED
551
0
            return(1);
552
#else
553
            return(0);
554
#endif
555
0
        case XML_WITH_WRITER:
556
0
#ifdef LIBXML_WRITER_ENABLED
557
0
            return(1);
558
#else
559
            return(0);
560
#endif
561
0
        case XML_WITH_SAX1:
562
0
#ifdef LIBXML_SAX1_ENABLED
563
0
            return(1);
564
#else
565
            return(0);
566
#endif
567
0
        case XML_WITH_FTP:
568
#ifdef LIBXML_FTP_ENABLED
569
            return(1);
570
#else
571
0
            return(0);
572
0
#endif
573
0
        case XML_WITH_HTTP:
574
#ifdef LIBXML_HTTP_ENABLED
575
            return(1);
576
#else
577
0
            return(0);
578
0
#endif
579
0
        case XML_WITH_VALID:
580
0
#ifdef LIBXML_VALID_ENABLED
581
0
            return(1);
582
#else
583
            return(0);
584
#endif
585
0
        case XML_WITH_HTML:
586
0
#ifdef LIBXML_HTML_ENABLED
587
0
            return(1);
588
#else
589
            return(0);
590
#endif
591
0
        case XML_WITH_LEGACY:
592
#ifdef LIBXML_LEGACY_ENABLED
593
            return(1);
594
#else
595
0
            return(0);
596
0
#endif
597
0
        case XML_WITH_C14N:
598
0
#ifdef LIBXML_C14N_ENABLED
599
0
            return(1);
600
#else
601
            return(0);
602
#endif
603
0
        case XML_WITH_CATALOG:
604
0
#ifdef LIBXML_CATALOG_ENABLED
605
0
            return(1);
606
#else
607
            return(0);
608
#endif
609
0
        case XML_WITH_XPATH:
610
0
#ifdef LIBXML_XPATH_ENABLED
611
0
            return(1);
612
#else
613
            return(0);
614
#endif
615
0
        case XML_WITH_XPTR:
616
0
#ifdef LIBXML_XPTR_ENABLED
617
0
            return(1);
618
#else
619
            return(0);
620
#endif
621
0
        case XML_WITH_XINCLUDE:
622
0
#ifdef LIBXML_XINCLUDE_ENABLED
623
0
            return(1);
624
#else
625
            return(0);
626
#endif
627
0
        case XML_WITH_ICONV:
628
0
#ifdef LIBXML_ICONV_ENABLED
629
0
            return(1);
630
#else
631
            return(0);
632
#endif
633
0
        case XML_WITH_ISO8859X:
634
0
#ifdef LIBXML_ISO8859X_ENABLED
635
0
            return(1);
636
#else
637
            return(0);
638
#endif
639
0
        case XML_WITH_UNICODE:
640
0
#ifdef LIBXML_UNICODE_ENABLED
641
0
            return(1);
642
#else
643
            return(0);
644
#endif
645
0
        case XML_WITH_REGEXP:
646
0
#ifdef LIBXML_REGEXP_ENABLED
647
0
            return(1);
648
#else
649
            return(0);
650
#endif
651
0
        case XML_WITH_AUTOMATA:
652
0
#ifdef LIBXML_AUTOMATA_ENABLED
653
0
            return(1);
654
#else
655
            return(0);
656
#endif
657
0
        case XML_WITH_EXPR:
658
#ifdef LIBXML_EXPR_ENABLED
659
            return(1);
660
#else
661
0
            return(0);
662
0
#endif
663
0
        case XML_WITH_SCHEMAS:
664
0
#ifdef LIBXML_SCHEMAS_ENABLED
665
0
            return(1);
666
#else
667
            return(0);
668
#endif
669
0
        case XML_WITH_SCHEMATRON:
670
0
#ifdef LIBXML_SCHEMATRON_ENABLED
671
0
            return(1);
672
#else
673
            return(0);
674
#endif
675
0
        case XML_WITH_MODULES:
676
0
#ifdef LIBXML_MODULES_ENABLED
677
0
            return(1);
678
#else
679
            return(0);
680
#endif
681
0
        case XML_WITH_DEBUG:
682
#ifdef LIBXML_DEBUG_ENABLED
683
            return(1);
684
#else
685
0
            return(0);
686
0
#endif
687
0
        case XML_WITH_DEBUG_MEM:
688
#ifdef DEBUG_MEMORY_LOCATION
689
            return(1);
690
#else
691
0
            return(0);
692
0
#endif
693
0
        case XML_WITH_ZLIB:
694
0
#ifdef LIBXML_ZLIB_ENABLED
695
0
            return(1);
696
#else
697
            return(0);
698
#endif
699
0
        case XML_WITH_LZMA:
700
0
#ifdef LIBXML_LZMA_ENABLED
701
0
            return(1);
702
#else
703
            return(0);
704
#endif
705
0
        case XML_WITH_ICU:
706
#ifdef LIBXML_ICU_ENABLED
707
            return(1);
708
#else
709
0
            return(0);
710
0
#endif
711
0
        default:
712
0
      break;
713
0
     }
714
0
     return(0);
715
0
}
716
717
/************************************************************************
718
 *                  *
719
 *      Simple string buffer        *
720
 *                  *
721
 ************************************************************************/
722
723
typedef struct {
724
    xmlChar *mem;
725
    unsigned size;
726
    unsigned cap; /* size < cap */
727
    unsigned max; /* size <= max */
728
    xmlParserErrors code;
729
} xmlSBuf;
730
731
static void
732
2.97M
xmlSBufInit(xmlSBuf *buf, unsigned max) {
733
2.97M
    buf->mem = NULL;
734
2.97M
    buf->size = 0;
735
2.97M
    buf->cap = 0;
736
2.97M
    buf->max = max;
737
2.97M
    buf->code = XML_ERR_OK;
738
2.97M
}
739
740
static int
741
1.15M
xmlSBufGrow(xmlSBuf *buf, unsigned len) {
742
1.15M
    xmlChar *mem;
743
1.15M
    unsigned cap;
744
745
1.15M
    if (len >= UINT_MAX / 2 - buf->size) {
746
0
        buf->code = XML_ERR_RESOURCE_LIMIT;
747
0
        return(-1);
748
0
    }
749
750
1.15M
    cap = (buf->size + len) * 2;
751
1.15M
    if (cap < 240)
752
999k
        cap = 240;
753
754
1.15M
    mem = xmlRealloc(buf->mem, cap);
755
1.15M
    if (mem == NULL) {
756
875
        buf->code = XML_ERR_NO_MEMORY;
757
875
        return(-1);
758
875
    }
759
760
1.15M
    buf->mem = mem;
761
1.15M
    buf->cap = cap;
762
763
1.15M
    return(0);
764
1.15M
}
765
766
static void
767
242M
xmlSBufAddString(xmlSBuf *buf, const xmlChar *str, unsigned len) {
768
242M
    if (buf->max - buf->size < len) {
769
0
        buf->code = XML_ERR_RESOURCE_LIMIT;
770
0
        return;
771
0
    }
772
773
242M
    if (buf->cap - buf->size <= len) {
774
1.10M
        if (xmlSBufGrow(buf, len) < 0)
775
826
            return;
776
1.10M
    }
777
778
242M
    if (len > 0)
779
242M
        memcpy(buf->mem + buf->size, str, len);
780
242M
    buf->size += len;
781
242M
}
782
783
static void
784
235M
xmlSBufAddCString(xmlSBuf *buf, const char *str, unsigned len) {
785
235M
    xmlSBufAddString(buf, (const xmlChar *) str, len);
786
235M
}
787
788
static void
789
389k
xmlSBufAddChar(xmlSBuf *buf, int c) {
790
389k
    xmlChar *end;
791
792
389k
    if (buf->max - buf->size < 4) {
793
0
        buf->code = XML_ERR_RESOURCE_LIMIT;
794
0
        return;
795
0
    }
796
797
389k
    if (buf->cap - buf->size <= 4) {
798
57.7k
        if (xmlSBufGrow(buf, 4) < 0)
799
49
            return;
800
57.7k
    }
801
802
389k
    end = buf->mem + buf->size;
803
804
389k
    if (c < 0x80) {
805
339k
        *end = (xmlChar) c;
806
339k
        buf->size += 1;
807
339k
    } else {
808
49.5k
        buf->size += xmlCopyCharMultiByte(end, c);
809
49.5k
    }
810
389k
}
811
812
static void
813
198M
xmlSBufAddReplChar(xmlSBuf *buf) {
814
198M
    xmlSBufAddCString(buf, "\xEF\xBF\xBD", 3);
815
198M
}
816
817
static void
818
937
xmlSBufReportError(xmlSBuf *buf, xmlParserCtxtPtr ctxt, const char *errMsg) {
819
937
    if (buf->code == XML_ERR_NO_MEMORY)
820
937
        xmlCtxtErrMemory(ctxt);
821
0
    else
822
0
        xmlFatalErr(ctxt, buf->code, errMsg);
823
937
}
824
825
static xmlChar *
826
xmlSBufFinish(xmlSBuf *buf, int *sizeOut, xmlParserCtxtPtr ctxt,
827
1.07M
              const char *errMsg) {
828
1.07M
    if (buf->mem == NULL) {
829
76.3k
        buf->mem = xmlMalloc(1);
830
76.3k
        if (buf->mem == NULL) {
831
62
            buf->code = XML_ERR_NO_MEMORY;
832
76.3k
        } else {
833
76.3k
            buf->mem[0] = 0;
834
76.3k
        }
835
995k
    } else {
836
995k
        buf->mem[buf->size] = 0;
837
995k
    }
838
839
1.07M
    if (buf->code == XML_ERR_OK) {
840
1.07M
        if (sizeOut != NULL)
841
360k
            *sizeOut = buf->size;
842
1.07M
        return(buf->mem);
843
1.07M
    }
844
845
685
    xmlSBufReportError(buf, ctxt, errMsg);
846
847
685
    xmlFree(buf->mem);
848
849
685
    if (sizeOut != NULL)
850
160
        *sizeOut = 0;
851
685
    return(NULL);
852
1.07M
}
853
854
static void
855
1.83M
xmlSBufCleanup(xmlSBuf *buf, xmlParserCtxtPtr ctxt, const char *errMsg) {
856
1.83M
    if (buf->code != XML_ERR_OK)
857
252
        xmlSBufReportError(buf, ctxt, errMsg);
858
859
1.83M
    xmlFree(buf->mem);
860
1.83M
}
861
862
static int
863
xmlUTF8MultibyteLen(xmlParserCtxtPtr ctxt, const xmlChar *str,
864
320M
                    const char *errMsg) {
865
320M
    int c = str[0];
866
320M
    int c1 = str[1];
867
868
320M
    if ((c1 & 0xC0) != 0x80)
869
69.5M
        goto encoding_error;
870
871
251M
    if (c < 0xE0) {
872
        /* 2-byte sequence */
873
94.5M
        if (c < 0xC2)
874
53.0M
            goto encoding_error;
875
876
41.4M
        return(2);
877
156M
    } else {
878
156M
        int c2 = str[2];
879
880
156M
        if ((c2 & 0xC0) != 0x80)
881
29.1k
            goto encoding_error;
882
883
156M
        if (c < 0xF0) {
884
            /* 3-byte sequence */
885
156M
            if (c == 0xE0) {
886
                /* overlong */
887
26.6M
                if (c1 < 0xA0)
888
2.16k
                    goto encoding_error;
889
129M
            } else if (c == 0xED) {
890
                /* surrogate */
891
1.15M
                if (c1 >= 0xA0)
892
1.45k
                    goto encoding_error;
893
128M
            } else if (c == 0xEF) {
894
                /* U+FFFE and U+FFFF are invalid Chars */
895
19.8M
                if ((c1 == 0xBF) && (c2 >= 0xBE))
896
3.08k
                    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR, errMsg);
897
19.8M
            }
898
899
156M
            return(3);
900
156M
        } else {
901
            /* 4-byte sequence */
902
283k
            if ((str[3] & 0xC0) != 0x80)
903
8.41k
                goto encoding_error;
904
275k
            if (c == 0xF0) {
905
                /* overlong */
906
27.4k
                if (c1 < 0x90)
907
16.5k
                    goto encoding_error;
908
247k
            } else if (c >= 0xF4) {
909
                /* greater than 0x10FFFF */
910
41.2k
                if ((c > 0xF4) || (c1 >= 0x90))
911
39.6k
                    goto encoding_error;
912
41.2k
            }
913
914
218k
            return(4);
915
275k
        }
916
156M
    }
917
918
122M
encoding_error:
919
    /* Only report the first error */
920
122M
    if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
921
46.1k
        xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
922
46.1k
        ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
923
46.1k
    }
924
925
122M
    return(0);
926
251M
}
927
928
/************************************************************************
929
 *                  *
930
 *    SAX2 defaulted attributes handling      *
931
 *                  *
932
 ************************************************************************/
933
934
/**
935
 * xmlCtxtInitializeLate:
936
 * @ctxt:  an XML parser context
937
 *
938
 * Final initialization of the parser context before starting to parse.
939
 *
940
 * This accounts for users modifying struct members of parser context
941
 * directly.
942
 */
943
static void
944
225k
xmlCtxtInitializeLate(xmlParserCtxtPtr ctxt) {
945
225k
    xmlSAXHandlerPtr sax;
946
947
    /* Avoid unused variable warning if features are disabled. */
948
225k
    (void) sax;
949
950
    /*
951
     * Changing the SAX struct directly is still widespread practice
952
     * in internal and external code.
953
     */
954
225k
    if (ctxt == NULL) return;
955
225k
    sax = ctxt->sax;
956
225k
#ifdef LIBXML_SAX1_ENABLED
957
    /*
958
     * Only enable SAX2 if there SAX2 element handlers, except when there
959
     * are no element handlers at all.
960
     */
961
225k
    if (((ctxt->options & XML_PARSE_SAX1) == 0) &&
962
225k
        (sax) &&
963
225k
        (sax->initialized == XML_SAX2_MAGIC) &&
964
225k
        ((sax->startElementNs != NULL) ||
965
191k
         (sax->endElementNs != NULL) ||
966
191k
         ((sax->startElement == NULL) && (sax->endElement == NULL))))
967
191k
        ctxt->sax2 = 1;
968
#else
969
    ctxt->sax2 = 1;
970
#endif /* LIBXML_SAX1_ENABLED */
971
972
    /*
973
     * Some users replace the dictionary directly in the context struct.
974
     * We really need an API function to do that cleanly.
975
     */
976
225k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
977
225k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
978
225k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
979
225k
    if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
980
225k
    (ctxt->str_xml_ns == NULL)) {
981
979
        xmlErrMemory(ctxt);
982
979
    }
983
225k
}
984
985
typedef struct {
986
    xmlHashedString prefix;
987
    xmlHashedString name;
988
    xmlHashedString value;
989
    const xmlChar *valueEnd;
990
    int external;
991
    int expandedSize;
992
} xmlDefAttr;
993
994
typedef struct _xmlDefAttrs xmlDefAttrs;
995
typedef xmlDefAttrs *xmlDefAttrsPtr;
996
struct _xmlDefAttrs {
997
    int nbAttrs;  /* number of defaulted attributes on that element */
998
    int maxAttrs;       /* the size of the array */
999
#if __STDC_VERSION__ >= 199901L
1000
    /* Using a C99 flexible array member avoids UBSan errors. */
1001
    xmlDefAttr attrs[]; /* array of localname/prefix/values/external */
1002
#else
1003
    xmlDefAttr attrs[1];
1004
#endif
1005
};
1006
1007
/**
1008
 * xmlAttrNormalizeSpace:
1009
 * @src: the source string
1010
 * @dst: the target string
1011
 *
1012
 * Normalize the space in non CDATA attribute values:
1013
 * If the attribute type is not CDATA, then the XML processor MUST further
1014
 * process the normalized attribute value by discarding any leading and
1015
 * trailing space (#x20) characters, and by replacing sequences of space
1016
 * (#x20) characters by a single space (#x20) character.
1017
 * Note that the size of dst need to be at least src, and if one doesn't need
1018
 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1019
 * passing src as dst is just fine.
1020
 *
1021
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1022
 *         is needed.
1023
 */
1024
static xmlChar *
1025
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1026
160k
{
1027
160k
    if ((src == NULL) || (dst == NULL))
1028
0
        return(NULL);
1029
1030
173k
    while (*src == 0x20) src++;
1031
124M
    while (*src != 0) {
1032
124M
  if (*src == 0x20) {
1033
14.9M
      while (*src == 0x20) src++;
1034
121k
      if (*src != 0)
1035
110k
    *dst++ = 0x20;
1036
124M
  } else {
1037
124M
      *dst++ = *src++;
1038
124M
  }
1039
124M
    }
1040
160k
    *dst = 0;
1041
160k
    if (dst == src)
1042
141k
       return(NULL);
1043
18.7k
    return(dst);
1044
160k
}
1045
1046
/**
1047
 * xmlAddDefAttrs:
1048
 * @ctxt:  an XML parser context
1049
 * @fullname:  the element fullname
1050
 * @fullattr:  the attribute fullname
1051
 * @value:  the attribute value
1052
 *
1053
 * Add a defaulted attribute for an element
1054
 */
1055
static void
1056
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1057
               const xmlChar *fullname,
1058
               const xmlChar *fullattr,
1059
320k
               const xmlChar *value) {
1060
320k
    xmlDefAttrsPtr defaults;
1061
320k
    xmlDefAttr *attr;
1062
320k
    int len, expandedSize;
1063
320k
    xmlHashedString name;
1064
320k
    xmlHashedString prefix;
1065
320k
    xmlHashedString hvalue;
1066
320k
    const xmlChar *localname;
1067
1068
    /*
1069
     * Allows to detect attribute redefinitions
1070
     */
1071
320k
    if (ctxt->attsSpecial != NULL) {
1072
300k
        if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1073
216k
      return;
1074
300k
    }
1075
1076
103k
    if (ctxt->attsDefault == NULL) {
1077
20.3k
        ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1078
20.3k
  if (ctxt->attsDefault == NULL)
1079
65
      goto mem_error;
1080
20.3k
    }
1081
1082
    /*
1083
     * split the element name into prefix:localname , the string found
1084
     * are within the DTD and then not associated to namespace names.
1085
     */
1086
103k
    localname = xmlSplitQName3(fullname, &len);
1087
103k
    if (localname == NULL) {
1088
67.5k
        name = xmlDictLookupHashed(ctxt->dict, fullname, -1);
1089
67.5k
  prefix.name = NULL;
1090
67.5k
    } else {
1091
36.2k
        name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1092
36.2k
  prefix = xmlDictLookupHashed(ctxt->dict, fullname, len);
1093
36.2k
        if (prefix.name == NULL)
1094
7
            goto mem_error;
1095
36.2k
    }
1096
103k
    if (name.name == NULL)
1097
6
        goto mem_error;
1098
1099
    /*
1100
     * make sure there is some storage
1101
     */
1102
103k
    defaults = xmlHashLookup2(ctxt->attsDefault, name.name, prefix.name);
1103
103k
    if ((defaults == NULL) ||
1104
103k
        (defaults->nbAttrs >= defaults->maxAttrs)) {
1105
32.1k
        xmlDefAttrsPtr temp;
1106
32.1k
        int newSize;
1107
1108
32.1k
        newSize = (defaults != NULL) ? 2 * defaults->maxAttrs : 4;
1109
32.1k
        temp = xmlRealloc(defaults,
1110
32.1k
                          sizeof(*defaults) + newSize * sizeof(xmlDefAttr));
1111
32.1k
  if (temp == NULL)
1112
47
      goto mem_error;
1113
32.1k
        if (defaults == NULL)
1114
23.9k
            temp->nbAttrs = 0;
1115
32.1k
  temp->maxAttrs = newSize;
1116
32.1k
        defaults = temp;
1117
32.1k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name.name, prefix.name,
1118
32.1k
                          defaults, NULL) < 0) {
1119
0
      xmlFree(defaults);
1120
0
      goto mem_error;
1121
0
  }
1122
32.1k
    }
1123
1124
    /*
1125
     * Split the attribute name into prefix:localname , the string found
1126
     * are within the DTD and hen not associated to namespace names.
1127
     */
1128
103k
    localname = xmlSplitQName3(fullattr, &len);
1129
103k
    if (localname == NULL) {
1130
85.4k
        name = xmlDictLookupHashed(ctxt->dict, fullattr, -1);
1131
85.4k
  prefix.name = NULL;
1132
85.4k
    } else {
1133
18.3k
        name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1134
18.3k
  prefix = xmlDictLookupHashed(ctxt->dict, fullattr, len);
1135
18.3k
        if (prefix.name == NULL)
1136
6
            goto mem_error;
1137
18.3k
    }
1138
103k
    if (name.name == NULL)
1139
5
        goto mem_error;
1140
1141
    /* intern the string and precompute the end */
1142
103k
    len = strlen((const char *) value);
1143
103k
    hvalue = xmlDictLookupHashed(ctxt->dict, value, len);
1144
103k
    if (hvalue.name == NULL)
1145
14
        goto mem_error;
1146
1147
103k
    expandedSize = strlen((const char *) name.name);
1148
103k
    if (prefix.name != NULL)
1149
18.3k
        expandedSize += strlen((const char *) prefix.name);
1150
103k
    expandedSize += len;
1151
1152
103k
    attr = &defaults->attrs[defaults->nbAttrs++];
1153
103k
    attr->name = name;
1154
103k
    attr->prefix = prefix;
1155
103k
    attr->value = hvalue;
1156
103k
    attr->valueEnd = hvalue.name + len;
1157
103k
    attr->external = PARSER_EXTERNAL(ctxt);
1158
103k
    attr->expandedSize = expandedSize;
1159
1160
103k
    return;
1161
1162
150
mem_error:
1163
150
    xmlErrMemory(ctxt);
1164
150
    return;
1165
103k
}
1166
1167
/**
1168
 * xmlAddSpecialAttr:
1169
 * @ctxt:  an XML parser context
1170
 * @fullname:  the element fullname
1171
 * @fullattr:  the attribute fullname
1172
 * @type:  the attribute type
1173
 *
1174
 * Register this attribute type
1175
 */
1176
static void
1177
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1178
      const xmlChar *fullname,
1179
      const xmlChar *fullattr,
1180
      int type)
1181
482k
{
1182
482k
    if (ctxt->attsSpecial == NULL) {
1183
26.9k
        ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1184
26.9k
  if (ctxt->attsSpecial == NULL)
1185
84
      goto mem_error;
1186
26.9k
    }
1187
1188
482k
    if (xmlHashAdd2(ctxt->attsSpecial, fullname, fullattr,
1189
482k
                    (void *) (ptrdiff_t) type) < 0)
1190
5
        goto mem_error;
1191
482k
    return;
1192
1193
482k
mem_error:
1194
89
    xmlErrMemory(ctxt);
1195
89
    return;
1196
482k
}
1197
1198
/**
1199
 * xmlCleanSpecialAttrCallback:
1200
 *
1201
 * Removes CDATA attributes from the special attribute table
1202
 */
1203
static void
1204
xmlCleanSpecialAttrCallback(void *payload, void *data,
1205
                            const xmlChar *fullname, const xmlChar *fullattr,
1206
201k
                            const xmlChar *unused ATTRIBUTE_UNUSED) {
1207
201k
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1208
1209
201k
    if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1210
44.1k
        xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1211
44.1k
    }
1212
201k
}
1213
1214
/**
1215
 * xmlCleanSpecialAttr:
1216
 * @ctxt:  an XML parser context
1217
 *
1218
 * Trim the list of attributes defined to remove all those of type
1219
 * CDATA as they are not special. This call should be done when finishing
1220
 * to parse the DTD and before starting to parse the document root.
1221
 */
1222
static void
1223
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1224
95.8k
{
1225
95.8k
    if (ctxt->attsSpecial == NULL)
1226
69.0k
        return;
1227
1228
26.8k
    xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1229
1230
26.8k
    if (xmlHashSize(ctxt->attsSpecial) == 0) {
1231
2.21k
        xmlHashFree(ctxt->attsSpecial, NULL);
1232
2.21k
        ctxt->attsSpecial = NULL;
1233
2.21k
    }
1234
26.8k
    return;
1235
95.8k
}
1236
1237
/**
1238
 * xmlCheckLanguageID:
1239
 * @lang:  pointer to the string value
1240
 *
1241
 * DEPRECATED: Internal function, do not use.
1242
 *
1243
 * Checks that the value conforms to the LanguageID production:
1244
 *
1245
 * NOTE: this is somewhat deprecated, those productions were removed from
1246
 *       the XML Second edition.
1247
 *
1248
 * [33] LanguageID ::= Langcode ('-' Subcode)*
1249
 * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1250
 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1251
 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1252
 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1253
 * [38] Subcode ::= ([a-z] | [A-Z])+
1254
 *
1255
 * The current REC reference the successors of RFC 1766, currently 5646
1256
 *
1257
 * http://www.rfc-editor.org/rfc/rfc5646.txt
1258
 * langtag       = language
1259
 *                 ["-" script]
1260
 *                 ["-" region]
1261
 *                 *("-" variant)
1262
 *                 *("-" extension)
1263
 *                 ["-" privateuse]
1264
 * language      = 2*3ALPHA            ; shortest ISO 639 code
1265
 *                 ["-" extlang]       ; sometimes followed by
1266
 *                                     ; extended language subtags
1267
 *               / 4ALPHA              ; or reserved for future use
1268
 *               / 5*8ALPHA            ; or registered language subtag
1269
 *
1270
 * extlang       = 3ALPHA              ; selected ISO 639 codes
1271
 *                 *2("-" 3ALPHA)      ; permanently reserved
1272
 *
1273
 * script        = 4ALPHA              ; ISO 15924 code
1274
 *
1275
 * region        = 2ALPHA              ; ISO 3166-1 code
1276
 *               / 3DIGIT              ; UN M.49 code
1277
 *
1278
 * variant       = 5*8alphanum         ; registered variants
1279
 *               / (DIGIT 3alphanum)
1280
 *
1281
 * extension     = singleton 1*("-" (2*8alphanum))
1282
 *
1283
 *                                     ; Single alphanumerics
1284
 *                                     ; "x" reserved for private use
1285
 * singleton     = DIGIT               ; 0 - 9
1286
 *               / %x41-57             ; A - W
1287
 *               / %x59-5A             ; Y - Z
1288
 *               / %x61-77             ; a - w
1289
 *               / %x79-7A             ; y - z
1290
 *
1291
 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1292
 * The parser below doesn't try to cope with extension or privateuse
1293
 * that could be added but that's not interoperable anyway
1294
 *
1295
 * Returns 1 if correct 0 otherwise
1296
 **/
1297
int
1298
xmlCheckLanguageID(const xmlChar * lang)
1299
84.9k
{
1300
84.9k
    const xmlChar *cur = lang, *nxt;
1301
1302
84.9k
    if (cur == NULL)
1303
3.20k
        return (0);
1304
81.7k
    if (((cur[0] == 'i') && (cur[1] == '-')) ||
1305
81.7k
        ((cur[0] == 'I') && (cur[1] == '-')) ||
1306
81.7k
        ((cur[0] == 'x') && (cur[1] == '-')) ||
1307
81.7k
        ((cur[0] == 'X') && (cur[1] == '-'))) {
1308
        /*
1309
         * Still allow IANA code and user code which were coming
1310
         * from the previous version of the XML-1.0 specification
1311
         * it's deprecated but we should not fail
1312
         */
1313
3.29k
        cur += 2;
1314
9.88k
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1315
9.88k
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
1316
6.58k
            cur++;
1317
3.29k
        return(cur[0] == 0);
1318
3.29k
    }
1319
78.4k
    nxt = cur;
1320
259k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1321
259k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1322
181k
           nxt++;
1323
78.4k
    if (nxt - cur >= 4) {
1324
        /*
1325
         * Reserved
1326
         */
1327
4.63k
        if ((nxt - cur > 8) || (nxt[0] != 0))
1328
3.18k
            return(0);
1329
1.44k
        return(1);
1330
4.63k
    }
1331
73.8k
    if (nxt - cur < 2)
1332
8.45k
        return(0);
1333
    /* we got an ISO 639 code */
1334
65.3k
    if (nxt[0] == 0)
1335
2.16k
        return(1);
1336
63.2k
    if (nxt[0] != '-')
1337
3.29k
        return(0);
1338
1339
59.9k
    nxt++;
1340
59.9k
    cur = nxt;
1341
    /* now we can have extlang or script or region or variant */
1342
59.9k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1343
12.1k
        goto region_m49;
1344
1345
209k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1346
209k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1347
161k
           nxt++;
1348
47.7k
    if (nxt - cur == 4)
1349
18.7k
        goto script;
1350
28.9k
    if (nxt - cur == 2)
1351
7.50k
        goto region;
1352
21.4k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1353
2.22k
        goto variant;
1354
19.2k
    if (nxt - cur != 3)
1355
2.25k
        return(0);
1356
    /* we parsed an extlang */
1357
16.9k
    if (nxt[0] == 0)
1358
613
        return(1);
1359
16.3k
    if (nxt[0] != '-')
1360
1.77k
        return(0);
1361
1362
14.5k
    nxt++;
1363
14.5k
    cur = nxt;
1364
    /* now we can have script or region or variant */
1365
14.5k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1366
1.19k
        goto region_m49;
1367
1368
76.5k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1369
76.5k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1370
63.1k
           nxt++;
1371
13.3k
    if (nxt - cur == 2)
1372
2.22k
        goto region;
1373
11.1k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1374
1.29k
        goto variant;
1375
9.87k
    if (nxt - cur != 4)
1376
4.72k
        return(0);
1377
    /* we parsed a script */
1378
23.9k
script:
1379
23.9k
    if (nxt[0] == 0)
1380
405
        return(1);
1381
23.5k
    if (nxt[0] != '-')
1382
4.14k
        return(0);
1383
1384
19.3k
    nxt++;
1385
19.3k
    cur = nxt;
1386
    /* now we can have region or variant */
1387
19.3k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1388
7.28k
        goto region_m49;
1389
1390
67.6k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1391
67.6k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1392
55.5k
           nxt++;
1393
1394
12.0k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1395
4.91k
        goto variant;
1396
7.18k
    if (nxt - cur != 2)
1397
3.59k
        return(0);
1398
    /* we parsed a region */
1399
15.1k
region:
1400
15.1k
    if (nxt[0] == 0)
1401
1.70k
        return(1);
1402
13.4k
    if (nxt[0] != '-')
1403
6.62k
        return(0);
1404
1405
6.80k
    nxt++;
1406
6.80k
    cur = nxt;
1407
    /* now we can just have a variant */
1408
44.5k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1409
44.5k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1410
37.7k
           nxt++;
1411
1412
6.80k
    if ((nxt - cur < 5) || (nxt - cur > 8))
1413
5.03k
        return(0);
1414
1415
    /* we parsed a variant */
1416
10.2k
variant:
1417
10.2k
    if (nxt[0] == 0)
1418
1.06k
        return(1);
1419
9.14k
    if (nxt[0] != '-')
1420
4.34k
        return(0);
1421
    /* extensions and private use subtags not checked */
1422
4.79k
    return (1);
1423
1424
20.6k
region_m49:
1425
20.6k
    if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1426
20.6k
        ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1427
1.81k
        nxt += 3;
1428
1.81k
        goto region;
1429
1.81k
    }
1430
18.8k
    return(0);
1431
20.6k
}
1432
1433
/************************************************************************
1434
 *                  *
1435
 *    Parser stacks related functions and macros    *
1436
 *                  *
1437
 ************************************************************************/
1438
1439
static xmlChar *
1440
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar **str);
1441
1442
/**
1443
 * xmlParserNsCreate:
1444
 *
1445
 * Create a new namespace database.
1446
 *
1447
 * Returns the new obejct.
1448
 */
1449
xmlParserNsData *
1450
220k
xmlParserNsCreate(void) {
1451
220k
    xmlParserNsData *nsdb = xmlMalloc(sizeof(*nsdb));
1452
1453
220k
    if (nsdb == NULL)
1454
81
        return(NULL);
1455
220k
    memset(nsdb, 0, sizeof(*nsdb));
1456
220k
    nsdb->defaultNsIndex = INT_MAX;
1457
1458
220k
    return(nsdb);
1459
220k
}
1460
1461
/**
1462
 * xmlParserNsFree:
1463
 * @nsdb: namespace database
1464
 *
1465
 * Free a namespace database.
1466
 */
1467
void
1468
220k
xmlParserNsFree(xmlParserNsData *nsdb) {
1469
220k
    if (nsdb == NULL)
1470
0
        return;
1471
1472
220k
    xmlFree(nsdb->extra);
1473
220k
    xmlFree(nsdb->hash);
1474
220k
    xmlFree(nsdb);
1475
220k
}
1476
1477
/**
1478
 * xmlParserNsReset:
1479
 * @nsdb: namespace database
1480
 *
1481
 * Reset a namespace database.
1482
 */
1483
static void
1484
91.8k
xmlParserNsReset(xmlParserNsData *nsdb) {
1485
91.8k
    if (nsdb == NULL)
1486
0
        return;
1487
1488
91.8k
    nsdb->hashElems = 0;
1489
91.8k
    nsdb->elementId = 0;
1490
91.8k
    nsdb->defaultNsIndex = INT_MAX;
1491
1492
91.8k
    if (nsdb->hash)
1493
0
        memset(nsdb->hash, 0, nsdb->hashSize * sizeof(nsdb->hash[0]));
1494
91.8k
}
1495
1496
/**
1497
 * xmlParserStartElement:
1498
 * @nsdb: namespace database
1499
 *
1500
 * Signal that a new element has started.
1501
 *
1502
 * Returns 0 on success, -1 if the element counter overflowed.
1503
 */
1504
static int
1505
4.31M
xmlParserNsStartElement(xmlParserNsData *nsdb) {
1506
4.31M
    if (nsdb->elementId == UINT_MAX)
1507
0
        return(-1);
1508
4.31M
    nsdb->elementId++;
1509
1510
4.31M
    return(0);
1511
4.31M
}
1512
1513
/**
1514
 * xmlParserNsLookup:
1515
 * @ctxt: parser context
1516
 * @prefix: namespace prefix
1517
 * @bucketPtr: optional bucket (return value)
1518
 *
1519
 * Lookup namespace with given prefix. If @bucketPtr is non-NULL, it will
1520
 * be set to the matching bucket, or the first empty bucket if no match
1521
 * was found.
1522
 *
1523
 * Returns the namespace index on success, INT_MAX if no namespace was
1524
 * found.
1525
 */
1526
static int
1527
xmlParserNsLookup(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1528
6.88M
                  xmlParserNsBucket **bucketPtr) {
1529
6.88M
    xmlParserNsBucket *bucket;
1530
6.88M
    unsigned index, hashValue;
1531
1532
6.88M
    if (prefix->name == NULL)
1533
3.71M
        return(ctxt->nsdb->defaultNsIndex);
1534
1535
3.17M
    if (ctxt->nsdb->hashSize == 0)
1536
202k
        return(INT_MAX);
1537
1538
2.97M
    hashValue = prefix->hashValue;
1539
2.97M
    index = hashValue & (ctxt->nsdb->hashSize - 1);
1540
2.97M
    bucket = &ctxt->nsdb->hash[index];
1541
1542
271M
    while (bucket->hashValue) {
1543
271M
        if ((bucket->hashValue == hashValue) &&
1544
271M
            (bucket->index != INT_MAX)) {
1545
2.49M
            if (ctxt->nsTab[bucket->index * 2] == prefix->name) {
1546
2.48M
                if (bucketPtr != NULL)
1547
708k
                    *bucketPtr = bucket;
1548
2.48M
                return(bucket->index);
1549
2.48M
            }
1550
2.49M
        }
1551
1552
268M
        index++;
1553
268M
        bucket++;
1554
268M
        if (index == ctxt->nsdb->hashSize) {
1555
282k
            index = 0;
1556
282k
            bucket = ctxt->nsdb->hash;
1557
282k
        }
1558
268M
    }
1559
1560
489k
    if (bucketPtr != NULL)
1561
182k
        *bucketPtr = bucket;
1562
489k
    return(INT_MAX);
1563
2.97M
}
1564
1565
/**
1566
 * xmlParserNsLookupUri:
1567
 * @ctxt: parser context
1568
 * @prefix: namespace prefix
1569
 *
1570
 * Lookup namespace URI with given prefix.
1571
 *
1572
 * Returns the namespace URI on success, NULL if no namespace was found.
1573
 */
1574
static const xmlChar *
1575
3.96M
xmlParserNsLookupUri(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix) {
1576
3.96M
    const xmlChar *ret;
1577
3.96M
    int nsIndex;
1578
1579
3.96M
    if (prefix->name == ctxt->str_xml)
1580
2.57k
        return(ctxt->str_xml_ns);
1581
1582
    /*
1583
     * minNsIndex is used when building an entity tree. We must
1584
     * ignore namespaces declared outside the entity.
1585
     */
1586
3.96M
    nsIndex = xmlParserNsLookup(ctxt, prefix, NULL);
1587
3.96M
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1588
2.77M
        return(NULL);
1589
1590
1.18M
    ret = ctxt->nsTab[nsIndex * 2 + 1];
1591
1.18M
    if (ret[0] == 0)
1592
12.5k
        ret = NULL;
1593
1.18M
    return(ret);
1594
3.96M
}
1595
1596
/**
1597
 * xmlParserNsLookupSax:
1598
 * @ctxt: parser context
1599
 * @prefix: namespace prefix
1600
 *
1601
 * Lookup extra data for the given prefix. This returns data stored
1602
 * with xmlParserNsUdpateSax.
1603
 *
1604
 * Returns the data on success, NULL if no namespace was found.
1605
 */
1606
void *
1607
1.07M
xmlParserNsLookupSax(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
1608
1.07M
    xmlHashedString hprefix;
1609
1.07M
    int nsIndex;
1610
1611
1.07M
    if (prefix == ctxt->str_xml)
1612
187k
        return(NULL);
1613
1614
882k
    hprefix.name = prefix;
1615
882k
    if (prefix != NULL)
1616
636k
        hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1617
246k
    else
1618
246k
        hprefix.hashValue = 0;
1619
882k
    nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1620
882k
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1621
0
        return(NULL);
1622
1623
882k
    return(ctxt->nsdb->extra[nsIndex].saxData);
1624
882k
}
1625
1626
/**
1627
 * xmlParserNsUpdateSax:
1628
 * @ctxt: parser context
1629
 * @prefix: namespace prefix
1630
 * @saxData: extra data for SAX handler
1631
 *
1632
 * Sets or updates extra data for the given prefix. This value will be
1633
 * returned by xmlParserNsLookupSax as long as the namespace with the
1634
 * given prefix is in scope.
1635
 *
1636
 * Returns the data on success, NULL if no namespace was found.
1637
 */
1638
int
1639
xmlParserNsUpdateSax(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
1640
646k
                     void *saxData) {
1641
646k
    xmlHashedString hprefix;
1642
646k
    int nsIndex;
1643
1644
646k
    if (prefix == ctxt->str_xml)
1645
0
        return(-1);
1646
1647
646k
    hprefix.name = prefix;
1648
646k
    if (prefix != NULL)
1649
396k
        hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1650
250k
    else
1651
250k
        hprefix.hashValue = 0;
1652
646k
    nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1653
646k
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1654
0
        return(-1);
1655
1656
646k
    ctxt->nsdb->extra[nsIndex].saxData = saxData;
1657
646k
    return(0);
1658
646k
}
1659
1660
/**
1661
 * xmlParserNsGrow:
1662
 * @ctxt: parser context
1663
 *
1664
 * Grows the namespace tables.
1665
 *
1666
 * Returns 0 on success, -1 if a memory allocation failed.
1667
 */
1668
static int
1669
47.6k
xmlParserNsGrow(xmlParserCtxtPtr ctxt) {
1670
47.6k
    const xmlChar **table;
1671
47.6k
    xmlParserNsExtra *extra;
1672
47.6k
    int newSize;
1673
1674
47.6k
    if (ctxt->nsMax > INT_MAX / 2)
1675
0
        goto error;
1676
47.6k
    newSize = ctxt->nsMax ? ctxt->nsMax * 2 : 16;
1677
1678
47.6k
    table = xmlRealloc(ctxt->nsTab, 2 * newSize * sizeof(table[0]));
1679
47.6k
    if (table == NULL)
1680
72
        goto error;
1681
47.5k
    ctxt->nsTab = table;
1682
1683
47.5k
    extra = xmlRealloc(ctxt->nsdb->extra, newSize * sizeof(extra[0]));
1684
47.5k
    if (extra == NULL)
1685
60
        goto error;
1686
47.4k
    ctxt->nsdb->extra = extra;
1687
1688
47.4k
    ctxt->nsMax = newSize;
1689
47.4k
    return(0);
1690
1691
132
error:
1692
132
    xmlErrMemory(ctxt);
1693
132
    return(-1);
1694
47.5k
}
1695
1696
/**
1697
 * xmlParserNsPush:
1698
 * @ctxt: parser context
1699
 * @prefix: prefix with hash value
1700
 * @uri: uri with hash value
1701
 * @saxData: extra data for SAX handler
1702
 * @defAttr: whether the namespace comes from a default attribute
1703
 *
1704
 * Push a new namespace on the table.
1705
 *
1706
 * Returns 1 if the namespace was pushed, 0 if the namespace was ignored,
1707
 * -1 if a memory allocation failed.
1708
 */
1709
static int
1710
xmlParserNsPush(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1711
820k
                const xmlHashedString *uri, void *saxData, int defAttr) {
1712
820k
    xmlParserNsBucket *bucket = NULL;
1713
820k
    xmlParserNsExtra *extra;
1714
820k
    const xmlChar **ns;
1715
820k
    unsigned hashValue, nsIndex, oldIndex;
1716
1717
820k
    if ((prefix != NULL) && (prefix->name == ctxt->str_xml))
1718
1.32k
        return(0);
1719
1720
819k
    if ((ctxt->nsNr >= ctxt->nsMax) && (xmlParserNsGrow(ctxt) < 0)) {
1721
132
        xmlErrMemory(ctxt);
1722
132
        return(-1);
1723
132
    }
1724
1725
    /*
1726
     * Default namespace and 'xml' namespace
1727
     */
1728
819k
    if ((prefix == NULL) || (prefix->name == NULL)) {
1729
333k
        oldIndex = ctxt->nsdb->defaultNsIndex;
1730
1731
333k
        if (oldIndex != INT_MAX) {
1732
294k
            extra = &ctxt->nsdb->extra[oldIndex];
1733
1734
294k
            if (extra->elementId == ctxt->nsdb->elementId) {
1735
33.7k
                if (defAttr == 0)
1736
12.8k
                    xmlErrAttributeDup(ctxt, NULL, BAD_CAST "xmlns");
1737
33.7k
                return(0);
1738
33.7k
            }
1739
1740
261k
            if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1741
261k
                (uri->name == ctxt->nsTab[oldIndex * 2 + 1]))
1742
13.2k
                return(0);
1743
261k
        }
1744
1745
286k
        ctxt->nsdb->defaultNsIndex = ctxt->nsNr;
1746
286k
        goto populate_entry;
1747
333k
    }
1748
1749
    /*
1750
     * Hash table lookup
1751
     */
1752
485k
    oldIndex = xmlParserNsLookup(ctxt, prefix, &bucket);
1753
485k
    if (oldIndex != INT_MAX) {
1754
269k
        extra = &ctxt->nsdb->extra[oldIndex];
1755
1756
        /*
1757
         * Check for duplicate definitions on the same element.
1758
         */
1759
269k
        if (extra->elementId == ctxt->nsdb->elementId) {
1760
9.38k
            if (defAttr == 0)
1761
8.93k
                xmlErrAttributeDup(ctxt, BAD_CAST "xmlns", prefix->name);
1762
9.38k
            return(0);
1763
9.38k
        }
1764
1765
260k
        if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1766
260k
            (uri->name == ctxt->nsTab[bucket->index * 2 + 1]))
1767
15.1k
            return(0);
1768
1769
244k
        bucket->index = ctxt->nsNr;
1770
244k
        goto populate_entry;
1771
260k
    }
1772
1773
    /*
1774
     * Insert new bucket
1775
     */
1776
1777
216k
    hashValue = prefix->hashValue;
1778
1779
    /*
1780
     * Grow hash table, 50% fill factor
1781
     */
1782
216k
    if (ctxt->nsdb->hashElems + 1 > ctxt->nsdb->hashSize / 2) {
1783
38.5k
        xmlParserNsBucket *newHash;
1784
38.5k
        unsigned newSize, i, index;
1785
1786
38.5k
        if (ctxt->nsdb->hashSize > UINT_MAX / 2) {
1787
0
            xmlErrMemory(ctxt);
1788
0
            return(-1);
1789
0
        }
1790
38.5k
        newSize = ctxt->nsdb->hashSize ? ctxt->nsdb->hashSize * 2 : 16;
1791
38.5k
        newHash = xmlMalloc(newSize * sizeof(newHash[0]));
1792
38.5k
        if (newHash == NULL) {
1793
25
            xmlErrMemory(ctxt);
1794
25
            return(-1);
1795
25
        }
1796
38.5k
        memset(newHash, 0, newSize * sizeof(newHash[0]));
1797
1798
513k
        for (i = 0; i < ctxt->nsdb->hashSize; i++) {
1799
475k
            unsigned hv = ctxt->nsdb->hash[i].hashValue;
1800
475k
            unsigned newIndex;
1801
1802
475k
            if (hv == 0)
1803
237k
                continue;
1804
237k
            newIndex = hv & (newSize - 1);
1805
1806
55.7M
            while (newHash[newIndex].hashValue != 0) {
1807
55.4M
                newIndex++;
1808
55.4M
                if (newIndex == newSize)
1809
38.6k
                    newIndex = 0;
1810
55.4M
            }
1811
1812
237k
            newHash[newIndex] = ctxt->nsdb->hash[i];
1813
237k
        }
1814
1815
38.5k
        xmlFree(ctxt->nsdb->hash);
1816
38.5k
        ctxt->nsdb->hash = newHash;
1817
38.5k
        ctxt->nsdb->hashSize = newSize;
1818
1819
        /*
1820
         * Relookup
1821
         */
1822
38.5k
        index = hashValue & (newSize - 1);
1823
1824
201k
        while (newHash[index].hashValue != 0) {
1825
162k
            index++;
1826
162k
            if (index == newSize)
1827
1.06k
                index = 0;
1828
162k
        }
1829
1830
38.5k
        bucket = &newHash[index];
1831
38.5k
    }
1832
1833
216k
    bucket->hashValue = hashValue;
1834
216k
    bucket->index = ctxt->nsNr;
1835
216k
    ctxt->nsdb->hashElems++;
1836
216k
    oldIndex = INT_MAX;
1837
1838
747k
populate_entry:
1839
747k
    nsIndex = ctxt->nsNr;
1840
1841
747k
    ns = &ctxt->nsTab[nsIndex * 2];
1842
747k
    ns[0] = prefix ? prefix->name : NULL;
1843
747k
    ns[1] = uri->name;
1844
1845
747k
    extra = &ctxt->nsdb->extra[nsIndex];
1846
747k
    extra->saxData = saxData;
1847
747k
    extra->prefixHashValue = prefix ? prefix->hashValue : 0;
1848
747k
    extra->uriHashValue = uri->hashValue;
1849
747k
    extra->elementId = ctxt->nsdb->elementId;
1850
747k
    extra->oldIndex = oldIndex;
1851
1852
747k
    ctxt->nsNr++;
1853
1854
747k
    return(1);
1855
216k
}
1856
1857
/**
1858
 * xmlParserNsPop:
1859
 * @ctxt: an XML parser context
1860
 * @nr:  the number to pop
1861
 *
1862
 * Pops the top @nr namespaces and restores the hash table.
1863
 *
1864
 * Returns the number of namespaces popped.
1865
 */
1866
static int
1867
xmlParserNsPop(xmlParserCtxtPtr ctxt, int nr)
1868
434k
{
1869
434k
    int i;
1870
1871
    /* assert(nr <= ctxt->nsNr); */
1872
1873
1.11M
    for (i = ctxt->nsNr - 1; i >= ctxt->nsNr - nr; i--) {
1874
685k
        const xmlChar *prefix = ctxt->nsTab[i * 2];
1875
685k
        xmlParserNsExtra *extra = &ctxt->nsdb->extra[i];
1876
1877
685k
        if (prefix == NULL) {
1878
246k
            ctxt->nsdb->defaultNsIndex = extra->oldIndex;
1879
438k
        } else {
1880
438k
            xmlHashedString hprefix;
1881
438k
            xmlParserNsBucket *bucket = NULL;
1882
1883
438k
            hprefix.name = prefix;
1884
438k
            hprefix.hashValue = extra->prefixHashValue;
1885
438k
            xmlParserNsLookup(ctxt, &hprefix, &bucket);
1886
            /* assert(bucket && bucket->hashValue); */
1887
438k
            bucket->index = extra->oldIndex;
1888
438k
        }
1889
685k
    }
1890
1891
434k
    ctxt->nsNr -= nr;
1892
434k
    return(nr);
1893
434k
}
1894
1895
static int
1896
51.4k
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1897
51.4k
    const xmlChar **atts;
1898
51.4k
    unsigned *attallocs;
1899
51.4k
    int maxatts;
1900
1901
51.4k
    if (nr + 5 > ctxt->maxatts) {
1902
51.4k
  maxatts = ctxt->maxatts == 0 ? 55 : (nr + 5) * 2;
1903
51.4k
  atts = (const xmlChar **) xmlMalloc(
1904
51.4k
             maxatts * sizeof(const xmlChar *));
1905
51.4k
  if (atts == NULL) goto mem_error;
1906
51.3k
  attallocs = xmlRealloc(ctxt->attallocs,
1907
51.3k
                               (maxatts / 5) * sizeof(attallocs[0]));
1908
51.3k
  if (attallocs == NULL) {
1909
99
            xmlFree(atts);
1910
99
            goto mem_error;
1911
99
        }
1912
51.2k
        if (ctxt->maxatts > 0)
1913
2.25k
            memcpy(atts, ctxt->atts, ctxt->maxatts * sizeof(const xmlChar *));
1914
51.2k
        xmlFree(ctxt->atts);
1915
51.2k
  ctxt->atts = atts;
1916
51.2k
  ctxt->attallocs = attallocs;
1917
51.2k
  ctxt->maxatts = maxatts;
1918
51.2k
    }
1919
51.2k
    return(ctxt->maxatts);
1920
176
mem_error:
1921
176
    xmlErrMemory(ctxt);
1922
176
    return(-1);
1923
51.4k
}
1924
1925
/**
1926
 * inputPush:
1927
 * @ctxt:  an XML parser context
1928
 * @value:  the parser input
1929
 *
1930
 * Pushes a new parser input on top of the input stack
1931
 *
1932
 * Returns -1 in case of error, the index in the stack otherwise
1933
 */
1934
int
1935
inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1936
883k
{
1937
883k
    if ((ctxt == NULL) || (value == NULL))
1938
0
        return(-1);
1939
883k
    if (ctxt->inputNr >= ctxt->inputMax) {
1940
503
        size_t newSize = ctxt->inputMax * 2;
1941
503
        xmlParserInputPtr *tmp;
1942
1943
503
        tmp = (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1944
503
                                               newSize * sizeof(*tmp));
1945
503
        if (tmp == NULL) {
1946
4
            xmlErrMemory(ctxt);
1947
4
            return (-1);
1948
4
        }
1949
499
        ctxt->inputTab = tmp;
1950
499
        ctxt->inputMax = newSize;
1951
499
    }
1952
883k
    ctxt->inputTab[ctxt->inputNr] = value;
1953
883k
    ctxt->input = value;
1954
883k
    return (ctxt->inputNr++);
1955
883k
}
1956
/**
1957
 * inputPop:
1958
 * @ctxt: an XML parser context
1959
 *
1960
 * Pops the top parser input from the input stack
1961
 *
1962
 * Returns the input just removed
1963
 */
1964
xmlParserInputPtr
1965
inputPop(xmlParserCtxtPtr ctxt)
1966
1.44M
{
1967
1.44M
    xmlParserInputPtr ret;
1968
1969
1.44M
    if (ctxt == NULL)
1970
0
        return(NULL);
1971
1.44M
    if (ctxt->inputNr <= 0)
1972
569k
        return (NULL);
1973
878k
    ctxt->inputNr--;
1974
878k
    if (ctxt->inputNr > 0)
1975
613k
        ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1976
265k
    else
1977
265k
        ctxt->input = NULL;
1978
878k
    ret = ctxt->inputTab[ctxt->inputNr];
1979
878k
    ctxt->inputTab[ctxt->inputNr] = NULL;
1980
878k
    return (ret);
1981
1.44M
}
1982
/**
1983
 * nodePush:
1984
 * @ctxt:  an XML parser context
1985
 * @value:  the element node
1986
 *
1987
 * DEPRECATED: Internal function, do not use.
1988
 *
1989
 * Pushes a new element node on top of the node stack
1990
 *
1991
 * Returns -1 in case of error, the index in the stack otherwise
1992
 */
1993
int
1994
nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1995
4.53M
{
1996
4.53M
    int maxDepth;
1997
1998
4.53M
    if (ctxt == NULL)
1999
0
        return(0);
2000
2001
4.53M
    maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
2002
4.53M
    if (ctxt->nodeNr > maxDepth) {
2003
69
        xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
2004
69
                "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
2005
69
                ctxt->nodeNr);
2006
69
        xmlHaltParser(ctxt);
2007
69
        return(-1);
2008
69
    }
2009
4.53M
    if (ctxt->nodeNr >= ctxt->nodeMax) {
2010
35.3k
        xmlNodePtr *tmp;
2011
2012
35.3k
  tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
2013
35.3k
                                      ctxt->nodeMax * 2 *
2014
35.3k
                                      sizeof(ctxt->nodeTab[0]));
2015
35.3k
        if (tmp == NULL) {
2016
29
            xmlErrMemory(ctxt);
2017
29
            return (-1);
2018
29
        }
2019
35.2k
        ctxt->nodeTab = tmp;
2020
35.2k
  ctxt->nodeMax *= 2;
2021
35.2k
    }
2022
4.53M
    ctxt->nodeTab[ctxt->nodeNr] = value;
2023
4.53M
    ctxt->node = value;
2024
4.53M
    return (ctxt->nodeNr++);
2025
4.53M
}
2026
2027
/**
2028
 * nodePop:
2029
 * @ctxt: an XML parser context
2030
 *
2031
 * DEPRECATED: Internal function, do not use.
2032
 *
2033
 * Pops the top element node from the node stack
2034
 *
2035
 * Returns the node just removed
2036
 */
2037
xmlNodePtr
2038
nodePop(xmlParserCtxtPtr ctxt)
2039
4.15M
{
2040
4.15M
    xmlNodePtr ret;
2041
2042
4.15M
    if (ctxt == NULL) return(NULL);
2043
4.15M
    if (ctxt->nodeNr <= 0)
2044
121k
        return (NULL);
2045
4.03M
    ctxt->nodeNr--;
2046
4.03M
    if (ctxt->nodeNr > 0)
2047
3.97M
        ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
2048
60.8k
    else
2049
60.8k
        ctxt->node = NULL;
2050
4.03M
    ret = ctxt->nodeTab[ctxt->nodeNr];
2051
4.03M
    ctxt->nodeTab[ctxt->nodeNr] = NULL;
2052
4.03M
    return (ret);
2053
4.15M
}
2054
2055
/**
2056
 * nameNsPush:
2057
 * @ctxt:  an XML parser context
2058
 * @value:  the element name
2059
 * @prefix:  the element prefix
2060
 * @URI:  the element namespace name
2061
 * @line:  the current line number for error messages
2062
 * @nsNr:  the number of namespaces pushed on the namespace table
2063
 *
2064
 * Pushes a new element name/prefix/URL on top of the name stack
2065
 *
2066
 * Returns -1 in case of error, the index in the stack otherwise
2067
 */
2068
static int
2069
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
2070
           const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
2071
3.96M
{
2072
3.96M
    xmlStartTag *tag;
2073
2074
3.96M
    if (ctxt->nameNr >= ctxt->nameMax) {
2075
35.0k
        const xmlChar * *tmp;
2076
35.0k
        xmlStartTag *tmp2;
2077
35.0k
        ctxt->nameMax *= 2;
2078
35.0k
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
2079
35.0k
                                    ctxt->nameMax *
2080
35.0k
                                    sizeof(ctxt->nameTab[0]));
2081
35.0k
        if (tmp == NULL) {
2082
27
      ctxt->nameMax /= 2;
2083
27
      goto mem_error;
2084
27
        }
2085
35.0k
  ctxt->nameTab = tmp;
2086
35.0k
        tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
2087
35.0k
                                    ctxt->nameMax *
2088
35.0k
                                    sizeof(ctxt->pushTab[0]));
2089
35.0k
        if (tmp2 == NULL) {
2090
30
      ctxt->nameMax /= 2;
2091
30
      goto mem_error;
2092
30
        }
2093
35.0k
  ctxt->pushTab = tmp2;
2094
3.92M
    } else if (ctxt->pushTab == NULL) {
2095
124k
        ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
2096
124k
                                            sizeof(ctxt->pushTab[0]));
2097
124k
        if (ctxt->pushTab == NULL)
2098
260
            goto mem_error;
2099
124k
    }
2100
3.96M
    ctxt->nameTab[ctxt->nameNr] = value;
2101
3.96M
    ctxt->name = value;
2102
3.96M
    tag = &ctxt->pushTab[ctxt->nameNr];
2103
3.96M
    tag->prefix = prefix;
2104
3.96M
    tag->URI = URI;
2105
3.96M
    tag->line = line;
2106
3.96M
    tag->nsNr = nsNr;
2107
3.96M
    return (ctxt->nameNr++);
2108
317
mem_error:
2109
317
    xmlErrMemory(ctxt);
2110
317
    return (-1);
2111
3.96M
}
2112
#ifdef LIBXML_PUSH_ENABLED
2113
/**
2114
 * nameNsPop:
2115
 * @ctxt: an XML parser context
2116
 *
2117
 * Pops the top element/prefix/URI name from the name stack
2118
 *
2119
 * Returns the name just removed
2120
 */
2121
static const xmlChar *
2122
nameNsPop(xmlParserCtxtPtr ctxt)
2123
360k
{
2124
360k
    const xmlChar *ret;
2125
2126
360k
    if (ctxt->nameNr <= 0)
2127
0
        return (NULL);
2128
360k
    ctxt->nameNr--;
2129
360k
    if (ctxt->nameNr > 0)
2130
359k
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2131
1.27k
    else
2132
1.27k
        ctxt->name = NULL;
2133
360k
    ret = ctxt->nameTab[ctxt->nameNr];
2134
360k
    ctxt->nameTab[ctxt->nameNr] = NULL;
2135
360k
    return (ret);
2136
360k
}
2137
#endif /* LIBXML_PUSH_ENABLED */
2138
2139
/**
2140
 * namePush:
2141
 * @ctxt:  an XML parser context
2142
 * @value:  the element name
2143
 *
2144
 * DEPRECATED: Internal function, do not use.
2145
 *
2146
 * Pushes a new element name on top of the name stack
2147
 *
2148
 * Returns -1 in case of error, the index in the stack otherwise
2149
 */
2150
int
2151
namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
2152
0
{
2153
0
    if (ctxt == NULL) return (-1);
2154
2155
0
    if (ctxt->nameNr >= ctxt->nameMax) {
2156
0
        const xmlChar * *tmp;
2157
0
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
2158
0
                                    ctxt->nameMax * 2 *
2159
0
                                    sizeof(ctxt->nameTab[0]));
2160
0
        if (tmp == NULL) {
2161
0
      goto mem_error;
2162
0
        }
2163
0
  ctxt->nameTab = tmp;
2164
0
        ctxt->nameMax *= 2;
2165
0
    }
2166
0
    ctxt->nameTab[ctxt->nameNr] = value;
2167
0
    ctxt->name = value;
2168
0
    return (ctxt->nameNr++);
2169
0
mem_error:
2170
0
    xmlErrMemory(ctxt);
2171
0
    return (-1);
2172
0
}
2173
2174
/**
2175
 * namePop:
2176
 * @ctxt: an XML parser context
2177
 *
2178
 * DEPRECATED: Internal function, do not use.
2179
 *
2180
 * Pops the top element name from the name stack
2181
 *
2182
 * Returns the name just removed
2183
 */
2184
const xmlChar *
2185
namePop(xmlParserCtxtPtr ctxt)
2186
3.15M
{
2187
3.15M
    const xmlChar *ret;
2188
2189
3.15M
    if ((ctxt == NULL) || (ctxt->nameNr <= 0))
2190
86
        return (NULL);
2191
3.14M
    ctxt->nameNr--;
2192
3.14M
    if (ctxt->nameNr > 0)
2193
3.11M
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2194
34.3k
    else
2195
34.3k
        ctxt->name = NULL;
2196
3.14M
    ret = ctxt->nameTab[ctxt->nameNr];
2197
3.14M
    ctxt->nameTab[ctxt->nameNr] = NULL;
2198
3.14M
    return (ret);
2199
3.15M
}
2200
2201
4.85M
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
2202
4.85M
    if (ctxt->spaceNr >= ctxt->spaceMax) {
2203
36.9k
        int *tmp;
2204
2205
36.9k
  ctxt->spaceMax *= 2;
2206
36.9k
        tmp = (int *) xmlRealloc(ctxt->spaceTab,
2207
36.9k
                           ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
2208
36.9k
        if (tmp == NULL) {
2209
36
      xmlErrMemory(ctxt);
2210
36
      ctxt->spaceMax /=2;
2211
36
      return(-1);
2212
36
  }
2213
36.8k
  ctxt->spaceTab = tmp;
2214
36.8k
    }
2215
4.85M
    ctxt->spaceTab[ctxt->spaceNr] = val;
2216
4.85M
    ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
2217
4.85M
    return(ctxt->spaceNr++);
2218
4.85M
}
2219
2220
4.40M
static int spacePop(xmlParserCtxtPtr ctxt) {
2221
4.40M
    int ret;
2222
4.40M
    if (ctxt->spaceNr <= 0) return(0);
2223
4.40M
    ctxt->spaceNr--;
2224
4.40M
    if (ctxt->spaceNr > 0)
2225
4.37M
  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
2226
26.6k
    else
2227
26.6k
        ctxt->space = &ctxt->spaceTab[0];
2228
4.40M
    ret = ctxt->spaceTab[ctxt->spaceNr];
2229
4.40M
    ctxt->spaceTab[ctxt->spaceNr] = -1;
2230
4.40M
    return(ret);
2231
4.40M
}
2232
2233
/*
2234
 * Macros for accessing the content. Those should be used only by the parser,
2235
 * and not exported.
2236
 *
2237
 * Dirty macros, i.e. one often need to make assumption on the context to
2238
 * use them
2239
 *
2240
 *   CUR_PTR return the current pointer to the xmlChar to be parsed.
2241
 *           To be used with extreme caution since operations consuming
2242
 *           characters may move the input buffer to a different location !
2243
 *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
2244
 *           This should be used internally by the parser
2245
 *           only to compare to ASCII values otherwise it would break when
2246
 *           running with UTF-8 encoding.
2247
 *   RAW     same as CUR but in the input buffer, bypass any token
2248
 *           extraction that may have been done
2249
 *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
2250
 *           to compare on ASCII based substring.
2251
 *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2252
 *           strings without newlines within the parser.
2253
 *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2254
 *           defined char within the parser.
2255
 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2256
 *
2257
 *   NEXT    Skip to the next character, this does the proper decoding
2258
 *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2259
 *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2260
 *   CUR_CHAR(l) returns the current unicode character (int), set l
2261
 *           to the number of xmlChars used for the encoding [0-5].
2262
 *   CUR_SCHAR  same but operate on a string instead of the context
2263
 *   COPY_BUF  copy the current unicode char to the target buffer, increment
2264
 *            the index
2265
 *   GROW, SHRINK  handling of input buffers
2266
 */
2267
2268
108M
#define RAW (*ctxt->input->cur)
2269
804M
#define CUR (*ctxt->input->cur)
2270
56.3M
#define NXT(val) ctxt->input->cur[(val)]
2271
1.45G
#define CUR_PTR ctxt->input->cur
2272
15.1M
#define BASE_PTR ctxt->input->base
2273
2274
#define CMP4( s, c1, c2, c3, c4 ) \
2275
37.8M
  ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2276
19.1M
    ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2277
#define CMP5( s, c1, c2, c3, c4, c5 ) \
2278
35.7M
  ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2279
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2280
32.5M
  ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2281
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2282
30.2M
  ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2283
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2284
28.4M
  ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2285
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2286
13.7M
  ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2287
13.7M
    ((unsigned char *) s)[ 8 ] == c9 )
2288
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2289
19.1k
  ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2290
19.1k
    ((unsigned char *) s)[ 9 ] == c10 )
2291
2292
19.5M
#define SKIP(val) do {             \
2293
19.5M
    ctxt->input->cur += (val),ctxt->input->col+=(val);      \
2294
19.5M
    if (*ctxt->input->cur == 0)           \
2295
19.5M
        xmlParserGrow(ctxt);           \
2296
19.5M
  } while (0)
2297
2298
223k
#define SKIPL(val) do {             \
2299
223k
    int skipl;                \
2300
45.1M
    for(skipl=0; skipl<val; skipl++) {         \
2301
44.9M
  if (*(ctxt->input->cur) == '\n') {       \
2302
16.6k
  ctxt->input->line++; ctxt->input->col = 1;      \
2303
44.9M
  } else ctxt->input->col++;         \
2304
44.9M
  ctxt->input->cur++;           \
2305
44.9M
    }                  \
2306
223k
    if (*ctxt->input->cur == 0)           \
2307
223k
        xmlParserGrow(ctxt);           \
2308
223k
  } while (0)
2309
2310
#define SHRINK \
2311
33.5M
    if ((!PARSER_PROGRESSIVE(ctxt)) && \
2312
33.5M
        (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2313
33.5M
  (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2314
33.5M
  xmlParserShrink(ctxt);
2315
2316
#define GROW \
2317
93.4M
    if ((!PARSER_PROGRESSIVE(ctxt)) && \
2318
93.4M
        (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2319
15.1M
  xmlParserGrow(ctxt);
2320
2321
14.4M
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2322
2323
11.3M
#define SKIP_BLANKS_PE xmlSkipBlankCharsPE(ctxt)
2324
2325
130M
#define NEXT xmlNextChar(ctxt)
2326
2327
7.37M
#define NEXT1 {               \
2328
7.37M
  ctxt->input->col++;           \
2329
7.37M
  ctxt->input->cur++;           \
2330
7.37M
  if (*ctxt->input->cur == 0)         \
2331
7.37M
      xmlParserGrow(ctxt);           \
2332
7.37M
    }
2333
2334
939M
#define NEXTL(l) do {             \
2335
939M
    if (*(ctxt->input->cur) == '\n') {         \
2336
40.8M
  ctxt->input->line++; ctxt->input->col = 1;      \
2337
898M
    } else ctxt->input->col++;           \
2338
939M
    ctxt->input->cur += l;        \
2339
939M
  } while (0)
2340
2341
284M
#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2342
12.8M
#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2343
2344
#define COPY_BUF(b, i, v)           \
2345
229M
    if (v < 0x80) b[i++] = v;           \
2346
229M
    else i += xmlCopyCharMultiByte(&b[i],v)
2347
2348
/**
2349
 * xmlSkipBlankChars:
2350
 * @ctxt:  the XML parser context
2351
 *
2352
 * DEPRECATED: Internal function, do not use.
2353
 *
2354
 * Skip whitespace in the input stream.
2355
 *
2356
 * Returns the number of space chars skipped
2357
 */
2358
int
2359
17.3M
xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2360
17.3M
    const xmlChar *cur;
2361
17.3M
    int res = 0;
2362
2363
    /*
2364
     * It's Okay to use CUR/NEXT here since all the blanks are on
2365
     * the ASCII range.
2366
     */
2367
17.3M
    cur = ctxt->input->cur;
2368
17.3M
    while (IS_BLANK_CH(*cur)) {
2369
12.8M
        if (*cur == '\n') {
2370
7.83M
            ctxt->input->line++; ctxt->input->col = 1;
2371
7.83M
        } else {
2372
5.04M
            ctxt->input->col++;
2373
5.04M
        }
2374
12.8M
        cur++;
2375
12.8M
        if (res < INT_MAX)
2376
12.8M
            res++;
2377
12.8M
        if (*cur == 0) {
2378
42.2k
            ctxt->input->cur = cur;
2379
42.2k
            xmlParserGrow(ctxt);
2380
42.2k
            cur = ctxt->input->cur;
2381
42.2k
        }
2382
12.8M
    }
2383
17.3M
    ctxt->input->cur = cur;
2384
2385
17.3M
    return(res);
2386
17.3M
}
2387
2388
static void
2389
588k
xmlPopPE(xmlParserCtxtPtr ctxt) {
2390
588k
    unsigned long consumed;
2391
588k
    xmlEntityPtr ent;
2392
2393
588k
    ent = ctxt->input->entity;
2394
2395
588k
    ent->flags &= ~XML_ENT_EXPANDING;
2396
2397
588k
    if ((ent->flags & XML_ENT_CHECKED) == 0) {
2398
23.5k
        int result;
2399
2400
        /*
2401
         * Read the rest of the stream in case of errors. We want
2402
         * to account for the whole entity size.
2403
         */
2404
34.7k
        do {
2405
34.7k
            ctxt->input->cur = ctxt->input->end;
2406
34.7k
            xmlParserShrink(ctxt);
2407
34.7k
            result = xmlParserGrow(ctxt);
2408
34.7k
        } while (result > 0);
2409
2410
23.5k
        consumed = ctxt->input->consumed;
2411
23.5k
        xmlSaturatedAddSizeT(&consumed,
2412
23.5k
                             ctxt->input->end - ctxt->input->base);
2413
2414
23.5k
        xmlSaturatedAdd(&ent->expandedSize, consumed);
2415
2416
        /*
2417
         * Add to sizeentities when parsing an external entity
2418
         * for the first time.
2419
         */
2420
23.5k
        if (ent->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
2421
10.6k
            xmlSaturatedAdd(&ctxt->sizeentities, consumed);
2422
10.6k
        }
2423
2424
23.5k
        ent->flags |= XML_ENT_CHECKED;
2425
23.5k
    }
2426
2427
588k
    xmlPopInput(ctxt);
2428
2429
588k
    xmlParserEntityCheck(ctxt, ent->expandedSize);
2430
588k
}
2431
2432
/**
2433
 * xmlSkipBlankCharsPE:
2434
 * @ctxt:  the XML parser context
2435
 *
2436
 * Skip whitespace in the input stream, also handling parameter
2437
 * entities.
2438
 *
2439
 * Returns the number of space chars skipped
2440
 */
2441
static int
2442
11.3M
xmlSkipBlankCharsPE(xmlParserCtxtPtr ctxt) {
2443
11.3M
    int res = 0;
2444
11.3M
    int inParam;
2445
11.3M
    int expandParam;
2446
2447
11.3M
    inParam = PARSER_IN_PE(ctxt);
2448
11.3M
    expandParam = PARSER_EXTERNAL(ctxt);
2449
2450
11.3M
    if (!inParam && !expandParam)
2451
2.90M
        return(xmlSkipBlankChars(ctxt));
2452
2453
20.1M
    while (PARSER_STOPPED(ctxt) == 0) {
2454
20.1M
        if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2455
9.82M
            NEXT;
2456
10.2M
        } else if (CUR == '%') {
2457
1.54M
            if ((expandParam == 0) ||
2458
1.54M
                (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2459
269k
                break;
2460
2461
            /*
2462
             * Expand parameter entity. We continue to consume
2463
             * whitespace at the start of the entity and possible
2464
             * even consume the whole entity and pop it. We might
2465
             * even pop multiple PEs in this loop.
2466
             */
2467
1.27M
            xmlParsePEReference(ctxt);
2468
2469
1.27M
            inParam = PARSER_IN_PE(ctxt);
2470
1.27M
            expandParam = PARSER_EXTERNAL(ctxt);
2471
8.74M
        } else if (CUR == 0) {
2472
588k
            if (inParam == 0)
2473
3.91k
                break;
2474
2475
584k
            xmlPopPE(ctxt);
2476
2477
584k
            inParam = PARSER_IN_PE(ctxt);
2478
584k
            expandParam = PARSER_EXTERNAL(ctxt);
2479
8.15M
        } else {
2480
8.15M
            break;
2481
8.15M
        }
2482
2483
        /*
2484
         * Also increase the counter when entering or exiting a PERef.
2485
         * The spec says: "When a parameter-entity reference is recognized
2486
         * in the DTD and included, its replacement text MUST be enlarged
2487
         * by the attachment of one leading and one following space (#x20)
2488
         * character."
2489
         */
2490
11.6M
        if (res < INT_MAX)
2491
11.6M
            res++;
2492
11.6M
    }
2493
2494
8.43M
    return(res);
2495
11.3M
}
2496
2497
/************************************************************************
2498
 *                  *
2499
 *    Commodity functions to handle entities      *
2500
 *                  *
2501
 ************************************************************************/
2502
2503
/**
2504
 * xmlPopInput:
2505
 * @ctxt:  an XML parser context
2506
 *
2507
 * xmlPopInput: the current input pointed by ctxt->input came to an end
2508
 *          pop it and return the next char.
2509
 *
2510
 * Returns the current xmlChar in the parser context
2511
 */
2512
xmlChar
2513
588k
xmlPopInput(xmlParserCtxtPtr ctxt) {
2514
588k
    xmlParserInputPtr input;
2515
2516
588k
    if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2517
588k
    input = inputPop(ctxt);
2518
588k
    xmlFreeInputStream(input);
2519
588k
    if (*ctxt->input->cur == 0)
2520
5.18k
        xmlParserGrow(ctxt);
2521
588k
    return(CUR);
2522
588k
}
2523
2524
/**
2525
 * xmlPushInput:
2526
 * @ctxt:  an XML parser context
2527
 * @input:  an XML parser input fragment (entity, XML fragment ...).
2528
 *
2529
 * Push an input stream onto the stack.
2530
 *
2531
 * This makes the parser use an input returned from advanced functions
2532
 * like xmlNewInputURL or xmlNewInputMemory.
2533
 *
2534
 * Returns -1 in case of error or the index in the input stack
2535
 */
2536
int
2537
805k
xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2538
805k
    int maxDepth;
2539
805k
    int ret;
2540
2541
805k
    if ((ctxt == NULL) || (input == NULL))
2542
186k
        return(-1);
2543
2544
618k
    maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
2545
618k
    if (ctxt->inputNr > maxDepth) {
2546
22
        xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
2547
22
                       "Maximum entity nesting depth exceeded");
2548
22
        xmlHaltParser(ctxt);
2549
22
  return(-1);
2550
22
    }
2551
618k
    ret = inputPush(ctxt, input);
2552
618k
    GROW;
2553
618k
    return(ret);
2554
618k
}
2555
2556
/**
2557
 * xmlParseCharRef:
2558
 * @ctxt:  an XML parser context
2559
 *
2560
 * DEPRECATED: Internal function, don't use.
2561
 *
2562
 * Parse a numeric character reference. Always consumes '&'.
2563
 *
2564
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2565
 *                  '&#x' [0-9a-fA-F]+ ';'
2566
 *
2567
 * [ WFC: Legal Character ]
2568
 * Characters referred to using character references must match the
2569
 * production for Char.
2570
 *
2571
 * Returns the value parsed (as an int), 0 in case of error
2572
 */
2573
int
2574
474k
xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2575
474k
    int val = 0;
2576
474k
    int count = 0;
2577
2578
    /*
2579
     * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2580
     */
2581
474k
    if ((RAW == '&') && (NXT(1) == '#') &&
2582
474k
        (NXT(2) == 'x')) {
2583
261k
  SKIP(3);
2584
261k
  GROW;
2585
662k
  while ((RAW != ';') && (PARSER_STOPPED(ctxt) == 0)) {
2586
505k
      if (count++ > 20) {
2587
4.46k
    count = 0;
2588
4.46k
    GROW;
2589
4.46k
      }
2590
505k
      if ((RAW >= '0') && (RAW <= '9'))
2591
235k
          val = val * 16 + (CUR - '0');
2592
269k
      else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2593
46.2k
          val = val * 16 + (CUR - 'a') + 10;
2594
223k
      else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2595
118k
          val = val * 16 + (CUR - 'A') + 10;
2596
104k
      else {
2597
104k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2598
104k
    val = 0;
2599
104k
    break;
2600
104k
      }
2601
400k
      if (val > 0x110000)
2602
52.9k
          val = 0x110000;
2603
2604
400k
      NEXT;
2605
400k
      count++;
2606
400k
  }
2607
261k
  if (RAW == ';') {
2608
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2609
157k
      ctxt->input->col++;
2610
157k
      ctxt->input->cur++;
2611
157k
  }
2612
261k
    } else if  ((RAW == '&') && (NXT(1) == '#')) {
2613
212k
  SKIP(2);
2614
212k
  GROW;
2615
704k
  while (RAW != ';') { /* loop blocked by count */
2616
545k
      if (count++ > 20) {
2617
10.2k
    count = 0;
2618
10.2k
    GROW;
2619
10.2k
      }
2620
545k
      if ((RAW >= '0') && (RAW <= '9'))
2621
492k
          val = val * 10 + (CUR - '0');
2622
53.4k
      else {
2623
53.4k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2624
53.4k
    val = 0;
2625
53.4k
    break;
2626
53.4k
      }
2627
492k
      if (val > 0x110000)
2628
109k
          val = 0x110000;
2629
2630
492k
      NEXT;
2631
492k
      count++;
2632
492k
  }
2633
212k
  if (RAW == ';') {
2634
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2635
158k
      ctxt->input->col++;
2636
158k
      ctxt->input->cur++;
2637
158k
  }
2638
212k
    } else {
2639
0
        if (RAW == '&')
2640
0
            SKIP(1);
2641
0
        xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2642
0
    }
2643
2644
    /*
2645
     * [ WFC: Legal Character ]
2646
     * Characters referred to using character references must match the
2647
     * production for Char.
2648
     */
2649
474k
    if (val >= 0x110000) {
2650
2.95k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2651
2.95k
                "xmlParseCharRef: character reference out of bounds\n",
2652
2.95k
          val);
2653
471k
    } else if (IS_CHAR(val)) {
2654
302k
        return(val);
2655
302k
    } else {
2656
169k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2657
169k
                          "xmlParseCharRef: invalid xmlChar value %d\n",
2658
169k
                    val);
2659
169k
    }
2660
172k
    return(0);
2661
474k
}
2662
2663
/**
2664
 * xmlParseStringCharRef:
2665
 * @ctxt:  an XML parser context
2666
 * @str:  a pointer to an index in the string
2667
 *
2668
 * parse Reference declarations, variant parsing from a string rather
2669
 * than an an input flow.
2670
 *
2671
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2672
 *                  '&#x' [0-9a-fA-F]+ ';'
2673
 *
2674
 * [ WFC: Legal Character ]
2675
 * Characters referred to using character references must match the
2676
 * production for Char.
2677
 *
2678
 * Returns the value parsed (as an int), 0 in case of error, str will be
2679
 *         updated to the current value of the index
2680
 */
2681
static int
2682
270k
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2683
270k
    const xmlChar *ptr;
2684
270k
    xmlChar cur;
2685
270k
    int val = 0;
2686
2687
270k
    if ((str == NULL) || (*str == NULL)) return(0);
2688
270k
    ptr = *str;
2689
270k
    cur = *ptr;
2690
270k
    if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2691
50.1k
  ptr += 3;
2692
50.1k
  cur = *ptr;
2693
201k
  while (cur != ';') { /* Non input consuming loop */
2694
157k
      if ((cur >= '0') && (cur <= '9'))
2695
64.8k
          val = val * 16 + (cur - '0');
2696
92.2k
      else if ((cur >= 'a') && (cur <= 'f'))
2697
31.3k
          val = val * 16 + (cur - 'a') + 10;
2698
60.9k
      else if ((cur >= 'A') && (cur <= 'F'))
2699
55.6k
          val = val * 16 + (cur - 'A') + 10;
2700
5.33k
      else {
2701
5.33k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2702
5.33k
    val = 0;
2703
5.33k
    break;
2704
5.33k
      }
2705
151k
      if (val > 0x110000)
2706
51.8k
          val = 0x110000;
2707
2708
151k
      ptr++;
2709
151k
      cur = *ptr;
2710
151k
  }
2711
50.1k
  if (cur == ';')
2712
44.7k
      ptr++;
2713
220k
    } else if  ((cur == '&') && (ptr[1] == '#')){
2714
220k
  ptr += 2;
2715
220k
  cur = *ptr;
2716
702k
  while (cur != ';') { /* Non input consuming loops */
2717
489k
      if ((cur >= '0') && (cur <= '9'))
2718
482k
          val = val * 10 + (cur - '0');
2719
7.16k
      else {
2720
7.16k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2721
7.16k
    val = 0;
2722
7.16k
    break;
2723
7.16k
      }
2724
482k
      if (val > 0x110000)
2725
29.2k
          val = 0x110000;
2726
2727
482k
      ptr++;
2728
482k
      cur = *ptr;
2729
482k
  }
2730
220k
  if (cur == ';')
2731
213k
      ptr++;
2732
220k
    } else {
2733
0
  xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2734
0
  return(0);
2735
0
    }
2736
270k
    *str = ptr;
2737
2738
    /*
2739
     * [ WFC: Legal Character ]
2740
     * Characters referred to using character references must match the
2741
     * production for Char.
2742
     */
2743
270k
    if (val >= 0x110000) {
2744
2.62k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2745
2.62k
                "xmlParseStringCharRef: character reference out of bounds\n",
2746
2.62k
                val);
2747
267k
    } else if (IS_CHAR(val)) {
2748
251k
        return(val);
2749
251k
    } else {
2750
16.2k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2751
16.2k
        "xmlParseStringCharRef: invalid xmlChar value %d\n",
2752
16.2k
        val);
2753
16.2k
    }
2754
18.9k
    return(0);
2755
270k
}
2756
2757
/**
2758
 * xmlParserHandlePEReference:
2759
 * @ctxt:  the parser context
2760
 *
2761
 * DEPRECATED: Internal function, do not use.
2762
 *
2763
 * [69] PEReference ::= '%' Name ';'
2764
 *
2765
 * [ WFC: No Recursion ]
2766
 * A parsed entity must not contain a recursive
2767
 * reference to itself, either directly or indirectly.
2768
 *
2769
 * [ WFC: Entity Declared ]
2770
 * In a document without any DTD, a document with only an internal DTD
2771
 * subset which contains no parameter entity references, or a document
2772
 * with "standalone='yes'", ...  ... The declaration of a parameter
2773
 * entity must precede any reference to it...
2774
 *
2775
 * [ VC: Entity Declared ]
2776
 * In a document with an external subset or external parameter entities
2777
 * with "standalone='no'", ...  ... The declaration of a parameter entity
2778
 * must precede any reference to it...
2779
 *
2780
 * [ WFC: In DTD ]
2781
 * Parameter-entity references may only appear in the DTD.
2782
 * NOTE: misleading but this is handled.
2783
 *
2784
 * A PEReference may have been detected in the current input stream
2785
 * the handling is done accordingly to
2786
 *      http://www.w3.org/TR/REC-xml#entproc
2787
 * i.e.
2788
 *   - Included in literal in entity values
2789
 *   - Included as Parameter Entity reference within DTDs
2790
 */
2791
void
2792
0
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2793
0
    xmlParsePEReference(ctxt);
2794
0
}
2795
2796
/**
2797
 * xmlStringLenDecodeEntities:
2798
 * @ctxt:  the parser context
2799
 * @str:  the input string
2800
 * @len: the string length
2801
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2802
 * @end:  an end marker xmlChar, 0 if none
2803
 * @end2:  an end marker xmlChar, 0 if none
2804
 * @end3:  an end marker xmlChar, 0 if none
2805
 *
2806
 * DEPRECATED: Internal function, don't use.
2807
 *
2808
 * Returns A newly allocated string with the substitution done. The caller
2809
 *      must deallocate it !
2810
 */
2811
xmlChar *
2812
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2813
                           int what ATTRIBUTE_UNUSED,
2814
0
                           xmlChar end, xmlChar end2, xmlChar end3) {
2815
0
    if ((ctxt == NULL) || (str == NULL) || (len < 0))
2816
0
        return(NULL);
2817
2818
0
    if ((str[len] != 0) ||
2819
0
        (end != 0) || (end2 != 0) || (end3 != 0))
2820
0
        return(NULL);
2821
2822
0
    return(xmlExpandEntitiesInAttValue(ctxt, str, 0));
2823
0
}
2824
2825
/**
2826
 * xmlStringDecodeEntities:
2827
 * @ctxt:  the parser context
2828
 * @str:  the input string
2829
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2830
 * @end:  an end marker xmlChar, 0 if none
2831
 * @end2:  an end marker xmlChar, 0 if none
2832
 * @end3:  an end marker xmlChar, 0 if none
2833
 *
2834
 * DEPRECATED: Internal function, don't use.
2835
 *
2836
 * Returns A newly allocated string with the substitution done. The caller
2837
 *      must deallocate it !
2838
 */
2839
xmlChar *
2840
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str,
2841
                        int what ATTRIBUTE_UNUSED,
2842
0
            xmlChar end, xmlChar  end2, xmlChar end3) {
2843
0
    if ((ctxt == NULL) || (str == NULL))
2844
0
        return(NULL);
2845
2846
0
    if ((end != 0) || (end2 != 0) || (end3 != 0))
2847
0
        return(NULL);
2848
2849
0
    return(xmlExpandEntitiesInAttValue(ctxt, str, 0));
2850
0
}
2851
2852
/************************************************************************
2853
 *                  *
2854
 *    Commodity functions, cleanup needed ?     *
2855
 *                  *
2856
 ************************************************************************/
2857
2858
/**
2859
 * areBlanks:
2860
 * @ctxt:  an XML parser context
2861
 * @str:  a xmlChar *
2862
 * @len:  the size of @str
2863
 * @blank_chars: we know the chars are blanks
2864
 *
2865
 * Is this a sequence of blank chars that one can ignore ?
2866
 *
2867
 * Returns 1 if ignorable 0 otherwise.
2868
 */
2869
2870
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2871
1.87M
                     int blank_chars) {
2872
1.87M
    int i;
2873
1.87M
    xmlNodePtr lastChild;
2874
2875
    /*
2876
     * Don't spend time trying to differentiate them, the same callback is
2877
     * used !
2878
     */
2879
1.87M
    if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2880
1.00M
  return(0);
2881
2882
    /*
2883
     * Check for xml:space value.
2884
     */
2885
874k
    if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2886
874k
        (*(ctxt->space) == -2))
2887
712k
  return(0);
2888
2889
    /*
2890
     * Check that the string is made of blanks
2891
     */
2892
161k
    if (blank_chars == 0) {
2893
717k
  for (i = 0;i < len;i++)
2894
684k
      if (!(IS_BLANK_CH(str[i]))) return(0);
2895
129k
    }
2896
2897
    /*
2898
     * Look if the element is mixed content in the DTD if available
2899
     */
2900
65.4k
    if (ctxt->node == NULL) return(0);
2901
65.4k
    if (ctxt->myDoc != NULL) {
2902
65.4k
        xmlElementPtr elemDecl = NULL;
2903
65.4k
        xmlDocPtr doc = ctxt->myDoc;
2904
65.4k
        const xmlChar *prefix = NULL;
2905
2906
65.4k
        if (ctxt->node->ns)
2907
9.46k
            prefix = ctxt->node->ns->prefix;
2908
65.4k
        if (doc->intSubset != NULL)
2909
25.5k
            elemDecl = xmlHashLookup2(doc->intSubset->elements, ctxt->node->name,
2910
25.5k
                                      prefix);
2911
65.4k
        if ((elemDecl == NULL) && (doc->extSubset != NULL))
2912
3.76k
            elemDecl = xmlHashLookup2(doc->extSubset->elements, ctxt->node->name,
2913
3.76k
                                      prefix);
2914
65.4k
        if (elemDecl != NULL) {
2915
8.49k
            if (elemDecl->etype == XML_ELEMENT_TYPE_ELEMENT)
2916
5.46k
                return(1);
2917
3.03k
            if ((elemDecl->etype == XML_ELEMENT_TYPE_ANY) ||
2918
3.03k
                (elemDecl->etype == XML_ELEMENT_TYPE_MIXED))
2919
1.20k
                return(0);
2920
3.03k
        }
2921
65.4k
    }
2922
2923
    /*
2924
     * Otherwise, heuristic :-\
2925
     */
2926
58.7k
    if ((RAW != '<') && (RAW != 0xD)) return(0);
2927
52.6k
    if ((ctxt->node->children == NULL) &&
2928
52.6k
  (RAW == '<') && (NXT(1) == '/')) return(0);
2929
2930
48.5k
    lastChild = xmlGetLastChild(ctxt->node);
2931
48.5k
    if (lastChild == NULL) {
2932
17.6k
        if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2933
17.6k
            (ctxt->node->content != NULL)) return(0);
2934
30.8k
    } else if (xmlNodeIsText(lastChild))
2935
2.41k
        return(0);
2936
28.4k
    else if ((ctxt->node->children != NULL) &&
2937
28.4k
             (xmlNodeIsText(ctxt->node->children)))
2938
2.19k
        return(0);
2939
43.9k
    return(1);
2940
48.5k
}
2941
2942
/************************************************************************
2943
 *                  *
2944
 *    Extra stuff for namespace support     *
2945
 *  Relates to http://www.w3.org/TR/WD-xml-names      *
2946
 *                  *
2947
 ************************************************************************/
2948
2949
/**
2950
 * xmlSplitQName:
2951
 * @ctxt:  an XML parser context
2952
 * @name:  an XML parser context
2953
 * @prefixOut:  a xmlChar **
2954
 *
2955
 * parse an UTF8 encoded XML qualified name string
2956
 *
2957
 * [NS 5] QName ::= (Prefix ':')? LocalPart
2958
 *
2959
 * [NS 6] Prefix ::= NCName
2960
 *
2961
 * [NS 7] LocalPart ::= NCName
2962
 *
2963
 * Returns the local part, and prefix is updated
2964
 *   to get the Prefix if any.
2965
 */
2966
2967
xmlChar *
2968
1.13M
xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefixOut) {
2969
1.13M
    xmlChar buf[XML_MAX_NAMELEN + 5];
2970
1.13M
    xmlChar *buffer = NULL;
2971
1.13M
    int len = 0;
2972
1.13M
    int max = XML_MAX_NAMELEN;
2973
1.13M
    xmlChar *ret = NULL;
2974
1.13M
    xmlChar *prefix;
2975
1.13M
    const xmlChar *cur = name;
2976
1.13M
    int c;
2977
2978
1.13M
    if (prefixOut == NULL) return(NULL);
2979
1.13M
    *prefixOut = NULL;
2980
2981
1.13M
    if (cur == NULL) return(NULL);
2982
2983
#ifndef XML_XML_NAMESPACE
2984
    /* xml: prefix is not really a namespace */
2985
    if ((cur[0] == 'x') && (cur[1] == 'm') &&
2986
        (cur[2] == 'l') && (cur[3] == ':'))
2987
  return(xmlStrdup(name));
2988
#endif
2989
2990
    /* nasty but well=formed */
2991
1.13M
    if (cur[0] == ':')
2992
8.93k
  return(xmlStrdup(name));
2993
2994
1.12M
    c = *cur++;
2995
5.76M
    while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2996
4.64M
  buf[len++] = c;
2997
4.64M
  c = *cur++;
2998
4.64M
    }
2999
1.12M
    if (len >= max) {
3000
  /*
3001
   * Okay someone managed to make a huge name, so he's ready to pay
3002
   * for the processing speed.
3003
   */
3004
7.60k
  max = len * 2;
3005
3006
7.60k
  buffer = (xmlChar *) xmlMallocAtomic(max);
3007
7.60k
  if (buffer == NULL) {
3008
12
      xmlErrMemory(ctxt);
3009
12
      return(NULL);
3010
12
  }
3011
7.59k
  memcpy(buffer, buf, len);
3012
2.09M
  while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3013
2.08M
      if (len + 10 > max) {
3014
7.22k
          xmlChar *tmp;
3015
3016
7.22k
    max *= 2;
3017
7.22k
    tmp = (xmlChar *) xmlRealloc(buffer, max);
3018
7.22k
    if (tmp == NULL) {
3019
16
        xmlFree(buffer);
3020
16
        xmlErrMemory(ctxt);
3021
16
        return(NULL);
3022
16
    }
3023
7.20k
    buffer = tmp;
3024
7.20k
      }
3025
2.08M
      buffer[len++] = c;
3026
2.08M
      c = *cur++;
3027
2.08M
  }
3028
7.58k
  buffer[len] = 0;
3029
7.58k
    }
3030
3031
1.12M
    if ((c == ':') && (*cur == 0)) {
3032
16.3k
        if (buffer != NULL)
3033
876
      xmlFree(buffer);
3034
16.3k
  return(xmlStrdup(name));
3035
16.3k
    }
3036
3037
1.10M
    if (buffer == NULL) {
3038
1.09M
  ret = xmlStrndup(buf, len);
3039
1.09M
        if (ret == NULL) {
3040
519
      xmlErrMemory(ctxt);
3041
519
      return(NULL);
3042
519
        }
3043
1.09M
    } else {
3044
6.70k
  ret = buffer;
3045
6.70k
  buffer = NULL;
3046
6.70k
  max = XML_MAX_NAMELEN;
3047
6.70k
    }
3048
3049
3050
1.10M
    if (c == ':') {
3051
212k
  c = *cur;
3052
212k
        prefix = ret;
3053
212k
  if (c == 0) {
3054
0
      ret = xmlStrndup(BAD_CAST "", 0);
3055
0
            if (ret == NULL) {
3056
0
                xmlFree(prefix);
3057
0
                return(NULL);
3058
0
            }
3059
0
            *prefixOut = prefix;
3060
0
            return(ret);
3061
0
  }
3062
212k
  len = 0;
3063
3064
  /*
3065
   * Check that the first character is proper to start
3066
   * a new name
3067
   */
3068
212k
  if (!(((c >= 0x61) && (c <= 0x7A)) ||
3069
212k
        ((c >= 0x41) && (c <= 0x5A)) ||
3070
212k
        (c == '_') || (c == ':'))) {
3071
28.3k
      int l;
3072
28.3k
      int first = CUR_SCHAR(cur, l);
3073
3074
28.3k
      if (!IS_LETTER(first) && (first != '_')) {
3075
17.8k
    xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3076
17.8k
          "Name %s is not XML Namespace compliant\n",
3077
17.8k
          name);
3078
17.8k
      }
3079
28.3k
  }
3080
212k
  cur++;
3081
3082
2.05M
  while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3083
1.84M
      buf[len++] = c;
3084
1.84M
      c = *cur++;
3085
1.84M
  }
3086
212k
  if (len >= max) {
3087
      /*
3088
       * Okay someone managed to make a huge name, so he's ready to pay
3089
       * for the processing speed.
3090
       */
3091
8.06k
      max = len * 2;
3092
3093
8.06k
      buffer = (xmlChar *) xmlMallocAtomic(max);
3094
8.06k
      if (buffer == NULL) {
3095
13
          xmlErrMemory(ctxt);
3096
13
                xmlFree(prefix);
3097
13
    return(NULL);
3098
13
      }
3099
8.05k
      memcpy(buffer, buf, len);
3100
3.24M
      while (c != 0) { /* tested bigname2.xml */
3101
3.24M
    if (len + 10 > max) {
3102
5.27k
        xmlChar *tmp;
3103
3104
5.27k
        max *= 2;
3105
5.27k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3106
5.27k
        if (tmp == NULL) {
3107
17
      xmlErrMemory(ctxt);
3108
17
                        xmlFree(prefix);
3109
17
      xmlFree(buffer);
3110
17
      return(NULL);
3111
17
        }
3112
5.26k
        buffer = tmp;
3113
5.26k
    }
3114
3.24M
    buffer[len++] = c;
3115
3.24M
    c = *cur++;
3116
3.24M
      }
3117
8.03k
      buffer[len] = 0;
3118
8.03k
  }
3119
3120
212k
  if (buffer == NULL) {
3121
203k
      ret = xmlStrndup(buf, len);
3122
203k
            if (ret == NULL) {
3123
116
                xmlFree(prefix);
3124
116
                return(NULL);
3125
116
            }
3126
203k
  } else {
3127
8.03k
      ret = buffer;
3128
8.03k
  }
3129
3130
211k
        *prefixOut = prefix;
3131
211k
    }
3132
3133
1.10M
    return(ret);
3134
1.10M
}
3135
3136
/************************************************************************
3137
 *                  *
3138
 *      The parser itself       *
3139
 *  Relates to http://www.w3.org/TR/REC-xml       *
3140
 *                  *
3141
 ************************************************************************/
3142
3143
/************************************************************************
3144
 *                  *
3145
 *  Routines to parse Name, NCName and NmToken      *
3146
 *                  *
3147
 ************************************************************************/
3148
3149
/*
3150
 * The two following functions are related to the change of accepted
3151
 * characters for Name and NmToken in the Revision 5 of XML-1.0
3152
 * They correspond to the modified production [4] and the new production [4a]
3153
 * changes in that revision. Also note that the macros used for the
3154
 * productions Letter, Digit, CombiningChar and Extender are not needed
3155
 * anymore.
3156
 * We still keep compatibility to pre-revision5 parsing semantic if the
3157
 * new XML_PARSE_OLD10 option is given to the parser.
3158
 */
3159
static int
3160
3.43M
xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3161
3.43M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3162
        /*
3163
   * Use the new checks of production [4] [4a] amd [5] of the
3164
   * Update 5 of XML-1.0
3165
   */
3166
2.80M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3167
2.80M
      (((c >= 'a') && (c <= 'z')) ||
3168
2.79M
       ((c >= 'A') && (c <= 'Z')) ||
3169
2.79M
       (c == '_') || (c == ':') ||
3170
2.79M
       ((c >= 0xC0) && (c <= 0xD6)) ||
3171
2.79M
       ((c >= 0xD8) && (c <= 0xF6)) ||
3172
2.79M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3173
2.79M
       ((c >= 0x370) && (c <= 0x37D)) ||
3174
2.79M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3175
2.79M
       ((c >= 0x200C) && (c <= 0x200D)) ||
3176
2.79M
       ((c >= 0x2070) && (c <= 0x218F)) ||
3177
2.79M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3178
2.79M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3179
2.79M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3180
2.79M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3181
2.79M
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3182
2.06M
      return(1);
3183
2.80M
    } else {
3184
634k
        if (IS_LETTER(c) || (c == '_') || (c == ':'))
3185
485k
      return(1);
3186
634k
    }
3187
887k
    return(0);
3188
3.43M
}
3189
3190
static int
3191
42.2M
xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3192
42.2M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3193
        /*
3194
   * Use the new checks of production [4] [4a] amd [5] of the
3195
   * Update 5 of XML-1.0
3196
   */
3197
37.8M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3198
37.8M
      (((c >= 'a') && (c <= 'z')) ||
3199
37.8M
       ((c >= 'A') && (c <= 'Z')) ||
3200
37.8M
       ((c >= '0') && (c <= '9')) || /* !start */
3201
37.8M
       (c == '_') || (c == ':') ||
3202
37.8M
       (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3203
37.8M
       ((c >= 0xC0) && (c <= 0xD6)) ||
3204
37.8M
       ((c >= 0xD8) && (c <= 0xF6)) ||
3205
37.8M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3206
37.8M
       ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3207
37.8M
       ((c >= 0x370) && (c <= 0x37D)) ||
3208
37.8M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3209
37.8M
       ((c >= 0x200C) && (c <= 0x200D)) ||
3210
37.8M
       ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3211
37.8M
       ((c >= 0x2070) && (c <= 0x218F)) ||
3212
37.8M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3213
37.8M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3214
37.8M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3215
37.8M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3216
37.8M
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3217
35.8M
       return(1);
3218
37.8M
    } else {
3219
4.36M
        if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3220
4.36M
            (c == '.') || (c == '-') ||
3221
4.36M
      (c == '_') || (c == ':') ||
3222
4.36M
      (IS_COMBINING(c)) ||
3223
4.36M
      (IS_EXTENDER(c)))
3224
3.86M
      return(1);
3225
4.36M
    }
3226
2.57M
    return(0);
3227
42.2M
}
3228
3229
static const xmlChar *
3230
1.36M
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3231
1.36M
    const xmlChar *ret;
3232
1.36M
    int len = 0, l;
3233
1.36M
    int c;
3234
1.36M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3235
386k
                    XML_MAX_TEXT_LENGTH :
3236
1.36M
                    XML_MAX_NAME_LENGTH;
3237
3238
    /*
3239
     * Handler for more complex cases
3240
     */
3241
1.36M
    c = CUR_CHAR(l);
3242
1.36M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3243
        /*
3244
   * Use the new checks of production [4] [4a] amd [5] of the
3245
   * Update 5 of XML-1.0
3246
   */
3247
1.24M
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3248
1.24M
      (!(((c >= 'a') && (c <= 'z')) ||
3249
1.20M
         ((c >= 'A') && (c <= 'Z')) ||
3250
1.20M
         (c == '_') || (c == ':') ||
3251
1.20M
         ((c >= 0xC0) && (c <= 0xD6)) ||
3252
1.20M
         ((c >= 0xD8) && (c <= 0xF6)) ||
3253
1.20M
         ((c >= 0xF8) && (c <= 0x2FF)) ||
3254
1.20M
         ((c >= 0x370) && (c <= 0x37D)) ||
3255
1.20M
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
3256
1.20M
         ((c >= 0x200C) && (c <= 0x200D)) ||
3257
1.20M
         ((c >= 0x2070) && (c <= 0x218F)) ||
3258
1.20M
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3259
1.20M
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
3260
1.20M
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
3261
1.20M
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3262
1.20M
         ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3263
741k
      return(NULL);
3264
741k
  }
3265
501k
  len += l;
3266
501k
  NEXTL(l);
3267
501k
  c = CUR_CHAR(l);
3268
29.0M
  while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3269
29.0M
         (((c >= 'a') && (c <= 'z')) ||
3270
28.9M
          ((c >= 'A') && (c <= 'Z')) ||
3271
28.9M
          ((c >= '0') && (c <= '9')) || /* !start */
3272
28.9M
          (c == '_') || (c == ':') ||
3273
28.9M
          (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3274
28.9M
          ((c >= 0xC0) && (c <= 0xD6)) ||
3275
28.9M
          ((c >= 0xD8) && (c <= 0xF6)) ||
3276
28.9M
          ((c >= 0xF8) && (c <= 0x2FF)) ||
3277
28.9M
          ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3278
28.9M
          ((c >= 0x370) && (c <= 0x37D)) ||
3279
28.9M
          ((c >= 0x37F) && (c <= 0x1FFF)) ||
3280
28.9M
          ((c >= 0x200C) && (c <= 0x200D)) ||
3281
28.9M
          ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3282
28.9M
          ((c >= 0x2070) && (c <= 0x218F)) ||
3283
28.9M
          ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3284
28.9M
          ((c >= 0x3001) && (c <= 0xD7FF)) ||
3285
28.9M
          ((c >= 0xF900) && (c <= 0xFDCF)) ||
3286
28.9M
          ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3287
28.9M
          ((c >= 0x10000) && (c <= 0xEFFFF))
3288
28.9M
    )) {
3289
28.5M
            if (len <= INT_MAX - l)
3290
28.5M
          len += l;
3291
28.5M
      NEXTL(l);
3292
28.5M
      c = CUR_CHAR(l);
3293
28.5M
  }
3294
501k
    } else {
3295
125k
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3296
125k
      (!IS_LETTER(c) && (c != '_') &&
3297
116k
       (c != ':'))) {
3298
87.6k
      return(NULL);
3299
87.6k
  }
3300
37.4k
  len += l;
3301
37.4k
  NEXTL(l);
3302
37.4k
  c = CUR_CHAR(l);
3303
3304
2.31M
  while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3305
2.31M
         ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3306
2.31M
    (c == '.') || (c == '-') ||
3307
2.31M
    (c == '_') || (c == ':') ||
3308
2.31M
    (IS_COMBINING(c)) ||
3309
2.31M
    (IS_EXTENDER(c)))) {
3310
2.28M
            if (len <= INT_MAX - l)
3311
2.28M
          len += l;
3312
2.28M
      NEXTL(l);
3313
2.28M
      c = CUR_CHAR(l);
3314
2.28M
  }
3315
37.4k
    }
3316
539k
    if (len > maxLength) {
3317
326
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3318
326
        return(NULL);
3319
326
    }
3320
538k
    if (ctxt->input->cur - ctxt->input->base < len) {
3321
        /*
3322
         * There were a couple of bugs where PERefs lead to to a change
3323
         * of the buffer. Check the buffer size to avoid passing an invalid
3324
         * pointer to xmlDictLookup.
3325
         */
3326
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3327
0
                    "unexpected change of input buffer");
3328
0
        return (NULL);
3329
0
    }
3330
538k
    if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3331
1.29k
        ret = xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len);
3332
537k
    else
3333
537k
        ret = xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len);
3334
538k
    if (ret == NULL)
3335
20
        xmlErrMemory(ctxt);
3336
538k
    return(ret);
3337
538k
}
3338
3339
/**
3340
 * xmlParseName:
3341
 * @ctxt:  an XML parser context
3342
 *
3343
 * DEPRECATED: Internal function, don't use.
3344
 *
3345
 * parse an XML name.
3346
 *
3347
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3348
 *                  CombiningChar | Extender
3349
 *
3350
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3351
 *
3352
 * [6] Names ::= Name (#x20 Name)*
3353
 *
3354
 * Returns the Name parsed or NULL
3355
 */
3356
3357
const xmlChar *
3358
6.64M
xmlParseName(xmlParserCtxtPtr ctxt) {
3359
6.64M
    const xmlChar *in;
3360
6.64M
    const xmlChar *ret;
3361
6.64M
    size_t count = 0;
3362
6.64M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3363
1.58M
                       XML_MAX_TEXT_LENGTH :
3364
6.64M
                       XML_MAX_NAME_LENGTH;
3365
3366
6.64M
    GROW;
3367
3368
    /*
3369
     * Accelerator for simple ASCII names
3370
     */
3371
6.64M
    in = ctxt->input->cur;
3372
6.64M
    if (((*in >= 0x61) && (*in <= 0x7A)) ||
3373
6.64M
  ((*in >= 0x41) && (*in <= 0x5A)) ||
3374
6.64M
  (*in == '_') || (*in == ':')) {
3375
5.55M
  in++;
3376
27.9M
  while (((*in >= 0x61) && (*in <= 0x7A)) ||
3377
27.9M
         ((*in >= 0x41) && (*in <= 0x5A)) ||
3378
27.9M
         ((*in >= 0x30) && (*in <= 0x39)) ||
3379
27.9M
         (*in == '_') || (*in == '-') ||
3380
27.9M
         (*in == ':') || (*in == '.'))
3381
22.4M
      in++;
3382
5.55M
  if ((*in > 0) && (*in < 0x80)) {
3383
5.27M
      count = in - ctxt->input->cur;
3384
5.27M
            if (count > maxLength) {
3385
57
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3386
57
                return(NULL);
3387
57
            }
3388
5.27M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3389
5.27M
      ctxt->input->cur = in;
3390
5.27M
      ctxt->input->col += count;
3391
5.27M
      if (ret == NULL)
3392
32
          xmlErrMemory(ctxt);
3393
5.27M
      return(ret);
3394
5.27M
  }
3395
5.55M
    }
3396
    /* accelerator for special cases */
3397
1.36M
    return(xmlParseNameComplex(ctxt));
3398
6.64M
}
3399
3400
static xmlHashedString
3401
1.47M
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3402
1.47M
    xmlHashedString ret;
3403
1.47M
    int len = 0, l;
3404
1.47M
    int c;
3405
1.47M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3406
506k
                    XML_MAX_TEXT_LENGTH :
3407
1.47M
                    XML_MAX_NAME_LENGTH;
3408
1.47M
    size_t startPosition = 0;
3409
3410
1.47M
    ret.name = NULL;
3411
1.47M
    ret.hashValue = 0;
3412
3413
    /*
3414
     * Handler for more complex cases
3415
     */
3416
1.47M
    startPosition = CUR_PTR - BASE_PTR;
3417
1.47M
    c = CUR_CHAR(l);
3418
1.47M
    if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3419
1.47M
  (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3420
1.06M
  return(ret);
3421
1.06M
    }
3422
3423
21.8M
    while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3424
21.8M
     (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3425
21.4M
        if (len <= INT_MAX - l)
3426
21.4M
      len += l;
3427
21.4M
  NEXTL(l);
3428
21.4M
  c = CUR_CHAR(l);
3429
21.4M
    }
3430
408k
    if (len > maxLength) {
3431
281
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3432
281
        return(ret);
3433
281
    }
3434
408k
    ret = xmlDictLookupHashed(ctxt->dict, (BASE_PTR + startPosition), len);
3435
408k
    if (ret.name == NULL)
3436
20
        xmlErrMemory(ctxt);
3437
408k
    return(ret);
3438
408k
}
3439
3440
/**
3441
 * xmlParseNCName:
3442
 * @ctxt:  an XML parser context
3443
 * @len:  length of the string parsed
3444
 *
3445
 * parse an XML name.
3446
 *
3447
 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3448
 *                      CombiningChar | Extender
3449
 *
3450
 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3451
 *
3452
 * Returns the Name parsed or NULL
3453
 */
3454
3455
static xmlHashedString
3456
8.83M
xmlParseNCName(xmlParserCtxtPtr ctxt) {
3457
8.83M
    const xmlChar *in, *e;
3458
8.83M
    xmlHashedString ret;
3459
8.83M
    size_t count = 0;
3460
8.83M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3461
3.36M
                       XML_MAX_TEXT_LENGTH :
3462
8.83M
                       XML_MAX_NAME_LENGTH;
3463
3464
8.83M
    ret.name = NULL;
3465
3466
    /*
3467
     * Accelerator for simple ASCII names
3468
     */
3469
8.83M
    in = ctxt->input->cur;
3470
8.83M
    e = ctxt->input->end;
3471
8.83M
    if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3472
8.83M
   ((*in >= 0x41) && (*in <= 0x5A)) ||
3473
8.83M
   (*in == '_')) && (in < e)) {
3474
7.56M
  in++;
3475
28.9M
  while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3476
28.9M
          ((*in >= 0x41) && (*in <= 0x5A)) ||
3477
28.9M
          ((*in >= 0x30) && (*in <= 0x39)) ||
3478
28.9M
          (*in == '_') || (*in == '-') ||
3479
28.9M
          (*in == '.')) && (in < e))
3480
21.3M
      in++;
3481
7.56M
  if (in >= e)
3482
7.38k
      goto complex;
3483
7.55M
  if ((*in > 0) && (*in < 0x80)) {
3484
7.36M
      count = in - ctxt->input->cur;
3485
7.36M
            if (count > maxLength) {
3486
34
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3487
34
                return(ret);
3488
34
            }
3489
7.36M
      ret = xmlDictLookupHashed(ctxt->dict, ctxt->input->cur, count);
3490
7.36M
      ctxt->input->cur = in;
3491
7.36M
      ctxt->input->col += count;
3492
7.36M
      if (ret.name == NULL) {
3493
36
          xmlErrMemory(ctxt);
3494
36
      }
3495
7.36M
      return(ret);
3496
7.36M
  }
3497
7.55M
    }
3498
1.47M
complex:
3499
1.47M
    return(xmlParseNCNameComplex(ctxt));
3500
8.83M
}
3501
3502
/**
3503
 * xmlParseNameAndCompare:
3504
 * @ctxt:  an XML parser context
3505
 *
3506
 * parse an XML name and compares for match
3507
 * (specialized for endtag parsing)
3508
 *
3509
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3510
 * and the name for mismatch
3511
 */
3512
3513
static const xmlChar *
3514
942k
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3515
942k
    register const xmlChar *cmp = other;
3516
942k
    register const xmlChar *in;
3517
942k
    const xmlChar *ret;
3518
3519
942k
    GROW;
3520
3521
942k
    in = ctxt->input->cur;
3522
2.67M
    while (*in != 0 && *in == *cmp) {
3523
1.72M
  ++in;
3524
1.72M
  ++cmp;
3525
1.72M
    }
3526
942k
    if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3527
  /* success */
3528
876k
  ctxt->input->col += in - ctxt->input->cur;
3529
876k
  ctxt->input->cur = in;
3530
876k
  return (const xmlChar*) 1;
3531
876k
    }
3532
    /* failure (or end of input buffer), check with full function */
3533
66.1k
    ret = xmlParseName (ctxt);
3534
    /* strings coming from the dictionary direct compare possible */
3535
66.1k
    if (ret == other) {
3536
6.20k
  return (const xmlChar*) 1;
3537
6.20k
    }
3538
59.9k
    return ret;
3539
66.1k
}
3540
3541
/**
3542
 * xmlParseStringName:
3543
 * @ctxt:  an XML parser context
3544
 * @str:  a pointer to the string pointer (IN/OUT)
3545
 *
3546
 * parse an XML name.
3547
 *
3548
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3549
 *                  CombiningChar | Extender
3550
 *
3551
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3552
 *
3553
 * [6] Names ::= Name (#x20 Name)*
3554
 *
3555
 * Returns the Name parsed or NULL. The @str pointer
3556
 * is updated to the current location in the string.
3557
 */
3558
3559
static xmlChar *
3560
2.05M
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3561
2.05M
    xmlChar buf[XML_MAX_NAMELEN + 5];
3562
2.05M
    xmlChar *ret;
3563
2.05M
    const xmlChar *cur = *str;
3564
2.05M
    int len = 0, l;
3565
2.05M
    int c;
3566
2.05M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3567
583k
                    XML_MAX_TEXT_LENGTH :
3568
2.05M
                    XML_MAX_NAME_LENGTH;
3569
3570
2.05M
    c = CUR_SCHAR(cur, l);
3571
2.05M
    if (!xmlIsNameStartChar(ctxt, c)) {
3572
10.3k
  return(NULL);
3573
10.3k
    }
3574
3575
2.04M
    COPY_BUF(buf, len, c);
3576
2.04M
    cur += l;
3577
2.04M
    c = CUR_SCHAR(cur, l);
3578
8.82M
    while (xmlIsNameChar(ctxt, c)) {
3579
6.79M
  COPY_BUF(buf, len, c);
3580
6.79M
  cur += l;
3581
6.79M
  c = CUR_SCHAR(cur, l);
3582
6.79M
  if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3583
      /*
3584
       * Okay someone managed to make a huge name, so he's ready to pay
3585
       * for the processing speed.
3586
       */
3587
8.88k
      xmlChar *buffer;
3588
8.88k
      int max = len * 2;
3589
3590
8.88k
      buffer = (xmlChar *) xmlMallocAtomic(max);
3591
8.88k
      if (buffer == NULL) {
3592
13
          xmlErrMemory(ctxt);
3593
13
    return(NULL);
3594
13
      }
3595
8.87k
      memcpy(buffer, buf, len);
3596
1.98M
      while (xmlIsNameChar(ctxt, c)) {
3597
1.97M
    if (len + 10 > max) {
3598
4.82k
        xmlChar *tmp;
3599
3600
4.82k
        max *= 2;
3601
4.82k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3602
4.82k
        if (tmp == NULL) {
3603
8
      xmlErrMemory(ctxt);
3604
8
      xmlFree(buffer);
3605
8
      return(NULL);
3606
8
        }
3607
4.81k
        buffer = tmp;
3608
4.81k
    }
3609
1.97M
    COPY_BUF(buffer, len, c);
3610
1.97M
    cur += l;
3611
1.97M
    c = CUR_SCHAR(cur, l);
3612
1.97M
                if (len > maxLength) {
3613
44
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3614
44
                    xmlFree(buffer);
3615
44
                    return(NULL);
3616
44
                }
3617
1.97M
      }
3618
8.81k
      buffer[len] = 0;
3619
8.81k
      *str = cur;
3620
8.81k
      return(buffer);
3621
8.87k
  }
3622
6.79M
    }
3623
2.03M
    if (len > maxLength) {
3624
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3625
0
        return(NULL);
3626
0
    }
3627
2.03M
    *str = cur;
3628
2.03M
    ret = xmlStrndup(buf, len);
3629
2.03M
    if (ret == NULL)
3630
217
        xmlErrMemory(ctxt);
3631
2.03M
    return(ret);
3632
2.03M
}
3633
3634
/**
3635
 * xmlParseNmtoken:
3636
 * @ctxt:  an XML parser context
3637
 *
3638
 * DEPRECATED: Internal function, don't use.
3639
 *
3640
 * parse an XML Nmtoken.
3641
 *
3642
 * [7] Nmtoken ::= (NameChar)+
3643
 *
3644
 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3645
 *
3646
 * Returns the Nmtoken parsed or NULL
3647
 */
3648
3649
xmlChar *
3650
280k
xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3651
280k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3652
280k
    xmlChar *ret;
3653
280k
    int len = 0, l;
3654
280k
    int c;
3655
280k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3656
66.2k
                    XML_MAX_TEXT_LENGTH :
3657
280k
                    XML_MAX_NAME_LENGTH;
3658
3659
280k
    c = CUR_CHAR(l);
3660
3661
1.05M
    while (xmlIsNameChar(ctxt, c)) {
3662
777k
  COPY_BUF(buf, len, c);
3663
777k
  NEXTL(l);
3664
777k
  c = CUR_CHAR(l);
3665
777k
  if (len >= XML_MAX_NAMELEN) {
3666
      /*
3667
       * Okay someone managed to make a huge token, so he's ready to pay
3668
       * for the processing speed.
3669
       */
3670
4.70k
      xmlChar *buffer;
3671
4.70k
      int max = len * 2;
3672
3673
4.70k
      buffer = (xmlChar *) xmlMallocAtomic(max);
3674
4.70k
      if (buffer == NULL) {
3675
16
          xmlErrMemory(ctxt);
3676
16
    return(NULL);
3677
16
      }
3678
4.68k
      memcpy(buffer, buf, len);
3679
8.69M
      while (xmlIsNameChar(ctxt, c)) {
3680
8.68M
    if (len + 10 > max) {
3681
9.57k
        xmlChar *tmp;
3682
3683
9.57k
        max *= 2;
3684
9.57k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3685
9.57k
        if (tmp == NULL) {
3686
12
      xmlErrMemory(ctxt);
3687
12
      xmlFree(buffer);
3688
12
      return(NULL);
3689
12
        }
3690
9.55k
        buffer = tmp;
3691
9.55k
    }
3692
8.68M
    COPY_BUF(buffer, len, c);
3693
8.68M
                if (len > maxLength) {
3694
340
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3695
340
                    xmlFree(buffer);
3696
340
                    return(NULL);
3697
340
                }
3698
8.68M
    NEXTL(l);
3699
8.68M
    c = CUR_CHAR(l);
3700
8.68M
      }
3701
4.33k
      buffer[len] = 0;
3702
4.33k
      return(buffer);
3703
4.68k
  }
3704
777k
    }
3705
276k
    if (len == 0)
3706
41.2k
        return(NULL);
3707
234k
    if (len > maxLength) {
3708
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3709
0
        return(NULL);
3710
0
    }
3711
234k
    ret = xmlStrndup(buf, len);
3712
234k
    if (ret == NULL)
3713
59
        xmlErrMemory(ctxt);
3714
234k
    return(ret);
3715
234k
}
3716
3717
/**
3718
 * xmlExpandPEsInEntityValue:
3719
 * @ctxt:  parser context
3720
 * @buf:  string buffer
3721
 * @str:  entity value
3722
 * @length:  size of entity value
3723
 * @depth:  nesting depth
3724
 *
3725
 * Validate an entity value and expand parameter entities.
3726
 */
3727
static void
3728
xmlExpandPEsInEntityValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf,
3729
233k
                          const xmlChar *str, int length, int depth) {
3730
233k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3731
233k
    const xmlChar *end, *chunk;
3732
233k
    int c, l;
3733
3734
233k
    if (str == NULL)
3735
25.6k
        return;
3736
3737
208k
    depth += 1;
3738
208k
    if (depth > maxDepth) {
3739
0
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3740
0
                       "Maximum entity nesting depth exceeded");
3741
0
  return;
3742
0
    }
3743
3744
208k
    end = str + length;
3745
208k
    chunk = str;
3746
3747
330M
    while ((str < end) && (!PARSER_STOPPED(ctxt))) {
3748
330M
        c = *str;
3749
3750
330M
        if (c >= 0x80) {
3751
192M
            l = xmlUTF8MultibyteLen(ctxt, str,
3752
192M
                    "invalid character in entity value\n");
3753
192M
            if (l == 0) {
3754
64.2M
                if (chunk < str)
3755
319k
                    xmlSBufAddString(buf, chunk, str - chunk);
3756
64.2M
                xmlSBufAddReplChar(buf);
3757
64.2M
                str += 1;
3758
64.2M
                chunk = str;
3759
128M
            } else {
3760
128M
                str += l;
3761
128M
            }
3762
192M
        } else if (c == '&') {
3763
259k
            if (str[1] == '#') {
3764
71.2k
                if (chunk < str)
3765
37.6k
                    xmlSBufAddString(buf, chunk, str - chunk);
3766
3767
71.2k
                c = xmlParseStringCharRef(ctxt, &str);
3768
71.2k
                if (c == 0)
3769
18.8k
                    return;
3770
3771
52.3k
                xmlSBufAddChar(buf, c);
3772
3773
52.3k
                chunk = str;
3774
187k
            } else {
3775
187k
                xmlChar *name;
3776
3777
                /*
3778
                 * General entity references are checked for
3779
                 * syntactic validity.
3780
                 */
3781
187k
                str++;
3782
187k
                name = xmlParseStringName(ctxt, &str);
3783
3784
187k
                if ((name == NULL) || (*str++ != ';')) {
3785
8.48k
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3786
8.48k
                            "EntityValue: '&' forbidden except for entities "
3787
8.48k
                            "references\n");
3788
8.48k
                    xmlFree(name);
3789
8.48k
                    return;
3790
8.48k
                }
3791
3792
179k
                xmlFree(name);
3793
179k
            }
3794
137M
        } else if (c == '%') {
3795
95.7k
            xmlEntityPtr ent;
3796
3797
95.7k
            if (chunk < str)
3798
54.4k
                xmlSBufAddString(buf, chunk, str - chunk);
3799
3800
95.7k
            ent = xmlParseStringPEReference(ctxt, &str);
3801
95.7k
            if (ent == NULL)
3802
20.4k
                return;
3803
3804
75.2k
            if (!PARSER_EXTERNAL(ctxt)) {
3805
770
                xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3806
770
                return;
3807
770
            }
3808
3809
74.4k
            if (ent->content == NULL) {
3810
                /*
3811
                 * Note: external parsed entities will not be loaded,
3812
                 * it is not required for a non-validating parser to
3813
                 * complete external PEReferences coming from the
3814
                 * internal subset
3815
                 */
3816
26.2k
                if (((ctxt->options & XML_PARSE_NO_XXE) == 0) &&
3817
26.2k
                    ((ctxt->replaceEntities) ||
3818
26.2k
                     (ctxt->validate))) {
3819
24.5k
                    xmlLoadEntityContent(ctxt, ent);
3820
24.5k
                } else {
3821
1.68k
                    xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
3822
1.68k
                                  "not validating will not read content for "
3823
1.68k
                                  "PE entity %s\n", ent->name, NULL);
3824
1.68k
                }
3825
26.2k
            }
3826
3827
            /*
3828
             * TODO: Skip if ent->content is still NULL.
3829
             */
3830
3831
74.4k
            if (xmlParserEntityCheck(ctxt, ent->length))
3832
87
                return;
3833
3834
74.3k
            if (ent->flags & XML_ENT_EXPANDING) {
3835
117
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
3836
117
                xmlHaltParser(ctxt);
3837
117
                return;
3838
117
            }
3839
3840
74.2k
            ent->flags |= XML_ENT_EXPANDING;
3841
74.2k
            xmlExpandPEsInEntityValue(ctxt, buf, ent->content, ent->length,
3842
74.2k
                                      depth);
3843
74.2k
            ent->flags &= ~XML_ENT_EXPANDING;
3844
3845
74.2k
            chunk = str;
3846
137M
        } else {
3847
            /* Normal ASCII char */
3848
137M
            if (!IS_BYTE_CHAR(c)) {
3849
445k
                xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3850
445k
                        "invalid character in entity value\n");
3851
445k
                if (chunk < str)
3852
52.2k
                    xmlSBufAddString(buf, chunk, str - chunk);
3853
445k
                xmlSBufAddReplChar(buf);
3854
445k
                str += 1;
3855
445k
                chunk = str;
3856
137M
            } else {
3857
137M
                str += 1;
3858
137M
            }
3859
137M
        }
3860
330M
    }
3861
3862
159k
    if (chunk < str)
3863
125k
        xmlSBufAddString(buf, chunk, str - chunk);
3864
3865
159k
    return;
3866
208k
}
3867
3868
/**
3869
 * xmlParseEntityValue:
3870
 * @ctxt:  an XML parser context
3871
 * @orig:  if non-NULL store a copy of the original entity value
3872
 *
3873
 * DEPRECATED: Internal function, don't use.
3874
 *
3875
 * parse a value for ENTITY declarations
3876
 *
3877
 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3878
 *                 "'" ([^%&'] | PEReference | Reference)* "'"
3879
 *
3880
 * Returns the EntityValue parsed with reference substituted or NULL
3881
 */
3882
xmlChar *
3883
160k
xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3884
160k
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3885
42.3k
                         XML_MAX_HUGE_LENGTH :
3886
160k
                         XML_MAX_TEXT_LENGTH;
3887
160k
    xmlSBuf buf;
3888
160k
    const xmlChar *start;
3889
160k
    int quote, length;
3890
3891
160k
    xmlSBufInit(&buf, maxLength);
3892
3893
160k
    GROW;
3894
3895
160k
    quote = CUR;
3896
160k
    if ((quote != '"') && (quote != '\'')) {
3897
0
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3898
0
  return(NULL);
3899
0
    }
3900
160k
    CUR_PTR++;
3901
3902
160k
    length = 0;
3903
3904
    /*
3905
     * Copy raw content of the entity into a buffer
3906
     */
3907
357M
    while (1) {
3908
357M
        int c;
3909
3910
357M
        if (PARSER_STOPPED(ctxt))
3911
31
            goto error;
3912
3913
357M
        if (CUR_PTR >= ctxt->input->end) {
3914
484
            xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3915
484
            goto error;
3916
484
        }
3917
3918
357M
        c = CUR;
3919
3920
357M
        if (c == 0) {
3921
206
            xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3922
206
                    "invalid character in entity value\n");
3923
206
            goto error;
3924
206
        }
3925
357M
        if (c == quote)
3926
159k
            break;
3927
357M
        NEXTL(1);
3928
357M
        length += 1;
3929
3930
        /*
3931
         * TODO: Check growth threshold
3932
         */
3933
357M
        if (ctxt->input->end - CUR_PTR < 10)
3934
96.5k
            GROW;
3935
357M
    }
3936
3937
159k
    start = CUR_PTR - length;
3938
3939
159k
    if (orig != NULL) {
3940
159k
        *orig = xmlStrndup(start, length);
3941
159k
        if (*orig == NULL)
3942
79
            xmlErrMemory(ctxt);
3943
159k
    }
3944
3945
159k
    xmlExpandPEsInEntityValue(ctxt, &buf, start, length, ctxt->inputNr);
3946
3947
159k
    NEXTL(1);
3948
3949
159k
    return(xmlSBufFinish(&buf, NULL, ctxt, "entity length too long"));
3950
3951
721
error:
3952
721
    xmlSBufCleanup(&buf, ctxt, "entity length too long");
3953
721
    return(NULL);
3954
160k
}
3955
3956
/**
3957
 * xmlCheckEntityInAttValue:
3958
 * @ctxt:  parser context
3959
 * @pent:  entity
3960
 * @depth:  nesting depth
3961
 *
3962
 * Check an entity reference in an attribute value for validity
3963
 * without expanding it.
3964
 */
3965
static void
3966
11.0k
xmlCheckEntityInAttValue(xmlParserCtxtPtr ctxt, xmlEntityPtr pent, int depth) {
3967
11.0k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3968
11.0k
    const xmlChar *str;
3969
11.0k
    unsigned long expandedSize = pent->length;
3970
11.0k
    int c, flags;
3971
3972
11.0k
    depth += 1;
3973
11.0k
    if (depth > maxDepth) {
3974
63
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3975
63
                       "Maximum entity nesting depth exceeded");
3976
63
  return;
3977
63
    }
3978
3979
11.0k
    if (pent->flags & XML_ENT_EXPANDING) {
3980
74
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
3981
74
        xmlHaltParser(ctxt);
3982
74
        return;
3983
74
    }
3984
3985
    /*
3986
     * If we're parsing a default attribute value in DTD content,
3987
     * the entity might reference other entities which weren't
3988
     * defined yet, so the check isn't reliable.
3989
     */
3990
10.9k
    if (ctxt->inSubset == 0)
3991
10.8k
        flags = XML_ENT_CHECKED | XML_ENT_VALIDATED;
3992
97
    else
3993
97
        flags = XML_ENT_VALIDATED;
3994
3995
10.9k
    str = pent->content;
3996
10.9k
    if (str == NULL)
3997
0
        goto done;
3998
3999
    /*
4000
     * Note that entity values are already validated. We only check
4001
     * for illegal less-than signs and compute the expanded size
4002
     * of the entity. No special handling for multi-byte characters
4003
     * is needed.
4004
     */
4005
32.2M
    while (!PARSER_STOPPED(ctxt)) {
4006
32.2M
        c = *str;
4007
4008
32.2M
  if (c != '&') {
4009
32.1M
            if (c == 0)
4010
10.3k
                break;
4011
4012
32.1M
            if (c == '<')
4013
10.7k
                xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
4014
10.7k
                        "'<' in entity '%s' is not allowed in attributes "
4015
10.7k
                        "values\n", pent->name);
4016
4017
32.1M
            str += 1;
4018
32.1M
        } else if (str[1] == '#') {
4019
1.52k
            int val;
4020
4021
1.52k
      val = xmlParseStringCharRef(ctxt, &str);
4022
1.52k
      if (val == 0) {
4023
30
                pent->content[0] = 0;
4024
30
                break;
4025
30
            }
4026
24.0k
  } else {
4027
24.0k
            xmlChar *name;
4028
24.0k
            xmlEntityPtr ent;
4029
4030
24.0k
      name = xmlParseStringEntityRef(ctxt, &str);
4031
24.0k
      if (name == NULL) {
4032
153
                pent->content[0] = 0;
4033
153
                break;
4034
153
            }
4035
4036
23.8k
            ent = xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 1);
4037
23.8k
            xmlFree(name);
4038
4039
23.8k
            if ((ent != NULL) &&
4040
23.8k
                (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
4041
15.5k
                if ((ent->flags & flags) != flags) {
4042
7.74k
                    pent->flags |= XML_ENT_EXPANDING;
4043
7.74k
                    xmlCheckEntityInAttValue(ctxt, ent, depth);
4044
7.74k
                    pent->flags &= ~XML_ENT_EXPANDING;
4045
7.74k
                }
4046
4047
15.5k
                xmlSaturatedAdd(&expandedSize, ent->expandedSize);
4048
15.5k
                xmlSaturatedAdd(&expandedSize, XML_ENT_FIXED_COST);
4049
15.5k
            }
4050
23.8k
        }
4051
32.2M
    }
4052
4053
10.9k
done:
4054
10.9k
    if (ctxt->inSubset == 0)
4055
10.8k
        pent->expandedSize = expandedSize;
4056
4057
10.9k
    pent->flags |= flags;
4058
10.9k
}
4059
4060
/**
4061
 * xmlExpandEntityInAttValue:
4062
 * @ctxt:  parser context
4063
 * @buf:  string buffer
4064
 * @str:  entity or attribute value
4065
 * @pent:  entity for entity value, NULL for attribute values
4066
 * @normalize:  whether to collapse whitespace
4067
 * @inSpace:  whitespace state
4068
 * @depth:  nesting depth
4069
 * @check:  whether to check for amplification
4070
 *
4071
 * Expand general entity references in an entity or attribute value.
4072
 * Perform attribute value normalization.
4073
 */
4074
static void
4075
xmlExpandEntityInAttValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf,
4076
                          const xmlChar *str, xmlEntityPtr pent, int normalize,
4077
1.54M
                          int *inSpace, int depth, int check) {
4078
1.54M
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
4079
1.54M
    int c, chunkSize;
4080
4081
1.54M
    if (str == NULL)
4082
0
        return;
4083
4084
1.54M
    depth += 1;
4085
1.54M
    if (depth > maxDepth) {
4086
121k
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
4087
121k
                       "Maximum entity nesting depth exceeded");
4088
121k
  return;
4089
121k
    }
4090
4091
1.42M
    if (pent != NULL) {
4092
1.37M
        if (pent->flags & XML_ENT_EXPANDING) {
4093
32
            xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
4094
32
            xmlHaltParser(ctxt);
4095
32
            return;
4096
32
        }
4097
4098
1.37M
        if (check) {
4099
1.35M
            if (xmlParserEntityCheck(ctxt, pent->length))
4100
131
                return;
4101
1.35M
        }
4102
1.37M
    }
4103
4104
1.42M
    chunkSize = 0;
4105
4106
    /*
4107
     * Note that entity values are already validated. No special
4108
     * handling for multi-byte characters is needed.
4109
     */
4110
634M
    while (!PARSER_STOPPED(ctxt)) {
4111
634M
        c = *str;
4112
4113
634M
  if (c != '&') {
4114
632M
            if (c == 0)
4115
1.28M
                break;
4116
4117
            /*
4118
             * If this function is called without an entity, it is used to
4119
             * expand entities in an attribute content where less-than was
4120
             * already unscaped and is allowed.
4121
             */
4122
631M
            if ((pent != NULL) && (c == '<')) {
4123
137k
                xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
4124
137k
                        "'<' in entity '%s' is not allowed in attributes "
4125
137k
                        "values\n", pent->name);
4126
137k
                break;
4127
137k
            }
4128
4129
631M
            if (c <= 0x20) {
4130
13.5M
                if ((normalize) && (*inSpace)) {
4131
                    /* Skip char */
4132
547k
                    if (chunkSize > 0) {
4133
104k
                        xmlSBufAddString(buf, str - chunkSize, chunkSize);
4134
104k
                        chunkSize = 0;
4135
104k
                    }
4136
13.0M
                } else if (c < 0x20) {
4137
6.92M
                    if (chunkSize > 0) {
4138
249k
                        xmlSBufAddString(buf, str - chunkSize, chunkSize);
4139
249k
                        chunkSize = 0;
4140
249k
                    }
4141
4142
6.92M
                    xmlSBufAddCString(buf, " ", 1);
4143
6.92M
                } else {
4144
6.11M
                    chunkSize += 1;
4145
6.11M
                }
4146
4147
13.5M
                *inSpace = 1;
4148
617M
            } else {
4149
617M
                chunkSize += 1;
4150
617M
                *inSpace = 0;
4151
617M
            }
4152
4153
631M
            str += 1;
4154
631M
        } else if (str[1] == '#') {
4155
197k
            int val;
4156
4157
197k
            if (chunkSize > 0) {
4158
180k
                xmlSBufAddString(buf, str - chunkSize, chunkSize);
4159
180k
                chunkSize = 0;
4160
180k
            }
4161
4162
197k
      val = xmlParseStringCharRef(ctxt, &str);
4163
197k
      if (val == 0) {
4164
39
                if (pent != NULL)
4165
39
                    pent->content[0] = 0;
4166
39
                break;
4167
39
            }
4168
4169
197k
            if (val == ' ') {
4170
7.89k
                if ((!normalize) || (!*inSpace))
4171
7.79k
                    xmlSBufAddCString(buf, " ", 1);
4172
7.89k
                *inSpace = 1;
4173
189k
            } else {
4174
189k
                xmlSBufAddChar(buf, val);
4175
189k
                *inSpace = 0;
4176
189k
            }
4177
1.74M
  } else {
4178
1.74M
            xmlChar *name;
4179
1.74M
            xmlEntityPtr ent;
4180
4181
1.74M
            if (chunkSize > 0) {
4182
416k
                xmlSBufAddString(buf, str - chunkSize, chunkSize);
4183
416k
                chunkSize = 0;
4184
416k
            }
4185
4186
1.74M
      name = xmlParseStringEntityRef(ctxt, &str);
4187
1.74M
            if (name == NULL) {
4188
178
                if (pent != NULL)
4189
167
                    pent->content[0] = 0;
4190
178
                break;
4191
178
            }
4192
4193
1.74M
            ent = xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 1);
4194
1.74M
            xmlFree(name);
4195
4196
1.74M
      if ((ent != NULL) &&
4197
1.74M
    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
4198
220k
    if (ent->content == NULL) {
4199
0
        xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
4200
0
          "predefined entity has no content\n");
4201
0
                    break;
4202
0
                }
4203
4204
220k
                xmlSBufAddString(buf, ent->content, ent->length);
4205
4206
220k
                *inSpace = 0;
4207
1.52M
      } else if ((ent != NULL) && (ent->content != NULL)) {
4208
1.30M
                if (pent != NULL)
4209
1.30M
                    pent->flags |= XML_ENT_EXPANDING;
4210
1.30M
    xmlExpandEntityInAttValue(ctxt, buf, ent->content, ent,
4211
1.30M
                                          normalize, inSpace, depth, check);
4212
1.30M
                if (pent != NULL)
4213
1.30M
                    pent->flags &= ~XML_ENT_EXPANDING;
4214
1.30M
      }
4215
1.74M
        }
4216
634M
    }
4217
4218
1.42M
    if (chunkSize > 0)
4219
694k
        xmlSBufAddString(buf, str - chunkSize, chunkSize);
4220
4221
1.42M
    return;
4222
1.42M
}
4223
4224
/**
4225
 * xmlExpandEntitiesInAttValue:
4226
 * @ctxt:  parser context
4227
 * @str:  entity or attribute value
4228
 * @normalize:  whether to collapse whitespace
4229
 *
4230
 * Expand general entity references in an entity or attribute value.
4231
 * Perform attribute value normalization.
4232
 *
4233
 * Returns the expanded attribtue value.
4234
 */
4235
xmlChar *
4236
xmlExpandEntitiesInAttValue(xmlParserCtxtPtr ctxt, const xmlChar *str,
4237
53.7k
                            int normalize) {
4238
53.7k
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4239
24.6k
                         XML_MAX_HUGE_LENGTH :
4240
53.7k
                         XML_MAX_TEXT_LENGTH;
4241
53.7k
    xmlSBuf buf;
4242
53.7k
    int inSpace = 1;
4243
4244
53.7k
    xmlSBufInit(&buf, maxLength);
4245
4246
53.7k
    xmlExpandEntityInAttValue(ctxt, &buf, str, NULL, normalize, &inSpace,
4247
53.7k
                              ctxt->inputNr, /* check */ 0);
4248
4249
53.7k
    if ((normalize) && (inSpace) && (buf.size > 0))
4250
0
        buf.size--;
4251
4252
53.7k
    return(xmlSBufFinish(&buf, NULL, ctxt, "AttValue length too long"));
4253
53.7k
}
4254
4255
/**
4256
 * xmlParseAttValueInternal:
4257
 * @ctxt:  an XML parser context
4258
 * @len:  attribute len result
4259
 * @alloc:  whether the attribute was reallocated as a new string
4260
 * @normalize:  if 1 then further non-CDATA normalization must be done
4261
 *
4262
 * parse a value for an attribute.
4263
 * NOTE: if no normalization is needed, the routine will return pointers
4264
 *       directly from the data buffer.
4265
 *
4266
 * 3.3.3 Attribute-Value Normalization:
4267
 * Before the value of an attribute is passed to the application or
4268
 * checked for validity, the XML processor must normalize it as follows:
4269
 * - a character reference is processed by appending the referenced
4270
 *   character to the attribute value
4271
 * - an entity reference is processed by recursively processing the
4272
 *   replacement text of the entity
4273
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4274
 *   appending #x20 to the normalized value, except that only a single
4275
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4276
 *   parsed entity or the literal entity value of an internal parsed entity
4277
 * - other characters are processed by appending them to the normalized value
4278
 * If the declared value is not CDATA, then the XML processor must further
4279
 * process the normalized attribute value by discarding any leading and
4280
 * trailing space (#x20) characters, and by replacing sequences of space
4281
 * (#x20) characters by a single space (#x20) character.
4282
 * All attributes for which no declaration has been read should be treated
4283
 * by a non-validating parser as if declared CDATA.
4284
 *
4285
 * Returns the AttValue parsed or NULL. The value has to be freed by the
4286
 *     caller if it was copied, this can be detected by val[*len] == 0.
4287
 */
4288
static xmlChar *
4289
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *attlen, int *alloc,
4290
2.76M
                         int normalize) {
4291
2.76M
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4292
929k
                         XML_MAX_HUGE_LENGTH :
4293
2.76M
                         XML_MAX_TEXT_LENGTH;
4294
2.76M
    xmlSBuf buf;
4295
2.76M
    xmlChar *ret;
4296
2.76M
    int c, l, quote, flags, chunkSize;
4297
2.76M
    int inSpace = 1;
4298
4299
2.76M
    xmlSBufInit(&buf, maxLength);
4300
4301
2.76M
    GROW;
4302
4303
2.76M
    quote = CUR;
4304
2.76M
    if ((quote != '"') && (quote != '\'')) {
4305
66.5k
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
4306
66.5k
  return(NULL);
4307
66.5k
    }
4308
2.69M
    NEXTL(1);
4309
4310
2.69M
    if (ctxt->inSubset == 0)
4311
2.34M
        flags = XML_ENT_CHECKED | XML_ENT_VALIDATED;
4312
354k
    else
4313
354k
        flags = XML_ENT_VALIDATED;
4314
4315
2.69M
    inSpace = 1;
4316
2.69M
    chunkSize = 0;
4317
4318
300M
    while (1) {
4319
300M
        if (PARSER_STOPPED(ctxt))
4320
1.62k
            goto error;
4321
4322
300M
        if (CUR_PTR >= ctxt->input->end) {
4323
31.5k
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4324
31.5k
                           "AttValue: ' expected\n");
4325
31.5k
            goto error;
4326
31.5k
        }
4327
4328
        /*
4329
         * TODO: Check growth threshold
4330
         */
4331
300M
        if (ctxt->input->end - CUR_PTR < 10)
4332
413k
            GROW;
4333
4334
300M
        c = CUR;
4335
4336
300M
        if (c >= 0x80) {
4337
128M
            l = xmlUTF8MultibyteLen(ctxt, CUR_PTR,
4338
128M
                    "invalid character in attribute value\n");
4339
128M
            if (l == 0) {
4340
58.4M
                if (chunkSize > 0) {
4341
578k
                    xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4342
578k
                    chunkSize = 0;
4343
578k
                }
4344
58.4M
                xmlSBufAddReplChar(&buf);
4345
58.4M
                NEXTL(1);
4346
70.0M
            } else {
4347
70.0M
                chunkSize += l;
4348
70.0M
                NEXTL(l);
4349
70.0M
            }
4350
4351
128M
            inSpace = 0;
4352
172M
        } else if (c != '&') {
4353
170M
            if (c > 0x20) {
4354
61.5M
                if (c == quote)
4355
2.64M
                    break;
4356
4357
58.8M
                if (c == '<')
4358
813k
                    xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4359
4360
58.8M
                chunkSize += 1;
4361
58.8M
                inSpace = 0;
4362
108M
            } else if (!IS_BYTE_CHAR(c)) {
4363
75.5M
                xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4364
75.5M
                        "invalid character in attribute value\n");
4365
75.5M
                if (chunkSize > 0) {
4366
197k
                    xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4367
197k
                    chunkSize = 0;
4368
197k
                }
4369
75.5M
                xmlSBufAddReplChar(&buf);
4370
75.5M
                inSpace = 0;
4371
75.5M
            } else {
4372
                /* Whitespace */
4373
33.4M
                if ((normalize) && (inSpace)) {
4374
                    /* Skip char */
4375
2.20M
                    if (chunkSize > 0) {
4376
41.6k
                        xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4377
41.6k
                        chunkSize = 0;
4378
41.6k
                    }
4379
31.2M
                } else if (c < 0x20) {
4380
                    /* Convert to space */
4381
29.4M
                    if (chunkSize > 0) {
4382
694k
                        xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4383
694k
                        chunkSize = 0;
4384
694k
                    }
4385
4386
29.4M
                    xmlSBufAddCString(&buf, " ", 1);
4387
29.4M
                } else {
4388
1.82M
                    chunkSize += 1;
4389
1.82M
                }
4390
4391
33.4M
                inSpace = 1;
4392
4393
33.4M
                if ((c == 0xD) && (NXT(1) == 0xA))
4394
56.5k
                    CUR_PTR++;
4395
33.4M
            }
4396
4397
167M
            NEXTL(1);
4398
167M
        } else if (NXT(1) == '#') {
4399
189k
            int val;
4400
4401
189k
            if (chunkSize > 0) {
4402
113k
                xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4403
113k
                chunkSize = 0;
4404
113k
            }
4405
4406
189k
            val = xmlParseCharRef(ctxt);
4407
189k
            if (val == 0)
4408
17.1k
                goto error;
4409
4410
172k
            if ((val == '&') && (!ctxt->replaceEntities)) {
4411
                /*
4412
                 * The reparsing will be done in xmlStringGetNodeList()
4413
                 * called by the attribute() function in SAX.c
4414
                 */
4415
8.23k
                xmlSBufAddCString(&buf, "&#38;", 5);
4416
8.23k
                inSpace = 0;
4417
164k
            } else if (val == ' ') {
4418
17.2k
                if ((!normalize) || (!inSpace))
4419
15.5k
                    xmlSBufAddCString(&buf, " ", 1);
4420
17.2k
                inSpace = 1;
4421
147k
            } else {
4422
147k
                xmlSBufAddChar(&buf, val);
4423
147k
                inSpace = 0;
4424
147k
            }
4425
1.35M
        } else {
4426
1.35M
            const xmlChar *name;
4427
1.35M
            xmlEntityPtr ent;
4428
4429
1.35M
            if (chunkSize > 0) {
4430
738k
                xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4431
738k
                chunkSize = 0;
4432
738k
            }
4433
4434
1.35M
            name = xmlParseEntityRefInternal(ctxt);
4435
1.35M
            if (name == NULL) {
4436
                /*
4437
                 * Probably a literal '&' which wasn't escaped.
4438
                 * TODO: Handle gracefully in recovery mode.
4439
                 */
4440
145k
                continue;
4441
145k
            }
4442
4443
1.20M
            ent = xmlLookupGeneralEntity(ctxt, name, /* isAttr */ 1);
4444
1.20M
            if (ent == NULL)
4445
73.9k
                continue;
4446
4447
1.13M
            if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
4448
561k
                if ((ent->content[0] == '&') && (!ctxt->replaceEntities))
4449
13.3k
                    xmlSBufAddCString(&buf, "&#38;", 5);
4450
548k
                else
4451
548k
                    xmlSBufAddString(&buf, ent->content, ent->length);
4452
561k
                inSpace = 0;
4453
569k
            } else if (ctxt->replaceEntities) {
4454
186k
                xmlExpandEntityInAttValue(ctxt, &buf, ent->content, ent,
4455
186k
                                          normalize, &inSpace, ctxt->inputNr,
4456
186k
                                          /* check */ 1);
4457
382k
            } else {
4458
382k
                if ((ent->flags & flags) != flags)
4459
3.34k
                    xmlCheckEntityInAttValue(ctxt, ent, ctxt->inputNr);
4460
4461
382k
                if (xmlParserEntityCheck(ctxt, ent->expandedSize)) {
4462
210
                    ent->content[0] = 0;
4463
210
                    goto error;
4464
210
                }
4465
4466
                /*
4467
                 * Just output the reference
4468
                 */
4469
382k
                xmlSBufAddCString(&buf, "&", 1);
4470
382k
                xmlSBufAddString(&buf, ent->name, xmlStrlen(ent->name));
4471
382k
                xmlSBufAddCString(&buf, ";", 1);
4472
4473
382k
                inSpace = 0;
4474
382k
            }
4475
1.13M
  }
4476
300M
    }
4477
4478
2.64M
    if ((buf.mem == NULL) && (alloc != NULL)) {
4479
1.78M
        ret = (xmlChar *) CUR_PTR - chunkSize;
4480
4481
1.78M
        if (attlen != NULL)
4482
1.78M
            *attlen = chunkSize;
4483
1.78M
        if ((normalize) && (inSpace) && (chunkSize > 0))
4484
973
            *attlen -= 1;
4485
1.78M
        *alloc = 0;
4486
4487
        /* Report potential error */
4488
1.78M
        xmlSBufCleanup(&buf, ctxt, "AttValue length too long");
4489
1.78M
    } else {
4490
858k
        if (chunkSize > 0)
4491
644k
            xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4492
4493
858k
        if ((normalize) && (inSpace) && (buf.size > 0))
4494
5.66k
            buf.size--;
4495
4496
858k
        ret = xmlSBufFinish(&buf, attlen, ctxt, "AttValue length too long");
4497
4498
858k
        if (ret != NULL) {
4499
858k
            if (attlen != NULL)
4500
360k
                *attlen = buf.size;
4501
858k
            if (alloc != NULL)
4502
360k
                *alloc = 1;
4503
858k
        }
4504
858k
    }
4505
4506
2.64M
    NEXTL(1);
4507
4508
2.64M
    return(ret);
4509
4510
50.5k
error:
4511
50.5k
    xmlSBufCleanup(&buf, ctxt, "AttValue length too long");
4512
50.5k
    return(NULL);
4513
2.69M
}
4514
4515
/**
4516
 * xmlParseAttValue:
4517
 * @ctxt:  an XML parser context
4518
 *
4519
 * DEPRECATED: Internal function, don't use.
4520
 *
4521
 * parse a value for an attribute
4522
 * Note: the parser won't do substitution of entities here, this
4523
 * will be handled later in xmlStringGetNodeList
4524
 *
4525
 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4526
 *                   "'" ([^<&'] | Reference)* "'"
4527
 *
4528
 * 3.3.3 Attribute-Value Normalization:
4529
 * Before the value of an attribute is passed to the application or
4530
 * checked for validity, the XML processor must normalize it as follows:
4531
 * - a character reference is processed by appending the referenced
4532
 *   character to the attribute value
4533
 * - an entity reference is processed by recursively processing the
4534
 *   replacement text of the entity
4535
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4536
 *   appending #x20 to the normalized value, except that only a single
4537
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4538
 *   parsed entity or the literal entity value of an internal parsed entity
4539
 * - other characters are processed by appending them to the normalized value
4540
 * If the declared value is not CDATA, then the XML processor must further
4541
 * process the normalized attribute value by discarding any leading and
4542
 * trailing space (#x20) characters, and by replacing sequences of space
4543
 * (#x20) characters by a single space (#x20) character.
4544
 * All attributes for which no declaration has been read should be treated
4545
 * by a non-validating parser as if declared CDATA.
4546
 *
4547
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4548
 */
4549
4550
4551
xmlChar *
4552
549k
xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4553
549k
    if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4554
549k
    return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4555
549k
}
4556
4557
/**
4558
 * xmlParseSystemLiteral:
4559
 * @ctxt:  an XML parser context
4560
 *
4561
 * DEPRECATED: Internal function, don't use.
4562
 *
4563
 * parse an XML Literal
4564
 *
4565
 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4566
 *
4567
 * Returns the SystemLiteral parsed or NULL
4568
 */
4569
4570
xmlChar *
4571
99.7k
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4572
99.7k
    xmlChar *buf = NULL;
4573
99.7k
    int len = 0;
4574
99.7k
    int size = XML_PARSER_BUFFER_SIZE;
4575
99.7k
    int cur, l;
4576
99.7k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4577
16.7k
                    XML_MAX_TEXT_LENGTH :
4578
99.7k
                    XML_MAX_NAME_LENGTH;
4579
99.7k
    xmlChar stop;
4580
4581
99.7k
    if (RAW == '"') {
4582
44.6k
        NEXT;
4583
44.6k
  stop = '"';
4584
55.1k
    } else if (RAW == '\'') {
4585
49.3k
        NEXT;
4586
49.3k
  stop = '\'';
4587
49.3k
    } else {
4588
5.73k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4589
5.73k
  return(NULL);
4590
5.73k
    }
4591
4592
94.0k
    buf = (xmlChar *) xmlMallocAtomic(size);
4593
94.0k
    if (buf == NULL) {
4594
71
        xmlErrMemory(ctxt);
4595
71
  return(NULL);
4596
71
    }
4597
93.9k
    cur = CUR_CHAR(l);
4598
16.1M
    while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4599
16.0M
  if (len + 5 >= size) {
4600
45.7k
      xmlChar *tmp;
4601
4602
45.7k
      size *= 2;
4603
45.7k
      tmp = (xmlChar *) xmlRealloc(buf, size);
4604
45.7k
      if (tmp == NULL) {
4605
19
          xmlFree(buf);
4606
19
    xmlErrMemory(ctxt);
4607
19
    return(NULL);
4608
19
      }
4609
45.7k
      buf = tmp;
4610
45.7k
  }
4611
16.0M
  COPY_BUF(buf, len, cur);
4612
16.0M
        if (len > maxLength) {
4613
13
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4614
13
            xmlFree(buf);
4615
13
            return(NULL);
4616
13
        }
4617
16.0M
  NEXTL(l);
4618
16.0M
  cur = CUR_CHAR(l);
4619
16.0M
    }
4620
93.9k
    buf[len] = 0;
4621
93.9k
    if (!IS_CHAR(cur)) {
4622
28.8k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4623
65.0k
    } else {
4624
65.0k
  NEXT;
4625
65.0k
    }
4626
93.9k
    return(buf);
4627
93.9k
}
4628
4629
/**
4630
 * xmlParsePubidLiteral:
4631
 * @ctxt:  an XML parser context
4632
 *
4633
 * DEPRECATED: Internal function, don't use.
4634
 *
4635
 * parse an XML public literal
4636
 *
4637
 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4638
 *
4639
 * Returns the PubidLiteral parsed or NULL.
4640
 */
4641
4642
xmlChar *
4643
34.1k
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4644
34.1k
    xmlChar *buf = NULL;
4645
34.1k
    int len = 0;
4646
34.1k
    int size = XML_PARSER_BUFFER_SIZE;
4647
34.1k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4648
6.48k
                    XML_MAX_TEXT_LENGTH :
4649
34.1k
                    XML_MAX_NAME_LENGTH;
4650
34.1k
    xmlChar cur;
4651
34.1k
    xmlChar stop;
4652
4653
34.1k
    if (RAW == '"') {
4654
9.44k
        NEXT;
4655
9.44k
  stop = '"';
4656
24.7k
    } else if (RAW == '\'') {
4657
23.2k
        NEXT;
4658
23.2k
  stop = '\'';
4659
23.2k
    } else {
4660
1.47k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4661
1.47k
  return(NULL);
4662
1.47k
    }
4663
32.6k
    buf = (xmlChar *) xmlMallocAtomic(size);
4664
32.6k
    if (buf == NULL) {
4665
38
  xmlErrMemory(ctxt);
4666
38
  return(NULL);
4667
38
    }
4668
32.6k
    cur = CUR;
4669
590k
    while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop) &&
4670
590k
           (PARSER_STOPPED(ctxt) == 0)) { /* checked */
4671
557k
  if (len + 1 >= size) {
4672
566
      xmlChar *tmp;
4673
4674
566
      size *= 2;
4675
566
      tmp = (xmlChar *) xmlRealloc(buf, size);
4676
566
      if (tmp == NULL) {
4677
6
    xmlErrMemory(ctxt);
4678
6
    xmlFree(buf);
4679
6
    return(NULL);
4680
6
      }
4681
560
      buf = tmp;
4682
560
  }
4683
557k
  buf[len++] = cur;
4684
557k
        if (len > maxLength) {
4685
6
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4686
6
            xmlFree(buf);
4687
6
            return(NULL);
4688
6
        }
4689
557k
  NEXT;
4690
557k
  cur = CUR;
4691
557k
    }
4692
32.6k
    buf[len] = 0;
4693
32.6k
    if (cur != stop) {
4694
6.05k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4695
26.5k
    } else {
4696
26.5k
  NEXTL(1);
4697
26.5k
    }
4698
32.6k
    return(buf);
4699
32.6k
}
4700
4701
static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial);
4702
4703
/*
4704
 * used for the test in the inner loop of the char data testing
4705
 */
4706
static const unsigned char test_char_data[256] = {
4707
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4708
    0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4709
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4710
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4711
    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4712
    0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4713
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4714
    0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4715
    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4716
    0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4717
    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4718
    0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4719
    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4720
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4721
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4722
    0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4723
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4724
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4725
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4726
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4727
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4728
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4729
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4730
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4731
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4732
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4733
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4734
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4735
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4736
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4737
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4738
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4739
};
4740
4741
/**
4742
 * xmlParseCharDataInternal:
4743
 * @ctxt:  an XML parser context
4744
 * @partial:  buffer may contain partial UTF-8 sequences
4745
 *
4746
 * Parse character data. Always makes progress if the first char isn't
4747
 * '<' or '&'.
4748
 *
4749
 * The right angle bracket (>) may be represented using the string "&gt;",
4750
 * and must, for compatibility, be escaped using "&gt;" or a character
4751
 * reference when it appears in the string "]]>" in content, when that
4752
 * string is not marking the end of a CDATA section.
4753
 *
4754
 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4755
 */
4756
static void
4757
9.90M
xmlParseCharDataInternal(xmlParserCtxtPtr ctxt, int partial) {
4758
9.90M
    const xmlChar *in;
4759
9.90M
    int nbchar = 0;
4760
9.90M
    int line = ctxt->input->line;
4761
9.90M
    int col = ctxt->input->col;
4762
9.90M
    int ccol;
4763
4764
9.90M
    GROW;
4765
    /*
4766
     * Accelerated common case where input don't need to be
4767
     * modified before passing it to the handler.
4768
     */
4769
9.90M
    in = ctxt->input->cur;
4770
10.1M
    do {
4771
10.7M
get_more_space:
4772
12.4M
        while (*in == 0x20) { in++; ctxt->input->col++; }
4773
10.7M
        if (*in == 0xA) {
4774
11.5M
            do {
4775
11.5M
                ctxt->input->line++; ctxt->input->col = 1;
4776
11.5M
                in++;
4777
11.5M
            } while (*in == 0xA);
4778
616k
            goto get_more_space;
4779
616k
        }
4780
10.1M
        if (*in == '<') {
4781
583k
            nbchar = in - ctxt->input->cur;
4782
583k
            if (nbchar > 0) {
4783
583k
                const xmlChar *tmp = ctxt->input->cur;
4784
583k
                ctxt->input->cur = in;
4785
4786
583k
                if ((ctxt->sax != NULL) &&
4787
583k
                    (ctxt->disableSAX == 0) &&
4788
583k
                    (ctxt->sax->ignorableWhitespace !=
4789
569k
                     ctxt->sax->characters)) {
4790
45.8k
                    if (areBlanks(ctxt, tmp, nbchar, 1)) {
4791
25.7k
                        if (ctxt->sax->ignorableWhitespace != NULL)
4792
25.7k
                            ctxt->sax->ignorableWhitespace(ctxt->userData,
4793
25.7k
                                                   tmp, nbchar);
4794
25.7k
                    } else {
4795
20.1k
                        if (ctxt->sax->characters != NULL)
4796
20.1k
                            ctxt->sax->characters(ctxt->userData,
4797
20.1k
                                                  tmp, nbchar);
4798
20.1k
                        if (*ctxt->space == -1)
4799
7.10k
                            *ctxt->space = -2;
4800
20.1k
                    }
4801
537k
                } else if ((ctxt->sax != NULL) &&
4802
537k
                           (ctxt->disableSAX == 0) &&
4803
537k
                           (ctxt->sax->characters != NULL)) {
4804
523k
                    ctxt->sax->characters(ctxt->userData,
4805
523k
                                          tmp, nbchar);
4806
523k
                }
4807
583k
            }
4808
583k
            return;
4809
583k
        }
4810
4811
10.1M
get_more:
4812
10.1M
        ccol = ctxt->input->col;
4813
39.1M
        while (test_char_data[*in]) {
4814
29.0M
            in++;
4815
29.0M
            ccol++;
4816
29.0M
        }
4817
10.1M
        ctxt->input->col = ccol;
4818
10.1M
        if (*in == 0xA) {
4819
6.21M
            do {
4820
6.21M
                ctxt->input->line++; ctxt->input->col = 1;
4821
6.21M
                in++;
4822
6.21M
            } while (*in == 0xA);
4823
451k
            goto get_more;
4824
451k
        }
4825
9.66M
        if (*in == ']') {
4826
149k
            if ((in[1] == ']') && (in[2] == '>')) {
4827
22.3k
                xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4828
22.3k
                ctxt->input->cur = in + 1;
4829
22.3k
                return;
4830
22.3k
            }
4831
127k
            in++;
4832
127k
            ctxt->input->col++;
4833
127k
            goto get_more;
4834
149k
        }
4835
9.51M
        nbchar = in - ctxt->input->cur;
4836
9.51M
        if (nbchar > 0) {
4837
2.36M
            if ((ctxt->sax != NULL) &&
4838
2.36M
                (ctxt->disableSAX == 0) &&
4839
2.36M
                (ctxt->sax->ignorableWhitespace !=
4840
2.14M
                 ctxt->sax->characters) &&
4841
2.36M
                (IS_BLANK_CH(*ctxt->input->cur))) {
4842
78.2k
                const xmlChar *tmp = ctxt->input->cur;
4843
78.2k
                ctxt->input->cur = in;
4844
4845
78.2k
                if (areBlanks(ctxt, tmp, nbchar, 0)) {
4846
21.6k
                    if (ctxt->sax->ignorableWhitespace != NULL)
4847
21.6k
                        ctxt->sax->ignorableWhitespace(ctxt->userData,
4848
21.6k
                                                       tmp, nbchar);
4849
56.5k
                } else {
4850
56.5k
                    if (ctxt->sax->characters != NULL)
4851
56.5k
                        ctxt->sax->characters(ctxt->userData,
4852
56.5k
                                              tmp, nbchar);
4853
56.5k
                    if (*ctxt->space == -1)
4854
25.9k
                        *ctxt->space = -2;
4855
56.5k
                }
4856
78.2k
                line = ctxt->input->line;
4857
78.2k
                col = ctxt->input->col;
4858
2.28M
            } else if ((ctxt->sax != NULL) &&
4859
2.28M
                       (ctxt->disableSAX == 0)) {
4860
2.06M
                if (ctxt->sax->characters != NULL)
4861
2.06M
                    ctxt->sax->characters(ctxt->userData,
4862
2.06M
                                          ctxt->input->cur, nbchar);
4863
2.06M
                line = ctxt->input->line;
4864
2.06M
                col = ctxt->input->col;
4865
2.06M
            }
4866
2.36M
        }
4867
9.51M
        ctxt->input->cur = in;
4868
9.51M
        if (*in == 0xD) {
4869
267k
            in++;
4870
267k
            if (*in == 0xA) {
4871
219k
                ctxt->input->cur = in;
4872
219k
                in++;
4873
219k
                ctxt->input->line++; ctxt->input->col = 1;
4874
219k
                continue; /* while */
4875
219k
            }
4876
48.1k
            in--;
4877
48.1k
        }
4878
9.29M
        if (*in == '<') {
4879
1.39M
            return;
4880
1.39M
        }
4881
7.90M
        if (*in == '&') {
4882
286k
            return;
4883
286k
        }
4884
7.61M
        SHRINK;
4885
7.61M
        GROW;
4886
7.61M
        in = ctxt->input->cur;
4887
7.83M
    } while (((*in >= 0x20) && (*in <= 0x7F)) ||
4888
7.83M
             (*in == 0x09) || (*in == 0x0a));
4889
7.61M
    ctxt->input->line = line;
4890
7.61M
    ctxt->input->col = col;
4891
7.61M
    xmlParseCharDataComplex(ctxt, partial);
4892
7.61M
}
4893
4894
/**
4895
 * xmlParseCharDataComplex:
4896
 * @ctxt:  an XML parser context
4897
 * @cdata:  int indicating whether we are within a CDATA section
4898
 *
4899
 * Always makes progress if the first char isn't '<' or '&'.
4900
 *
4901
 * parse a CharData section.this is the fallback function
4902
 * of xmlParseCharData() when the parsing requires handling
4903
 * of non-ASCII characters.
4904
 */
4905
static void
4906
7.61M
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial) {
4907
7.61M
    xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4908
7.61M
    int nbchar = 0;
4909
7.61M
    int cur, l;
4910
4911
7.61M
    cur = CUR_CHAR(l);
4912
140M
    while ((cur != '<') && /* checked */
4913
140M
           (cur != '&') &&
4914
140M
     (IS_CHAR(cur))) {
4915
132M
  if ((cur == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4916
15.6k
      xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4917
15.6k
  }
4918
132M
  COPY_BUF(buf, nbchar, cur);
4919
  /* move current position before possible calling of ctxt->sax->characters */
4920
132M
  NEXTL(l);
4921
132M
  if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4922
1.04M
      buf[nbchar] = 0;
4923
4924
      /*
4925
       * OK the segment is to be consumed as chars.
4926
       */
4927
1.04M
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4928
1.02M
    if (areBlanks(ctxt, buf, nbchar, 0)) {
4929
372
        if (ctxt->sax->ignorableWhitespace != NULL)
4930
372
      ctxt->sax->ignorableWhitespace(ctxt->userData,
4931
372
                                     buf, nbchar);
4932
1.02M
    } else {
4933
1.02M
        if (ctxt->sax->characters != NULL)
4934
1.02M
      ctxt->sax->characters(ctxt->userData, buf, nbchar);
4935
1.02M
        if ((ctxt->sax->characters !=
4936
1.02M
             ctxt->sax->ignorableWhitespace) &&
4937
1.02M
      (*ctxt->space == -1))
4938
2.18k
      *ctxt->space = -2;
4939
1.02M
    }
4940
1.02M
      }
4941
1.04M
      nbchar = 0;
4942
1.04M
            SHRINK;
4943
1.04M
  }
4944
132M
  cur = CUR_CHAR(l);
4945
132M
    }
4946
7.61M
    if (nbchar != 0) {
4947
806k
        buf[nbchar] = 0;
4948
  /*
4949
   * OK the segment is to be consumed as chars.
4950
   */
4951
806k
  if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4952
723k
      if (areBlanks(ctxt, buf, nbchar, 0)) {
4953
1.63k
    if (ctxt->sax->ignorableWhitespace != NULL)
4954
1.63k
        ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4955
721k
      } else {
4956
721k
    if (ctxt->sax->characters != NULL)
4957
721k
        ctxt->sax->characters(ctxt->userData, buf, nbchar);
4958
721k
    if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4959
721k
        (*ctxt->space == -1))
4960
77.2k
        *ctxt->space = -2;
4961
721k
      }
4962
723k
  }
4963
806k
    }
4964
    /*
4965
     * cur == 0 can mean
4966
     *
4967
     * - End of buffer.
4968
     * - An actual 0 character.
4969
     * - An incomplete UTF-8 sequence. This is allowed if partial is set.
4970
     */
4971
7.61M
    if (ctxt->input->cur < ctxt->input->end) {
4972
7.58M
        if ((cur == 0) && (CUR != 0)) {
4973
9.67k
            if (partial == 0) {
4974
6.27k
                xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4975
6.27k
                        "Incomplete UTF-8 sequence starting with %02X\n", CUR);
4976
6.27k
                NEXTL(1);
4977
6.27k
            }
4978
7.57M
        } else if ((cur != '<') && (cur != '&')) {
4979
            /* Generate the error and skip the offending character */
4980
6.98M
            xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4981
6.98M
                              "PCDATA invalid Char value %d\n", cur);
4982
6.98M
            NEXTL(l);
4983
6.98M
        }
4984
7.58M
    }
4985
7.61M
}
4986
4987
/**
4988
 * xmlParseCharData:
4989
 * @ctxt:  an XML parser context
4990
 * @cdata:  unused
4991
 *
4992
 * DEPRECATED: Internal function, don't use.
4993
 */
4994
void
4995
0
xmlParseCharData(xmlParserCtxtPtr ctxt, ATTRIBUTE_UNUSED int cdata) {
4996
0
    xmlParseCharDataInternal(ctxt, 0);
4997
0
}
4998
4999
/**
5000
 * xmlParseExternalID:
5001
 * @ctxt:  an XML parser context
5002
 * @publicID:  a xmlChar** receiving PubidLiteral
5003
 * @strict: indicate whether we should restrict parsing to only
5004
 *          production [75], see NOTE below
5005
 *
5006
 * DEPRECATED: Internal function, don't use.
5007
 *
5008
 * Parse an External ID or a Public ID
5009
 *
5010
 * NOTE: Productions [75] and [83] interact badly since [75] can generate
5011
 *       'PUBLIC' S PubidLiteral S SystemLiteral
5012
 *
5013
 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
5014
 *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
5015
 *
5016
 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
5017
 *
5018
 * Returns the function returns SystemLiteral and in the second
5019
 *                case publicID receives PubidLiteral, is strict is off
5020
 *                it is possible to return NULL and have publicID set.
5021
 */
5022
5023
xmlChar *
5024
202k
xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
5025
202k
    xmlChar *URI = NULL;
5026
5027
202k
    *publicID = NULL;
5028
202k
    if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
5029
73.8k
        SKIP(6);
5030
73.8k
  if (SKIP_BLANKS == 0) {
5031
2.19k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5032
2.19k
                     "Space required after 'SYSTEM'\n");
5033
2.19k
  }
5034
73.8k
  URI = xmlParseSystemLiteral(ctxt);
5035
73.8k
  if (URI == NULL) {
5036
2.09k
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
5037
2.09k
        }
5038
128k
    } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
5039
34.1k
        SKIP(6);
5040
34.1k
  if (SKIP_BLANKS == 0) {
5041
13.6k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5042
13.6k
        "Space required after 'PUBLIC'\n");
5043
13.6k
  }
5044
34.1k
  *publicID = xmlParsePubidLiteral(ctxt);
5045
34.1k
  if (*publicID == NULL) {
5046
1.52k
      xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
5047
1.52k
  }
5048
34.1k
  if (strict) {
5049
      /*
5050
       * We don't handle [83] so "S SystemLiteral" is required.
5051
       */
5052
25.5k
      if (SKIP_BLANKS == 0) {
5053
13.6k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5054
13.6k
      "Space required after the Public Identifier\n");
5055
13.6k
      }
5056
25.5k
  } else {
5057
      /*
5058
       * We handle [83] so we return immediately, if
5059
       * "S SystemLiteral" is not detected. We skip blanks if no
5060
             * system literal was found, but this is harmless since we must
5061
             * be at the end of a NotationDecl.
5062
       */
5063
8.65k
      if (SKIP_BLANKS == 0) return(NULL);
5064
1.88k
      if ((CUR != '\'') && (CUR != '"')) return(NULL);
5065
1.88k
  }
5066
25.9k
  URI = xmlParseSystemLiteral(ctxt);
5067
25.9k
  if (URI == NULL) {
5068
3.74k
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
5069
3.74k
        }
5070
25.9k
    }
5071
193k
    return(URI);
5072
202k
}
5073
5074
/**
5075
 * xmlParseCommentComplex:
5076
 * @ctxt:  an XML parser context
5077
 * @buf:  the already parsed part of the buffer
5078
 * @len:  number of bytes in the buffer
5079
 * @size:  allocated size of the buffer
5080
 *
5081
 * Skip an XML (SGML) comment <!-- .... -->
5082
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
5083
 *  must not occur within comments. "
5084
 * This is the slow routine in case the accelerator for ascii didn't work
5085
 *
5086
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
5087
 */
5088
static void
5089
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
5090
382k
                       size_t len, size_t size) {
5091
382k
    int q, ql;
5092
382k
    int r, rl;
5093
382k
    int cur, l;
5094
382k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5095
49.7k
                       XML_MAX_HUGE_LENGTH :
5096
382k
                       XML_MAX_TEXT_LENGTH;
5097
5098
382k
    if (buf == NULL) {
5099
237k
        len = 0;
5100
237k
  size = XML_PARSER_BUFFER_SIZE;
5101
237k
  buf = (xmlChar *) xmlMallocAtomic(size);
5102
237k
  if (buf == NULL) {
5103
33
      xmlErrMemory(ctxt);
5104
33
      return;
5105
33
  }
5106
237k
    }
5107
382k
    q = CUR_CHAR(ql);
5108
382k
    if (q == 0)
5109
16.1k
        goto not_terminated;
5110
366k
    if (!IS_CHAR(q)) {
5111
4.82k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
5112
4.82k
                          "xmlParseComment: invalid xmlChar value %d\n",
5113
4.82k
                    q);
5114
4.82k
  xmlFree (buf);
5115
4.82k
  return;
5116
4.82k
    }
5117
361k
    NEXTL(ql);
5118
361k
    r = CUR_CHAR(rl);
5119
361k
    if (r == 0)
5120
28.5k
        goto not_terminated;
5121
332k
    if (!IS_CHAR(r)) {
5122
2.90k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
5123
2.90k
                          "xmlParseComment: invalid xmlChar value %d\n",
5124
2.90k
                    r);
5125
2.90k
  xmlFree (buf);
5126
2.90k
  return;
5127
2.90k
    }
5128
330k
    NEXTL(rl);
5129
330k
    cur = CUR_CHAR(l);
5130
330k
    if (cur == 0)
5131
189k
        goto not_terminated;
5132
16.9M
    while (IS_CHAR(cur) && /* checked */
5133
16.9M
           ((cur != '>') ||
5134
16.8M
      (r != '-') || (q != '-'))) {
5135
16.7M
  if ((r == '-') && (q == '-')) {
5136
233k
      xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
5137
233k
  }
5138
16.7M
  if (len + 5 >= size) {
5139
31.3k
      xmlChar *new_buf;
5140
31.3k
            size_t new_size;
5141
5142
31.3k
      new_size = size * 2;
5143
31.3k
      new_buf = (xmlChar *) xmlRealloc(buf, new_size);
5144
31.3k
      if (new_buf == NULL) {
5145
24
    xmlFree (buf);
5146
24
    xmlErrMemory(ctxt);
5147
24
    return;
5148
24
      }
5149
31.3k
      buf = new_buf;
5150
31.3k
            size = new_size;
5151
31.3k
  }
5152
16.7M
  COPY_BUF(buf, len, q);
5153
16.7M
        if (len > maxLength) {
5154
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5155
0
                         "Comment too big found", NULL);
5156
0
            xmlFree (buf);
5157
0
            return;
5158
0
        }
5159
5160
16.7M
  q = r;
5161
16.7M
  ql = rl;
5162
16.7M
  r = cur;
5163
16.7M
  rl = l;
5164
5165
16.7M
  NEXTL(l);
5166
16.7M
  cur = CUR_CHAR(l);
5167
5168
16.7M
    }
5169
140k
    buf[len] = 0;
5170
140k
    if (cur == 0) {
5171
24.8k
  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5172
24.8k
                       "Comment not terminated \n<!--%.50s\n", buf);
5173
115k
    } else if (!IS_CHAR(cur)) {
5174
6.89k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
5175
6.89k
                          "xmlParseComment: invalid xmlChar value %d\n",
5176
6.89k
                    cur);
5177
108k
    } else {
5178
108k
        NEXT;
5179
108k
  if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5180
108k
      (!ctxt->disableSAX))
5181
54.5k
      ctxt->sax->comment(ctxt->userData, buf);
5182
108k
    }
5183
140k
    xmlFree(buf);
5184
140k
    return;
5185
234k
not_terminated:
5186
234k
    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5187
234k
       "Comment not terminated\n", NULL);
5188
234k
    xmlFree(buf);
5189
234k
    return;
5190
140k
}
5191
5192
/**
5193
 * xmlParseComment:
5194
 * @ctxt:  an XML parser context
5195
 *
5196
 * DEPRECATED: Internal function, don't use.
5197
 *
5198
 * Parse an XML (SGML) comment. Always consumes '<!'.
5199
 *
5200
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
5201
 *  must not occur within comments. "
5202
 *
5203
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
5204
 */
5205
void
5206
4.67M
xmlParseComment(xmlParserCtxtPtr ctxt) {
5207
4.67M
    xmlChar *buf = NULL;
5208
4.67M
    size_t size = XML_PARSER_BUFFER_SIZE;
5209
4.67M
    size_t len = 0;
5210
4.67M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5211
1.00M
                       XML_MAX_HUGE_LENGTH :
5212
4.67M
                       XML_MAX_TEXT_LENGTH;
5213
4.67M
    const xmlChar *in;
5214
4.67M
    size_t nbchar = 0;
5215
4.67M
    int ccol;
5216
5217
    /*
5218
     * Check that there is a comment right here.
5219
     */
5220
4.67M
    if ((RAW != '<') || (NXT(1) != '!'))
5221
0
        return;
5222
4.67M
    SKIP(2);
5223
4.67M
    if ((RAW != '-') || (NXT(1) != '-'))
5224
88
        return;
5225
4.67M
    SKIP(2);
5226
4.67M
    GROW;
5227
5228
    /*
5229
     * Accelerated common case where input don't need to be
5230
     * modified before passing it to the handler.
5231
     */
5232
4.67M
    in = ctxt->input->cur;
5233
4.67M
    do {
5234
4.67M
  if (*in == 0xA) {
5235
79.0k
      do {
5236
79.0k
    ctxt->input->line++; ctxt->input->col = 1;
5237
79.0k
    in++;
5238
79.0k
      } while (*in == 0xA);
5239
26.9k
  }
5240
6.35M
get_more:
5241
6.35M
        ccol = ctxt->input->col;
5242
40.6M
  while (((*in > '-') && (*in <= 0x7F)) ||
5243
40.6M
         ((*in >= 0x20) && (*in < '-')) ||
5244
40.6M
         (*in == 0x09)) {
5245
34.2M
        in++;
5246
34.2M
        ccol++;
5247
34.2M
  }
5248
6.35M
  ctxt->input->col = ccol;
5249
6.35M
  if (*in == 0xA) {
5250
305k
      do {
5251
305k
    ctxt->input->line++; ctxt->input->col = 1;
5252
305k
    in++;
5253
305k
      } while (*in == 0xA);
5254
105k
      goto get_more;
5255
105k
  }
5256
6.24M
  nbchar = in - ctxt->input->cur;
5257
  /*
5258
   * save current set of data
5259
   */
5260
6.24M
  if (nbchar > 0) {
5261
1.91M
            if (buf == NULL) {
5262
453k
                if ((*in == '-') && (in[1] == '-'))
5263
279k
                    size = nbchar + 1;
5264
174k
                else
5265
174k
                    size = XML_PARSER_BUFFER_SIZE + nbchar;
5266
453k
                buf = (xmlChar *) xmlMallocAtomic(size);
5267
453k
                if (buf == NULL) {
5268
63
                    xmlErrMemory(ctxt);
5269
63
                    return;
5270
63
                }
5271
453k
                len = 0;
5272
1.46M
            } else if (len + nbchar + 1 >= size) {
5273
65.3k
                xmlChar *new_buf;
5274
65.3k
                size  += len + nbchar + XML_PARSER_BUFFER_SIZE;
5275
65.3k
                new_buf = (xmlChar *) xmlRealloc(buf, size);
5276
65.3k
                if (new_buf == NULL) {
5277
21
                    xmlFree (buf);
5278
21
                    xmlErrMemory(ctxt);
5279
21
                    return;
5280
21
                }
5281
65.3k
                buf = new_buf;
5282
65.3k
            }
5283
1.91M
            memcpy(&buf[len], ctxt->input->cur, nbchar);
5284
1.91M
            len += nbchar;
5285
1.91M
            buf[len] = 0;
5286
1.91M
  }
5287
6.24M
        if (len > maxLength) {
5288
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5289
0
                         "Comment too big found", NULL);
5290
0
            xmlFree (buf);
5291
0
            return;
5292
0
        }
5293
6.24M
  ctxt->input->cur = in;
5294
6.24M
  if (*in == 0xA) {
5295
0
      in++;
5296
0
      ctxt->input->line++; ctxt->input->col = 1;
5297
0
  }
5298
6.24M
  if (*in == 0xD) {
5299
220k
      in++;
5300
220k
      if (*in == 0xA) {
5301
218k
    ctxt->input->cur = in;
5302
218k
    in++;
5303
218k
    ctxt->input->line++; ctxt->input->col = 1;
5304
218k
    goto get_more;
5305
218k
      }
5306
2.60k
      in--;
5307
2.60k
  }
5308
6.02M
  SHRINK;
5309
6.02M
  GROW;
5310
6.02M
  in = ctxt->input->cur;
5311
6.02M
  if (*in == '-') {
5312
5.64M
      if (in[1] == '-') {
5313
5.26M
          if (in[2] == '>') {
5314
4.29M
        SKIP(3);
5315
4.29M
        if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5316
4.29M
            (!ctxt->disableSAX)) {
5317
2.90M
      if (buf != NULL)
5318
259k
          ctxt->sax->comment(ctxt->userData, buf);
5319
2.64M
      else
5320
2.64M
          ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5321
2.90M
        }
5322
4.29M
        if (buf != NULL)
5323
308k
            xmlFree(buf);
5324
4.29M
        return;
5325
4.29M
    }
5326
974k
    if (buf != NULL) {
5327
898k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5328
898k
                          "Double hyphen within comment: "
5329
898k
                                      "<!--%.50s\n",
5330
898k
              buf);
5331
898k
    } else
5332
75.3k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5333
75.3k
                          "Double hyphen within comment\n", NULL);
5334
974k
    in++;
5335
974k
    ctxt->input->col++;
5336
974k
      }
5337
1.34M
      in++;
5338
1.34M
      ctxt->input->col++;
5339
1.34M
      goto get_more;
5340
5.64M
  }
5341
6.02M
    } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5342
382k
    xmlParseCommentComplex(ctxt, buf, len, size);
5343
382k
    return;
5344
4.67M
}
5345
5346
5347
/**
5348
 * xmlParsePITarget:
5349
 * @ctxt:  an XML parser context
5350
 *
5351
 * DEPRECATED: Internal function, don't use.
5352
 *
5353
 * parse the name of a PI
5354
 *
5355
 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5356
 *
5357
 * Returns the PITarget name or NULL
5358
 */
5359
5360
const xmlChar *
5361
184k
xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5362
184k
    const xmlChar *name;
5363
5364
184k
    name = xmlParseName(ctxt);
5365
184k
    if ((name != NULL) &&
5366
184k
        ((name[0] == 'x') || (name[0] == 'X')) &&
5367
184k
        ((name[1] == 'm') || (name[1] == 'M')) &&
5368
184k
        ((name[2] == 'l') || (name[2] == 'L'))) {
5369
24.5k
  int i;
5370
24.5k
  if ((name[0] == 'x') && (name[1] == 'm') &&
5371
24.5k
      (name[2] == 'l') && (name[3] == 0)) {
5372
10.8k
      xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5373
10.8k
     "XML declaration allowed only at the start of the document\n");
5374
10.8k
      return(name);
5375
13.6k
  } else if (name[3] == 0) {
5376
4.40k
      xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5377
4.40k
      return(name);
5378
4.40k
  }
5379
25.9k
  for (i = 0;;i++) {
5380
25.9k
      if (xmlW3CPIs[i] == NULL) break;
5381
17.8k
      if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5382
1.11k
          return(name);
5383
17.8k
  }
5384
8.13k
  xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5385
8.13k
          "xmlParsePITarget: invalid name prefix 'xml'\n",
5386
8.13k
          NULL, NULL);
5387
8.13k
    }
5388
168k
    if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5389
5.50k
  xmlNsErr(ctxt, XML_NS_ERR_COLON,
5390
5.50k
     "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5391
5.50k
    }
5392
168k
    return(name);
5393
184k
}
5394
5395
#ifdef LIBXML_CATALOG_ENABLED
5396
/**
5397
 * xmlParseCatalogPI:
5398
 * @ctxt:  an XML parser context
5399
 * @catalog:  the PI value string
5400
 *
5401
 * parse an XML Catalog Processing Instruction.
5402
 *
5403
 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5404
 *
5405
 * Occurs only if allowed by the user and if happening in the Misc
5406
 * part of the document before any doctype information
5407
 * This will add the given catalog to the parsing context in order
5408
 * to be used if there is a resolution need further down in the document
5409
 */
5410
5411
static void
5412
0
xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5413
0
    xmlChar *URL = NULL;
5414
0
    const xmlChar *tmp, *base;
5415
0
    xmlChar marker;
5416
5417
0
    tmp = catalog;
5418
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5419
0
    if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5420
0
  goto error;
5421
0
    tmp += 7;
5422
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5423
0
    if (*tmp != '=') {
5424
0
  return;
5425
0
    }
5426
0
    tmp++;
5427
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5428
0
    marker = *tmp;
5429
0
    if ((marker != '\'') && (marker != '"'))
5430
0
  goto error;
5431
0
    tmp++;
5432
0
    base = tmp;
5433
0
    while ((*tmp != 0) && (*tmp != marker)) tmp++;
5434
0
    if (*tmp == 0)
5435
0
  goto error;
5436
0
    URL = xmlStrndup(base, tmp - base);
5437
0
    tmp++;
5438
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5439
0
    if (*tmp != 0)
5440
0
  goto error;
5441
5442
0
    if (URL != NULL) {
5443
        /*
5444
         * Unfortunately, the catalog API doesn't report OOM errors.
5445
         * xmlGetLastError isn't very helpful since we don't know
5446
         * where the last error came from. We'd have to reset it
5447
         * before this call and restore it afterwards.
5448
         */
5449
0
  ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5450
0
  xmlFree(URL);
5451
0
    }
5452
0
    return;
5453
5454
0
error:
5455
0
    xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5456
0
            "Catalog PI syntax error: %s\n",
5457
0
      catalog, NULL);
5458
0
    if (URL != NULL)
5459
0
  xmlFree(URL);
5460
0
}
5461
#endif
5462
5463
/**
5464
 * xmlParsePI:
5465
 * @ctxt:  an XML parser context
5466
 *
5467
 * DEPRECATED: Internal function, don't use.
5468
 *
5469
 * parse an XML Processing Instruction.
5470
 *
5471
 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5472
 *
5473
 * The processing is transferred to SAX once parsed.
5474
 */
5475
5476
void
5477
184k
xmlParsePI(xmlParserCtxtPtr ctxt) {
5478
184k
    xmlChar *buf = NULL;
5479
184k
    size_t len = 0;
5480
184k
    size_t size = XML_PARSER_BUFFER_SIZE;
5481
184k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5482
84.5k
                       XML_MAX_HUGE_LENGTH :
5483
184k
                       XML_MAX_TEXT_LENGTH;
5484
184k
    int cur, l;
5485
184k
    const xmlChar *target;
5486
5487
184k
    if ((RAW == '<') && (NXT(1) == '?')) {
5488
  /*
5489
   * this is a Processing Instruction.
5490
   */
5491
184k
  SKIP(2);
5492
5493
  /*
5494
   * Parse the target name and check for special support like
5495
   * namespace.
5496
   */
5497
184k
        target = xmlParsePITarget(ctxt);
5498
184k
  if (target != NULL) {
5499
165k
      if ((RAW == '?') && (NXT(1) == '>')) {
5500
59.2k
    SKIP(2);
5501
5502
    /*
5503
     * SAX: PI detected.
5504
     */
5505
59.2k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5506
59.2k
        (ctxt->sax->processingInstruction != NULL))
5507
56.1k
        ctxt->sax->processingInstruction(ctxt->userData,
5508
56.1k
                                         target, NULL);
5509
59.2k
    return;
5510
59.2k
      }
5511
105k
      buf = (xmlChar *) xmlMallocAtomic(size);
5512
105k
      if (buf == NULL) {
5513
108
    xmlErrMemory(ctxt);
5514
108
    return;
5515
108
      }
5516
105k
      if (SKIP_BLANKS == 0) {
5517
66.1k
    xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5518
66.1k
        "ParsePI: PI %s space expected\n", target);
5519
66.1k
      }
5520
105k
      cur = CUR_CHAR(l);
5521
11.7M
      while (IS_CHAR(cur) && /* checked */
5522
11.7M
       ((cur != '?') || (NXT(1) != '>'))) {
5523
11.6M
    if (len + 5 >= size) {
5524
13.3k
        xmlChar *tmp;
5525
13.3k
                    size_t new_size = size * 2;
5526
13.3k
        tmp = (xmlChar *) xmlRealloc(buf, new_size);
5527
13.3k
        if (tmp == NULL) {
5528
13
      xmlErrMemory(ctxt);
5529
13
      xmlFree(buf);
5530
13
      return;
5531
13
        }
5532
13.3k
        buf = tmp;
5533
13.3k
                    size = new_size;
5534
13.3k
    }
5535
11.6M
    COPY_BUF(buf, len, cur);
5536
11.6M
                if (len > maxLength) {
5537
0
                    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5538
0
                                      "PI %s too big found", target);
5539
0
                    xmlFree(buf);
5540
0
                    return;
5541
0
                }
5542
11.6M
    NEXTL(l);
5543
11.6M
    cur = CUR_CHAR(l);
5544
11.6M
      }
5545
105k
      buf[len] = 0;
5546
105k
      if (cur != '?') {
5547
33.7k
    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5548
33.7k
          "ParsePI: PI %s never end ...\n", target);
5549
72.0k
      } else {
5550
72.0k
    SKIP(2);
5551
5552
72.0k
#ifdef LIBXML_CATALOG_ENABLED
5553
72.0k
    if ((ctxt->inSubset == 0) &&
5554
72.0k
        (xmlStrEqual(target, XML_CATALOG_PI))) {
5555
4.88k
        xmlCatalogAllow allow = xmlCatalogGetDefaults();
5556
4.88k
        if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5557
4.88k
      (allow == XML_CATA_ALLOW_ALL))
5558
0
      xmlParseCatalogPI(ctxt, buf);
5559
4.88k
    }
5560
72.0k
#endif
5561
5562
5563
    /*
5564
     * SAX: PI detected.
5565
     */
5566
72.0k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5567
72.0k
        (ctxt->sax->processingInstruction != NULL))
5568
66.3k
        ctxt->sax->processingInstruction(ctxt->userData,
5569
66.3k
                                         target, buf);
5570
72.0k
      }
5571
105k
      xmlFree(buf);
5572
105k
  } else {
5573
19.5k
      xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5574
19.5k
  }
5575
184k
    }
5576
184k
}
5577
5578
/**
5579
 * xmlParseNotationDecl:
5580
 * @ctxt:  an XML parser context
5581
 *
5582
 * DEPRECATED: Internal function, don't use.
5583
 *
5584
 * Parse a notation declaration. Always consumes '<!'.
5585
 *
5586
 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
5587
 *
5588
 * Hence there is actually 3 choices:
5589
 *     'PUBLIC' S PubidLiteral
5590
 *     'PUBLIC' S PubidLiteral S SystemLiteral
5591
 * and 'SYSTEM' S SystemLiteral
5592
 *
5593
 * See the NOTE on xmlParseExternalID().
5594
 */
5595
5596
void
5597
52.3k
xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5598
52.3k
    const xmlChar *name;
5599
52.3k
    xmlChar *Pubid;
5600
52.3k
    xmlChar *Systemid;
5601
5602
52.3k
    if ((CUR != '<') || (NXT(1) != '!'))
5603
0
        return;
5604
52.3k
    SKIP(2);
5605
5606
52.3k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5607
52.1k
  int inputid = ctxt->input->id;
5608
52.1k
  SKIP(8);
5609
52.1k
  if (SKIP_BLANKS_PE == 0) {
5610
882
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5611
882
         "Space required after '<!NOTATION'\n");
5612
882
      return;
5613
882
  }
5614
5615
51.2k
        name = xmlParseName(ctxt);
5616
51.2k
  if (name == NULL) {
5617
5.37k
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5618
5.37k
      return;
5619
5.37k
  }
5620
45.9k
  if (xmlStrchr(name, ':') != NULL) {
5621
3.07k
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5622
3.07k
         "colons are forbidden from notation names '%s'\n",
5623
3.07k
         name, NULL, NULL);
5624
3.07k
  }
5625
45.9k
  if (SKIP_BLANKS_PE == 0) {
5626
1.55k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5627
1.55k
         "Space required after the NOTATION name'\n");
5628
1.55k
      return;
5629
1.55k
  }
5630
5631
  /*
5632
   * Parse the IDs.
5633
   */
5634
44.3k
  Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5635
44.3k
  SKIP_BLANKS_PE;
5636
5637
44.3k
  if (RAW == '>') {
5638
9.41k
      if (inputid != ctxt->input->id) {
5639
667
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5640
667
                         "Notation declaration doesn't start and stop"
5641
667
                               " in the same entity\n");
5642
667
      }
5643
9.41k
      NEXT;
5644
9.41k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5645
9.41k
    (ctxt->sax->notationDecl != NULL))
5646
6.53k
    ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5647
34.9k
  } else {
5648
34.9k
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5649
34.9k
  }
5650
44.3k
  if (Systemid != NULL) xmlFree(Systemid);
5651
44.3k
  if (Pubid != NULL) xmlFree(Pubid);
5652
44.3k
    }
5653
52.3k
}
5654
5655
/**
5656
 * xmlParseEntityDecl:
5657
 * @ctxt:  an XML parser context
5658
 *
5659
 * DEPRECATED: Internal function, don't use.
5660
 *
5661
 * Parse an entity declaration. Always consumes '<!'.
5662
 *
5663
 * [70] EntityDecl ::= GEDecl | PEDecl
5664
 *
5665
 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5666
 *
5667
 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5668
 *
5669
 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5670
 *
5671
 * [74] PEDef ::= EntityValue | ExternalID
5672
 *
5673
 * [76] NDataDecl ::= S 'NDATA' S Name
5674
 *
5675
 * [ VC: Notation Declared ]
5676
 * The Name must match the declared name of a notation.
5677
 */
5678
5679
void
5680
232k
xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5681
232k
    const xmlChar *name = NULL;
5682
232k
    xmlChar *value = NULL;
5683
232k
    xmlChar *URI = NULL, *literal = NULL;
5684
232k
    const xmlChar *ndata = NULL;
5685
232k
    int isParameter = 0;
5686
232k
    xmlChar *orig = NULL;
5687
5688
232k
    if ((CUR != '<') || (NXT(1) != '!'))
5689
0
        return;
5690
232k
    SKIP(2);
5691
5692
    /* GROW; done in the caller */
5693
232k
    if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5694
231k
  int inputid = ctxt->input->id;
5695
231k
  SKIP(6);
5696
231k
  if (SKIP_BLANKS_PE == 0) {
5697
43.0k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5698
43.0k
         "Space required after '<!ENTITY'\n");
5699
43.0k
  }
5700
5701
231k
  if (RAW == '%') {
5702
82.5k
      NEXT;
5703
82.5k
      if (SKIP_BLANKS_PE == 0) {
5704
4.35k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5705
4.35k
             "Space required after '%%'\n");
5706
4.35k
      }
5707
82.5k
      isParameter = 1;
5708
82.5k
  }
5709
5710
231k
        name = xmlParseName(ctxt);
5711
231k
  if (name == NULL) {
5712
13.2k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5713
13.2k
                     "xmlParseEntityDecl: no name\n");
5714
13.2k
            return;
5715
13.2k
  }
5716
218k
  if (xmlStrchr(name, ':') != NULL) {
5717
1.62k
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5718
1.62k
         "colons are forbidden from entities names '%s'\n",
5719
1.62k
         name, NULL, NULL);
5720
1.62k
  }
5721
218k
  if (SKIP_BLANKS_PE == 0) {
5722
16.7k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5723
16.7k
         "Space required after the entity name\n");
5724
16.7k
  }
5725
5726
  /*
5727
   * handle the various case of definitions...
5728
   */
5729
218k
  if (isParameter) {
5730
74.0k
      if ((RAW == '"') || (RAW == '\'')) {
5731
50.2k
          value = xmlParseEntityValue(ctxt, &orig);
5732
50.2k
    if (value) {
5733
49.9k
        if ((ctxt->sax != NULL) &&
5734
49.9k
      (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5735
44.0k
      ctxt->sax->entityDecl(ctxt->userData, name,
5736
44.0k
                        XML_INTERNAL_PARAMETER_ENTITY,
5737
44.0k
            NULL, NULL, value);
5738
49.9k
    }
5739
50.2k
      } else {
5740
23.7k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5741
23.7k
    if ((URI == NULL) && (literal == NULL)) {
5742
1.07k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5743
1.07k
    }
5744
23.7k
    if (URI) {
5745
21.9k
        xmlURIPtr uri;
5746
5747
21.9k
                    if (xmlParseURISafe((const char *) URI, &uri) < 0) {
5748
22
                        xmlErrMemory(ctxt);
5749
21.9k
                    } else if (uri == NULL) {
5750
                        /*
5751
                         * This really ought to be a well formedness error
5752
                         * but the XML Core WG decided otherwise c.f. issue
5753
                         * E26 of the XML erratas.
5754
                         */
5755
1.92k
                        xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5756
1.92k
                                     "Invalid URI: %s\n", URI);
5757
19.9k
                    } else if (uri->fragment != NULL) {
5758
                        /*
5759
                         * Okay this is foolish to block those but not
5760
                         * invalid URIs.
5761
                         */
5762
603
                        xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5763
19.3k
                    } else {
5764
19.3k
                        if ((ctxt->sax != NULL) &&
5765
19.3k
                            (!ctxt->disableSAX) &&
5766
19.3k
                            (ctxt->sax->entityDecl != NULL))
5767
18.5k
                            ctxt->sax->entityDecl(ctxt->userData, name,
5768
18.5k
                                        XML_EXTERNAL_PARAMETER_ENTITY,
5769
18.5k
                                        literal, URI, NULL);
5770
19.3k
                    }
5771
21.9k
        xmlFreeURI(uri);
5772
21.9k
    }
5773
23.7k
      }
5774
144k
  } else {
5775
144k
      if ((RAW == '"') || (RAW == '\'')) {
5776
109k
          value = xmlParseEntityValue(ctxt, &orig);
5777
109k
    if ((ctxt->sax != NULL) &&
5778
109k
        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5779
87.3k
        ctxt->sax->entityDecl(ctxt->userData, name,
5780
87.3k
        XML_INTERNAL_GENERAL_ENTITY,
5781
87.3k
        NULL, NULL, value);
5782
    /*
5783
     * For expat compatibility in SAX mode.
5784
     */
5785
109k
    if ((ctxt->myDoc == NULL) ||
5786
109k
        (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5787
4.67k
        if (ctxt->myDoc == NULL) {
5788
607
      ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5789
607
      if (ctxt->myDoc == NULL) {
5790
2
          xmlErrMemory(ctxt);
5791
2
          goto done;
5792
2
      }
5793
605
      ctxt->myDoc->properties = XML_DOC_INTERNAL;
5794
605
        }
5795
4.67k
        if (ctxt->myDoc->intSubset == NULL) {
5796
605
      ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5797
605
              BAD_CAST "fake", NULL, NULL);
5798
605
                        if (ctxt->myDoc->intSubset == NULL) {
5799
2
                            xmlErrMemory(ctxt);
5800
2
                            goto done;
5801
2
                        }
5802
605
                    }
5803
5804
4.67k
        xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5805
4.67k
                    NULL, NULL, value);
5806
4.67k
    }
5807
109k
      } else {
5808
34.4k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5809
34.4k
    if ((URI == NULL) && (literal == NULL)) {
5810
4.77k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5811
4.77k
    }
5812
34.4k
    if (URI) {
5813
27.9k
        xmlURIPtr uri;
5814
5815
27.9k
                    if (xmlParseURISafe((const char *) URI, &uri) < 0) {
5816
44
                        xmlErrMemory(ctxt);
5817
27.9k
                    } else if (uri == NULL) {
5818
                        /*
5819
                         * This really ought to be a well formedness error
5820
                         * but the XML Core WG decided otherwise c.f. issue
5821
                         * E26 of the XML erratas.
5822
                         */
5823
9.71k
                        xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5824
9.71k
                                     "Invalid URI: %s\n", URI);
5825
18.2k
                    } else if (uri->fragment != NULL) {
5826
                        /*
5827
                         * Okay this is foolish to block those but not
5828
                         * invalid URIs.
5829
                         */
5830
1.01k
                        xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5831
1.01k
                    }
5832
27.9k
                    xmlFreeURI(uri);
5833
27.9k
    }
5834
34.4k
    if ((RAW != '>') && (SKIP_BLANKS_PE == 0)) {
5835
4.53k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5836
4.53k
           "Space required before 'NDATA'\n");
5837
4.53k
    }
5838
34.4k
    if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5839
5.69k
        SKIP(5);
5840
5.69k
        if (SKIP_BLANKS_PE == 0) {
5841
568
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5842
568
               "Space required after 'NDATA'\n");
5843
568
        }
5844
5.69k
        ndata = xmlParseName(ctxt);
5845
5.69k
        if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5846
5.69k
            (ctxt->sax->unparsedEntityDecl != NULL))
5847
4.83k
      ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5848
4.83k
            literal, URI, ndata);
5849
28.7k
    } else {
5850
28.7k
        if ((ctxt->sax != NULL) &&
5851
28.7k
            (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5852
22.0k
      ctxt->sax->entityDecl(ctxt->userData, name,
5853
22.0k
            XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5854
22.0k
            literal, URI, NULL);
5855
        /*
5856
         * For expat compatibility in SAX mode.
5857
         * assuming the entity replacement was asked for
5858
         */
5859
28.7k
        if ((ctxt->replaceEntities != 0) &&
5860
28.7k
      ((ctxt->myDoc == NULL) ||
5861
16.0k
      (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5862
2.53k
      if (ctxt->myDoc == NULL) {
5863
683
          ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5864
683
          if (ctxt->myDoc == NULL) {
5865
2
              xmlErrMemory(ctxt);
5866
2
        goto done;
5867
2
          }
5868
681
          ctxt->myDoc->properties = XML_DOC_INTERNAL;
5869
681
      }
5870
5871
2.53k
      if (ctxt->myDoc->intSubset == NULL) {
5872
681
          ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5873
681
            BAD_CAST "fake", NULL, NULL);
5874
681
                            if (ctxt->myDoc->intSubset == NULL) {
5875
2
                                xmlErrMemory(ctxt);
5876
2
                                goto done;
5877
2
                            }
5878
681
                        }
5879
2.53k
      xmlSAX2EntityDecl(ctxt, name,
5880
2.53k
                  XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5881
2.53k
                  literal, URI, NULL);
5882
2.53k
        }
5883
28.7k
    }
5884
34.4k
      }
5885
144k
  }
5886
218k
  SKIP_BLANKS_PE;
5887
218k
  if (RAW != '>') {
5888
7.15k
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5889
7.15k
              "xmlParseEntityDecl: entity %s not terminated\n", name);
5890
7.15k
      xmlHaltParser(ctxt);
5891
211k
  } else {
5892
211k
      if (inputid != ctxt->input->id) {
5893
322
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5894
322
                         "Entity declaration doesn't start and stop in"
5895
322
                               " the same entity\n");
5896
322
      }
5897
211k
      NEXT;
5898
211k
  }
5899
218k
  if (orig != NULL) {
5900
      /*
5901
       * Ugly mechanism to save the raw entity value.
5902
       */
5903
159k
      xmlEntityPtr cur = NULL;
5904
5905
159k
      if (isParameter) {
5906
49.9k
          if ((ctxt->sax != NULL) &&
5907
49.9k
        (ctxt->sax->getParameterEntity != NULL))
5908
49.9k
        cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5909
109k
      } else {
5910
109k
          if ((ctxt->sax != NULL) &&
5911
109k
        (ctxt->sax->getEntity != NULL))
5912
109k
        cur = ctxt->sax->getEntity(ctxt->userData, name);
5913
109k
    if ((cur == NULL) && (ctxt->userData==ctxt)) {
5914
15.1k
        cur = xmlSAX2GetEntity(ctxt, name);
5915
15.1k
    }
5916
109k
      }
5917
159k
            if ((cur != NULL) && (cur->orig == NULL)) {
5918
75.8k
    cur->orig = orig;
5919
75.8k
                orig = NULL;
5920
75.8k
      }
5921
159k
  }
5922
5923
218k
done:
5924
218k
  if (value != NULL) xmlFree(value);
5925
218k
  if (URI != NULL) xmlFree(URI);
5926
218k
  if (literal != NULL) xmlFree(literal);
5927
218k
        if (orig != NULL) xmlFree(orig);
5928
218k
    }
5929
232k
}
5930
5931
/**
5932
 * xmlParseDefaultDecl:
5933
 * @ctxt:  an XML parser context
5934
 * @value:  Receive a possible fixed default value for the attribute
5935
 *
5936
 * DEPRECATED: Internal function, don't use.
5937
 *
5938
 * Parse an attribute default declaration
5939
 *
5940
 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5941
 *
5942
 * [ VC: Required Attribute ]
5943
 * if the default declaration is the keyword #REQUIRED, then the
5944
 * attribute must be specified for all elements of the type in the
5945
 * attribute-list declaration.
5946
 *
5947
 * [ VC: Attribute Default Legal ]
5948
 * The declared default value must meet the lexical constraints of
5949
 * the declared attribute type c.f. xmlValidateAttributeDecl()
5950
 *
5951
 * [ VC: Fixed Attribute Default ]
5952
 * if an attribute has a default value declared with the #FIXED
5953
 * keyword, instances of that attribute must match the default value.
5954
 *
5955
 * [ WFC: No < in Attribute Values ]
5956
 * handled in xmlParseAttValue()
5957
 *
5958
 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5959
 *          or XML_ATTRIBUTE_FIXED.
5960
 */
5961
5962
int
5963
513k
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5964
513k
    int val;
5965
513k
    xmlChar *ret;
5966
5967
513k
    *value = NULL;
5968
513k
    if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5969
11.4k
  SKIP(9);
5970
11.4k
  return(XML_ATTRIBUTE_REQUIRED);
5971
11.4k
    }
5972
501k
    if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5973
140k
  SKIP(8);
5974
140k
  return(XML_ATTRIBUTE_IMPLIED);
5975
140k
    }
5976
360k
    val = XML_ATTRIBUTE_NONE;
5977
360k
    if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5978
15.3k
  SKIP(6);
5979
15.3k
  val = XML_ATTRIBUTE_FIXED;
5980
15.3k
  if (SKIP_BLANKS_PE == 0) {
5981
1.04k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5982
1.04k
         "Space required after '#FIXED'\n");
5983
1.04k
  }
5984
15.3k
    }
5985
360k
    ret = xmlParseAttValue(ctxt);
5986
360k
    if (ret == NULL) {
5987
31.6k
  xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5988
31.6k
           "Attribute default value declaration error\n");
5989
31.6k
    } else
5990
329k
        *value = ret;
5991
360k
    return(val);
5992
501k
}
5993
5994
/**
5995
 * xmlParseNotationType:
5996
 * @ctxt:  an XML parser context
5997
 *
5998
 * DEPRECATED: Internal function, don't use.
5999
 *
6000
 * parse an Notation attribute type.
6001
 *
6002
 * Note: the leading 'NOTATION' S part has already being parsed...
6003
 *
6004
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
6005
 *
6006
 * [ VC: Notation Attributes ]
6007
 * Values of this type must match one of the notation names included
6008
 * in the declaration; all notation names in the declaration must be declared.
6009
 *
6010
 * Returns: the notation attribute tree built while parsing
6011
 */
6012
6013
xmlEnumerationPtr
6014
5.21k
xmlParseNotationType(xmlParserCtxtPtr ctxt) {
6015
5.21k
    const xmlChar *name;
6016
5.21k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
6017
6018
5.21k
    if (RAW != '(') {
6019
808
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
6020
808
  return(NULL);
6021
808
    }
6022
21.9k
    do {
6023
21.9k
        NEXT;
6024
21.9k
  SKIP_BLANKS_PE;
6025
21.9k
        name = xmlParseName(ctxt);
6026
21.9k
  if (name == NULL) {
6027
1.05k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6028
1.05k
         "Name expected in NOTATION declaration\n");
6029
1.05k
            xmlFreeEnumeration(ret);
6030
1.05k
      return(NULL);
6031
1.05k
  }
6032
20.8k
  tmp = ret;
6033
91.0k
  while (tmp != NULL) {
6034
79.1k
      if (xmlStrEqual(name, tmp->name)) {
6035
8.96k
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
6036
8.96k
    "standalone: attribute notation value token %s duplicated\n",
6037
8.96k
         name, NULL);
6038
8.96k
    if (!xmlDictOwns(ctxt->dict, name))
6039
0
        xmlFree((xmlChar *) name);
6040
8.96k
    break;
6041
8.96k
      }
6042
70.1k
      tmp = tmp->next;
6043
70.1k
  }
6044
20.8k
  if (tmp == NULL) {
6045
11.9k
      cur = xmlCreateEnumeration(name);
6046
11.9k
      if (cur == NULL) {
6047
36
                xmlErrMemory(ctxt);
6048
36
                xmlFreeEnumeration(ret);
6049
36
                return(NULL);
6050
36
            }
6051
11.8k
      if (last == NULL) ret = last = cur;
6052
8.12k
      else {
6053
8.12k
    last->next = cur;
6054
8.12k
    last = cur;
6055
8.12k
      }
6056
11.8k
  }
6057
20.8k
  SKIP_BLANKS_PE;
6058
20.8k
    } while (RAW == '|');
6059
3.31k
    if (RAW != ')') {
6060
947
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
6061
947
        xmlFreeEnumeration(ret);
6062
947
  return(NULL);
6063
947
    }
6064
2.36k
    NEXT;
6065
2.36k
    return(ret);
6066
3.31k
}
6067
6068
/**
6069
 * xmlParseEnumerationType:
6070
 * @ctxt:  an XML parser context
6071
 *
6072
 * DEPRECATED: Internal function, don't use.
6073
 *
6074
 * parse an Enumeration attribute type.
6075
 *
6076
 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
6077
 *
6078
 * [ VC: Enumeration ]
6079
 * Values of this type must match one of the Nmtoken tokens in
6080
 * the declaration
6081
 *
6082
 * Returns: the enumeration attribute tree built while parsing
6083
 */
6084
6085
xmlEnumerationPtr
6086
81.9k
xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
6087
81.9k
    xmlChar *name;
6088
81.9k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
6089
6090
81.9k
    if (RAW != '(') {
6091
6.46k
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
6092
6.46k
  return(NULL);
6093
6.46k
    }
6094
123k
    do {
6095
123k
        NEXT;
6096
123k
  SKIP_BLANKS_PE;
6097
123k
        name = xmlParseNmtoken(ctxt);
6098
123k
  if (name == NULL) {
6099
1.19k
      xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
6100
1.19k
      return(ret);
6101
1.19k
  }
6102
121k
  tmp = ret;
6103
226k
  while (tmp != NULL) {
6104
115k
      if (xmlStrEqual(name, tmp->name)) {
6105
11.1k
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
6106
11.1k
    "standalone: attribute enumeration value token %s duplicated\n",
6107
11.1k
         name, NULL);
6108
11.1k
    if (!xmlDictOwns(ctxt->dict, name))
6109
11.1k
        xmlFree(name);
6110
11.1k
    break;
6111
11.1k
      }
6112
104k
      tmp = tmp->next;
6113
104k
  }
6114
121k
  if (tmp == NULL) {
6115
110k
      cur = xmlCreateEnumeration(name);
6116
110k
      if (!xmlDictOwns(ctxt->dict, name))
6117
110k
    xmlFree(name);
6118
110k
      if (cur == NULL) {
6119
59
                xmlErrMemory(ctxt);
6120
59
                xmlFreeEnumeration(ret);
6121
59
                return(NULL);
6122
59
            }
6123
110k
      if (last == NULL) ret = last = cur;
6124
35.9k
      else {
6125
35.9k
    last->next = cur;
6126
35.9k
    last = cur;
6127
35.9k
      }
6128
110k
  }
6129
121k
  SKIP_BLANKS_PE;
6130
121k
    } while (RAW == '|');
6131
74.2k
    if (RAW != ')') {
6132
2.73k
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
6133
2.73k
  return(ret);
6134
2.73k
    }
6135
71.4k
    NEXT;
6136
71.4k
    return(ret);
6137
74.2k
}
6138
6139
/**
6140
 * xmlParseEnumeratedType:
6141
 * @ctxt:  an XML parser context
6142
 * @tree:  the enumeration tree built while parsing
6143
 *
6144
 * DEPRECATED: Internal function, don't use.
6145
 *
6146
 * parse an Enumerated attribute type.
6147
 *
6148
 * [57] EnumeratedType ::= NotationType | Enumeration
6149
 *
6150
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
6151
 *
6152
 *
6153
 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
6154
 */
6155
6156
int
6157
87.7k
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6158
87.7k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
6159
5.82k
  SKIP(8);
6160
5.82k
  if (SKIP_BLANKS_PE == 0) {
6161
615
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6162
615
         "Space required after 'NOTATION'\n");
6163
615
      return(0);
6164
615
  }
6165
5.21k
  *tree = xmlParseNotationType(ctxt);
6166
5.21k
  if (*tree == NULL) return(0);
6167
2.36k
  return(XML_ATTRIBUTE_NOTATION);
6168
5.21k
    }
6169
81.9k
    *tree = xmlParseEnumerationType(ctxt);
6170
81.9k
    if (*tree == NULL) return(0);
6171
74.7k
    return(XML_ATTRIBUTE_ENUMERATION);
6172
81.9k
}
6173
6174
/**
6175
 * xmlParseAttributeType:
6176
 * @ctxt:  an XML parser context
6177
 * @tree:  the enumeration tree built while parsing
6178
 *
6179
 * DEPRECATED: Internal function, don't use.
6180
 *
6181
 * parse the Attribute list def for an element
6182
 *
6183
 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
6184
 *
6185
 * [55] StringType ::= 'CDATA'
6186
 *
6187
 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
6188
 *                        'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
6189
 *
6190
 * Validity constraints for attribute values syntax are checked in
6191
 * xmlValidateAttributeValue()
6192
 *
6193
 * [ VC: ID ]
6194
 * Values of type ID must match the Name production. A name must not
6195
 * appear more than once in an XML document as a value of this type;
6196
 * i.e., ID values must uniquely identify the elements which bear them.
6197
 *
6198
 * [ VC: One ID per Element Type ]
6199
 * No element type may have more than one ID attribute specified.
6200
 *
6201
 * [ VC: ID Attribute Default ]
6202
 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
6203
 *
6204
 * [ VC: IDREF ]
6205
 * Values of type IDREF must match the Name production, and values
6206
 * of type IDREFS must match Names; each IDREF Name must match the value
6207
 * of an ID attribute on some element in the XML document; i.e. IDREF
6208
 * values must match the value of some ID attribute.
6209
 *
6210
 * [ VC: Entity Name ]
6211
 * Values of type ENTITY must match the Name production, values
6212
 * of type ENTITIES must match Names; each Entity Name must match the
6213
 * name of an unparsed entity declared in the DTD.
6214
 *
6215
 * [ VC: Name Token ]
6216
 * Values of type NMTOKEN must match the Nmtoken production; values
6217
 * of type NMTOKENS must match Nmtokens.
6218
 *
6219
 * Returns the attribute type
6220
 */
6221
int
6222
528k
xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6223
528k
    if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
6224
227k
  SKIP(5);
6225
227k
  return(XML_ATTRIBUTE_CDATA);
6226
300k
     } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
6227
8.16k
  SKIP(6);
6228
8.16k
  return(XML_ATTRIBUTE_IDREFS);
6229
292k
     } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
6230
7.82k
  SKIP(5);
6231
7.82k
  return(XML_ATTRIBUTE_IDREF);
6232
284k
     } else if ((RAW == 'I') && (NXT(1) == 'D')) {
6233
122k
        SKIP(2);
6234
122k
  return(XML_ATTRIBUTE_ID);
6235
162k
     } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
6236
18.6k
  SKIP(6);
6237
18.6k
  return(XML_ATTRIBUTE_ENTITY);
6238
143k
     } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
6239
18.6k
  SKIP(8);
6240
18.6k
  return(XML_ATTRIBUTE_ENTITIES);
6241
125k
     } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
6242
8.49k
  SKIP(8);
6243
8.49k
  return(XML_ATTRIBUTE_NMTOKENS);
6244
116k
     } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
6245
28.9k
  SKIP(7);
6246
28.9k
  return(XML_ATTRIBUTE_NMTOKEN);
6247
28.9k
     }
6248
87.7k
     return(xmlParseEnumeratedType(ctxt, tree));
6249
528k
}
6250
6251
/**
6252
 * xmlParseAttributeListDecl:
6253
 * @ctxt:  an XML parser context
6254
 *
6255
 * DEPRECATED: Internal function, don't use.
6256
 *
6257
 * Parse an attribute list declaration for an element. Always consumes '<!'.
6258
 *
6259
 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
6260
 *
6261
 * [53] AttDef ::= S Name S AttType S DefaultDecl
6262
 *
6263
 */
6264
void
6265
321k
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
6266
321k
    const xmlChar *elemName;
6267
321k
    const xmlChar *attrName;
6268
321k
    xmlEnumerationPtr tree;
6269
6270
321k
    if ((CUR != '<') || (NXT(1) != '!'))
6271
0
        return;
6272
321k
    SKIP(2);
6273
6274
321k
    if (CMP7(CUR_PTR, 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
6275
321k
  int inputid = ctxt->input->id;
6276
6277
321k
  SKIP(7);
6278
321k
  if (SKIP_BLANKS_PE == 0) {
6279
22.0k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6280
22.0k
                     "Space required after '<!ATTLIST'\n");
6281
22.0k
  }
6282
321k
        elemName = xmlParseName(ctxt);
6283
321k
  if (elemName == NULL) {
6284
2.71k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6285
2.71k
         "ATTLIST: no name for Element\n");
6286
2.71k
      return;
6287
2.71k
  }
6288
318k
  SKIP_BLANKS_PE;
6289
318k
  GROW;
6290
822k
  while ((RAW != '>') && (PARSER_STOPPED(ctxt) == 0)) {
6291
581k
      int type;
6292
581k
      int def;
6293
581k
      xmlChar *defaultValue = NULL;
6294
6295
581k
      GROW;
6296
581k
            tree = NULL;
6297
581k
      attrName = xmlParseName(ctxt);
6298
581k
      if (attrName == NULL) {
6299
44.4k
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6300
44.4k
             "ATTLIST: no name for Attribute\n");
6301
44.4k
    break;
6302
44.4k
      }
6303
536k
      GROW;
6304
536k
      if (SKIP_BLANKS_PE == 0) {
6305
8.54k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6306
8.54k
            "Space required after the attribute name\n");
6307
8.54k
    break;
6308
8.54k
      }
6309
6310
528k
      type = xmlParseAttributeType(ctxt, &tree);
6311
528k
      if (type <= 0) {
6312
10.6k
          break;
6313
10.6k
      }
6314
6315
517k
      GROW;
6316
517k
      if (SKIP_BLANKS_PE == 0) {
6317
4.35k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6318
4.35k
             "Space required after the attribute type\n");
6319
4.35k
          if (tree != NULL)
6320
3.26k
        xmlFreeEnumeration(tree);
6321
4.35k
    break;
6322
4.35k
      }
6323
6324
513k
      def = xmlParseDefaultDecl(ctxt, &defaultValue);
6325
513k
      if (def <= 0) {
6326
0
                if (defaultValue != NULL)
6327
0
        xmlFree(defaultValue);
6328
0
          if (tree != NULL)
6329
0
        xmlFreeEnumeration(tree);
6330
0
          break;
6331
0
      }
6332
513k
      if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6333
160k
          xmlAttrNormalizeSpace(defaultValue, defaultValue);
6334
6335
513k
      GROW;
6336
513k
            if (RAW != '>') {
6337
317k
    if (SKIP_BLANKS_PE == 0) {
6338
8.85k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6339
8.85k
      "Space required after the attribute default value\n");
6340
8.85k
        if (defaultValue != NULL)
6341
1.81k
      xmlFree(defaultValue);
6342
8.85k
        if (tree != NULL)
6343
858
      xmlFreeEnumeration(tree);
6344
8.85k
        break;
6345
8.85k
    }
6346
317k
      }
6347
504k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6348
504k
    (ctxt->sax->attributeDecl != NULL))
6349
464k
    ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6350
464k
                          type, def, defaultValue, tree);
6351
39.7k
      else if (tree != NULL)
6352
8.15k
    xmlFreeEnumeration(tree);
6353
6354
504k
      if ((ctxt->sax2) && (defaultValue != NULL) &&
6355
504k
          (def != XML_ATTRIBUTE_IMPLIED) &&
6356
504k
    (def != XML_ATTRIBUTE_REQUIRED)) {
6357
320k
    xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6358
320k
      }
6359
504k
      if (ctxt->sax2) {
6360
482k
    xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6361
482k
      }
6362
504k
      if (defaultValue != NULL)
6363
327k
          xmlFree(defaultValue);
6364
504k
      GROW;
6365
504k
  }
6366
318k
  if (RAW == '>') {
6367
245k
      if (inputid != ctxt->input->id) {
6368
1.53k
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6369
1.53k
                               "Attribute list declaration doesn't start and"
6370
1.53k
                               " stop in the same entity\n");
6371
1.53k
      }
6372
245k
      NEXT;
6373
245k
  }
6374
318k
    }
6375
321k
}
6376
6377
/**
6378
 * xmlParseElementMixedContentDecl:
6379
 * @ctxt:  an XML parser context
6380
 * @inputchk:  the input used for the current entity, needed for boundary checks
6381
 *
6382
 * DEPRECATED: Internal function, don't use.
6383
 *
6384
 * parse the declaration for a Mixed Element content
6385
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6386
 *
6387
 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6388
 *                '(' S? '#PCDATA' S? ')'
6389
 *
6390
 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6391
 *
6392
 * [ VC: No Duplicate Types ]
6393
 * The same name must not appear more than once in a single
6394
 * mixed-content declaration.
6395
 *
6396
 * returns: the list of the xmlElementContentPtr describing the element choices
6397
 */
6398
xmlElementContentPtr
6399
39.9k
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6400
39.9k
    xmlElementContentPtr ret = NULL, cur = NULL, n;
6401
39.9k
    const xmlChar *elem = NULL;
6402
6403
39.9k
    GROW;
6404
39.9k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6405
39.9k
  SKIP(7);
6406
39.9k
  SKIP_BLANKS_PE;
6407
39.9k
  if (RAW == ')') {
6408
20.2k
      if (ctxt->input->id != inputchk) {
6409
285
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6410
285
                               "Element content declaration doesn't start and"
6411
285
                               " stop in the same entity\n");
6412
285
      }
6413
20.2k
      NEXT;
6414
20.2k
      ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6415
20.2k
      if (ret == NULL)
6416
20
                goto mem_error;
6417
20.2k
      if (RAW == '*') {
6418
991
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6419
991
    NEXT;
6420
991
      }
6421
20.2k
      return(ret);
6422
20.2k
  }
6423
19.6k
  if ((RAW == '(') || (RAW == '|')) {
6424
16.0k
      ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6425
16.0k
      if (ret == NULL)
6426
11
                goto mem_error;
6427
16.0k
  }
6428
139k
  while ((RAW == '|') && (PARSER_STOPPED(ctxt) == 0)) {
6429
120k
      NEXT;
6430
120k
            n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6431
120k
            if (n == NULL)
6432
32
                goto mem_error;
6433
120k
      if (elem == NULL) {
6434
15.9k
    n->c1 = cur;
6435
15.9k
    if (cur != NULL)
6436
15.9k
        cur->parent = n;
6437
15.9k
    ret = cur = n;
6438
104k
      } else {
6439
104k
          cur->c2 = n;
6440
104k
    n->parent = cur;
6441
104k
    n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6442
104k
                if (n->c1 == NULL)
6443
28
                    goto mem_error;
6444
104k
    n->c1->parent = n;
6445
104k
    cur = n;
6446
104k
      }
6447
120k
      SKIP_BLANKS_PE;
6448
120k
      elem = xmlParseName(ctxt);
6449
120k
      if (elem == NULL) {
6450
443
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6451
443
      "xmlParseElementMixedContentDecl : Name expected\n");
6452
443
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6453
443
    return(NULL);
6454
443
      }
6455
119k
      SKIP_BLANKS_PE;
6456
119k
      GROW;
6457
119k
  }
6458
19.1k
  if ((RAW == ')') && (NXT(1) == '*')) {
6459
10.9k
      if (elem != NULL) {
6460
10.9k
    cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6461
10.9k
                                   XML_ELEMENT_CONTENT_ELEMENT);
6462
10.9k
    if (cur->c2 == NULL)
6463
15
                    goto mem_error;
6464
10.9k
    cur->c2->parent = cur;
6465
10.9k
            }
6466
10.9k
            if (ret != NULL)
6467
10.9k
                ret->ocur = XML_ELEMENT_CONTENT_MULT;
6468
10.9k
      if (ctxt->input->id != inputchk) {
6469
20
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6470
20
                               "Element content declaration doesn't start and"
6471
20
                               " stop in the same entity\n");
6472
20
      }
6473
10.9k
      SKIP(2);
6474
10.9k
  } else {
6475
8.13k
      xmlFreeDocElementContent(ctxt->myDoc, ret);
6476
8.13k
      xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6477
8.13k
      return(NULL);
6478
8.13k
  }
6479
6480
19.1k
    } else {
6481
0
  xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6482
0
    }
6483
10.9k
    return(ret);
6484
6485
106
mem_error:
6486
106
    xmlErrMemory(ctxt);
6487
106
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6488
106
    return(NULL);
6489
39.9k
}
6490
6491
/**
6492
 * xmlParseElementChildrenContentDeclPriv:
6493
 * @ctxt:  an XML parser context
6494
 * @inputchk:  the input used for the current entity, needed for boundary checks
6495
 * @depth: the level of recursion
6496
 *
6497
 * parse the declaration for a Mixed Element content
6498
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6499
 *
6500
 *
6501
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6502
 *
6503
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6504
 *
6505
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6506
 *
6507
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6508
 *
6509
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6510
 * TODO Parameter-entity replacement text must be properly nested
6511
 *  with parenthesized groups. That is to say, if either of the
6512
 *  opening or closing parentheses in a choice, seq, or Mixed
6513
 *  construct is contained in the replacement text for a parameter
6514
 *  entity, both must be contained in the same replacement text. For
6515
 *  interoperability, if a parameter-entity reference appears in a
6516
 *  choice, seq, or Mixed construct, its replacement text should not
6517
 *  be empty, and neither the first nor last non-blank character of
6518
 *  the replacement text should be a connector (| or ,).
6519
 *
6520
 * Returns the tree of xmlElementContentPtr describing the element
6521
 *          hierarchy.
6522
 */
6523
static xmlElementContentPtr
6524
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6525
339k
                                       int depth) {
6526
339k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
6527
339k
    xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6528
339k
    const xmlChar *elem;
6529
339k
    xmlChar type = 0;
6530
6531
339k
    if (depth > maxDepth) {
6532
23
        xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
6533
23
                "xmlParseElementChildrenContentDecl : depth %d too deep, "
6534
23
                "use XML_PARSE_HUGE\n", depth);
6535
23
  return(NULL);
6536
23
    }
6537
339k
    SKIP_BLANKS_PE;
6538
339k
    GROW;
6539
339k
    if (RAW == '(') {
6540
226k
  int inputid = ctxt->input->id;
6541
6542
        /* Recurse on first child */
6543
226k
  NEXT;
6544
226k
  SKIP_BLANKS_PE;
6545
226k
        cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6546
226k
                                                           depth + 1);
6547
226k
        if (cur == NULL)
6548
188k
            return(NULL);
6549
38.0k
  SKIP_BLANKS_PE;
6550
38.0k
  GROW;
6551
112k
    } else {
6552
112k
  elem = xmlParseName(ctxt);
6553
112k
  if (elem == NULL) {
6554
3.52k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6555
3.52k
      return(NULL);
6556
3.52k
  }
6557
109k
        cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6558
109k
  if (cur == NULL) {
6559
67
      xmlErrMemory(ctxt);
6560
67
      return(NULL);
6561
67
  }
6562
109k
  GROW;
6563
109k
  if (RAW == '?') {
6564
11.2k
      cur->ocur = XML_ELEMENT_CONTENT_OPT;
6565
11.2k
      NEXT;
6566
98.0k
  } else if (RAW == '*') {
6567
9.97k
      cur->ocur = XML_ELEMENT_CONTENT_MULT;
6568
9.97k
      NEXT;
6569
88.0k
  } else if (RAW == '+') {
6570
7.89k
      cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6571
7.89k
      NEXT;
6572
80.1k
  } else {
6573
80.1k
      cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6574
80.1k
  }
6575
109k
  GROW;
6576
109k
    }
6577
147k
    SKIP_BLANKS_PE;
6578
597k
    while ((RAW != ')') && (PARSER_STOPPED(ctxt) == 0)) {
6579
        /*
6580
   * Each loop we parse one separator and one element.
6581
   */
6582
479k
        if (RAW == ',') {
6583
111k
      if (type == 0) type = CUR;
6584
6585
      /*
6586
       * Detect "Name | Name , Name" error
6587
       */
6588
78.7k
      else if (type != CUR) {
6589
25
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6590
25
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6591
25
                      type);
6592
25
    if ((last != NULL) && (last != ret))
6593
25
        xmlFreeDocElementContent(ctxt->myDoc, last);
6594
25
    if (ret != NULL)
6595
25
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6596
25
    return(NULL);
6597
25
      }
6598
111k
      NEXT;
6599
6600
111k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6601
111k
      if (op == NULL) {
6602
25
                xmlErrMemory(ctxt);
6603
25
    if ((last != NULL) && (last != ret))
6604
7
        xmlFreeDocElementContent(ctxt->myDoc, last);
6605
25
          xmlFreeDocElementContent(ctxt->myDoc, ret);
6606
25
    return(NULL);
6607
25
      }
6608
111k
      if (last == NULL) {
6609
32.9k
    op->c1 = ret;
6610
32.9k
    if (ret != NULL)
6611
32.9k
        ret->parent = op;
6612
32.9k
    ret = cur = op;
6613
78.7k
      } else {
6614
78.7k
          cur->c2 = op;
6615
78.7k
    if (op != NULL)
6616
78.7k
        op->parent = cur;
6617
78.7k
    op->c1 = last;
6618
78.7k
    if (last != NULL)
6619
78.7k
        last->parent = op;
6620
78.7k
    cur =op;
6621
78.7k
    last = NULL;
6622
78.7k
      }
6623
367k
  } else if (RAW == '|') {
6624
357k
      if (type == 0) type = CUR;
6625
6626
      /*
6627
       * Detect "Name , Name | Name" error
6628
       */
6629
301k
      else if (type != CUR) {
6630
13
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6631
13
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6632
13
          type);
6633
13
    if ((last != NULL) && (last != ret))
6634
13
        xmlFreeDocElementContent(ctxt->myDoc, last);
6635
13
    if (ret != NULL)
6636
13
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6637
13
    return(NULL);
6638
13
      }
6639
357k
      NEXT;
6640
6641
357k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6642
357k
      if (op == NULL) {
6643
51
                xmlErrMemory(ctxt);
6644
51
    if ((last != NULL) && (last != ret))
6645
27
        xmlFreeDocElementContent(ctxt->myDoc, last);
6646
51
    if (ret != NULL)
6647
51
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6648
51
    return(NULL);
6649
51
      }
6650
357k
      if (last == NULL) {
6651
55.2k
    op->c1 = ret;
6652
55.2k
    if (ret != NULL)
6653
55.2k
        ret->parent = op;
6654
55.2k
    ret = cur = op;
6655
301k
      } else {
6656
301k
          cur->c2 = op;
6657
301k
    if (op != NULL)
6658
301k
        op->parent = cur;
6659
301k
    op->c1 = last;
6660
301k
    if (last != NULL)
6661
301k
        last->parent = op;
6662
301k
    cur =op;
6663
301k
    last = NULL;
6664
301k
      }
6665
357k
  } else {
6666
10.4k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6667
10.4k
      if ((last != NULL) && (last != ret))
6668
6.00k
          xmlFreeDocElementContent(ctxt->myDoc, last);
6669
10.4k
      if (ret != NULL)
6670
10.4k
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6671
10.4k
      return(NULL);
6672
10.4k
  }
6673
468k
  GROW;
6674
468k
  SKIP_BLANKS_PE;
6675
468k
  GROW;
6676
468k
  if (RAW == '(') {
6677
64.7k
      int inputid = ctxt->input->id;
6678
      /* Recurse on second child */
6679
64.7k
      NEXT;
6680
64.7k
      SKIP_BLANKS_PE;
6681
64.7k
      last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6682
64.7k
                                                          depth + 1);
6683
64.7k
            if (last == NULL) {
6684
15.9k
    if (ret != NULL)
6685
15.9k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6686
15.9k
    return(NULL);
6687
15.9k
            }
6688
48.8k
      SKIP_BLANKS_PE;
6689
404k
  } else {
6690
404k
      elem = xmlParseName(ctxt);
6691
404k
      if (elem == NULL) {
6692
2.90k
    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6693
2.90k
    if (ret != NULL)
6694
2.90k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6695
2.90k
    return(NULL);
6696
2.90k
      }
6697
401k
      last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6698
401k
      if (last == NULL) {
6699
78
                xmlErrMemory(ctxt);
6700
78
    if (ret != NULL)
6701
78
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6702
78
    return(NULL);
6703
78
      }
6704
401k
      if (RAW == '?') {
6705
24.0k
    last->ocur = XML_ELEMENT_CONTENT_OPT;
6706
24.0k
    NEXT;
6707
376k
      } else if (RAW == '*') {
6708
15.8k
    last->ocur = XML_ELEMENT_CONTENT_MULT;
6709
15.8k
    NEXT;
6710
361k
      } else if (RAW == '+') {
6711
10.3k
    last->ocur = XML_ELEMENT_CONTENT_PLUS;
6712
10.3k
    NEXT;
6713
350k
      } else {
6714
350k
    last->ocur = XML_ELEMENT_CONTENT_ONCE;
6715
350k
      }
6716
401k
  }
6717
449k
  SKIP_BLANKS_PE;
6718
449k
  GROW;
6719
449k
    }
6720
117k
    if ((cur != NULL) && (last != NULL)) {
6721
63.2k
        cur->c2 = last;
6722
63.2k
  if (last != NULL)
6723
63.2k
      last->parent = cur;
6724
63.2k
    }
6725
117k
    if (ctxt->input->id != inputchk) {
6726
239
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6727
239
                       "Element content declaration doesn't start and stop in"
6728
239
                       " the same entity\n");
6729
239
    }
6730
117k
    NEXT;
6731
117k
    if (RAW == '?') {
6732
24.4k
  if (ret != NULL) {
6733
24.4k
      if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6734
24.4k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6735
4.21k
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6736
20.2k
      else
6737
20.2k
          ret->ocur = XML_ELEMENT_CONTENT_OPT;
6738
24.4k
  }
6739
24.4k
  NEXT;
6740
93.4k
    } else if (RAW == '*') {
6741
20.9k
  if (ret != NULL) {
6742
20.9k
      ret->ocur = XML_ELEMENT_CONTENT_MULT;
6743
20.9k
      cur = ret;
6744
      /*
6745
       * Some normalization:
6746
       * (a | b* | c?)* == (a | b | c)*
6747
       */
6748
76.3k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6749
55.3k
    if ((cur->c1 != NULL) &&
6750
55.3k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6751
55.3k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6752
7.96k
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6753
55.3k
    if ((cur->c2 != NULL) &&
6754
55.3k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6755
55.3k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6756
4.74k
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6757
55.3k
    cur = cur->c2;
6758
55.3k
      }
6759
20.9k
  }
6760
20.9k
  NEXT;
6761
72.4k
    } else if (RAW == '+') {
6762
25.2k
  if (ret != NULL) {
6763
25.2k
      int found = 0;
6764
6765
25.2k
      if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6766
25.2k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6767
8.63k
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6768
16.5k
      else
6769
16.5k
          ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6770
      /*
6771
       * Some normalization:
6772
       * (a | b*)+ == (a | b)*
6773
       * (a | b?)+ == (a | b)*
6774
       */
6775
48.7k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6776
23.4k
    if ((cur->c1 != NULL) &&
6777
23.4k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6778
23.4k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6779
2.25k
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6780
2.25k
        found = 1;
6781
2.25k
    }
6782
23.4k
    if ((cur->c2 != NULL) &&
6783
23.4k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6784
23.4k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6785
3.89k
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6786
3.89k
        found = 1;
6787
3.89k
    }
6788
23.4k
    cur = cur->c2;
6789
23.4k
      }
6790
25.2k
      if (found)
6791
5.15k
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6792
25.2k
  }
6793
25.2k
  NEXT;
6794
25.2k
    }
6795
117k
    return(ret);
6796
147k
}
6797
6798
/**
6799
 * xmlParseElementChildrenContentDecl:
6800
 * @ctxt:  an XML parser context
6801
 * @inputchk:  the input used for the current entity, needed for boundary checks
6802
 *
6803
 * DEPRECATED: Internal function, don't use.
6804
 *
6805
 * parse the declaration for a Mixed Element content
6806
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6807
 *
6808
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6809
 *
6810
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6811
 *
6812
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6813
 *
6814
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6815
 *
6816
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6817
 * TODO Parameter-entity replacement text must be properly nested
6818
 *  with parenthesized groups. That is to say, if either of the
6819
 *  opening or closing parentheses in a choice, seq, or Mixed
6820
 *  construct is contained in the replacement text for a parameter
6821
 *  entity, both must be contained in the same replacement text. For
6822
 *  interoperability, if a parameter-entity reference appears in a
6823
 *  choice, seq, or Mixed construct, its replacement text should not
6824
 *  be empty, and neither the first nor last non-blank character of
6825
 *  the replacement text should be a connector (| or ,).
6826
 *
6827
 * Returns the tree of xmlElementContentPtr describing the element
6828
 *          hierarchy.
6829
 */
6830
xmlElementContentPtr
6831
0
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6832
    /* stub left for API/ABI compat */
6833
0
    return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6834
0
}
6835
6836
/**
6837
 * xmlParseElementContentDecl:
6838
 * @ctxt:  an XML parser context
6839
 * @name:  the name of the element being defined.
6840
 * @result:  the Element Content pointer will be stored here if any
6841
 *
6842
 * DEPRECATED: Internal function, don't use.
6843
 *
6844
 * parse the declaration for an Element content either Mixed or Children,
6845
 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6846
 *
6847
 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6848
 *
6849
 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6850
 */
6851
6852
int
6853
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6854
88.0k
                           xmlElementContentPtr *result) {
6855
6856
88.0k
    xmlElementContentPtr tree = NULL;
6857
88.0k
    int inputid = ctxt->input->id;
6858
88.0k
    int res;
6859
6860
88.0k
    *result = NULL;
6861
6862
88.0k
    if (RAW != '(') {
6863
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6864
0
    "xmlParseElementContentDecl : %s '(' expected\n", name);
6865
0
  return(-1);
6866
0
    }
6867
88.0k
    NEXT;
6868
88.0k
    GROW;
6869
88.0k
    SKIP_BLANKS_PE;
6870
88.0k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6871
39.9k
        tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6872
39.9k
  res = XML_ELEMENT_TYPE_MIXED;
6873
48.1k
    } else {
6874
48.1k
        tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6875
48.1k
  res = XML_ELEMENT_TYPE_ELEMENT;
6876
48.1k
    }
6877
88.0k
    SKIP_BLANKS_PE;
6878
88.0k
    *result = tree;
6879
88.0k
    return(res);
6880
88.0k
}
6881
6882
/**
6883
 * xmlParseElementDecl:
6884
 * @ctxt:  an XML parser context
6885
 *
6886
 * DEPRECATED: Internal function, don't use.
6887
 *
6888
 * Parse an element declaration. Always consumes '<!'.
6889
 *
6890
 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6891
 *
6892
 * [ VC: Unique Element Type Declaration ]
6893
 * No element type may be declared more than once
6894
 *
6895
 * Returns the type of the element, or -1 in case of error
6896
 */
6897
int
6898
118k
xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6899
118k
    const xmlChar *name;
6900
118k
    int ret = -1;
6901
118k
    xmlElementContentPtr content  = NULL;
6902
6903
118k
    if ((CUR != '<') || (NXT(1) != '!'))
6904
0
        return(ret);
6905
118k
    SKIP(2);
6906
6907
    /* GROW; done in the caller */
6908
118k
    if (CMP7(CUR_PTR, 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6909
117k
  int inputid = ctxt->input->id;
6910
6911
117k
  SKIP(7);
6912
117k
  if (SKIP_BLANKS_PE == 0) {
6913
2.07k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6914
2.07k
               "Space required after 'ELEMENT'\n");
6915
2.07k
      return(-1);
6916
2.07k
  }
6917
115k
        name = xmlParseName(ctxt);
6918
115k
  if (name == NULL) {
6919
2.28k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6920
2.28k
         "xmlParseElementDecl: no name for Element\n");
6921
2.28k
      return(-1);
6922
2.28k
  }
6923
113k
  if (SKIP_BLANKS_PE == 0) {
6924
16.5k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6925
16.5k
         "Space required after the element name\n");
6926
16.5k
  }
6927
113k
  if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6928
18.5k
      SKIP(5);
6929
      /*
6930
       * Element must always be empty.
6931
       */
6932
18.5k
      ret = XML_ELEMENT_TYPE_EMPTY;
6933
94.7k
  } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6934
94.7k
             (NXT(2) == 'Y')) {
6935
3.45k
      SKIP(3);
6936
      /*
6937
       * Element is a generic container.
6938
       */
6939
3.45k
      ret = XML_ELEMENT_TYPE_ANY;
6940
91.2k
  } else if (RAW == '(') {
6941
88.0k
      ret = xmlParseElementContentDecl(ctxt, name, &content);
6942
88.0k
  } else {
6943
      /*
6944
       * [ WFC: PEs in Internal Subset ] error handling.
6945
       */
6946
3.20k
            xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6947
3.20k
                  "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6948
3.20k
      return(-1);
6949
3.20k
  }
6950
6951
110k
  SKIP_BLANKS_PE;
6952
6953
110k
  if (RAW != '>') {
6954
27.8k
      xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6955
27.8k
      if (content != NULL) {
6956
3.04k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6957
3.04k
      }
6958
82.2k
  } else {
6959
82.2k
      if (inputid != ctxt->input->id) {
6960
324
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6961
324
                               "Element declaration doesn't start and stop in"
6962
324
                               " the same entity\n");
6963
324
      }
6964
6965
82.2k
      NEXT;
6966
82.2k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6967
82.2k
    (ctxt->sax->elementDecl != NULL)) {
6968
75.6k
    if (content != NULL)
6969
54.4k
        content->parent = NULL;
6970
75.6k
          ctxt->sax->elementDecl(ctxt->userData, name, ret,
6971
75.6k
                           content);
6972
75.6k
    if ((content != NULL) && (content->parent == NULL)) {
6973
        /*
6974
         * this is a trick: if xmlAddElementDecl is called,
6975
         * instead of copying the full tree it is plugged directly
6976
         * if called from the parser. Avoid duplicating the
6977
         * interfaces or change the API/ABI
6978
         */
6979
11.7k
        xmlFreeDocElementContent(ctxt->myDoc, content);
6980
11.7k
    }
6981
75.6k
      } else if (content != NULL) {
6982
4.69k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6983
4.69k
      }
6984
82.2k
  }
6985
110k
    }
6986
110k
    return(ret);
6987
118k
}
6988
6989
/**
6990
 * xmlParseConditionalSections
6991
 * @ctxt:  an XML parser context
6992
 *
6993
 * Parse a conditional section. Always consumes '<!['.
6994
 *
6995
 * [61] conditionalSect ::= includeSect | ignoreSect
6996
 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6997
 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6998
 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6999
 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
7000
 */
7001
7002
static void
7003
10.9k
xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
7004
10.9k
    int *inputIds = NULL;
7005
10.9k
    size_t inputIdsSize = 0;
7006
10.9k
    size_t depth = 0;
7007
7008
61.0k
    while (PARSER_STOPPED(ctxt) == 0) {
7009
60.9k
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7010
12.6k
            int id = ctxt->input->id;
7011
7012
12.6k
            SKIP(3);
7013
12.6k
            SKIP_BLANKS_PE;
7014
7015
12.6k
            if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
7016
7.08k
                SKIP(7);
7017
7.08k
                SKIP_BLANKS_PE;
7018
7.08k
                if (RAW != '[') {
7019
61
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
7020
61
                    xmlHaltParser(ctxt);
7021
61
                    goto error;
7022
61
                }
7023
7.02k
                if (ctxt->input->id != id) {
7024
1
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
7025
1
                                   "All markup of the conditional section is"
7026
1
                                   " not in the same entity\n");
7027
1
                }
7028
7.02k
                NEXT;
7029
7030
7.02k
                if (inputIdsSize <= depth) {
7031
5.83k
                    int *tmp;
7032
7033
5.83k
                    inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2);
7034
5.83k
                    tmp = (int *) xmlRealloc(inputIds,
7035
5.83k
                            inputIdsSize * sizeof(int));
7036
5.83k
                    if (tmp == NULL) {
7037
24
                        xmlErrMemory(ctxt);
7038
24
                        goto error;
7039
24
                    }
7040
5.81k
                    inputIds = tmp;
7041
5.81k
                }
7042
6.99k
                inputIds[depth] = id;
7043
6.99k
                depth++;
7044
6.99k
            } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
7045
5.38k
                size_t ignoreDepth = 0;
7046
7047
5.38k
                SKIP(6);
7048
5.38k
                SKIP_BLANKS_PE;
7049
5.38k
                if (RAW != '[') {
7050
29
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
7051
29
                    xmlHaltParser(ctxt);
7052
29
                    goto error;
7053
29
                }
7054
5.35k
                if (ctxt->input->id != id) {
7055
296
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
7056
296
                                   "All markup of the conditional section is"
7057
296
                                   " not in the same entity\n");
7058
296
                }
7059
5.35k
                NEXT;
7060
7061
9.67M
                while (PARSER_STOPPED(ctxt) == 0) {
7062
9.67M
                    if (RAW == 0) {
7063
2.78k
                        xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
7064
2.78k
                        goto error;
7065
2.78k
                    }
7066
9.67M
                    if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7067
9.47k
                        SKIP(3);
7068
9.47k
                        ignoreDepth++;
7069
                        /* Check for integer overflow */
7070
9.47k
                        if (ignoreDepth == 0) {
7071
0
                            xmlErrMemory(ctxt);
7072
0
                            goto error;
7073
0
                        }
7074
9.66M
                    } else if ((RAW == ']') && (NXT(1) == ']') &&
7075
9.66M
                               (NXT(2) == '>')) {
7076
8.24k
                        SKIP(3);
7077
8.24k
                        if (ignoreDepth == 0)
7078
2.54k
                            break;
7079
5.70k
                        ignoreDepth--;
7080
9.65M
                    } else {
7081
9.65M
                        NEXT;
7082
9.65M
                    }
7083
9.67M
                }
7084
7085
2.56k
                if (ctxt->input->id != id) {
7086
280
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
7087
280
                                   "All markup of the conditional section is"
7088
280
                                   " not in the same entity\n");
7089
280
                }
7090
2.56k
            } else {
7091
234
                xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
7092
234
                xmlHaltParser(ctxt);
7093
234
                goto error;
7094
234
            }
7095
48.2k
        } else if ((depth > 0) &&
7096
48.2k
                   (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
7097
5.62k
            depth--;
7098
5.62k
            if (ctxt->input->id != inputIds[depth]) {
7099
38
                xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
7100
38
                               "All markup of the conditional section is not"
7101
38
                               " in the same entity\n");
7102
38
            }
7103
5.62k
            SKIP(3);
7104
42.6k
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7105
42.3k
            xmlParseMarkupDecl(ctxt);
7106
42.3k
        } else {
7107
280
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7108
280
            xmlHaltParser(ctxt);
7109
280
            goto error;
7110
280
        }
7111
7112
57.5k
        if (depth == 0)
7113
7.47k
            break;
7114
7115
50.0k
        SKIP_BLANKS_PE;
7116
50.0k
        SHRINK;
7117
50.0k
        GROW;
7118
50.0k
    }
7119
7120
10.9k
error:
7121
10.9k
    xmlFree(inputIds);
7122
10.9k
}
7123
7124
/**
7125
 * xmlParseMarkupDecl:
7126
 * @ctxt:  an XML parser context
7127
 *
7128
 * DEPRECATED: Internal function, don't use.
7129
 *
7130
 * Parse markup declarations. Always consumes '<!' or '<?'.
7131
 *
7132
 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
7133
 *                     NotationDecl | PI | Comment
7134
 *
7135
 * [ VC: Proper Declaration/PE Nesting ]
7136
 * Parameter-entity replacement text must be properly nested with
7137
 * markup declarations. That is to say, if either the first character
7138
 * or the last character of a markup declaration (markupdecl above) is
7139
 * contained in the replacement text for a parameter-entity reference,
7140
 * both must be contained in the same replacement text.
7141
 *
7142
 * [ WFC: PEs in Internal Subset ]
7143
 * In the internal DTD subset, parameter-entity references can occur
7144
 * only where markup declarations can occur, not within markup declarations.
7145
 * (This does not apply to references that occur in external parameter
7146
 * entities or to the external subset.)
7147
 */
7148
void
7149
5.10M
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
7150
5.10M
    GROW;
7151
5.10M
    if (CUR == '<') {
7152
5.10M
        if (NXT(1) == '!') {
7153
5.06M
      switch (NXT(2)) {
7154
350k
          case 'E':
7155
350k
        if (NXT(3) == 'L')
7156
118k
      xmlParseElementDecl(ctxt);
7157
232k
        else if (NXT(3) == 'N')
7158
232k
      xmlParseEntityDecl(ctxt);
7159
175
                    else
7160
175
                        SKIP(2);
7161
350k
        break;
7162
321k
          case 'A':
7163
321k
        xmlParseAttributeListDecl(ctxt);
7164
321k
        break;
7165
52.3k
          case 'N':
7166
52.3k
        xmlParseNotationDecl(ctxt);
7167
52.3k
        break;
7168
4.31M
          case '-':
7169
4.31M
        xmlParseComment(ctxt);
7170
4.31M
        break;
7171
24.1k
    default:
7172
        /* there is an error but it will be detected later */
7173
24.1k
                    SKIP(2);
7174
24.1k
        break;
7175
5.06M
      }
7176
5.06M
  } else if (NXT(1) == '?') {
7177
39.5k
      xmlParsePI(ctxt);
7178
39.5k
  }
7179
5.10M
    }
7180
5.10M
}
7181
7182
/**
7183
 * xmlParseTextDecl:
7184
 * @ctxt:  an XML parser context
7185
 *
7186
 * DEPRECATED: Internal function, don't use.
7187
 *
7188
 * parse an XML declaration header for external entities
7189
 *
7190
 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
7191
 */
7192
7193
void
7194
45.9k
xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
7195
45.9k
    xmlChar *version;
7196
7197
    /*
7198
     * We know that '<?xml' is here.
7199
     */
7200
45.9k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7201
45.8k
  SKIP(5);
7202
45.8k
    } else {
7203
28
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
7204
28
  return;
7205
28
    }
7206
7207
45.8k
    if (SKIP_BLANKS == 0) {
7208
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7209
0
           "Space needed after '<?xml'\n");
7210
0
    }
7211
7212
    /*
7213
     * We may have the VersionInfo here.
7214
     */
7215
45.8k
    version = xmlParseVersionInfo(ctxt);
7216
45.8k
    if (version == NULL) {
7217
26.1k
  version = xmlCharStrdup(XML_DEFAULT_VERSION);
7218
26.1k
        if (version == NULL) {
7219
30
            xmlErrMemory(ctxt);
7220
30
            return;
7221
30
        }
7222
26.1k
    } else {
7223
19.7k
  if (SKIP_BLANKS == 0) {
7224
2.18k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7225
2.18k
               "Space needed here\n");
7226
2.18k
  }
7227
19.7k
    }
7228
45.8k
    ctxt->input->version = version;
7229
7230
    /*
7231
     * We must have the encoding declaration
7232
     */
7233
45.8k
    xmlParseEncodingDecl(ctxt);
7234
7235
45.8k
    SKIP_BLANKS;
7236
45.8k
    if ((RAW == '?') && (NXT(1) == '>')) {
7237
3.04k
        SKIP(2);
7238
42.8k
    } else if (RAW == '>') {
7239
        /* Deprecated old WD ... */
7240
649
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7241
649
  NEXT;
7242
42.1k
    } else {
7243
42.1k
        int c;
7244
7245
42.1k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7246
92.4M
        while ((PARSER_STOPPED(ctxt) == 0) && ((c = CUR) != 0)) {
7247
92.4M
            NEXT;
7248
92.4M
            if (c == '>')
7249
13.8k
                break;
7250
92.4M
        }
7251
42.1k
    }
7252
45.8k
}
7253
7254
/**
7255
 * xmlParseExternalSubset:
7256
 * @ctxt:  an XML parser context
7257
 * @ExternalID: the external identifier
7258
 * @SystemID: the system identifier (or URL)
7259
 *
7260
 * parse Markup declarations from an external subset
7261
 *
7262
 * [30] extSubset ::= textDecl? extSubsetDecl
7263
 *
7264
 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7265
 */
7266
void
7267
xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7268
4.92k
                       const xmlChar *SystemID) {
7269
4.92k
    int oldInputNr;
7270
7271
4.92k
    xmlCtxtInitializeLate(ctxt);
7272
7273
4.92k
    xmlDetectEncoding(ctxt);
7274
7275
4.92k
    if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7276
588
  xmlParseTextDecl(ctxt);
7277
588
    }
7278
4.92k
    if (ctxt->myDoc == NULL) {
7279
0
        ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7280
0
  if (ctxt->myDoc == NULL) {
7281
0
      xmlErrMemory(ctxt);
7282
0
      return;
7283
0
  }
7284
0
  ctxt->myDoc->properties = XML_DOC_INTERNAL;
7285
0
    }
7286
4.92k
    if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL) &&
7287
4.92k
        (xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID) == NULL)) {
7288
1
        xmlErrMemory(ctxt);
7289
1
    }
7290
7291
4.92k
    ctxt->inSubset = 2;
7292
4.92k
    oldInputNr = ctxt->inputNr;
7293
7294
4.92k
    SKIP_BLANKS_PE;
7295
3.15M
    while (((RAW != 0) || (ctxt->inputNr > oldInputNr)) &&
7296
3.15M
           (!PARSER_STOPPED(ctxt))) {
7297
3.14M
  GROW;
7298
3.14M
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7299
5.52k
            xmlParseConditionalSections(ctxt);
7300
3.14M
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7301
3.14M
            xmlParseMarkupDecl(ctxt);
7302
3.14M
        } else {
7303
1.22k
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7304
1.22k
            xmlHaltParser(ctxt);
7305
1.22k
            return;
7306
1.22k
        }
7307
3.14M
        SKIP_BLANKS_PE;
7308
3.14M
        SHRINK;
7309
3.14M
    }
7310
7311
4.07k
    while (ctxt->inputNr > oldInputNr)
7312
371
        xmlPopPE(ctxt);
7313
7314
3.70k
    if (RAW != 0) {
7315
1.18k
  xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7316
1.18k
    }
7317
3.70k
}
7318
7319
/**
7320
 * xmlParseReference:
7321
 * @ctxt:  an XML parser context
7322
 *
7323
 * DEPRECATED: Internal function, don't use.
7324
 *
7325
 * parse and handle entity references in content, depending on the SAX
7326
 * interface, this may end-up in a call to character() if this is a
7327
 * CharRef, a predefined entity, if there is no reference() callback.
7328
 * or if the parser was asked to switch to that mode.
7329
 *
7330
 * Always consumes '&'.
7331
 *
7332
 * [67] Reference ::= EntityRef | CharRef
7333
 */
7334
void
7335
703k
xmlParseReference(xmlParserCtxtPtr ctxt) {
7336
703k
    xmlEntityPtr ent = NULL;
7337
703k
    const xmlChar *name;
7338
703k
    xmlChar *val;
7339
7340
703k
    if (RAW != '&')
7341
0
        return;
7342
7343
    /*
7344
     * Simple case of a CharRef
7345
     */
7346
703k
    if (NXT(1) == '#') {
7347
284k
  int i = 0;
7348
284k
  xmlChar out[16];
7349
284k
  int value = xmlParseCharRef(ctxt);
7350
7351
284k
  if (value == 0)
7352
155k
      return;
7353
7354
        /*
7355
         * Just encode the value in UTF-8
7356
         */
7357
129k
        COPY_BUF(out, i, value);
7358
129k
        out[i] = 0;
7359
129k
        if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7360
129k
            (!ctxt->disableSAX))
7361
124k
            ctxt->sax->characters(ctxt->userData, out, i);
7362
129k
  return;
7363
284k
    }
7364
7365
    /*
7366
     * We are seeing an entity reference
7367
     */
7368
419k
    name = xmlParseEntityRefInternal(ctxt);
7369
419k
    if (name != NULL)
7370
250k
        ent = xmlLookupGeneralEntity(ctxt, name, /* isAttr */ 0);
7371
419k
    if (ent == NULL) return;
7372
202k
    if (!ctxt->wellFormed)
7373
103k
  return;
7374
7375
    /* special case of predefined entities */
7376
99.4k
    if ((ent->name == NULL) ||
7377
99.4k
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7378
4.90k
  val = ent->content;
7379
4.90k
  if (val == NULL) return;
7380
  /*
7381
   * inline the entity.
7382
   */
7383
4.90k
  if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7384
4.90k
      (!ctxt->disableSAX))
7385
4.90k
      ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7386
4.90k
  return;
7387
4.90k
    }
7388
7389
    /*
7390
     * The first reference to the entity trigger a parsing phase
7391
     * where the ent->children is filled with the result from
7392
     * the parsing.
7393
     * Note: external parsed entities will not be loaded, it is not
7394
     * required for a non-validating parser, unless the parsing option
7395
     * of validating, or substituting entities were given. Doing so is
7396
     * far more secure as the parser will only process data coming from
7397
     * the document entity by default.
7398
     *
7399
     * FIXME: This doesn't work correctly since entities can be
7400
     * expanded with different namespace declarations in scope.
7401
     * For example:
7402
     *
7403
     * <!DOCTYPE doc [
7404
     *   <!ENTITY ent "<ns:elem/>">
7405
     * ]>
7406
     * <doc>
7407
     *   <decl1 xmlns:ns="urn:ns1">
7408
     *     &ent;
7409
     *   </decl1>
7410
     *   <decl2 xmlns:ns="urn:ns2">
7411
     *     &ent;
7412
     *   </decl2>
7413
     * </doc>
7414
     *
7415
     * Proposed fix:
7416
     *
7417
     * - Ignore current namespace declarations when parsing the
7418
     *   entity. If a prefix can't be resolved, don't report an error
7419
     *   but mark it as unresolved.
7420
     * - Try to resolve these prefixes when expanding the entity.
7421
     *   This will require a specialized version of xmlStaticCopyNode
7422
     *   which can also make use of the namespace hash table to avoid
7423
     *   quadratic behavior.
7424
     *
7425
     * Alternatively, we could simply reparse the entity on each
7426
     * expansion like we already do with custom SAX callbacks.
7427
     * External entity content should be cached in this case.
7428
     */
7429
94.5k
    if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7430
94.5k
        (((ctxt->options & XML_PARSE_NO_XXE) == 0) &&
7431
20.8k
         ((ctxt->replaceEntities) ||
7432
92.4k
          (ctxt->validate)))) {
7433
92.4k
        if ((ent->flags & XML_ENT_PARSED) == 0) {
7434
14.4k
            xmlCtxtParseEntity(ctxt, ent);
7435
78.0k
        } else if (ent->children == NULL) {
7436
            /*
7437
             * Probably running in SAX mode and the callbacks don't
7438
             * build the entity content. Parse the entity again.
7439
             *
7440
             * This will also be triggered in normal tree builder mode
7441
             * if an entity happens to be empty, causing unnecessary
7442
             * reloads. It's hard to come up with a reliable check in
7443
             * which mode we're running.
7444
             */
7445
13.1k
            xmlCtxtParseEntity(ctxt, ent);
7446
13.1k
        }
7447
92.4k
    }
7448
7449
    /*
7450
     * We also check for amplification if entities aren't substituted.
7451
     * They might be expanded later.
7452
     */
7453
94.5k
    if (xmlParserEntityCheck(ctxt, ent->expandedSize))
7454
266
        return;
7455
7456
94.2k
    if ((ctxt->sax == NULL) || (ctxt->disableSAX))
7457
4.15k
        return;
7458
7459
90.0k
    if (ctxt->replaceEntities == 0) {
7460
  /*
7461
   * Create a reference
7462
   */
7463
16.5k
        if (ctxt->sax->reference != NULL)
7464
16.5k
      ctxt->sax->reference(ctxt->userData, ent->name);
7465
73.5k
    } else if ((ent->children != NULL) && (ctxt->node != NULL)) {
7466
60.5k
        xmlNodePtr copy, cur;
7467
7468
        /*
7469
         * Seems we are generating the DOM content, copy the tree
7470
   */
7471
60.5k
        cur = ent->children;
7472
7473
        /*
7474
         * Handle first text node with SAX to coalesce text efficiently
7475
         */
7476
60.5k
        if ((cur->type == XML_TEXT_NODE) ||
7477
60.5k
            (cur->type == XML_CDATA_SECTION_NODE)) {
7478
55.8k
            int len = xmlStrlen(cur->content);
7479
7480
55.8k
            if ((cur->type == XML_TEXT_NODE) ||
7481
55.8k
                (ctxt->sax->cdataBlock == NULL)) {
7482
55.3k
                if (ctxt->sax->characters != NULL)
7483
55.3k
                    ctxt->sax->characters(ctxt, cur->content, len);
7484
55.3k
            } else {
7485
457
                if (ctxt->sax->cdataBlock != NULL)
7486
457
                    ctxt->sax->cdataBlock(ctxt, cur->content, len);
7487
457
            }
7488
7489
55.8k
            cur = cur->next;
7490
55.8k
        }
7491
7492
425k
        while (cur != NULL) {
7493
378k
            xmlNodePtr last;
7494
7495
            /*
7496
             * Handle last text node with SAX to coalesce text efficiently
7497
             */
7498
378k
            if ((cur->next == NULL) &&
7499
378k
                ((cur->type == XML_TEXT_NODE) ||
7500
20.3k
                 (cur->type == XML_CDATA_SECTION_NODE))) {
7501
13.3k
                int len = xmlStrlen(cur->content);
7502
7503
13.3k
                if ((cur->type == XML_TEXT_NODE) ||
7504
13.3k
                    (ctxt->sax->cdataBlock == NULL)) {
7505
12.8k
                    if (ctxt->sax->characters != NULL)
7506
12.8k
                        ctxt->sax->characters(ctxt, cur->content, len);
7507
12.8k
                } else {
7508
523
                    if (ctxt->sax->cdataBlock != NULL)
7509
523
                        ctxt->sax->cdataBlock(ctxt, cur->content, len);
7510
523
                }
7511
7512
13.3k
                break;
7513
13.3k
            }
7514
7515
            /*
7516
             * Reset coalesce buffer stats only for non-text nodes.
7517
             */
7518
365k
            ctxt->nodemem = 0;
7519
365k
            ctxt->nodelen = 0;
7520
7521
365k
            copy = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7522
7523
365k
            if (copy == NULL) {
7524
585
                xmlErrMemory(ctxt);
7525
585
                break;
7526
585
            }
7527
7528
364k
            if (ctxt->parseMode == XML_PARSE_READER) {
7529
                /* Needed for reader */
7530
18.6k
                copy->extra = cur->extra;
7531
                /* Maybe needed for reader */
7532
18.6k
                copy->_private = cur->_private;
7533
18.6k
            }
7534
7535
364k
            copy->parent = ctxt->node;
7536
364k
            last = ctxt->node->last;
7537
364k
            if (last == NULL) {
7538
1.41k
                ctxt->node->children = copy;
7539
363k
            } else {
7540
363k
                last->next = copy;
7541
363k
                copy->prev = last;
7542
363k
            }
7543
364k
            ctxt->node->last = copy;
7544
7545
364k
            cur = cur->next;
7546
364k
        }
7547
60.5k
    }
7548
90.0k
}
7549
7550
static xmlEntityPtr
7551
3.22M
xmlLookupGeneralEntity(xmlParserCtxtPtr ctxt, const xmlChar *name, int inAttr) {
7552
3.22M
    xmlEntityPtr ent;
7553
7554
    /*
7555
     * Predefined entities override any extra definition
7556
     */
7557
3.22M
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7558
2.52M
        ent = xmlGetPredefinedEntity(name);
7559
2.52M
        if (ent != NULL)
7560
634k
            return(ent);
7561
2.52M
    }
7562
7563
    /*
7564
     * Ask first SAX for entity resolution, otherwise try the
7565
     * entities which may have stored in the parser context.
7566
     */
7567
2.58M
    if (ctxt->sax != NULL) {
7568
2.58M
  if (ctxt->sax->getEntity != NULL)
7569
2.58M
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7570
2.58M
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7571
2.58M
      (ctxt->options & XML_PARSE_OLDSAX))
7572
1.91k
      ent = xmlGetPredefinedEntity(name);
7573
2.58M
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7574
2.58M
      (ctxt->userData==ctxt)) {
7575
13.8k
      ent = xmlSAX2GetEntity(ctxt, name);
7576
13.8k
  }
7577
2.58M
    }
7578
    /*
7579
     * [ WFC: Entity Declared ]
7580
     * In a document without any DTD, a document with only an
7581
     * internal DTD subset which contains no parameter entity
7582
     * references, or a document with "standalone='yes'", the
7583
     * Name given in the entity reference must match that in an
7584
     * entity declaration, except that well-formed documents
7585
     * need not declare any of the following entities: amp, lt,
7586
     * gt, apos, quot.
7587
     * The declaration of a parameter entity must precede any
7588
     * reference to it.
7589
     * Similarly, the declaration of a general entity must
7590
     * precede any reference to it which appears in a default
7591
     * value in an attribute-list declaration. Note that if
7592
     * entities are declared in the external subset or in
7593
     * external parameter entities, a non-validating processor
7594
     * is not obligated to read and process their declarations;
7595
     * for such documents, the rule that an entity must be
7596
     * declared is a well-formedness constraint only if
7597
     * standalone='yes'.
7598
     */
7599
2.58M
    if (ent == NULL) {
7600
339k
  if ((ctxt->standalone == 1) ||
7601
339k
      ((ctxt->hasExternalSubset == 0) &&
7602
337k
       (ctxt->hasPErefs == 0))) {
7603
308k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7604
308k
         "Entity '%s' not defined\n", name);
7605
308k
  } else {
7606
30.2k
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7607
30.2k
         "Entity '%s' not defined\n", name);
7608
30.2k
      if ((ctxt->inSubset == 0) &&
7609
30.2k
    (ctxt->sax != NULL) &&
7610
30.2k
                (ctxt->disableSAX == 0) &&
7611
30.2k
    (ctxt->sax->reference != NULL)) {
7612
19.9k
    ctxt->sax->reference(ctxt->userData, name);
7613
19.9k
      }
7614
30.2k
  }
7615
339k
  ctxt->valid = 0;
7616
339k
    }
7617
7618
    /*
7619
     * [ WFC: Parsed Entity ]
7620
     * An entity reference must not contain the name of an
7621
     * unparsed entity
7622
     */
7623
2.25M
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7624
2.07k
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7625
2.07k
     "Entity reference to unparsed entity %s\n", name);
7626
2.07k
        ent = NULL;
7627
2.07k
    }
7628
7629
    /*
7630
     * [ WFC: No External Entity References ]
7631
     * Attribute values cannot contain direct or indirect
7632
     * entity references to external entities.
7633
     */
7634
2.24M
    else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7635
44.2k
        if (inAttr) {
7636
6.96k
            xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7637
6.96k
                 "Attribute references external entity '%s'\n", name);
7638
6.96k
            ent = NULL;
7639
6.96k
        }
7640
44.2k
    }
7641
7642
2.58M
    return(ent);
7643
3.22M
}
7644
7645
/**
7646
 * xmlParseEntityRefInternal:
7647
 * @ctxt:  an XML parser context
7648
 * @inAttr:  whether we are in an attribute value
7649
 *
7650
 * Parse an entity reference. Always consumes '&'.
7651
 *
7652
 * [68] EntityRef ::= '&' Name ';'
7653
 *
7654
 * Returns the name, or NULL in case of error.
7655
 */
7656
static const xmlChar *
7657
1.76M
xmlParseEntityRefInternal(xmlParserCtxtPtr ctxt) {
7658
1.76M
    const xmlChar *name;
7659
7660
1.76M
    GROW;
7661
7662
1.76M
    if (RAW != '&')
7663
0
        return(NULL);
7664
1.76M
    NEXT;
7665
1.76M
    name = xmlParseName(ctxt);
7666
1.76M
    if (name == NULL) {
7667
159k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7668
159k
           "xmlParseEntityRef: no name\n");
7669
159k
        return(NULL);
7670
159k
    }
7671
1.61M
    if (RAW != ';') {
7672
155k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7673
155k
  return(NULL);
7674
155k
    }
7675
1.45M
    NEXT;
7676
7677
1.45M
    return(name);
7678
1.61M
}
7679
7680
/**
7681
 * xmlParseEntityRef:
7682
 * @ctxt:  an XML parser context
7683
 *
7684
 * DEPRECATED: Internal function, don't use.
7685
 *
7686
 * Returns the xmlEntityPtr if found, or NULL otherwise.
7687
 */
7688
xmlEntityPtr
7689
0
xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7690
0
    const xmlChar *name;
7691
7692
0
    if (ctxt == NULL)
7693
0
        return(NULL);
7694
7695
0
    name = xmlParseEntityRefInternal(ctxt);
7696
0
    if (name == NULL)
7697
0
        return(NULL);
7698
7699
0
    return(xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 0));
7700
0
}
7701
7702
/**
7703
 * xmlParseStringEntityRef:
7704
 * @ctxt:  an XML parser context
7705
 * @str:  a pointer to an index in the string
7706
 *
7707
 * parse ENTITY references declarations, but this version parses it from
7708
 * a string value.
7709
 *
7710
 * [68] EntityRef ::= '&' Name ';'
7711
 *
7712
 * [ WFC: Entity Declared ]
7713
 * In a document without any DTD, a document with only an internal DTD
7714
 * subset which contains no parameter entity references, or a document
7715
 * with "standalone='yes'", the Name given in the entity reference
7716
 * must match that in an entity declaration, except that well-formed
7717
 * documents need not declare any of the following entities: amp, lt,
7718
 * gt, apos, quot.  The declaration of a parameter entity must precede
7719
 * any reference to it.  Similarly, the declaration of a general entity
7720
 * must precede any reference to it which appears in a default value in an
7721
 * attribute-list declaration. Note that if entities are declared in the
7722
 * external subset or in external parameter entities, a non-validating
7723
 * processor is not obligated to read and process their declarations;
7724
 * for such documents, the rule that an entity must be declared is a
7725
 * well-formedness constraint only if standalone='yes'.
7726
 *
7727
 * [ WFC: Parsed Entity ]
7728
 * An entity reference must not contain the name of an unparsed entity
7729
 *
7730
 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7731
 * is updated to the current location in the string.
7732
 */
7733
static xmlChar *
7734
1.76M
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7735
1.76M
    xmlChar *name;
7736
1.76M
    const xmlChar *ptr;
7737
1.76M
    xmlChar cur;
7738
7739
1.76M
    if ((str == NULL) || (*str == NULL))
7740
0
        return(NULL);
7741
1.76M
    ptr = *str;
7742
1.76M
    cur = *ptr;
7743
1.76M
    if (cur != '&')
7744
0
  return(NULL);
7745
7746
1.76M
    ptr++;
7747
1.76M
    name = xmlParseStringName(ctxt, &ptr);
7748
1.76M
    if (name == NULL) {
7749
217
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7750
217
           "xmlParseStringEntityRef: no name\n");
7751
217
  *str = ptr;
7752
217
  return(NULL);
7753
217
    }
7754
1.76M
    if (*ptr != ';') {
7755
114
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7756
114
        xmlFree(name);
7757
114
  *str = ptr;
7758
114
  return(NULL);
7759
114
    }
7760
1.76M
    ptr++;
7761
7762
1.76M
    *str = ptr;
7763
1.76M
    return(name);
7764
1.76M
}
7765
7766
/**
7767
 * xmlParsePEReference:
7768
 * @ctxt:  an XML parser context
7769
 *
7770
 * DEPRECATED: Internal function, don't use.
7771
 *
7772
 * Parse a parameter entity reference. Always consumes '%'.
7773
 *
7774
 * The entity content is handled directly by pushing it's content as
7775
 * a new input stream.
7776
 *
7777
 * [69] PEReference ::= '%' Name ';'
7778
 *
7779
 * [ WFC: No Recursion ]
7780
 * A parsed entity must not contain a recursive
7781
 * reference to itself, either directly or indirectly.
7782
 *
7783
 * [ WFC: Entity Declared ]
7784
 * In a document without any DTD, a document with only an internal DTD
7785
 * subset which contains no parameter entity references, or a document
7786
 * with "standalone='yes'", ...  ... The declaration of a parameter
7787
 * entity must precede any reference to it...
7788
 *
7789
 * [ VC: Entity Declared ]
7790
 * In a document with an external subset or external parameter entities
7791
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7792
 * must precede any reference to it...
7793
 *
7794
 * [ WFC: In DTD ]
7795
 * Parameter-entity references may only appear in the DTD.
7796
 * NOTE: misleading but this is handled.
7797
 */
7798
void
7799
xmlParsePEReference(xmlParserCtxtPtr ctxt)
7800
1.64M
{
7801
1.64M
    const xmlChar *name;
7802
1.64M
    xmlEntityPtr entity = NULL;
7803
1.64M
    xmlParserInputPtr input;
7804
7805
1.64M
    if (RAW != '%')
7806
0
        return;
7807
1.64M
    NEXT;
7808
1.64M
    name = xmlParseName(ctxt);
7809
1.64M
    if (name == NULL) {
7810
418k
  xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
7811
418k
  return;
7812
418k
    }
7813
1.22M
    if (RAW != ';') {
7814
75.6k
  xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
7815
75.6k
        return;
7816
75.6k
    }
7817
7818
1.15M
    NEXT;
7819
7820
    /*
7821
     * Request the entity from SAX
7822
     */
7823
1.15M
    if ((ctxt->sax != NULL) &&
7824
1.15M
  (ctxt->sax->getParameterEntity != NULL))
7825
1.15M
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7826
1.15M
    if (entity == NULL) {
7827
  /*
7828
   * [ WFC: Entity Declared ]
7829
   * In a document without any DTD, a document with only an
7830
   * internal DTD subset which contains no parameter entity
7831
   * references, or a document with "standalone='yes'", ...
7832
   * ... The declaration of a parameter entity must precede
7833
   * any reference to it...
7834
   */
7835
374k
  if ((ctxt->standalone == 1) ||
7836
374k
      ((ctxt->hasExternalSubset == 0) &&
7837
374k
       (ctxt->hasPErefs == 0))) {
7838
3.35k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7839
3.35k
            "PEReference: %%%s; not found\n",
7840
3.35k
            name);
7841
371k
  } else {
7842
      /*
7843
       * [ VC: Entity Declared ]
7844
       * In a document with an external subset or external
7845
       * parameter entities with "standalone='no'", ...
7846
       * ... The declaration of a parameter entity must
7847
       * precede any reference to it...
7848
       */
7849
371k
            if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
7850
156k
                xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
7851
156k
                                 "PEReference: %%%s; not found\n",
7852
156k
                                 name, NULL);
7853
156k
            } else
7854
215k
                xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7855
215k
                              "PEReference: %%%s; not found\n",
7856
215k
                              name, NULL);
7857
371k
            ctxt->valid = 0;
7858
371k
  }
7859
777k
    } else {
7860
  /*
7861
   * Internal checking in case the entity quest barfed
7862
   */
7863
777k
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7864
777k
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7865
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7866
0
      "Internal: %%%s; is not a parameter entity\n",
7867
0
        name, NULL);
7868
777k
  } else {
7869
777k
      if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7870
777k
                ((ctxt->options & XML_PARSE_NO_XXE) ||
7871
645k
     ((ctxt->loadsubset == 0) &&
7872
645k
      (ctxt->replaceEntities == 0) &&
7873
645k
      (ctxt->validate == 0))))
7874
1.86k
    return;
7875
7876
775k
            if (entity->flags & XML_ENT_EXPANDING) {
7877
65
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7878
65
                xmlHaltParser(ctxt);
7879
65
                return;
7880
65
            }
7881
7882
775k
      input = xmlNewEntityInputStream(ctxt, entity);
7883
775k
      if (xmlPushInput(ctxt, input) < 0) {
7884
186k
                xmlFreeInputStream(input);
7885
186k
    return;
7886
186k
            }
7887
7888
588k
            entity->flags |= XML_ENT_EXPANDING;
7889
7890
588k
      if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
7891
457k
                xmlDetectEncoding(ctxt);
7892
7893
457k
                if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7894
457k
                    (IS_BLANK_CH(NXT(5)))) {
7895
32.3k
                    xmlParseTextDecl(ctxt);
7896
32.3k
                }
7897
457k
            }
7898
588k
  }
7899
777k
    }
7900
963k
    ctxt->hasPErefs = 1;
7901
963k
}
7902
7903
/**
7904
 * xmlLoadEntityContent:
7905
 * @ctxt:  an XML parser context
7906
 * @entity: an unloaded system entity
7907
 *
7908
 * Load the original content of the given system entity from the
7909
 * ExternalID/SystemID given. This is to be used for Included in Literal
7910
 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
7911
 *
7912
 * Returns 0 in case of success and -1 in case of failure
7913
 */
7914
static int
7915
24.5k
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7916
24.5k
    xmlParserInputPtr oldinput, input = NULL;
7917
24.5k
    xmlParserInputPtr *oldinputTab;
7918
24.5k
    const xmlChar *oldencoding;
7919
24.5k
    xmlChar *content = NULL;
7920
24.5k
    size_t length, i;
7921
24.5k
    int oldinputNr, oldinputMax;
7922
24.5k
    int ret = -1;
7923
24.5k
    int res;
7924
7925
24.5k
    if ((ctxt == NULL) || (entity == NULL) ||
7926
24.5k
        ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7927
24.5k
   (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7928
24.5k
  (entity->content != NULL)) {
7929
0
  xmlFatalErr(ctxt, XML_ERR_ARGUMENT,
7930
0
              "xmlLoadEntityContent parameter error");
7931
0
        return(-1);
7932
0
    }
7933
7934
24.5k
    input = xmlLoadExternalEntity((char *) entity->URI,
7935
24.5k
           (char *) entity->ExternalID, ctxt);
7936
24.5k
    if (input == NULL)
7937
1.41k
        return(-1);
7938
7939
23.1k
    oldinput = ctxt->input;
7940
23.1k
    oldinputNr = ctxt->inputNr;
7941
23.1k
    oldinputMax = ctxt->inputMax;
7942
23.1k
    oldinputTab = ctxt->inputTab;
7943
23.1k
    oldencoding = ctxt->encoding;
7944
7945
23.1k
    ctxt->input = NULL;
7946
23.1k
    ctxt->inputNr = 0;
7947
23.1k
    ctxt->inputMax = 1;
7948
23.1k
    ctxt->encoding = NULL;
7949
23.1k
    ctxt->inputTab = xmlMalloc(sizeof(xmlParserInputPtr));
7950
23.1k
    if (ctxt->inputTab == NULL) {
7951
14
        xmlErrMemory(ctxt);
7952
14
        xmlFreeInputStream(input);
7953
14
        goto error;
7954
14
    }
7955
7956
23.1k
    xmlBufResetInput(input->buf->buffer, input);
7957
7958
23.1k
    inputPush(ctxt, input);
7959
7960
23.1k
    xmlDetectEncoding(ctxt);
7961
7962
    /*
7963
     * Parse a possible text declaration first
7964
     */
7965
23.1k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7966
12.2k
  xmlParseTextDecl(ctxt);
7967
        /*
7968
         * An XML-1.0 document can't reference an entity not XML-1.0
7969
         */
7970
12.2k
        if ((xmlStrEqual(ctxt->version, BAD_CAST "1.0")) &&
7971
12.2k
            (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
7972
4.88k
            xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
7973
4.88k
                           "Version mismatch between document and entity\n");
7974
4.88k
        }
7975
12.2k
    }
7976
7977
23.1k
    length = input->cur - input->base;
7978
23.1k
    xmlBufShrink(input->buf->buffer, length);
7979
23.1k
    xmlSaturatedAdd(&ctxt->sizeentities, length);
7980
7981
34.0k
    while ((res = xmlParserInputBufferGrow(input->buf, 4096)) > 0)
7982
10.8k
        ;
7983
7984
23.1k
    xmlBufResetInput(input->buf->buffer, input);
7985
7986
23.1k
    if (res < 0) {
7987
2.28k
        xmlCtxtErrIO(ctxt, input->buf->error, NULL);
7988
2.28k
        goto error;
7989
2.28k
    }
7990
7991
20.8k
    length = xmlBufUse(input->buf->buffer);
7992
20.8k
    content = xmlBufDetach(input->buf->buffer);
7993
7994
20.8k
    if (length > INT_MAX) {
7995
0
        xmlErrMemory(ctxt);
7996
0
        goto error;
7997
0
    }
7998
7999
26.6M
    for (i = 0; i < length; ) {
8000
26.6M
        int clen = length - i;
8001
26.6M
        int c = xmlGetUTF8Char(content + i, &clen);
8002
8003
26.6M
        if ((c < 0) || (!IS_CHAR(c))) {
8004
20.2k
            xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8005
20.2k
                              "xmlLoadEntityContent: invalid char value %d\n",
8006
20.2k
                              content[i]);
8007
20.2k
            goto error;
8008
20.2k
        }
8009
26.6M
        i += clen;
8010
26.6M
    }
8011
8012
541
    xmlSaturatedAdd(&ctxt->sizeentities, length);
8013
541
    entity->content = content;
8014
541
    entity->length = length;
8015
541
    content = NULL;
8016
541
    ret = 0;
8017
8018
23.1k
error:
8019
46.2k
    while (ctxt->inputNr > 0)
8020
23.1k
        xmlFreeInputStream(inputPop(ctxt));
8021
23.1k
    xmlFree(ctxt->inputTab);
8022
23.1k
    xmlFree((xmlChar *) ctxt->encoding);
8023
8024
23.1k
    ctxt->input = oldinput;
8025
23.1k
    ctxt->inputNr = oldinputNr;
8026
23.1k
    ctxt->inputMax = oldinputMax;
8027
23.1k
    ctxt->inputTab = oldinputTab;
8028
23.1k
    ctxt->encoding = oldencoding;
8029
8030
23.1k
    xmlFree(content);
8031
8032
23.1k
    return(ret);
8033
541
}
8034
8035
/**
8036
 * xmlParseStringPEReference:
8037
 * @ctxt:  an XML parser context
8038
 * @str:  a pointer to an index in the string
8039
 *
8040
 * parse PEReference declarations
8041
 *
8042
 * [69] PEReference ::= '%' Name ';'
8043
 *
8044
 * [ WFC: No Recursion ]
8045
 * A parsed entity must not contain a recursive
8046
 * reference to itself, either directly or indirectly.
8047
 *
8048
 * [ WFC: Entity Declared ]
8049
 * In a document without any DTD, a document with only an internal DTD
8050
 * subset which contains no parameter entity references, or a document
8051
 * with "standalone='yes'", ...  ... The declaration of a parameter
8052
 * entity must precede any reference to it...
8053
 *
8054
 * [ VC: Entity Declared ]
8055
 * In a document with an external subset or external parameter entities
8056
 * with "standalone='no'", ...  ... The declaration of a parameter entity
8057
 * must precede any reference to it...
8058
 *
8059
 * [ WFC: In DTD ]
8060
 * Parameter-entity references may only appear in the DTD.
8061
 * NOTE: misleading but this is handled.
8062
 *
8063
 * Returns the string of the entity content.
8064
 *         str is updated to the current value of the index
8065
 */
8066
static xmlEntityPtr
8067
95.7k
xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8068
95.7k
    const xmlChar *ptr;
8069
95.7k
    xmlChar cur;
8070
95.7k
    xmlChar *name;
8071
95.7k
    xmlEntityPtr entity = NULL;
8072
8073
95.7k
    if ((str == NULL) || (*str == NULL)) return(NULL);
8074
95.7k
    ptr = *str;
8075
95.7k
    cur = *ptr;
8076
95.7k
    if (cur != '%')
8077
0
        return(NULL);
8078
95.7k
    ptr++;
8079
95.7k
    name = xmlParseStringName(ctxt, &ptr);
8080
95.7k
    if (name == NULL) {
8081
5.17k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8082
5.17k
           "xmlParseStringPEReference: no name\n");
8083
5.17k
  *str = ptr;
8084
5.17k
  return(NULL);
8085
5.17k
    }
8086
90.5k
    cur = *ptr;
8087
90.5k
    if (cur != ';') {
8088
8.85k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8089
8.85k
  xmlFree(name);
8090
8.85k
  *str = ptr;
8091
8.85k
  return(NULL);
8092
8.85k
    }
8093
81.6k
    ptr++;
8094
8095
    /*
8096
     * Request the entity from SAX
8097
     */
8098
81.6k
    if ((ctxt->sax != NULL) &&
8099
81.6k
  (ctxt->sax->getParameterEntity != NULL))
8100
81.6k
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8101
81.6k
    if (entity == NULL) {
8102
  /*
8103
   * [ WFC: Entity Declared ]
8104
   * In a document without any DTD, a document with only an
8105
   * internal DTD subset which contains no parameter entity
8106
   * references, or a document with "standalone='yes'", ...
8107
   * ... The declaration of a parameter entity must precede
8108
   * any reference to it...
8109
   */
8110
6.44k
  if ((ctxt->standalone == 1) ||
8111
6.44k
      ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8112
485
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8113
485
     "PEReference: %%%s; not found\n", name);
8114
5.96k
  } else {
8115
      /*
8116
       * [ VC: Entity Declared ]
8117
       * In a document with an external subset or external
8118
       * parameter entities with "standalone='no'", ...
8119
       * ... The declaration of a parameter entity must
8120
       * precede any reference to it...
8121
       */
8122
5.96k
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8123
5.96k
        "PEReference: %%%s; not found\n",
8124
5.96k
        name, NULL);
8125
5.96k
      ctxt->valid = 0;
8126
5.96k
  }
8127
75.2k
    } else {
8128
  /*
8129
   * Internal checking in case the entity quest barfed
8130
   */
8131
75.2k
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8132
75.2k
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8133
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8134
0
        "%%%s; is not a parameter entity\n",
8135
0
        name, NULL);
8136
0
  }
8137
75.2k
    }
8138
81.6k
    ctxt->hasPErefs = 1;
8139
81.6k
    xmlFree(name);
8140
81.6k
    *str = ptr;
8141
81.6k
    return(entity);
8142
90.5k
}
8143
8144
/**
8145
 * xmlParseDocTypeDecl:
8146
 * @ctxt:  an XML parser context
8147
 *
8148
 * DEPRECATED: Internal function, don't use.
8149
 *
8150
 * parse a DOCTYPE declaration
8151
 *
8152
 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8153
 *                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8154
 *
8155
 * [ VC: Root Element Type ]
8156
 * The Name in the document type declaration must match the element
8157
 * type of the root element.
8158
 */
8159
8160
void
8161
99.4k
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8162
99.4k
    const xmlChar *name = NULL;
8163
99.4k
    xmlChar *ExternalID = NULL;
8164
99.4k
    xmlChar *URI = NULL;
8165
8166
    /*
8167
     * We know that '<!DOCTYPE' has been detected.
8168
     */
8169
99.4k
    SKIP(9);
8170
8171
99.4k
    SKIP_BLANKS;
8172
8173
    /*
8174
     * Parse the DOCTYPE name.
8175
     */
8176
99.4k
    name = xmlParseName(ctxt);
8177
99.4k
    if (name == NULL) {
8178
11.9k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8179
11.9k
           "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8180
11.9k
    }
8181
99.4k
    ctxt->intSubName = name;
8182
8183
99.4k
    SKIP_BLANKS;
8184
8185
    /*
8186
     * Check for SystemID and ExternalID
8187
     */
8188
99.4k
    URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8189
8190
99.4k
    if ((URI != NULL) || (ExternalID != NULL)) {
8191
15.7k
        ctxt->hasExternalSubset = 1;
8192
15.7k
    }
8193
99.4k
    ctxt->extSubURI = URI;
8194
99.4k
    ctxt->extSubSystem = ExternalID;
8195
8196
99.4k
    SKIP_BLANKS;
8197
8198
    /*
8199
     * Create and update the internal subset.
8200
     */
8201
99.4k
    if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8202
99.4k
  (!ctxt->disableSAX))
8203
94.7k
  ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8204
8205
    /*
8206
     * Is there any internal subset declarations ?
8207
     * they are handled separately in xmlParseInternalSubset()
8208
     */
8209
99.4k
    if (RAW == '[')
8210
77.5k
  return;
8211
8212
    /*
8213
     * We should be at the end of the DOCTYPE declaration.
8214
     */
8215
21.8k
    if (RAW != '>') {
8216
9.12k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8217
9.12k
    }
8218
21.8k
    NEXT;
8219
21.8k
}
8220
8221
/**
8222
 * xmlParseInternalSubset:
8223
 * @ctxt:  an XML parser context
8224
 *
8225
 * parse the internal subset declaration
8226
 *
8227
 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8228
 */
8229
8230
static void
8231
79.8k
xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8232
    /*
8233
     * Is there any DTD definition ?
8234
     */
8235
79.8k
    if (RAW == '[') {
8236
79.8k
        int oldInputNr = ctxt->inputNr;
8237
8238
79.8k
        NEXT;
8239
  /*
8240
   * Parse the succession of Markup declarations and
8241
   * PEReferences.
8242
   * Subsequence (markupdecl | PEReference | S)*
8243
   */
8244
79.8k
  SKIP_BLANKS;
8245
2.37M
  while (((RAW != ']') || (ctxt->inputNr > oldInputNr)) &&
8246
2.37M
               (PARSER_STOPPED(ctxt) == 0)) {
8247
8248
            /*
8249
             * Conditional sections are allowed from external entities included
8250
             * by PE References in the internal subset.
8251
             */
8252
2.32M
            if ((PARSER_EXTERNAL(ctxt)) &&
8253
2.32M
                (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8254
5.41k
                xmlParseConditionalSections(ctxt);
8255
2.32M
            } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
8256
1.92M
          xmlParseMarkupDecl(ctxt);
8257
1.92M
            } else if (RAW == '%') {
8258
369k
          xmlParsePEReference(ctxt);
8259
369k
            } else {
8260
27.6k
    xmlFatalErr(ctxt, XML_ERR_INT_SUBSET_NOT_FINISHED, NULL);
8261
27.6k
                break;
8262
27.6k
            }
8263
2.29M
      SKIP_BLANKS_PE;
8264
2.29M
            SHRINK;
8265
2.29M
            GROW;
8266
2.29M
  }
8267
8268
83.7k
        while (ctxt->inputNr > oldInputNr)
8269
3.92k
            xmlPopPE(ctxt);
8270
8271
79.8k
  if (RAW == ']') {
8272
40.0k
      NEXT;
8273
40.0k
      SKIP_BLANKS;
8274
40.0k
  }
8275
79.8k
    }
8276
8277
    /*
8278
     * We should be at the end of the DOCTYPE declaration.
8279
     */
8280
79.8k
    if ((ctxt->wellFormed) && (RAW != '>')) {
8281
3.55k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8282
3.55k
  return;
8283
3.55k
    }
8284
76.3k
    NEXT;
8285
76.3k
}
8286
8287
#ifdef LIBXML_SAX1_ENABLED
8288
/**
8289
 * xmlParseAttribute:
8290
 * @ctxt:  an XML parser context
8291
 * @value:  a xmlChar ** used to store the value of the attribute
8292
 *
8293
 * DEPRECATED: Internal function, don't use.
8294
 *
8295
 * parse an attribute
8296
 *
8297
 * [41] Attribute ::= Name Eq AttValue
8298
 *
8299
 * [ WFC: No External Entity References ]
8300
 * Attribute values cannot contain direct or indirect entity references
8301
 * to external entities.
8302
 *
8303
 * [ WFC: No < in Attribute Values ]
8304
 * The replacement text of any entity referred to directly or indirectly in
8305
 * an attribute value (other than "&lt;") must not contain a <.
8306
 *
8307
 * [ VC: Attribute Value Type ]
8308
 * The attribute must have been declared; the value must be of the type
8309
 * declared for it.
8310
 *
8311
 * [25] Eq ::= S? '=' S?
8312
 *
8313
 * With namespace:
8314
 *
8315
 * [NS 11] Attribute ::= QName Eq AttValue
8316
 *
8317
 * Also the case QName == xmlns:??? is handled independently as a namespace
8318
 * definition.
8319
 *
8320
 * Returns the attribute name, and the value in *value.
8321
 */
8322
8323
const xmlChar *
8324
395k
xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8325
395k
    const xmlChar *name;
8326
395k
    xmlChar *val;
8327
8328
395k
    *value = NULL;
8329
395k
    GROW;
8330
395k
    name = xmlParseName(ctxt);
8331
395k
    if (name == NULL) {
8332
97.8k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8333
97.8k
                 "error parsing attribute name\n");
8334
97.8k
        return(NULL);
8335
97.8k
    }
8336
8337
    /*
8338
     * read the value
8339
     */
8340
297k
    SKIP_BLANKS;
8341
297k
    if (RAW == '=') {
8342
188k
        NEXT;
8343
188k
  SKIP_BLANKS;
8344
188k
  val = xmlParseAttValue(ctxt);
8345
188k
    } else {
8346
108k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8347
108k
         "Specification mandates value for attribute %s\n", name);
8348
108k
  return(name);
8349
108k
    }
8350
8351
    /*
8352
     * Check that xml:lang conforms to the specification
8353
     * No more registered as an error, just generate a warning now
8354
     * since this was deprecated in XML second edition
8355
     */
8356
188k
    if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8357
41.4k
  if (!xmlCheckLanguageID(val)) {
8358
37.6k
      xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8359
37.6k
              "Malformed value for xml:lang : %s\n",
8360
37.6k
        val, NULL);
8361
37.6k
  }
8362
41.4k
    }
8363
8364
    /*
8365
     * Check that xml:space conforms to the specification
8366
     */
8367
188k
    if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8368
1.56k
  if (xmlStrEqual(val, BAD_CAST "default"))
8369
453
      *(ctxt->space) = 0;
8370
1.10k
  else if (xmlStrEqual(val, BAD_CAST "preserve"))
8371
408
      *(ctxt->space) = 1;
8372
701
  else {
8373
701
    xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8374
701
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8375
701
                                 val, NULL);
8376
701
  }
8377
1.56k
    }
8378
8379
188k
    *value = val;
8380
188k
    return(name);
8381
297k
}
8382
8383
/**
8384
 * xmlParseStartTag:
8385
 * @ctxt:  an XML parser context
8386
 *
8387
 * DEPRECATED: Internal function, don't use.
8388
 *
8389
 * Parse a start tag. Always consumes '<'.
8390
 *
8391
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8392
 *
8393
 * [ WFC: Unique Att Spec ]
8394
 * No attribute name may appear more than once in the same start-tag or
8395
 * empty-element tag.
8396
 *
8397
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8398
 *
8399
 * [ WFC: Unique Att Spec ]
8400
 * No attribute name may appear more than once in the same start-tag or
8401
 * empty-element tag.
8402
 *
8403
 * With namespace:
8404
 *
8405
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8406
 *
8407
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8408
 *
8409
 * Returns the element name parsed
8410
 */
8411
8412
const xmlChar *
8413
516k
xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8414
516k
    const xmlChar *name;
8415
516k
    const xmlChar *attname;
8416
516k
    xmlChar *attvalue;
8417
516k
    const xmlChar **atts = ctxt->atts;
8418
516k
    int nbatts = 0;
8419
516k
    int maxatts = ctxt->maxatts;
8420
516k
    int i;
8421
8422
516k
    if (RAW != '<') return(NULL);
8423
516k
    NEXT1;
8424
8425
516k
    name = xmlParseName(ctxt);
8426
516k
    if (name == NULL) {
8427
32.8k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8428
32.8k
       "xmlParseStartTag: invalid element name\n");
8429
32.8k
        return(NULL);
8430
32.8k
    }
8431
8432
    /*
8433
     * Now parse the attributes, it ends up with the ending
8434
     *
8435
     * (S Attribute)* S?
8436
     */
8437
483k
    SKIP_BLANKS;
8438
483k
    GROW;
8439
8440
735k
    while (((RAW != '>') &&
8441
735k
     ((RAW != '/') || (NXT(1) != '>')) &&
8442
735k
     (IS_BYTE_CHAR(RAW))) && (PARSER_STOPPED(ctxt) == 0)) {
8443
395k
  attname = xmlParseAttribute(ctxt, &attvalue);
8444
395k
        if (attname == NULL)
8445
97.8k
      break;
8446
297k
        if (attvalue != NULL) {
8447
      /*
8448
       * [ WFC: Unique Att Spec ]
8449
       * No attribute name may appear more than once in the same
8450
       * start-tag or empty-element tag.
8451
       */
8452
631k
      for (i = 0; i < nbatts;i += 2) {
8453
466k
          if (xmlStrEqual(atts[i], attname)) {
8454
3.93k
        xmlErrAttributeDup(ctxt, NULL, attname);
8455
3.93k
        xmlFree(attvalue);
8456
3.93k
        goto failed;
8457
3.93k
    }
8458
466k
      }
8459
      /*
8460
       * Add the pair to atts
8461
       */
8462
164k
      if (atts == NULL) {
8463
7.91k
          maxatts = 22; /* allow for 10 attrs by default */
8464
7.91k
          atts = (const xmlChar **)
8465
7.91k
           xmlMalloc(maxatts * sizeof(xmlChar *));
8466
7.91k
    if (atts == NULL) {
8467
23
        xmlErrMemory(ctxt);
8468
23
        if (attvalue != NULL)
8469
23
      xmlFree(attvalue);
8470
23
        goto failed;
8471
23
    }
8472
7.88k
    ctxt->atts = atts;
8473
7.88k
    ctxt->maxatts = maxatts;
8474
156k
      } else if (nbatts + 4 > maxatts) {
8475
183
          const xmlChar **n;
8476
8477
183
          maxatts *= 2;
8478
183
          n = (const xmlChar **) xmlRealloc((void *) atts,
8479
183
               maxatts * sizeof(const xmlChar *));
8480
183
    if (n == NULL) {
8481
5
        xmlErrMemory(ctxt);
8482
5
        if (attvalue != NULL)
8483
5
      xmlFree(attvalue);
8484
5
        goto failed;
8485
5
    }
8486
178
    atts = n;
8487
178
    ctxt->atts = atts;
8488
178
    ctxt->maxatts = maxatts;
8489
178
      }
8490
164k
      atts[nbatts++] = attname;
8491
164k
      atts[nbatts++] = attvalue;
8492
164k
      atts[nbatts] = NULL;
8493
164k
      atts[nbatts + 1] = NULL;
8494
164k
  } else {
8495
128k
      if (attvalue != NULL)
8496
0
    xmlFree(attvalue);
8497
128k
  }
8498
8499
297k
failed:
8500
8501
297k
  GROW
8502
297k
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8503
45.7k
      break;
8504
251k
  if (SKIP_BLANKS == 0) {
8505
193k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8506
193k
         "attributes construct error\n");
8507
193k
  }
8508
251k
  SHRINK;
8509
251k
        GROW;
8510
251k
    }
8511
8512
    /*
8513
     * SAX: Start of Element !
8514
     */
8515
483k
    if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8516
483k
  (!ctxt->disableSAX)) {
8517
442k
  if (nbatts > 0)
8518
120k
      ctxt->sax->startElement(ctxt->userData, name, atts);
8519
322k
  else
8520
322k
      ctxt->sax->startElement(ctxt->userData, name, NULL);
8521
442k
    }
8522
8523
483k
    if (atts != NULL) {
8524
        /* Free only the content strings */
8525
416k
        for (i = 1;i < nbatts;i+=2)
8526
164k
      if (atts[i] != NULL)
8527
164k
         xmlFree((xmlChar *) atts[i]);
8528
252k
    }
8529
483k
    return(name);
8530
483k
}
8531
8532
/**
8533
 * xmlParseEndTag1:
8534
 * @ctxt:  an XML parser context
8535
 * @line:  line of the start tag
8536
 * @nsNr:  number of namespaces on the start tag
8537
 *
8538
 * Parse an end tag. Always consumes '</'.
8539
 *
8540
 * [42] ETag ::= '</' Name S? '>'
8541
 *
8542
 * With namespace
8543
 *
8544
 * [NS 9] ETag ::= '</' QName S? '>'
8545
 */
8546
8547
static void
8548
98.6k
xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8549
98.6k
    const xmlChar *name;
8550
8551
98.6k
    GROW;
8552
98.6k
    if ((RAW != '<') || (NXT(1) != '/')) {
8553
1.77k
  xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8554
1.77k
           "xmlParseEndTag: '</' not found\n");
8555
1.77k
  return;
8556
1.77k
    }
8557
96.8k
    SKIP(2);
8558
8559
96.8k
    name = xmlParseNameAndCompare(ctxt,ctxt->name);
8560
8561
    /*
8562
     * We should definitely be at the ending "S? '>'" part
8563
     */
8564
96.8k
    GROW;
8565
96.8k
    SKIP_BLANKS;
8566
96.8k
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8567
5.83k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8568
5.83k
    } else
8569
91.0k
  NEXT1;
8570
8571
    /*
8572
     * [ WFC: Element Type Match ]
8573
     * The Name in an element's end-tag must match the element type in the
8574
     * start-tag.
8575
     *
8576
     */
8577
96.8k
    if (name != (xmlChar*)1) {
8578
9.42k
        if (name == NULL) name = BAD_CAST "unparsable";
8579
9.42k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8580
9.42k
         "Opening and ending tag mismatch: %s line %d and %s\n",
8581
9.42k
                    ctxt->name, line, name);
8582
9.42k
    }
8583
8584
    /*
8585
     * SAX: End of Tag
8586
     */
8587
96.8k
    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8588
96.8k
  (!ctxt->disableSAX))
8589
68.7k
        ctxt->sax->endElement(ctxt->userData, ctxt->name);
8590
8591
96.8k
    namePop(ctxt);
8592
96.8k
    spacePop(ctxt);
8593
96.8k
    return;
8594
98.6k
}
8595
8596
/**
8597
 * xmlParseEndTag:
8598
 * @ctxt:  an XML parser context
8599
 *
8600
 * DEPRECATED: Internal function, don't use.
8601
 *
8602
 * parse an end of tag
8603
 *
8604
 * [42] ETag ::= '</' Name S? '>'
8605
 *
8606
 * With namespace
8607
 *
8608
 * [NS 9] ETag ::= '</' QName S? '>'
8609
 */
8610
8611
void
8612
0
xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8613
0
    xmlParseEndTag1(ctxt, 0);
8614
0
}
8615
#endif /* LIBXML_SAX1_ENABLED */
8616
8617
/************************************************************************
8618
 *                  *
8619
 *          SAX 2 specific operations       *
8620
 *                  *
8621
 ************************************************************************/
8622
8623
/**
8624
 * xmlParseQNameHashed:
8625
 * @ctxt:  an XML parser context
8626
 * @prefix:  pointer to store the prefix part
8627
 *
8628
 * parse an XML Namespace QName
8629
 *
8630
 * [6]  QName  ::= (Prefix ':')? LocalPart
8631
 * [7]  Prefix  ::= NCName
8632
 * [8]  LocalPart  ::= NCName
8633
 *
8634
 * Returns the Name parsed or NULL
8635
 */
8636
8637
static xmlHashedString
8638
7.32M
xmlParseQNameHashed(xmlParserCtxtPtr ctxt, xmlHashedString *prefix) {
8639
7.32M
    xmlHashedString l, p;
8640
7.32M
    int start, isNCName = 0;
8641
8642
7.32M
    l.name = NULL;
8643
7.32M
    p.name = NULL;
8644
8645
7.32M
    GROW;
8646
7.32M
    start = CUR_PTR - BASE_PTR;
8647
8648
7.32M
    l = xmlParseNCName(ctxt);
8649
7.32M
    if (l.name != NULL) {
8650
6.31M
        isNCName = 1;
8651
6.31M
        if (CUR == ':') {
8652
1.50M
            NEXT;
8653
1.50M
            p = l;
8654
1.50M
            l = xmlParseNCName(ctxt);
8655
1.50M
        }
8656
6.31M
    }
8657
7.32M
    if ((l.name == NULL) || (CUR == ':')) {
8658
1.07M
        xmlChar *tmp;
8659
8660
1.07M
        l.name = NULL;
8661
1.07M
        p.name = NULL;
8662
1.07M
        if ((isNCName == 0) && (CUR != ':'))
8663
920k
            return(l);
8664
157k
        tmp = xmlParseNmtoken(ctxt);
8665
157k
        if (tmp != NULL)
8666
117k
            xmlFree(tmp);
8667
157k
        l = xmlDictLookupHashed(ctxt->dict, BASE_PTR + start,
8668
157k
                                CUR_PTR - (BASE_PTR + start));
8669
157k
        if (l.name == NULL) {
8670
17
            xmlErrMemory(ctxt);
8671
17
            return(l);
8672
17
        }
8673
157k
        xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8674
157k
                 "Failed to parse QName '%s'\n", l.name, NULL, NULL);
8675
157k
    }
8676
8677
6.40M
    *prefix = p;
8678
6.40M
    return(l);
8679
7.32M
}
8680
8681
/**
8682
 * xmlParseQName:
8683
 * @ctxt:  an XML parser context
8684
 * @prefix:  pointer to store the prefix part
8685
 *
8686
 * parse an XML Namespace QName
8687
 *
8688
 * [6]  QName  ::= (Prefix ':')? LocalPart
8689
 * [7]  Prefix  ::= NCName
8690
 * [8]  LocalPart  ::= NCName
8691
 *
8692
 * Returns the Name parsed or NULL
8693
 */
8694
8695
static const xmlChar *
8696
51.8k
xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8697
51.8k
    xmlHashedString n, p;
8698
8699
51.8k
    n = xmlParseQNameHashed(ctxt, &p);
8700
51.8k
    if (n.name == NULL)
8701
5.78k
        return(NULL);
8702
46.0k
    *prefix = p.name;
8703
46.0k
    return(n.name);
8704
51.8k
}
8705
8706
/**
8707
 * xmlParseQNameAndCompare:
8708
 * @ctxt:  an XML parser context
8709
 * @name:  the localname
8710
 * @prefix:  the prefix, if any.
8711
 *
8712
 * parse an XML name and compares for match
8713
 * (specialized for endtag parsing)
8714
 *
8715
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8716
 * and the name for mismatch
8717
 */
8718
8719
static const xmlChar *
8720
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8721
142k
                        xmlChar const *prefix) {
8722
142k
    const xmlChar *cmp;
8723
142k
    const xmlChar *in;
8724
142k
    const xmlChar *ret;
8725
142k
    const xmlChar *prefix2;
8726
8727
142k
    if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8728
8729
142k
    GROW;
8730
142k
    in = ctxt->input->cur;
8731
8732
142k
    cmp = prefix;
8733
399k
    while (*in != 0 && *in == *cmp) {
8734
257k
  ++in;
8735
257k
  ++cmp;
8736
257k
    }
8737
142k
    if ((*cmp == 0) && (*in == ':')) {
8738
119k
        in++;
8739
119k
  cmp = name;
8740
868k
  while (*in != 0 && *in == *cmp) {
8741
749k
      ++in;
8742
749k
      ++cmp;
8743
749k
  }
8744
119k
  if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8745
      /* success */
8746
90.2k
            ctxt->input->col += in - ctxt->input->cur;
8747
90.2k
      ctxt->input->cur = in;
8748
90.2k
      return((const xmlChar*) 1);
8749
90.2k
  }
8750
119k
    }
8751
    /*
8752
     * all strings coms from the dictionary, equality can be done directly
8753
     */
8754
51.8k
    ret = xmlParseQName (ctxt, &prefix2);
8755
51.8k
    if (ret == NULL)
8756
5.78k
        return(NULL);
8757
46.0k
    if ((ret == name) && (prefix == prefix2))
8758
3.56k
  return((const xmlChar*) 1);
8759
42.4k
    return ret;
8760
46.0k
}
8761
8762
/**
8763
 * xmlParseAttribute2:
8764
 * @ctxt:  an XML parser context
8765
 * @pref:  the element prefix
8766
 * @elem:  the element name
8767
 * @prefix:  a xmlChar ** used to store the value of the attribute prefix
8768
 * @value:  a xmlChar ** used to store the value of the attribute
8769
 * @len:  an int * to save the length of the attribute
8770
 * @alloc:  an int * to indicate if the attribute was allocated
8771
 *
8772
 * parse an attribute in the new SAX2 framework.
8773
 *
8774
 * Returns the attribute name, and the value in *value, .
8775
 */
8776
8777
static xmlHashedString
8778
xmlParseAttribute2(xmlParserCtxtPtr ctxt,
8779
                   const xmlChar * pref, const xmlChar * elem,
8780
                   xmlHashedString * hprefix, xmlChar ** value,
8781
                   int *len, int *alloc)
8782
2.95M
{
8783
2.95M
    xmlHashedString hname;
8784
2.95M
    const xmlChar *prefix, *name;
8785
2.95M
    xmlChar *val = NULL, *internal_val = NULL;
8786
2.95M
    int normalize = 0;
8787
8788
2.95M
    *value = NULL;
8789
2.95M
    GROW;
8790
2.95M
    hname = xmlParseQNameHashed(ctxt, hprefix);
8791
2.95M
    if (hname.name == NULL) {
8792
563k
        xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8793
563k
                       "error parsing attribute name\n");
8794
563k
        return(hname);
8795
563k
    }
8796
2.38M
    name = hname.name;
8797
2.38M
    if (hprefix->name != NULL)
8798
660k
        prefix = hprefix->name;
8799
1.72M
    else
8800
1.72M
        prefix = NULL;
8801
8802
    /*
8803
     * get the type if needed
8804
     */
8805
2.38M
    if (ctxt->attsSpecial != NULL) {
8806
674k
        int type;
8807
8808
674k
        type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
8809
674k
                                                 pref, elem,
8810
674k
                                                 prefix, name);
8811
674k
        if (type != 0)
8812
146k
            normalize = 1;
8813
674k
    }
8814
8815
    /*
8816
     * read the value
8817
     */
8818
2.38M
    SKIP_BLANKS;
8819
2.38M
    if (RAW == '=') {
8820
2.21M
        NEXT;
8821
2.21M
        SKIP_BLANKS;
8822
2.21M
        val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
8823
2.21M
        if (val == NULL)
8824
65.8k
            goto error;
8825
2.21M
    } else {
8826
175k
        xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8827
175k
                          "Specification mandates value for attribute %s\n",
8828
175k
                          name);
8829
175k
        goto error;
8830
175k
    }
8831
8832
2.14M
    if (prefix == ctxt->str_xml) {
8833
        /*
8834
         * Check that xml:lang conforms to the specification
8835
         * No more registered as an error, just generate a warning now
8836
         * since this was deprecated in XML second edition
8837
         */
8838
123k
        if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8839
43.5k
            internal_val = xmlStrndup(val, *len);
8840
43.5k
            if (internal_val == NULL)
8841
45
                goto mem_error;
8842
43.4k
            if (!xmlCheckLanguageID(internal_val)) {
8843
34.9k
                xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8844
34.9k
                              "Malformed value for xml:lang : %s\n",
8845
34.9k
                              internal_val, NULL);
8846
34.9k
            }
8847
43.4k
        }
8848
8849
        /*
8850
         * Check that xml:space conforms to the specification
8851
         */
8852
123k
        if (xmlStrEqual(name, BAD_CAST "space")) {
8853
3.56k
            internal_val = xmlStrndup(val, *len);
8854
3.56k
            if (internal_val == NULL)
8855
9
                goto mem_error;
8856
3.55k
            if (xmlStrEqual(internal_val, BAD_CAST "default"))
8857
791
                *(ctxt->space) = 0;
8858
2.76k
            else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8859
988
                *(ctxt->space) = 1;
8860
1.77k
            else {
8861
1.77k
                xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8862
1.77k
                              "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8863
1.77k
                              internal_val, NULL);
8864
1.77k
            }
8865
3.55k
        }
8866
123k
        if (internal_val) {
8867
47.0k
            xmlFree(internal_val);
8868
47.0k
        }
8869
123k
    }
8870
8871
2.14M
    *value = val;
8872
2.14M
    return (hname);
8873
8874
54
mem_error:
8875
54
    xmlErrMemory(ctxt);
8876
241k
error:
8877
241k
    if ((val != NULL) && (*alloc != 0))
8878
17
        xmlFree(val);
8879
241k
    return(hname);
8880
54
}
8881
8882
/**
8883
 * xmlAttrHashInsert:
8884
 * @ctxt: parser context
8885
 * @size: size of the hash table
8886
 * @name: attribute name
8887
 * @uri: namespace uri
8888
 * @hashValue: combined hash value of name and uri
8889
 * @aindex: attribute index (this is a multiple of 5)
8890
 *
8891
 * Inserts a new attribute into the hash table.
8892
 *
8893
 * Returns INT_MAX if no existing attribute was found, the attribute
8894
 * index if an attribute was found, -1 if a memory allocation failed.
8895
 */
8896
static int
8897
xmlAttrHashInsert(xmlParserCtxtPtr ctxt, unsigned size, const xmlChar *name,
8898
2.04M
                  const xmlChar *uri, unsigned hashValue, int aindex) {
8899
2.04M
    xmlAttrHashBucket *table = ctxt->attrHash;
8900
2.04M
    xmlAttrHashBucket *bucket;
8901
2.04M
    unsigned hindex;
8902
8903
2.04M
    hindex = hashValue & (size - 1);
8904
2.04M
    bucket = &table[hindex];
8905
8906
5.57M
    while (bucket->index >= 0) {
8907
3.61M
        const xmlChar **atts = &ctxt->atts[bucket->index];
8908
8909
3.61M
        if (name == atts[0]) {
8910
3.29M
            int nsIndex = (int) (ptrdiff_t) atts[2];
8911
8912
3.29M
            if ((nsIndex == NS_INDEX_EMPTY) ? (uri == NULL) :
8913
3.29M
                (nsIndex == NS_INDEX_XML) ? (uri == ctxt->str_xml) :
8914
3.23M
                (uri == ctxt->nsTab[nsIndex * 2 + 1]))
8915
77.8k
                return(bucket->index);
8916
3.29M
        }
8917
8918
3.53M
        hindex++;
8919
3.53M
        bucket++;
8920
3.53M
        if (hindex >= size) {
8921
18.6k
            hindex = 0;
8922
18.6k
            bucket = table;
8923
18.6k
        }
8924
3.53M
    }
8925
8926
1.96M
    bucket->index = aindex;
8927
8928
1.96M
    return(INT_MAX);
8929
2.04M
}
8930
8931
/**
8932
 * xmlParseStartTag2:
8933
 * @ctxt:  an XML parser context
8934
 *
8935
 * Parse a start tag. Always consumes '<'.
8936
 *
8937
 * This routine is called when running SAX2 parsing
8938
 *
8939
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8940
 *
8941
 * [ WFC: Unique Att Spec ]
8942
 * No attribute name may appear more than once in the same start-tag or
8943
 * empty-element tag.
8944
 *
8945
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8946
 *
8947
 * [ WFC: Unique Att Spec ]
8948
 * No attribute name may appear more than once in the same start-tag or
8949
 * empty-element tag.
8950
 *
8951
 * With namespace:
8952
 *
8953
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8954
 *
8955
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8956
 *
8957
 * Returns the element name parsed
8958
 */
8959
8960
static const xmlChar *
8961
xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
8962
4.31M
                  const xmlChar **URI, int *nbNsPtr) {
8963
4.31M
    xmlHashedString hlocalname;
8964
4.31M
    xmlHashedString hprefix;
8965
4.31M
    xmlHashedString hattname;
8966
4.31M
    xmlHashedString haprefix;
8967
4.31M
    const xmlChar *localname;
8968
4.31M
    const xmlChar *prefix;
8969
4.31M
    const xmlChar *attname;
8970
4.31M
    const xmlChar *aprefix;
8971
4.31M
    const xmlChar *uri;
8972
4.31M
    xmlChar *attvalue = NULL;
8973
4.31M
    const xmlChar **atts = ctxt->atts;
8974
4.31M
    unsigned attrHashSize = 0;
8975
4.31M
    int maxatts = ctxt->maxatts;
8976
4.31M
    int nratts, nbatts, nbdef;
8977
4.31M
    int i, j, nbNs, nbTotalDef, attval, nsIndex, maxAtts;
8978
4.31M
    int alloc = 0;
8979
8980
4.31M
    if (RAW != '<') return(NULL);
8981
4.31M
    NEXT1;
8982
8983
4.31M
    nbatts = 0;
8984
4.31M
    nratts = 0;
8985
4.31M
    nbdef = 0;
8986
4.31M
    nbNs = 0;
8987
4.31M
    nbTotalDef = 0;
8988
4.31M
    attval = 0;
8989
8990
4.31M
    if (xmlParserNsStartElement(ctxt->nsdb) < 0) {
8991
0
        xmlErrMemory(ctxt);
8992
0
        return(NULL);
8993
0
    }
8994
8995
4.31M
    hlocalname = xmlParseQNameHashed(ctxt, &hprefix);
8996
4.31M
    if (hlocalname.name == NULL) {
8997
352k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8998
352k
           "StartTag: invalid element name\n");
8999
352k
        return(NULL);
9000
352k
    }
9001
3.96M
    localname = hlocalname.name;
9002
3.96M
    prefix = hprefix.name;
9003
9004
    /*
9005
     * Now parse the attributes, it ends up with the ending
9006
     *
9007
     * (S Attribute)* S?
9008
     */
9009
3.96M
    SKIP_BLANKS;
9010
3.96M
    GROW;
9011
9012
    /*
9013
     * The ctxt->atts array will be ultimately passed to the SAX callback
9014
     * containing five xmlChar pointers for each attribute:
9015
     *
9016
     * [0] attribute name
9017
     * [1] attribute prefix
9018
     * [2] namespace URI
9019
     * [3] attribute value
9020
     * [4] end of attribute value
9021
     *
9022
     * To save memory, we reuse this array temporarily and store integers
9023
     * in these pointer variables.
9024
     *
9025
     * [0] attribute name
9026
     * [1] attribute prefix
9027
     * [2] hash value of attribute prefix, and later namespace index
9028
     * [3] for non-allocated values: ptrdiff_t offset into input buffer
9029
     * [4] for non-allocated values: ptrdiff_t offset into input buffer
9030
     *
9031
     * The ctxt->attallocs array contains an additional unsigned int for
9032
     * each attribute, containing the hash value of the attribute name
9033
     * and the alloc flag in bit 31.
9034
     */
9035
9036
4.97M
    while (((RAW != '>') &&
9037
4.97M
     ((RAW != '/') || (NXT(1) != '>')) &&
9038
4.97M
     (IS_BYTE_CHAR(RAW))) && (PARSER_STOPPED(ctxt) == 0)) {
9039
2.95M
  int len = -1;
9040
9041
2.95M
  hattname = xmlParseAttribute2(ctxt, prefix, localname,
9042
2.95M
                                          &haprefix, &attvalue, &len,
9043
2.95M
                                          &alloc);
9044
2.95M
        if (hattname.name == NULL)
9045
563k
      break;
9046
2.38M
        if (attvalue == NULL)
9047
241k
            goto next_attr;
9048
2.14M
        attname = hattname.name;
9049
2.14M
        aprefix = haprefix.name;
9050
2.14M
  if (len < 0) len = xmlStrlen(attvalue);
9051
9052
2.14M
        if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9053
211k
            xmlHashedString huri;
9054
211k
            xmlURIPtr parsedUri;
9055
9056
211k
            huri = xmlDictLookupHashed(ctxt->dict, attvalue, len);
9057
211k
            uri = huri.name;
9058
211k
            if (uri == NULL) {
9059
15
                xmlErrMemory(ctxt);
9060
15
                goto next_attr;
9061
15
            }
9062
211k
            if (*uri != 0) {
9063
208k
                if (xmlParseURISafe((const char *) uri, &parsedUri) < 0) {
9064
424
                    xmlErrMemory(ctxt);
9065
424
                    goto next_attr;
9066
424
                }
9067
207k
                if (parsedUri == NULL) {
9068
125k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9069
125k
                             "xmlns: '%s' is not a valid URI\n",
9070
125k
                                       uri, NULL, NULL);
9071
125k
                } else {
9072
82.0k
                    if (parsedUri->scheme == NULL) {
9073
61.4k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9074
61.4k
                                  "xmlns: URI %s is not absolute\n",
9075
61.4k
                                  uri, NULL, NULL);
9076
61.4k
                    }
9077
82.0k
                    xmlFreeURI(parsedUri);
9078
82.0k
                }
9079
207k
                if (uri == ctxt->str_xml_ns) {
9080
713
                    if (attname != ctxt->str_xml) {
9081
713
                        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9082
713
                     "xml namespace URI cannot be the default namespace\n",
9083
713
                                 NULL, NULL, NULL);
9084
713
                    }
9085
713
                    goto next_attr;
9086
713
                }
9087
207k
                if ((len == 29) &&
9088
207k
                    (xmlStrEqual(uri,
9089
8.75k
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9090
1.22k
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9091
1.22k
                         "reuse of the xmlns namespace name is forbidden\n",
9092
1.22k
                             NULL, NULL, NULL);
9093
1.22k
                    goto next_attr;
9094
1.22k
                }
9095
207k
            }
9096
9097
208k
            if (xmlParserNsPush(ctxt, NULL, &huri, NULL, 0) > 0)
9098
184k
                nbNs++;
9099
1.93M
        } else if (aprefix == ctxt->str_xmlns) {
9100
268k
            xmlHashedString huri;
9101
268k
            xmlURIPtr parsedUri;
9102
9103
268k
            huri = xmlDictLookupHashed(ctxt->dict, attvalue, len);
9104
268k
            uri = huri.name;
9105
268k
            if (uri == NULL) {
9106
9
                xmlErrMemory(ctxt);
9107
9
                goto next_attr;
9108
9
            }
9109
9110
268k
            if (attname == ctxt->str_xml) {
9111
1.22k
                if (uri != ctxt->str_xml_ns) {
9112
1.21k
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9113
1.21k
                             "xml namespace prefix mapped to wrong URI\n",
9114
1.21k
                             NULL, NULL, NULL);
9115
1.21k
                }
9116
                /*
9117
                 * Do not keep a namespace definition node
9118
                 */
9119
1.22k
                goto next_attr;
9120
1.22k
            }
9121
267k
            if (uri == ctxt->str_xml_ns) {
9122
288
                if (attname != ctxt->str_xml) {
9123
288
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9124
288
                             "xml namespace URI mapped to wrong prefix\n",
9125
288
                             NULL, NULL, NULL);
9126
288
                }
9127
288
                goto next_attr;
9128
288
            }
9129
266k
            if (attname == ctxt->str_xmlns) {
9130
1.25k
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9131
1.25k
                         "redefinition of the xmlns prefix is forbidden\n",
9132
1.25k
                         NULL, NULL, NULL);
9133
1.25k
                goto next_attr;
9134
1.25k
            }
9135
265k
            if ((len == 29) &&
9136
265k
                (xmlStrEqual(uri,
9137
2.49k
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9138
949
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9139
949
                         "reuse of the xmlns namespace name is forbidden\n",
9140
949
                         NULL, NULL, NULL);
9141
949
                goto next_attr;
9142
949
            }
9143
264k
            if ((uri == NULL) || (uri[0] == 0)) {
9144
2.25k
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9145
2.25k
                         "xmlns:%s: Empty XML namespace is not allowed\n",
9146
2.25k
                              attname, NULL, NULL);
9147
2.25k
                goto next_attr;
9148
262k
            } else {
9149
262k
                if (xmlParseURISafe((const char *) uri, &parsedUri) < 0) {
9150
148
                    xmlErrMemory(ctxt);
9151
148
                    goto next_attr;
9152
148
                }
9153
262k
                if (parsedUri == NULL) {
9154
47.7k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9155
47.7k
                         "xmlns:%s: '%s' is not a valid URI\n",
9156
47.7k
                                       attname, uri, NULL);
9157
214k
                } else {
9158
214k
                    if ((ctxt->pedantic) && (parsedUri->scheme == NULL)) {
9159
44.7k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9160
44.7k
                                  "xmlns:%s: URI %s is not absolute\n",
9161
44.7k
                                  attname, uri, NULL);
9162
44.7k
                    }
9163
214k
                    xmlFreeURI(parsedUri);
9164
214k
                }
9165
262k
            }
9166
9167
262k
            if (xmlParserNsPush(ctxt, &hattname, &huri, NULL, 0) > 0)
9168
244k
                nbNs++;
9169
1.66M
        } else {
9170
            /*
9171
             * Populate attributes array, see above for repurposing
9172
             * of xmlChar pointers.
9173
             */
9174
1.66M
            if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9175
44.2k
                if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9176
124
                    goto next_attr;
9177
124
                }
9178
44.1k
                maxatts = ctxt->maxatts;
9179
44.1k
                atts = ctxt->atts;
9180
44.1k
            }
9181
1.66M
            ctxt->attallocs[nratts++] = (hattname.hashValue & 0x7FFFFFFF) |
9182
1.66M
                                        ((unsigned) alloc << 31);
9183
1.66M
            atts[nbatts++] = attname;
9184
1.66M
            atts[nbatts++] = aprefix;
9185
1.66M
            atts[nbatts++] = (const xmlChar *) (size_t) haprefix.hashValue;
9186
1.66M
            if (alloc) {
9187
265k
                atts[nbatts++] = attvalue;
9188
265k
                attvalue += len;
9189
265k
                atts[nbatts++] = attvalue;
9190
1.40M
            } else {
9191
                /*
9192
                 * attvalue points into the input buffer which can be
9193
                 * reallocated. Store differences to input->base instead.
9194
                 * The pointers will be reconstructed later.
9195
                 */
9196
1.40M
                atts[nbatts++] = (void *) (attvalue - BASE_PTR);
9197
1.40M
                attvalue += len;
9198
1.40M
                atts[nbatts++] = (void *) (attvalue - BASE_PTR);
9199
1.40M
            }
9200
            /*
9201
             * tag if some deallocation is needed
9202
             */
9203
1.66M
            if (alloc != 0) attval = 1;
9204
1.66M
            attvalue = NULL; /* moved into atts */
9205
1.66M
        }
9206
9207
2.38M
next_attr:
9208
2.38M
        if ((attvalue != NULL) && (alloc != 0)) {
9209
95.0k
            xmlFree(attvalue);
9210
95.0k
            attvalue = NULL;
9211
95.0k
        }
9212
9213
2.38M
  GROW
9214
2.38M
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9215
944k
      break;
9216
1.44M
  if (SKIP_BLANKS == 0) {
9217
432k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9218
432k
         "attributes construct error\n");
9219
432k
      break;
9220
432k
  }
9221
1.01M
        GROW;
9222
1.01M
    }
9223
9224
    /*
9225
     * Namespaces from default attributes
9226
     */
9227
3.96M
    if (ctxt->attsDefault != NULL) {
9228
988k
        xmlDefAttrsPtr defaults;
9229
9230
988k
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9231
988k
  if (defaults != NULL) {
9232
2.48M
      for (i = 0; i < defaults->nbAttrs; i++) {
9233
1.97M
                xmlDefAttr *attr = &defaults->attrs[i];
9234
9235
1.97M
          attname = attr->name.name;
9236
1.97M
    aprefix = attr->prefix.name;
9237
9238
1.97M
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9239
124k
                    xmlParserEntityCheck(ctxt, attr->expandedSize);
9240
9241
124k
                    if (xmlParserNsPush(ctxt, NULL, &attr->value, NULL, 1) > 0)
9242
101k
                        nbNs++;
9243
1.84M
    } else if (aprefix == ctxt->str_xmlns) {
9244
224k
                    xmlParserEntityCheck(ctxt, attr->expandedSize);
9245
9246
224k
                    if (xmlParserNsPush(ctxt, &attr->name, &attr->value,
9247
224k
                                      NULL, 1) > 0)
9248
217k
                        nbNs++;
9249
1.62M
    } else {
9250
1.62M
                    nbTotalDef += 1;
9251
1.62M
                }
9252
1.97M
      }
9253
515k
  }
9254
988k
    }
9255
9256
    /*
9257
     * Resolve attribute namespaces
9258
     */
9259
5.63M
    for (i = 0; i < nbatts; i += 5) {
9260
1.66M
        attname = atts[i];
9261
1.66M
        aprefix = atts[i+1];
9262
9263
        /*
9264
  * The default namespace does not apply to attribute names.
9265
  */
9266
1.66M
  if (aprefix == NULL) {
9267
1.31M
            nsIndex = NS_INDEX_EMPTY;
9268
1.31M
        } else if (aprefix == ctxt->str_xml) {
9269
123k
            nsIndex = NS_INDEX_XML;
9270
225k
        } else {
9271
225k
            haprefix.name = aprefix;
9272
225k
            haprefix.hashValue = (size_t) atts[i+2];
9273
225k
            nsIndex = xmlParserNsLookup(ctxt, &haprefix, NULL);
9274
9275
225k
      if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex)) {
9276
113k
                xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9277
113k
        "Namespace prefix %s for %s on %s is not defined\n",
9278
113k
        aprefix, attname, localname);
9279
113k
                nsIndex = NS_INDEX_EMPTY;
9280
113k
            }
9281
225k
        }
9282
9283
1.66M
        atts[i+2] = (const xmlChar *) (ptrdiff_t) nsIndex;
9284
1.66M
    }
9285
9286
    /*
9287
     * Maximum number of attributes including default attributes.
9288
     */
9289
3.96M
    maxAtts = nratts + nbTotalDef;
9290
9291
    /*
9292
     * Verify that attribute names are unique.
9293
     */
9294
3.96M
    if (maxAtts > 1) {
9295
372k
        attrHashSize = 4;
9296
634k
        while (attrHashSize / 2 < (unsigned) maxAtts)
9297
261k
            attrHashSize *= 2;
9298
9299
372k
        if (attrHashSize > ctxt->attrHashMax) {
9300
18.0k
            xmlAttrHashBucket *tmp;
9301
9302
18.0k
            tmp = xmlRealloc(ctxt->attrHash, attrHashSize * sizeof(tmp[0]));
9303
18.0k
            if (tmp == NULL) {
9304
34
                xmlErrMemory(ctxt);
9305
34
                goto done;
9306
34
            }
9307
9308
18.0k
            ctxt->attrHash = tmp;
9309
18.0k
            ctxt->attrHashMax = attrHashSize;
9310
18.0k
        }
9311
9312
372k
        memset(ctxt->attrHash, -1, attrHashSize * sizeof(ctxt->attrHash[0]));
9313
9314
1.08M
        for (i = 0, j = 0; j < nratts; i += 5, j++) {
9315
707k
            const xmlChar *nsuri;
9316
707k
            unsigned hashValue, nameHashValue, uriHashValue;
9317
707k
            int res;
9318
9319
707k
            attname = atts[i];
9320
707k
            aprefix = atts[i+1];
9321
707k
            nsIndex = (ptrdiff_t) atts[i+2];
9322
            /* Hash values always have bit 31 set, see dict.c */
9323
707k
            nameHashValue = ctxt->attallocs[j] | 0x80000000;
9324
9325
707k
            if (nsIndex == NS_INDEX_EMPTY) {
9326
                /*
9327
                 * Prefix with empty namespace means an undeclared
9328
                 * prefix which was already reported above.
9329
                 */
9330
560k
                if (aprefix != NULL)
9331
68.1k
                    continue;
9332
492k
                nsuri = NULL;
9333
492k
                uriHashValue = URI_HASH_EMPTY;
9334
492k
            } else if (nsIndex == NS_INDEX_XML) {
9335
48.7k
                nsuri = ctxt->str_xml_ns;
9336
48.7k
                uriHashValue = URI_HASH_XML;
9337
98.6k
            } else {
9338
98.6k
                nsuri = ctxt->nsTab[nsIndex * 2 + 1];
9339
98.6k
                uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue;
9340
98.6k
            }
9341
9342
639k
            hashValue = xmlDictCombineHash(nameHashValue, uriHashValue);
9343
639k
            res = xmlAttrHashInsert(ctxt, attrHashSize, attname, nsuri,
9344
639k
                                    hashValue, i);
9345
639k
            if (res < 0)
9346
0
                continue;
9347
9348
            /*
9349
             * [ WFC: Unique Att Spec ]
9350
             * No attribute name may appear more than once in the same
9351
             * start-tag or empty-element tag.
9352
             * As extended by the Namespace in XML REC.
9353
             */
9354
639k
            if (res < INT_MAX) {
9355
44.0k
                if (aprefix == atts[res+1]) {
9356
40.6k
                    xmlErrAttributeDup(ctxt, aprefix, attname);
9357
40.6k
                } else {
9358
3.33k
                    xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9359
3.33k
                             "Namespaced Attribute %s in '%s' redefined\n",
9360
3.33k
                             attname, nsuri, NULL);
9361
3.33k
                }
9362
44.0k
            }
9363
639k
        }
9364
372k
    }
9365
9366
    /*
9367
     * Default attributes
9368
     */
9369
3.96M
    if (ctxt->attsDefault != NULL) {
9370
988k
        xmlDefAttrsPtr defaults;
9371
9372
988k
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9373
988k
  if (defaults != NULL) {
9374
2.48M
      for (i = 0; i < defaults->nbAttrs; i++) {
9375
1.97M
                xmlDefAttr *attr = &defaults->attrs[i];
9376
1.97M
                const xmlChar *nsuri;
9377
1.97M
                unsigned hashValue, uriHashValue;
9378
1.97M
                int res;
9379
9380
1.97M
          attname = attr->name.name;
9381
1.97M
    aprefix = attr->prefix.name;
9382
9383
1.97M
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL))
9384
124k
                    continue;
9385
1.84M
    if (aprefix == ctxt->str_xmlns)
9386
224k
                    continue;
9387
9388
1.62M
                if (aprefix == NULL) {
9389
1.30M
                    nsIndex = NS_INDEX_EMPTY;
9390
1.30M
                    nsuri = NULL;
9391
1.30M
                    uriHashValue = URI_HASH_EMPTY;
9392
1.62M
                } if (aprefix == ctxt->str_xml) {
9393
76.2k
                    nsIndex = NS_INDEX_XML;
9394
76.2k
                    nsuri = ctxt->str_xml_ns;
9395
76.2k
                    uriHashValue = URI_HASH_XML;
9396
1.54M
                } else if (aprefix != NULL) {
9397
244k
                    nsIndex = xmlParserNsLookup(ctxt, &attr->prefix, NULL);
9398
244k
                    if ((nsIndex == INT_MAX) ||
9399
244k
                        (nsIndex < ctxt->nsdb->minNsIndex)) {
9400
228k
                        xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9401
228k
                                 "Namespace prefix %s for %s on %s is not "
9402
228k
                                 "defined\n",
9403
228k
                                 aprefix, attname, localname);
9404
228k
                        nsIndex = NS_INDEX_EMPTY;
9405
228k
                        nsuri = NULL;
9406
228k
                        uriHashValue = URI_HASH_EMPTY;
9407
228k
                    } else {
9408
16.4k
                        nsuri = ctxt->nsTab[nsIndex * 2 + 1];
9409
16.4k
                        uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue;
9410
16.4k
                    }
9411
244k
                }
9412
9413
                /*
9414
                 * Check whether the attribute exists
9415
                 */
9416
1.62M
                if (maxAtts > 1) {
9417
1.40M
                    hashValue = xmlDictCombineHash(attr->name.hashValue,
9418
1.40M
                                                   uriHashValue);
9419
1.40M
                    res = xmlAttrHashInsert(ctxt, attrHashSize, attname, nsuri,
9420
1.40M
                                            hashValue, nbatts);
9421
1.40M
                    if (res < 0)
9422
0
                        continue;
9423
1.40M
                    if (res < INT_MAX) {
9424
33.7k
                        if (aprefix == atts[res+1])
9425
6.70k
                            continue;
9426
27.0k
                        xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9427
27.0k
                                 "Namespaced Attribute %s in '%s' redefined\n",
9428
27.0k
                                 attname, nsuri, NULL);
9429
27.0k
                    }
9430
1.40M
                }
9431
9432
1.61M
                xmlParserEntityCheck(ctxt, attr->expandedSize);
9433
9434
1.61M
                if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9435
7.11k
                    if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9436
52
                        localname = NULL;
9437
52
                        goto done;
9438
52
                    }
9439
7.06k
                    maxatts = ctxt->maxatts;
9440
7.06k
                    atts = ctxt->atts;
9441
7.06k
                }
9442
9443
1.61M
                atts[nbatts++] = attname;
9444
1.61M
                atts[nbatts++] = aprefix;
9445
1.61M
                atts[nbatts++] = (const xmlChar *) (ptrdiff_t) nsIndex;
9446
1.61M
                atts[nbatts++] = attr->value.name;
9447
1.61M
                atts[nbatts++] = attr->valueEnd;
9448
1.61M
                if ((ctxt->standalone == 1) && (attr->external != 0)) {
9449
332
                    xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9450
332
                            "standalone: attribute %s on %s defaulted "
9451
332
                            "from external subset\n",
9452
332
                            attname, localname);
9453
332
                }
9454
1.61M
                nbdef++;
9455
1.61M
      }
9456
515k
  }
9457
988k
    }
9458
9459
    /*
9460
     * Reconstruct attribute pointers
9461
     */
9462
7.25M
    for (i = 0, j = 0; i < nbatts; i += 5, j++) {
9463
        /* namespace URI */
9464
3.28M
        nsIndex = (ptrdiff_t) atts[i+2];
9465
3.28M
        if (nsIndex == INT_MAX)
9466
2.95M
            atts[i+2] = NULL;
9467
327k
        else if (nsIndex == INT_MAX - 1)
9468
199k
            atts[i+2] = ctxt->str_xml_ns;
9469
127k
        else
9470
127k
            atts[i+2] = ctxt->nsTab[nsIndex * 2 + 1];
9471
9472
3.28M
        if ((j < nratts) && (ctxt->attallocs[j] & 0x80000000) == 0) {
9473
1.40M
            atts[i+3] = BASE_PTR + (ptrdiff_t) atts[i+3];  /* value */
9474
1.40M
            atts[i+4] = BASE_PTR + (ptrdiff_t) atts[i+4];  /* valuend */
9475
1.40M
        }
9476
3.28M
    }
9477
9478
3.96M
    uri = xmlParserNsLookupUri(ctxt, &hprefix);
9479
3.96M
    if ((prefix != NULL) && (uri == NULL)) {
9480
134k
  xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9481
134k
           "Namespace prefix %s on %s is not defined\n",
9482
134k
     prefix, localname, NULL);
9483
134k
    }
9484
3.96M
    *pref = prefix;
9485
3.96M
    *URI = uri;
9486
9487
    /*
9488
     * SAX callback
9489
     */
9490
3.96M
    if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9491
3.96M
  (!ctxt->disableSAX)) {
9492
3.65M
  if (nbNs > 0)
9493
426k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix, uri,
9494
426k
                          nbNs, ctxt->nsTab + 2 * (ctxt->nsNr - nbNs),
9495
426k
        nbatts / 5, nbdef, atts);
9496
3.23M
  else
9497
3.23M
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix, uri,
9498
3.23M
                          0, NULL, nbatts / 5, nbdef, atts);
9499
3.65M
    }
9500
9501
3.96M
done:
9502
    /*
9503
     * Free allocated attribute values
9504
     */
9505
3.96M
    if (attval != 0) {
9506
598k
  for (i = 0, j = 0; j < nratts; i += 5, j++)
9507
354k
      if (ctxt->attallocs[j] & 0x80000000)
9508
265k
          xmlFree((xmlChar *) atts[i+3]);
9509
244k
    }
9510
9511
3.96M
    *nbNsPtr = nbNs;
9512
3.96M
    return(localname);
9513
3.96M
}
9514
9515
/**
9516
 * xmlParseEndTag2:
9517
 * @ctxt:  an XML parser context
9518
 * @line:  line of the start tag
9519
 * @nsNr:  number of namespaces on the start tag
9520
 *
9521
 * Parse an end tag. Always consumes '</'.
9522
 *
9523
 * [42] ETag ::= '</' Name S? '>'
9524
 *
9525
 * With namespace
9526
 *
9527
 * [NS 9] ETag ::= '</' QName S? '>'
9528
 */
9529
9530
static void
9531
991k
xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9532
991k
    const xmlChar *name;
9533
9534
991k
    GROW;
9535
991k
    if ((RAW != '<') || (NXT(1) != '/')) {
9536
3.47k
  xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9537
3.47k
  return;
9538
3.47k
    }
9539
987k
    SKIP(2);
9540
9541
987k
    if (tag->prefix == NULL)
9542
845k
        name = xmlParseNameAndCompare(ctxt, ctxt->name);
9543
142k
    else
9544
142k
        name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9545
9546
    /*
9547
     * We should definitely be at the ending "S? '>'" part
9548
     */
9549
987k
    GROW;
9550
987k
    SKIP_BLANKS;
9551
987k
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9552
98.4k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9553
98.4k
    } else
9554
889k
  NEXT1;
9555
9556
    /*
9557
     * [ WFC: Element Type Match ]
9558
     * The Name in an element's end-tag must match the element type in the
9559
     * start-tag.
9560
     *
9561
     */
9562
987k
    if (name != (xmlChar*)1) {
9563
98.7k
        if (name == NULL) name = BAD_CAST "unparsable";
9564
98.7k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9565
98.7k
         "Opening and ending tag mismatch: %s line %d and %s\n",
9566
98.7k
                    ctxt->name, tag->line, name);
9567
98.7k
    }
9568
9569
    /*
9570
     * SAX: End of Tag
9571
     */
9572
987k
    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9573
987k
  (!ctxt->disableSAX))
9574
933k
  ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9575
933k
                                tag->URI);
9576
9577
987k
    spacePop(ctxt);
9578
987k
    if (tag->nsNr != 0)
9579
56.9k
  xmlParserNsPop(ctxt, tag->nsNr);
9580
987k
}
9581
9582
/**
9583
 * xmlParseCDSect:
9584
 * @ctxt:  an XML parser context
9585
 *
9586
 * DEPRECATED: Internal function, don't use.
9587
 *
9588
 * Parse escaped pure raw content. Always consumes '<!['.
9589
 *
9590
 * [18] CDSect ::= CDStart CData CDEnd
9591
 *
9592
 * [19] CDStart ::= '<![CDATA['
9593
 *
9594
 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9595
 *
9596
 * [21] CDEnd ::= ']]>'
9597
 */
9598
void
9599
194k
xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9600
194k
    xmlChar *buf = NULL;
9601
194k
    int len = 0;
9602
194k
    int size = XML_PARSER_BUFFER_SIZE;
9603
194k
    int r, rl;
9604
194k
    int s, sl;
9605
194k
    int cur, l;
9606
194k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9607
79.7k
                    XML_MAX_HUGE_LENGTH :
9608
194k
                    XML_MAX_TEXT_LENGTH;
9609
9610
194k
    if ((CUR != '<') || (NXT(1) != '!') || (NXT(2) != '['))
9611
0
        return;
9612
194k
    SKIP(3);
9613
9614
194k
    if (!CMP6(CUR_PTR, 'C', 'D', 'A', 'T', 'A', '['))
9615
0
        return;
9616
194k
    SKIP(6);
9617
9618
194k
    r = CUR_CHAR(rl);
9619
194k
    if (!IS_CHAR(r)) {
9620
4.31k
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9621
4.31k
        goto out;
9622
4.31k
    }
9623
190k
    NEXTL(rl);
9624
190k
    s = CUR_CHAR(sl);
9625
190k
    if (!IS_CHAR(s)) {
9626
4.82k
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9627
4.82k
        goto out;
9628
4.82k
    }
9629
185k
    NEXTL(sl);
9630
185k
    cur = CUR_CHAR(l);
9631
185k
    buf = (xmlChar *) xmlMallocAtomic(size);
9632
185k
    if (buf == NULL) {
9633
29
  xmlErrMemory(ctxt);
9634
29
        goto out;
9635
29
    }
9636
32.4M
    while (IS_CHAR(cur) &&
9637
32.4M
           ((r != ']') || (s != ']') || (cur != '>'))) {
9638
32.2M
  if (len + 5 >= size) {
9639
16.8k
      xmlChar *tmp;
9640
9641
16.8k
      tmp = (xmlChar *) xmlRealloc(buf, size * 2);
9642
16.8k
      if (tmp == NULL) {
9643
11
    xmlErrMemory(ctxt);
9644
11
                goto out;
9645
11
      }
9646
16.8k
      buf = tmp;
9647
16.8k
      size *= 2;
9648
16.8k
  }
9649
32.2M
  COPY_BUF(buf, len, r);
9650
32.2M
        if (len > maxLength) {
9651
0
            xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9652
0
                           "CData section too big found\n");
9653
0
            goto out;
9654
0
        }
9655
32.2M
  r = s;
9656
32.2M
  rl = sl;
9657
32.2M
  s = cur;
9658
32.2M
  sl = l;
9659
32.2M
  NEXTL(l);
9660
32.2M
  cur = CUR_CHAR(l);
9661
32.2M
    }
9662
185k
    buf[len] = 0;
9663
185k
    if (cur != '>') {
9664
18.8k
  xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9665
18.8k
                       "CData section not finished\n%.50s\n", buf);
9666
18.8k
        goto out;
9667
18.8k
    }
9668
166k
    NEXTL(l);
9669
9670
    /*
9671
     * OK the buffer is to be consumed as cdata.
9672
     */
9673
166k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9674
162k
  if (ctxt->sax->cdataBlock != NULL)
9675
104k
      ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9676
58.4k
  else if (ctxt->sax->characters != NULL)
9677
58.4k
      ctxt->sax->characters(ctxt->userData, buf, len);
9678
162k
    }
9679
9680
194k
out:
9681
194k
    xmlFree(buf);
9682
194k
}
9683
9684
/**
9685
 * xmlParseContentInternal:
9686
 * @ctxt:  an XML parser context
9687
 *
9688
 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9689
 * unexpected EOF to the caller.
9690
 */
9691
9692
static void
9693
90.0k
xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9694
90.0k
    int oldNameNr = ctxt->nameNr;
9695
90.0k
    int oldSpaceNr = ctxt->spaceNr;
9696
90.0k
    int oldNodeNr = ctxt->nodeNr;
9697
9698
90.0k
    GROW;
9699
13.2M
    while ((ctxt->input->cur < ctxt->input->end) &&
9700
13.2M
     (PARSER_STOPPED(ctxt) == 0)) {
9701
13.1M
  const xmlChar *cur = ctxt->input->cur;
9702
9703
  /*
9704
   * First case : a Processing Instruction.
9705
   */
9706
13.1M
  if ((*cur == '<') && (cur[1] == '?')) {
9707
77.7k
      xmlParsePI(ctxt);
9708
77.7k
  }
9709
9710
  /*
9711
   * Second case : a CDSection
9712
   */
9713
  /* 2.6.0 test was *cur not RAW */
9714
13.0M
  else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9715
194k
      xmlParseCDSect(ctxt);
9716
194k
  }
9717
9718
  /*
9719
   * Third case :  a comment
9720
   */
9721
12.8M
  else if ((*cur == '<') && (NXT(1) == '!') &&
9722
12.8M
     (NXT(2) == '-') && (NXT(3) == '-')) {
9723
120k
      xmlParseComment(ctxt);
9724
120k
  }
9725
9726
  /*
9727
   * Fourth case :  a sub-element.
9728
   */
9729
12.7M
  else if (*cur == '<') {
9730
4.13M
            if (NXT(1) == '/') {
9731
701k
                if (ctxt->nameNr <= oldNameNr)
9732
15.3k
                    break;
9733
685k
          xmlParseElementEnd(ctxt);
9734
3.43M
            } else {
9735
3.43M
          xmlParseElementStart(ctxt);
9736
3.43M
            }
9737
4.13M
  }
9738
9739
  /*
9740
   * Fifth case : a reference. If if has not been resolved,
9741
   *    parsing returns it's Name, create the node
9742
   */
9743
9744
8.61M
  else if (*cur == '&') {
9745
553k
      xmlParseReference(ctxt);
9746
553k
  }
9747
9748
  /*
9749
   * Last case, text. Note that References are handled directly.
9750
   */
9751
8.06M
  else {
9752
8.06M
      xmlParseCharDataInternal(ctxt, 0);
9753
8.06M
  }
9754
9755
13.1M
  SHRINK;
9756
13.1M
  GROW;
9757
13.1M
    }
9758
9759
90.0k
    if ((ctxt->nameNr > oldNameNr) &&
9760
90.0k
        (ctxt->input->cur >= ctxt->input->end) &&
9761
90.0k
        (ctxt->wellFormed)) {
9762
3.51k
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9763
3.51k
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9764
3.51k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9765
3.51k
                "Premature end of data in tag %s line %d\n",
9766
3.51k
                name, line, NULL);
9767
3.51k
    }
9768
9769
    /*
9770
     * Clean up in error case
9771
     */
9772
9773
782k
    while (ctxt->nodeNr > oldNodeNr)
9774
691k
        nodePop(ctxt);
9775
9776
900k
    while (ctxt->nameNr > oldNameNr) {
9777
809k
        xmlStartTag *tag = &ctxt->pushTab[ctxt->nameNr - 1];
9778
9779
809k
        if (tag->nsNr != 0)
9780
161k
            xmlParserNsPop(ctxt, tag->nsNr);
9781
9782
809k
        namePop(ctxt);
9783
809k
    }
9784
9785
900k
    while (ctxt->spaceNr > oldSpaceNr)
9786
809k
        spacePop(ctxt);
9787
90.0k
}
9788
9789
/**
9790
 * xmlParseContent:
9791
 * @ctxt:  an XML parser context
9792
 *
9793
 * Parse XML element content. This is useful if you're only interested
9794
 * in custom SAX callbacks. If you want a node list, use
9795
 * xmlParseInNodeContext.
9796
 */
9797
void
9798
0
xmlParseContent(xmlParserCtxtPtr ctxt) {
9799
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
9800
0
        return;
9801
9802
0
    xmlCtxtInitializeLate(ctxt);
9803
9804
0
    xmlParseContentInternal(ctxt);
9805
9806
0
    if (ctxt->input->cur < ctxt->input->end)
9807
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
9808
0
}
9809
9810
/**
9811
 * xmlParseElement:
9812
 * @ctxt:  an XML parser context
9813
 *
9814
 * DEPRECATED: Internal function, don't use.
9815
 *
9816
 * parse an XML element
9817
 *
9818
 * [39] element ::= EmptyElemTag | STag content ETag
9819
 *
9820
 * [ WFC: Element Type Match ]
9821
 * The Name in an element's end-tag must match the element type in the
9822
 * start-tag.
9823
 *
9824
 */
9825
9826
void
9827
84.1k
xmlParseElement(xmlParserCtxtPtr ctxt) {
9828
84.1k
    if (xmlParseElementStart(ctxt) != 0)
9829
18.4k
        return;
9830
9831
65.6k
    xmlParseContentInternal(ctxt);
9832
9833
65.6k
    if (ctxt->input->cur >= ctxt->input->end) {
9834
45.2k
        if (ctxt->wellFormed) {
9835
3.62k
            const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9836
3.62k
            int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9837
3.62k
            xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9838
3.62k
                    "Premature end of data in tag %s line %d\n",
9839
3.62k
                    name, line, NULL);
9840
3.62k
        }
9841
45.2k
        return;
9842
45.2k
    }
9843
9844
20.4k
    xmlParseElementEnd(ctxt);
9845
20.4k
}
9846
9847
/**
9848
 * xmlParseElementStart:
9849
 * @ctxt:  an XML parser context
9850
 *
9851
 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
9852
 * opening tag was parsed, 1 if an empty element was parsed.
9853
 *
9854
 * Always consumes '<'.
9855
 */
9856
static int
9857
3.52M
xmlParseElementStart(xmlParserCtxtPtr ctxt) {
9858
3.52M
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
9859
3.52M
    const xmlChar *name;
9860
3.52M
    const xmlChar *prefix = NULL;
9861
3.52M
    const xmlChar *URI = NULL;
9862
3.52M
    xmlParserNodeInfo node_info;
9863
3.52M
    int line;
9864
3.52M
    xmlNodePtr cur;
9865
3.52M
    int nbNs = 0;
9866
9867
3.52M
    if (ctxt->nameNr > maxDepth) {
9868
136
        xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
9869
136
                "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9870
136
                ctxt->nameNr);
9871
136
  xmlHaltParser(ctxt);
9872
136
  return(-1);
9873
136
    }
9874
9875
    /* Capture start position */
9876
3.52M
    if (ctxt->record_info) {
9877
0
        node_info.begin_pos = ctxt->input->consumed +
9878
0
                          (CUR_PTR - ctxt->input->base);
9879
0
  node_info.begin_line = ctxt->input->line;
9880
0
    }
9881
9882
3.52M
    if (ctxt->spaceNr == 0)
9883
62.3k
  spacePush(ctxt, -1);
9884
3.46M
    else if (*ctxt->space == -2)
9885
264k
  spacePush(ctxt, -1);
9886
3.19M
    else
9887
3.19M
  spacePush(ctxt, *ctxt->space);
9888
9889
3.52M
    line = ctxt->input->line;
9890
3.52M
#ifdef LIBXML_SAX1_ENABLED
9891
3.52M
    if (ctxt->sax2)
9892
3.15M
#endif /* LIBXML_SAX1_ENABLED */
9893
3.15M
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &nbNs);
9894
363k
#ifdef LIBXML_SAX1_ENABLED
9895
363k
    else
9896
363k
  name = xmlParseStartTag(ctxt);
9897
3.52M
#endif /* LIBXML_SAX1_ENABLED */
9898
3.52M
    if (name == NULL) {
9899
373k
  spacePop(ctxt);
9900
373k
        return(-1);
9901
373k
    }
9902
3.14M
    nameNsPush(ctxt, name, prefix, URI, line, nbNs);
9903
3.14M
    cur = ctxt->node;
9904
9905
3.14M
#ifdef LIBXML_VALID_ENABLED
9906
    /*
9907
     * [ VC: Root Element Type ]
9908
     * The Name in the document type declaration must match the element
9909
     * type of the root element.
9910
     */
9911
3.14M
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9912
3.14M
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
9913
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9914
3.14M
#endif /* LIBXML_VALID_ENABLED */
9915
9916
    /*
9917
     * Check for an Empty Element.
9918
     */
9919
3.14M
    if ((RAW == '/') && (NXT(1) == '>')) {
9920
621k
        SKIP(2);
9921
621k
  if (ctxt->sax2) {
9922
591k
      if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9923
591k
    (!ctxt->disableSAX))
9924
572k
    ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
9925
591k
#ifdef LIBXML_SAX1_ENABLED
9926
591k
  } else {
9927
29.3k
      if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9928
29.3k
    (!ctxt->disableSAX))
9929
28.3k
    ctxt->sax->endElement(ctxt->userData, name);
9930
29.3k
#endif /* LIBXML_SAX1_ENABLED */
9931
29.3k
  }
9932
621k
  namePop(ctxt);
9933
621k
  spacePop(ctxt);
9934
621k
  if (nbNs > 0)
9935
43.9k
      xmlParserNsPop(ctxt, nbNs);
9936
621k
  if (cur != NULL && ctxt->record_info) {
9937
0
            node_info.node = cur;
9938
0
            node_info.end_pos = ctxt->input->consumed +
9939
0
                                (CUR_PTR - ctxt->input->base);
9940
0
            node_info.end_line = ctxt->input->line;
9941
0
            xmlParserAddNodeInfo(ctxt, &node_info);
9942
0
  }
9943
621k
  return(1);
9944
621k
    }
9945
2.52M
    if (RAW == '>') {
9946
1.56M
        NEXT1;
9947
1.56M
        if (cur != NULL && ctxt->record_info) {
9948
0
            node_info.node = cur;
9949
0
            node_info.end_pos = 0;
9950
0
            node_info.end_line = 0;
9951
0
            xmlParserAddNodeInfo(ctxt, &node_info);
9952
0
        }
9953
1.56M
    } else {
9954
966k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9955
966k
         "Couldn't find end of Start Tag %s line %d\n",
9956
966k
                    name, line, NULL);
9957
9958
  /*
9959
   * end of parsing of this node.
9960
   */
9961
966k
  nodePop(ctxt);
9962
966k
  namePop(ctxt);
9963
966k
  spacePop(ctxt);
9964
966k
  if (nbNs > 0)
9965
107k
      xmlParserNsPop(ctxt, nbNs);
9966
966k
  return(-1);
9967
966k
    }
9968
9969
1.56M
    return(0);
9970
2.52M
}
9971
9972
/**
9973
 * xmlParseElementEnd:
9974
 * @ctxt:  an XML parser context
9975
 *
9976
 * Parse the end of an XML element. Always consumes '</'.
9977
 */
9978
static void
9979
706k
xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
9980
706k
    xmlNodePtr cur = ctxt->node;
9981
9982
706k
    if (ctxt->nameNr <= 0) {
9983
77
        if ((RAW == '<') && (NXT(1) == '/'))
9984
15
            SKIP(2);
9985
77
        return;
9986
77
    }
9987
9988
    /*
9989
     * parse the end of tag: '</' should be here.
9990
     */
9991
706k
    if (ctxt->sax2) {
9992
630k
  xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
9993
630k
  namePop(ctxt);
9994
630k
    }
9995
75.0k
#ifdef LIBXML_SAX1_ENABLED
9996
75.0k
    else
9997
75.0k
  xmlParseEndTag1(ctxt, 0);
9998
706k
#endif /* LIBXML_SAX1_ENABLED */
9999
10000
    /*
10001
     * Capture end position
10002
     */
10003
706k
    if (cur != NULL && ctxt->record_info) {
10004
0
        xmlParserNodeInfoPtr node_info;
10005
10006
0
        node_info = (xmlParserNodeInfoPtr) xmlParserFindNodeInfo(ctxt, cur);
10007
0
        if (node_info != NULL) {
10008
0
            node_info->end_pos = ctxt->input->consumed +
10009
0
                                 (CUR_PTR - ctxt->input->base);
10010
0
            node_info->end_line = ctxt->input->line;
10011
0
        }
10012
0
    }
10013
706k
}
10014
10015
/**
10016
 * xmlParseVersionNum:
10017
 * @ctxt:  an XML parser context
10018
 *
10019
 * DEPRECATED: Internal function, don't use.
10020
 *
10021
 * parse the XML version value.
10022
 *
10023
 * [26] VersionNum ::= '1.' [0-9]+
10024
 *
10025
 * In practice allow [0-9].[0-9]+ at that level
10026
 *
10027
 * Returns the string giving the XML version number, or NULL
10028
 */
10029
xmlChar *
10030
45.5k
xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10031
45.5k
    xmlChar *buf = NULL;
10032
45.5k
    int len = 0;
10033
45.5k
    int size = 10;
10034
45.5k
    xmlChar cur;
10035
10036
45.5k
    buf = (xmlChar *) xmlMallocAtomic(size);
10037
45.5k
    if (buf == NULL) {
10038
91
  xmlErrMemory(ctxt);
10039
91
  return(NULL);
10040
91
    }
10041
45.4k
    cur = CUR;
10042
45.4k
    if (!((cur >= '0') && (cur <= '9'))) {
10043
2.98k
  xmlFree(buf);
10044
2.98k
  return(NULL);
10045
2.98k
    }
10046
42.4k
    buf[len++] = cur;
10047
42.4k
    NEXT;
10048
42.4k
    cur=CUR;
10049
42.4k
    if (cur != '.') {
10050
1.48k
  xmlFree(buf);
10051
1.48k
  return(NULL);
10052
1.48k
    }
10053
40.9k
    buf[len++] = cur;
10054
40.9k
    NEXT;
10055
40.9k
    cur=CUR;
10056
148k
    while ((cur >= '0') && (cur <= '9')) {
10057
108k
  if (len + 1 >= size) {
10058
5.66k
      xmlChar *tmp;
10059
10060
5.66k
      size *= 2;
10061
5.66k
      tmp = (xmlChar *) xmlRealloc(buf, size);
10062
5.66k
      if (tmp == NULL) {
10063
6
          xmlFree(buf);
10064
6
    xmlErrMemory(ctxt);
10065
6
    return(NULL);
10066
6
      }
10067
5.65k
      buf = tmp;
10068
5.65k
  }
10069
108k
  buf[len++] = cur;
10070
108k
  NEXT;
10071
108k
  cur=CUR;
10072
108k
    }
10073
40.9k
    buf[len] = 0;
10074
40.9k
    return(buf);
10075
40.9k
}
10076
10077
/**
10078
 * xmlParseVersionInfo:
10079
 * @ctxt:  an XML parser context
10080
 *
10081
 * DEPRECATED: Internal function, don't use.
10082
 *
10083
 * parse the XML version.
10084
 *
10085
 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10086
 *
10087
 * [25] Eq ::= S? '=' S?
10088
 *
10089
 * Returns the version string, e.g. "1.0"
10090
 */
10091
10092
xmlChar *
10093
80.7k
xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10094
80.7k
    xmlChar *version = NULL;
10095
10096
80.7k
    if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10097
50.2k
  SKIP(7);
10098
50.2k
  SKIP_BLANKS;
10099
50.2k
  if (RAW != '=') {
10100
2.86k
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10101
2.86k
      return(NULL);
10102
2.86k
        }
10103
47.3k
  NEXT;
10104
47.3k
  SKIP_BLANKS;
10105
47.3k
  if (RAW == '"') {
10106
26.4k
      NEXT;
10107
26.4k
      version = xmlParseVersionNum(ctxt);
10108
26.4k
      if (RAW != '"') {
10109
4.45k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10110
4.45k
      } else
10111
22.0k
          NEXT;
10112
26.4k
  } else if (RAW == '\''){
10113
19.0k
      NEXT;
10114
19.0k
      version = xmlParseVersionNum(ctxt);
10115
19.0k
      if (RAW != '\'') {
10116
3.61k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10117
3.61k
      } else
10118
15.4k
          NEXT;
10119
19.0k
  } else {
10120
1.78k
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10121
1.78k
  }
10122
47.3k
    }
10123
77.8k
    return(version);
10124
80.7k
}
10125
10126
/**
10127
 * xmlParseEncName:
10128
 * @ctxt:  an XML parser context
10129
 *
10130
 * DEPRECATED: Internal function, don't use.
10131
 *
10132
 * parse the XML encoding name
10133
 *
10134
 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10135
 *
10136
 * Returns the encoding name value or NULL
10137
 */
10138
xmlChar *
10139
37.0k
xmlParseEncName(xmlParserCtxtPtr ctxt) {
10140
37.0k
    xmlChar *buf = NULL;
10141
37.0k
    int len = 0;
10142
37.0k
    int size = 10;
10143
37.0k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
10144
7.44k
                    XML_MAX_TEXT_LENGTH :
10145
37.0k
                    XML_MAX_NAME_LENGTH;
10146
37.0k
    xmlChar cur;
10147
10148
37.0k
    cur = CUR;
10149
37.0k
    if (((cur >= 'a') && (cur <= 'z')) ||
10150
37.0k
        ((cur >= 'A') && (cur <= 'Z'))) {
10151
35.6k
  buf = (xmlChar *) xmlMallocAtomic(size);
10152
35.6k
  if (buf == NULL) {
10153
69
      xmlErrMemory(ctxt);
10154
69
      return(NULL);
10155
69
  }
10156
10157
35.5k
  buf[len++] = cur;
10158
35.5k
  NEXT;
10159
35.5k
  cur = CUR;
10160
849k
  while (((cur >= 'a') && (cur <= 'z')) ||
10161
849k
         ((cur >= 'A') && (cur <= 'Z')) ||
10162
849k
         ((cur >= '0') && (cur <= '9')) ||
10163
849k
         (cur == '.') || (cur == '_') ||
10164
849k
         (cur == '-')) {
10165
813k
      if (len + 1 >= size) {
10166
25.5k
          xmlChar *tmp;
10167
10168
25.5k
    size *= 2;
10169
25.5k
    tmp = (xmlChar *) xmlRealloc(buf, size);
10170
25.5k
    if (tmp == NULL) {
10171
55
        xmlErrMemory(ctxt);
10172
55
        xmlFree(buf);
10173
55
        return(NULL);
10174
55
    }
10175
25.5k
    buf = tmp;
10176
25.5k
      }
10177
813k
      buf[len++] = cur;
10178
813k
            if (len > maxLength) {
10179
5
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "EncName");
10180
5
                xmlFree(buf);
10181
5
                return(NULL);
10182
5
            }
10183
813k
      NEXT;
10184
813k
      cur = CUR;
10185
813k
        }
10186
35.5k
  buf[len] = 0;
10187
35.5k
    } else {
10188
1.37k
  xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10189
1.37k
    }
10190
36.8k
    return(buf);
10191
37.0k
}
10192
10193
/**
10194
 * xmlParseEncodingDecl:
10195
 * @ctxt:  an XML parser context
10196
 *
10197
 * DEPRECATED: Internal function, don't use.
10198
 *
10199
 * parse the XML encoding declaration
10200
 *
10201
 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'")
10202
 *
10203
 * this setups the conversion filters.
10204
 *
10205
 * Returns the encoding value or NULL
10206
 */
10207
10208
const xmlChar *
10209
76.6k
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10210
76.6k
    xmlChar *encoding = NULL;
10211
10212
76.6k
    SKIP_BLANKS;
10213
76.6k
    if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g') == 0)
10214
36.9k
        return(NULL);
10215
10216
39.6k
    SKIP(8);
10217
39.6k
    SKIP_BLANKS;
10218
39.6k
    if (RAW != '=') {
10219
1.41k
        xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10220
1.41k
        return(NULL);
10221
1.41k
    }
10222
38.2k
    NEXT;
10223
38.2k
    SKIP_BLANKS;
10224
38.2k
    if (RAW == '"') {
10225
23.2k
        NEXT;
10226
23.2k
        encoding = xmlParseEncName(ctxt);
10227
23.2k
        if (RAW != '"') {
10228
2.17k
            xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10229
2.17k
            xmlFree((xmlChar *) encoding);
10230
2.17k
            return(NULL);
10231
2.17k
        } else
10232
21.1k
            NEXT;
10233
23.2k
    } else if (RAW == '\''){
10234
13.7k
        NEXT;
10235
13.7k
        encoding = xmlParseEncName(ctxt);
10236
13.7k
        if (RAW != '\'') {
10237
2.59k
            xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10238
2.59k
            xmlFree((xmlChar *) encoding);
10239
2.59k
            return(NULL);
10240
2.59k
        } else
10241
11.1k
            NEXT;
10242
13.7k
    } else {
10243
1.25k
        xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10244
1.25k
    }
10245
10246
33.5k
    if (encoding == NULL)
10247
1.45k
        return(NULL);
10248
10249
32.0k
    xmlSetDeclaredEncoding(ctxt, encoding);
10250
10251
32.0k
    return(ctxt->encoding);
10252
33.5k
}
10253
10254
/**
10255
 * xmlParseSDDecl:
10256
 * @ctxt:  an XML parser context
10257
 *
10258
 * DEPRECATED: Internal function, don't use.
10259
 *
10260
 * parse the XML standalone declaration
10261
 *
10262
 * [32] SDDecl ::= S 'standalone' Eq
10263
 *                 (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10264
 *
10265
 * [ VC: Standalone Document Declaration ]
10266
 * TODO The standalone document declaration must have the value "no"
10267
 * if any external markup declarations contain declarations of:
10268
 *  - attributes with default values, if elements to which these
10269
 *    attributes apply appear in the document without specifications
10270
 *    of values for these attributes, or
10271
 *  - entities (other than amp, lt, gt, apos, quot), if references
10272
 *    to those entities appear in the document, or
10273
 *  - attributes with values subject to normalization, where the
10274
 *    attribute appears in the document with a value which will change
10275
 *    as a result of normalization, or
10276
 *  - element types with element content, if white space occurs directly
10277
 *    within any instance of those types.
10278
 *
10279
 * Returns:
10280
 *   1 if standalone="yes"
10281
 *   0 if standalone="no"
10282
 *  -2 if standalone attribute is missing or invalid
10283
 *    (A standalone value of -2 means that the XML declaration was found,
10284
 *     but no value was specified for the standalone attribute).
10285
 */
10286
10287
int
10288
19.1k
xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10289
19.1k
    int standalone = -2;
10290
10291
19.1k
    SKIP_BLANKS;
10292
19.1k
    if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10293
1.69k
  SKIP(10);
10294
1.69k
        SKIP_BLANKS;
10295
1.69k
  if (RAW != '=') {
10296
39
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10297
39
      return(standalone);
10298
39
        }
10299
1.65k
  NEXT;
10300
1.65k
  SKIP_BLANKS;
10301
1.65k
        if (RAW == '\''){
10302
591
      NEXT;
10303
591
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10304
323
          standalone = 0;
10305
323
                SKIP(2);
10306
323
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10307
268
                 (NXT(2) == 's')) {
10308
144
          standalone = 1;
10309
144
    SKIP(3);
10310
144
            } else {
10311
124
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10312
124
      }
10313
591
      if (RAW != '\'') {
10314
279
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10315
279
      } else
10316
312
          NEXT;
10317
1.06k
  } else if (RAW == '"'){
10318
1.04k
      NEXT;
10319
1.04k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10320
72
          standalone = 0;
10321
72
    SKIP(2);
10322
974
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10323
974
                 (NXT(2) == 's')) {
10324
824
          standalone = 1;
10325
824
                SKIP(3);
10326
824
            } else {
10327
150
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10328
150
      }
10329
1.04k
      if (RAW != '"') {
10330
318
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10331
318
      } else
10332
728
          NEXT;
10333
1.04k
  } else {
10334
22
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10335
22
        }
10336
1.65k
    }
10337
19.1k
    return(standalone);
10338
19.1k
}
10339
10340
/**
10341
 * xmlParseXMLDecl:
10342
 * @ctxt:  an XML parser context
10343
 *
10344
 * DEPRECATED: Internal function, don't use.
10345
 *
10346
 * parse an XML declaration header
10347
 *
10348
 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10349
 */
10350
10351
void
10352
34.8k
xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10353
34.8k
    xmlChar *version;
10354
10355
    /*
10356
     * This value for standalone indicates that the document has an
10357
     * XML declaration but it does not have a standalone attribute.
10358
     * It will be overwritten later if a standalone attribute is found.
10359
     */
10360
10361
34.8k
    ctxt->standalone = -2;
10362
10363
    /*
10364
     * We know that '<?xml' is here.
10365
     */
10366
34.8k
    SKIP(5);
10367
10368
34.8k
    if (!IS_BLANK_CH(RAW)) {
10369
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10370
0
                 "Blank needed after '<?xml'\n");
10371
0
    }
10372
34.8k
    SKIP_BLANKS;
10373
10374
    /*
10375
     * We must have the VersionInfo here.
10376
     */
10377
34.8k
    version = xmlParseVersionInfo(ctxt);
10378
34.8k
    if (version == NULL) {
10379
13.6k
  xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10380
21.1k
    } else {
10381
21.1k
  if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10382
      /*
10383
       * Changed here for XML-1.0 5th edition
10384
       */
10385
8.38k
      if (ctxt->options & XML_PARSE_OLD10) {
10386
1.31k
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10387
1.31k
                "Unsupported version '%s'\n",
10388
1.31k
                version);
10389
7.07k
      } else {
10390
7.07k
          if ((version[0] == '1') && ((version[1] == '.'))) {
10391
5.00k
        xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10392
5.00k
                      "Unsupported version '%s'\n",
10393
5.00k
          version, NULL);
10394
5.00k
    } else {
10395
2.06k
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10396
2.06k
              "Unsupported version '%s'\n",
10397
2.06k
              version);
10398
2.06k
    }
10399
7.07k
      }
10400
8.38k
  }
10401
21.1k
  if (ctxt->version != NULL)
10402
0
      xmlFree((void *) ctxt->version);
10403
21.1k
  ctxt->version = version;
10404
21.1k
    }
10405
10406
    /*
10407
     * We may have the encoding declaration
10408
     */
10409
34.8k
    if (!IS_BLANK_CH(RAW)) {
10410
17.2k
        if ((RAW == '?') && (NXT(1) == '>')) {
10411
4.05k
      SKIP(2);
10412
4.05k
      return;
10413
4.05k
  }
10414
13.1k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10415
13.1k
    }
10416
30.7k
    xmlParseEncodingDecl(ctxt);
10417
10418
    /*
10419
     * We may have the standalone status.
10420
     */
10421
30.7k
    if ((ctxt->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10422
17.3k
        if ((RAW == '?') && (NXT(1) == '>')) {
10423
11.6k
      SKIP(2);
10424
11.6k
      return;
10425
11.6k
  }
10426
5.76k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10427
5.76k
    }
10428
10429
    /*
10430
     * We can grow the input buffer freely at that point
10431
     */
10432
19.1k
    GROW;
10433
10434
19.1k
    SKIP_BLANKS;
10435
19.1k
    ctxt->standalone = xmlParseSDDecl(ctxt);
10436
10437
19.1k
    SKIP_BLANKS;
10438
19.1k
    if ((RAW == '?') && (NXT(1) == '>')) {
10439
2.30k
        SKIP(2);
10440
16.8k
    } else if (RAW == '>') {
10441
        /* Deprecated old WD ... */
10442
1.41k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10443
1.41k
  NEXT;
10444
15.4k
    } else {
10445
15.4k
        int c;
10446
10447
15.4k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10448
2.44M
        while ((PARSER_STOPPED(ctxt) == 0) &&
10449
2.44M
               ((c = CUR) != 0)) {
10450
2.43M
            NEXT;
10451
2.43M
            if (c == '>')
10452
9.77k
                break;
10453
2.43M
        }
10454
15.4k
    }
10455
19.1k
}
10456
10457
/**
10458
 * xmlParseMisc:
10459
 * @ctxt:  an XML parser context
10460
 *
10461
 * DEPRECATED: Internal function, don't use.
10462
 *
10463
 * parse an XML Misc* optional field.
10464
 *
10465
 * [27] Misc ::= Comment | PI |  S
10466
 */
10467
10468
void
10469
261k
xmlParseMisc(xmlParserCtxtPtr ctxt) {
10470
366k
    while (PARSER_STOPPED(ctxt) == 0) {
10471
343k
        SKIP_BLANKS;
10472
343k
        GROW;
10473
343k
        if ((RAW == '<') && (NXT(1) == '?')) {
10474
20.8k
      xmlParsePI(ctxt);
10475
323k
        } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10476
84.0k
      xmlParseComment(ctxt);
10477
239k
        } else {
10478
239k
            break;
10479
239k
        }
10480
343k
    }
10481
261k
}
10482
10483
static void
10484
180k
xmlFinishDocument(xmlParserCtxtPtr ctxt) {
10485
180k
    xmlDocPtr doc;
10486
10487
    /*
10488
     * SAX: end of the document processing.
10489
     */
10490
180k
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10491
180k
        ctxt->sax->endDocument(ctxt->userData);
10492
10493
180k
    doc = ctxt->myDoc;
10494
180k
    if (doc != NULL) {
10495
175k
        if (ctxt->wellFormed) {
10496
18.0k
            doc->properties |= XML_DOC_WELLFORMED;
10497
18.0k
            if (ctxt->valid)
10498
13.7k
                doc->properties |= XML_DOC_DTDVALID;
10499
18.0k
            if (ctxt->nsWellFormed)
10500
14.7k
                doc->properties |= XML_DOC_NSVALID;
10501
18.0k
        }
10502
10503
175k
        if (ctxt->options & XML_PARSE_OLD10)
10504
34.6k
            doc->properties |= XML_DOC_OLD10;
10505
10506
        /*
10507
         * Remove locally kept entity definitions if the tree was not built
10508
         */
10509
175k
  if (xmlStrEqual(doc->version, SAX_COMPAT_MODE)) {
10510
1.28k
            xmlFreeDoc(doc);
10511
1.28k
            ctxt->myDoc = NULL;
10512
1.28k
        }
10513
175k
    }
10514
180k
}
10515
10516
/**
10517
 * xmlParseDocument:
10518
 * @ctxt:  an XML parser context
10519
 *
10520
 * Parse an XML document and invoke the SAX handlers. This is useful
10521
 * if you're only interested in custom SAX callbacks. If you want a
10522
 * document tree, use xmlCtxtParseDocument.
10523
 *
10524
 * Returns 0, -1 in case of error.
10525
 */
10526
10527
int
10528
125k
xmlParseDocument(xmlParserCtxtPtr ctxt) {
10529
125k
    if ((ctxt == NULL) || (ctxt->input == NULL))
10530
0
        return(-1);
10531
10532
125k
    GROW;
10533
10534
    /*
10535
     * SAX: detecting the level.
10536
     */
10537
125k
    xmlCtxtInitializeLate(ctxt);
10538
10539
    /*
10540
     * Document locator is unused. Only for backward compatibility.
10541
     */
10542
125k
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10543
125k
        xmlSAXLocator copy = xmlDefaultSAXLocator;
10544
125k
        ctxt->sax->setDocumentLocator(ctxt->userData, &copy);
10545
125k
    }
10546
10547
125k
    xmlDetectEncoding(ctxt);
10548
10549
125k
    if (CUR == 0) {
10550
746
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10551
746
  return(-1);
10552
746
    }
10553
10554
124k
    GROW;
10555
124k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10556
10557
  /*
10558
   * Note that we will switch encoding on the fly.
10559
   */
10560
20.8k
  xmlParseXMLDecl(ctxt);
10561
20.8k
  SKIP_BLANKS;
10562
103k
    } else {
10563
103k
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10564
103k
        if (ctxt->version == NULL) {
10565
40
            xmlErrMemory(ctxt);
10566
40
            return(-1);
10567
40
        }
10568
103k
    }
10569
124k
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10570
120k
        ctxt->sax->startDocument(ctxt->userData);
10571
124k
    if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10572
124k
        (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10573
0
  ctxt->myDoc->compression = ctxt->input->buf->compressed;
10574
0
    }
10575
10576
    /*
10577
     * The Misc part of the Prolog
10578
     */
10579
124k
    xmlParseMisc(ctxt);
10580
10581
    /*
10582
     * Then possibly doc type declaration(s) and more Misc
10583
     * (doctypedecl Misc*)?
10584
     */
10585
124k
    GROW;
10586
124k
    if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10587
10588
52.6k
  ctxt->inSubset = 1;
10589
52.6k
  xmlParseDocTypeDecl(ctxt);
10590
52.6k
  if (RAW == '[') {
10591
45.4k
      xmlParseInternalSubset(ctxt);
10592
45.4k
  }
10593
10594
  /*
10595
   * Create and update the external subset.
10596
   */
10597
52.6k
  ctxt->inSubset = 2;
10598
52.6k
  if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10599
52.6k
      (!ctxt->disableSAX))
10600
36.2k
      ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10601
36.2k
                                ctxt->extSubSystem, ctxt->extSubURI);
10602
52.6k
  ctxt->inSubset = 0;
10603
10604
52.6k
        xmlCleanSpecialAttr(ctxt);
10605
10606
52.6k
  xmlParseMisc(ctxt);
10607
52.6k
    }
10608
10609
    /*
10610
     * Time to start parsing the tree itself
10611
     */
10612
124k
    GROW;
10613
124k
    if (RAW != '<') {
10614
40.6k
        if (ctxt->wellFormed)
10615
5.88k
            xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10616
5.88k
                           "Start tag expected, '<' not found\n");
10617
84.1k
    } else {
10618
84.1k
  xmlParseElement(ctxt);
10619
10620
  /*
10621
   * The Misc part at the end
10622
   */
10623
84.1k
  xmlParseMisc(ctxt);
10624
10625
84.1k
        if (ctxt->input->cur < ctxt->input->end) {
10626
16.8k
            if (ctxt->wellFormed)
10627
635
          xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10628
67.2k
        } else if ((ctxt->input->buf != NULL) &&
10629
67.2k
                   (ctxt->input->buf->encoder != NULL) &&
10630
67.2k
                   (!xmlBufIsEmpty(ctxt->input->buf->raw))) {
10631
714
            xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
10632
714
                           "Truncated multi-byte sequence at EOF\n");
10633
714
        }
10634
84.1k
    }
10635
10636
124k
    ctxt->instate = XML_PARSER_EOF;
10637
124k
    xmlFinishDocument(ctxt);
10638
10639
124k
    if (! ctxt->wellFormed) {
10640
108k
  ctxt->valid = 0;
10641
108k
  return(-1);
10642
108k
    }
10643
10644
16.2k
    return(0);
10645
124k
}
10646
10647
/**
10648
 * xmlParseExtParsedEnt:
10649
 * @ctxt:  an XML parser context
10650
 *
10651
 * parse a general parsed entity
10652
 * An external general parsed entity is well-formed if it matches the
10653
 * production labeled extParsedEnt.
10654
 *
10655
 * [78] extParsedEnt ::= TextDecl? content
10656
 *
10657
 * Returns 0, -1 in case of error. the parser context is augmented
10658
 *                as a result of the parsing.
10659
 */
10660
10661
int
10662
0
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10663
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
10664
0
        return(-1);
10665
10666
0
    xmlCtxtInitializeLate(ctxt);
10667
10668
    /*
10669
     * Document locator is unused. Only for backward compatibility.
10670
     */
10671
0
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10672
0
        xmlSAXLocator copy = xmlDefaultSAXLocator;
10673
0
        ctxt->sax->setDocumentLocator(ctxt->userData, &copy);
10674
0
    }
10675
10676
0
    xmlDetectEncoding(ctxt);
10677
10678
0
    if (CUR == 0) {
10679
0
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10680
0
    }
10681
10682
    /*
10683
     * Check for the XMLDecl in the Prolog.
10684
     */
10685
0
    GROW;
10686
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10687
10688
  /*
10689
   * Note that we will switch encoding on the fly.
10690
   */
10691
0
  xmlParseXMLDecl(ctxt);
10692
0
  SKIP_BLANKS;
10693
0
    } else {
10694
0
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10695
0
    }
10696
0
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10697
0
        ctxt->sax->startDocument(ctxt->userData);
10698
10699
    /*
10700
     * Doing validity checking on chunk doesn't make sense
10701
     */
10702
0
    ctxt->options &= ~XML_PARSE_DTDVALID;
10703
0
    ctxt->validate = 0;
10704
0
    ctxt->depth = 0;
10705
10706
0
    xmlParseContentInternal(ctxt);
10707
10708
0
    if (ctxt->input->cur < ctxt->input->end)
10709
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10710
10711
    /*
10712
     * SAX: end of the document processing.
10713
     */
10714
0
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10715
0
        ctxt->sax->endDocument(ctxt->userData);
10716
10717
0
    if (! ctxt->wellFormed) return(-1);
10718
0
    return(0);
10719
0
}
10720
10721
#ifdef LIBXML_PUSH_ENABLED
10722
/************************************************************************
10723
 *                  *
10724
 *    Progressive parsing interfaces        *
10725
 *                  *
10726
 ************************************************************************/
10727
10728
/**
10729
 * xmlParseLookupChar:
10730
 * @ctxt:  an XML parser context
10731
 * @c:  character
10732
 *
10733
 * Check whether the input buffer contains a character.
10734
 */
10735
static int
10736
641k
xmlParseLookupChar(xmlParserCtxtPtr ctxt, int c) {
10737
641k
    const xmlChar *cur;
10738
10739
641k
    if (ctxt->checkIndex == 0) {
10740
461k
        cur = ctxt->input->cur + 1;
10741
461k
    } else {
10742
180k
        cur = ctxt->input->cur + ctxt->checkIndex;
10743
180k
    }
10744
10745
641k
    if (memchr(cur, c, ctxt->input->end - cur) == NULL) {
10746
184k
        size_t index = ctxt->input->end - ctxt->input->cur;
10747
10748
184k
        if (index > LONG_MAX) {
10749
0
            ctxt->checkIndex = 0;
10750
0
            return(1);
10751
0
        }
10752
184k
        ctxt->checkIndex = index;
10753
184k
        return(0);
10754
457k
    } else {
10755
457k
        ctxt->checkIndex = 0;
10756
457k
        return(1);
10757
457k
    }
10758
641k
}
10759
10760
/**
10761
 * xmlParseLookupString:
10762
 * @ctxt:  an XML parser context
10763
 * @startDelta: delta to apply at the start
10764
 * @str:  string
10765
 * @strLen:  length of string
10766
 *
10767
 * Check whether the input buffer contains a string.
10768
 */
10769
static const xmlChar *
10770
xmlParseLookupString(xmlParserCtxtPtr ctxt, size_t startDelta,
10771
973k
                     const char *str, size_t strLen) {
10772
973k
    const xmlChar *cur, *term;
10773
10774
973k
    if (ctxt->checkIndex == 0) {
10775
457k
        cur = ctxt->input->cur + startDelta;
10776
515k
    } else {
10777
515k
        cur = ctxt->input->cur + ctxt->checkIndex;
10778
515k
    }
10779
10780
973k
    term = BAD_CAST strstr((const char *) cur, str);
10781
973k
    if (term == NULL) {
10782
606k
        const xmlChar *end = ctxt->input->end;
10783
606k
        size_t index;
10784
10785
        /* Rescan (strLen - 1) characters. */
10786
606k
        if ((size_t) (end - cur) < strLen)
10787
3.26k
            end = cur;
10788
603k
        else
10789
603k
            end -= strLen - 1;
10790
606k
        index = end - ctxt->input->cur;
10791
606k
        if (index > LONG_MAX) {
10792
0
            ctxt->checkIndex = 0;
10793
0
            return(ctxt->input->end - strLen);
10794
0
        }
10795
606k
        ctxt->checkIndex = index;
10796
606k
    } else {
10797
366k
        ctxt->checkIndex = 0;
10798
366k
    }
10799
10800
973k
    return(term);
10801
973k
}
10802
10803
/**
10804
 * xmlParseLookupCharData:
10805
 * @ctxt:  an XML parser context
10806
 *
10807
 * Check whether the input buffer contains terminated char data.
10808
 */
10809
static int
10810
430k
xmlParseLookupCharData(xmlParserCtxtPtr ctxt) {
10811
430k
    const xmlChar *cur = ctxt->input->cur + ctxt->checkIndex;
10812
430k
    const xmlChar *end = ctxt->input->end;
10813
430k
    size_t index;
10814
10815
7.53M
    while (cur < end) {
10816
7.50M
        if ((*cur == '<') || (*cur == '&')) {
10817
400k
            ctxt->checkIndex = 0;
10818
400k
            return(1);
10819
400k
        }
10820
7.10M
        cur++;
10821
7.10M
    }
10822
10823
30.3k
    index = cur - ctxt->input->cur;
10824
30.3k
    if (index > LONG_MAX) {
10825
0
        ctxt->checkIndex = 0;
10826
0
        return(1);
10827
0
    }
10828
30.3k
    ctxt->checkIndex = index;
10829
30.3k
    return(0);
10830
30.3k
}
10831
10832
/**
10833
 * xmlParseLookupGt:
10834
 * @ctxt:  an XML parser context
10835
 *
10836
 * Check whether there's enough data in the input buffer to finish parsing
10837
 * a start tag. This has to take quotes into account.
10838
 */
10839
static int
10840
1.98M
xmlParseLookupGt(xmlParserCtxtPtr ctxt) {
10841
1.98M
    const xmlChar *cur;
10842
1.98M
    const xmlChar *end = ctxt->input->end;
10843
1.98M
    int state = ctxt->endCheckState;
10844
1.98M
    size_t index;
10845
10846
1.98M
    if (ctxt->checkIndex == 0)
10847
1.12M
        cur = ctxt->input->cur + 1;
10848
860k
    else
10849
860k
        cur = ctxt->input->cur + ctxt->checkIndex;
10850
10851
322M
    while (cur < end) {
10852
321M
        if (state) {
10853
245M
            if (*cur == state)
10854
1.71M
                state = 0;
10855
245M
        } else if (*cur == '\'' || *cur == '"') {
10856
1.72M
            state = *cur;
10857
74.7M
        } else if (*cur == '>') {
10858
1.09M
            ctxt->checkIndex = 0;
10859
1.09M
            ctxt->endCheckState = 0;
10860
1.09M
            return(1);
10861
1.09M
        }
10862
320M
        cur++;
10863
320M
    }
10864
10865
890k
    index = cur - ctxt->input->cur;
10866
890k
    if (index > LONG_MAX) {
10867
0
        ctxt->checkIndex = 0;
10868
0
        ctxt->endCheckState = 0;
10869
0
        return(1);
10870
0
    }
10871
890k
    ctxt->checkIndex = index;
10872
890k
    ctxt->endCheckState = state;
10873
890k
    return(0);
10874
890k
}
10875
10876
/**
10877
 * xmlParseLookupInternalSubset:
10878
 * @ctxt:  an XML parser context
10879
 *
10880
 * Check whether there's enough data in the input buffer to finish parsing
10881
 * the internal subset.
10882
 */
10883
static int
10884
445k
xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt) {
10885
    /*
10886
     * Sorry, but progressive parsing of the internal subset is not
10887
     * supported. We first check that the full content of the internal
10888
     * subset is available and parsing is launched only at that point.
10889
     * Internal subset ends with "']' S? '>'" in an unescaped section and
10890
     * not in a ']]>' sequence which are conditional sections.
10891
     */
10892
445k
    const xmlChar *cur, *start;
10893
445k
    const xmlChar *end = ctxt->input->end;
10894
445k
    int state = ctxt->endCheckState;
10895
445k
    size_t index;
10896
10897
445k
    if (ctxt->checkIndex == 0) {
10898
31.1k
        cur = ctxt->input->cur + 1;
10899
414k
    } else {
10900
414k
        cur = ctxt->input->cur + ctxt->checkIndex;
10901
414k
    }
10902
445k
    start = cur;
10903
10904
249M
    while (cur < end) {
10905
248M
        if (state == '-') {
10906
470k
            if ((*cur == '-') &&
10907
470k
                (cur[1] == '-') &&
10908
470k
                (cur[2] == '>')) {
10909
8.91k
                state = 0;
10910
8.91k
                cur += 3;
10911
8.91k
                start = cur;
10912
8.91k
                continue;
10913
8.91k
            }
10914
470k
        }
10915
248M
        else if (state == ']') {
10916
29.1k
            if (*cur == '>') {
10917
19.0k
                ctxt->checkIndex = 0;
10918
19.0k
                ctxt->endCheckState = 0;
10919
19.0k
                return(1);
10920
19.0k
            }
10921
10.1k
            if (IS_BLANK_CH(*cur)) {
10922
5.66k
                state = ' ';
10923
5.66k
            } else if (*cur != ']') {
10924
2.62k
                state = 0;
10925
2.62k
                start = cur;
10926
2.62k
                continue;
10927
2.62k
            }
10928
10.1k
        }
10929
248M
        else if (state == ' ') {
10930
205k
            if (*cur == '>') {
10931
583
                ctxt->checkIndex = 0;
10932
583
                ctxt->endCheckState = 0;
10933
583
                return(1);
10934
583
            }
10935
204k
            if (!IS_BLANK_CH(*cur)) {
10936
5.07k
                state = 0;
10937
5.07k
                start = cur;
10938
5.07k
                continue;
10939
5.07k
            }
10940
204k
        }
10941
247M
        else if (state != 0) {
10942
236M
            if (*cur == state) {
10943
233k
                state = 0;
10944
233k
                start = cur + 1;
10945
233k
            }
10946
236M
        }
10947
11.6M
        else if (*cur == '<') {
10948
181k
            if ((cur[1] == '!') &&
10949
181k
                (cur[2] == '-') &&
10950
181k
                (cur[3] == '-')) {
10951
9.11k
                state = '-';
10952
9.11k
                cur += 4;
10953
                /* Don't treat <!--> as comment */
10954
9.11k
                start = cur;
10955
9.11k
                continue;
10956
9.11k
            }
10957
181k
        }
10958
11.4M
        else if ((*cur == '"') || (*cur == '\'') || (*cur == ']')) {
10959
262k
            state = *cur;
10960
262k
        }
10961
10962
248M
        cur++;
10963
248M
    }
10964
10965
    /*
10966
     * Rescan the three last characters to detect "<!--" and "-->"
10967
     * split across chunks.
10968
     */
10969
426k
    if ((state == 0) || (state == '-')) {
10970
26.1k
        if (cur - start < 3)
10971
2.63k
            cur = start;
10972
23.5k
        else
10973
23.5k
            cur -= 3;
10974
26.1k
    }
10975
426k
    index = cur - ctxt->input->cur;
10976
426k
    if (index > LONG_MAX) {
10977
0
        ctxt->checkIndex = 0;
10978
0
        ctxt->endCheckState = 0;
10979
0
        return(1);
10980
0
    }
10981
426k
    ctxt->checkIndex = index;
10982
426k
    ctxt->endCheckState = state;
10983
426k
    return(0);
10984
426k
}
10985
10986
/**
10987
 * xmlCheckCdataPush:
10988
 * @cur: pointer to the block of characters
10989
 * @len: length of the block in bytes
10990
 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
10991
 *
10992
 * Check that the block of characters is okay as SCdata content [20]
10993
 *
10994
 * Returns the number of bytes to pass if okay, a negative index where an
10995
 *         UTF-8 error occurred otherwise
10996
 */
10997
static int
10998
257k
xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
10999
257k
    int ix;
11000
257k
    unsigned char c;
11001
257k
    int codepoint;
11002
11003
257k
    if ((utf == NULL) || (len <= 0))
11004
2.83k
        return(0);
11005
11006
31.6M
    for (ix = 0; ix < len;) {      /* string is 0-terminated */
11007
31.4M
        c = utf[ix];
11008
31.4M
        if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11009
6.42M
      if (c >= 0x20)
11010
5.75M
    ix++;
11011
664k
      else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11012
650k
          ix++;
11013
13.9k
      else
11014
13.9k
          return(-ix);
11015
25.0M
  } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11016
6.48M
      if (ix + 2 > len) return(complete ? -ix : ix);
11017
6.46M
      if ((utf[ix+1] & 0xc0 ) != 0x80)
11018
725
          return(-ix);
11019
6.46M
      codepoint = (utf[ix] & 0x1f) << 6;
11020
6.46M
      codepoint |= utf[ix+1] & 0x3f;
11021
6.46M
      if (!xmlIsCharQ(codepoint))
11022
1.98k
          return(-ix);
11023
6.46M
      ix += 2;
11024
18.5M
  } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11025
18.5M
      if (ix + 3 > len) return(complete ? -ix : ix);
11026
18.5M
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11027
18.5M
          ((utf[ix+2] & 0xc0) != 0x80))
11028
2.60k
        return(-ix);
11029
18.5M
      codepoint = (utf[ix] & 0xf) << 12;
11030
18.5M
      codepoint |= (utf[ix+1] & 0x3f) << 6;
11031
18.5M
      codepoint |= utf[ix+2] & 0x3f;
11032
18.5M
      if (!xmlIsCharQ(codepoint))
11033
916
          return(-ix);
11034
18.5M
      ix += 3;
11035
18.5M
  } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11036
22.3k
      if (ix + 4 > len) return(complete ? -ix : ix);
11037
21.8k
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11038
21.8k
          ((utf[ix+2] & 0xc0) != 0x80) ||
11039
21.8k
    ((utf[ix+3] & 0xc0) != 0x80))
11040
5.19k
        return(-ix);
11041
16.6k
      codepoint = (utf[ix] & 0x7) << 18;
11042
16.6k
      codepoint |= (utf[ix+1] & 0x3f) << 12;
11043
16.6k
      codepoint |= (utf[ix+2] & 0x3f) << 6;
11044
16.6k
      codepoint |= utf[ix+3] & 0x3f;
11045
16.6k
      if (!xmlIsCharQ(codepoint))
11046
1.16k
          return(-ix);
11047
15.4k
      ix += 4;
11048
15.4k
  } else       /* unknown encoding */
11049
6.63k
      return(-ix);
11050
31.4M
      }
11051
197k
      return(ix);
11052
254k
}
11053
11054
/**
11055
 * xmlParseTryOrFinish:
11056
 * @ctxt:  an XML parser context
11057
 * @terminate:  last chunk indicator
11058
 *
11059
 * Try to progress on parsing
11060
 *
11061
 * Returns zero if no parsing was possible
11062
 */
11063
static int
11064
2.45M
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11065
2.45M
    int ret = 0;
11066
2.45M
    size_t avail;
11067
2.45M
    xmlChar cur, next;
11068
11069
2.45M
    if (ctxt->input == NULL)
11070
0
        return(0);
11071
11072
2.45M
    if ((ctxt->input != NULL) &&
11073
2.45M
        (ctxt->input->cur - ctxt->input->base > 4096)) {
11074
11.9k
        xmlParserShrink(ctxt);
11075
11.9k
    }
11076
11077
8.70M
    while (ctxt->disableSAX == 0) {
11078
8.67M
        avail = ctxt->input->end - ctxt->input->cur;
11079
8.67M
        if (avail < 1)
11080
36.6k
      goto done;
11081
8.63M
        switch (ctxt->instate) {
11082
257k
            case XML_PARSER_EOF:
11083
          /*
11084
     * Document parsing is done !
11085
     */
11086
257k
          goto done;
11087
94.5k
            case XML_PARSER_START:
11088
                /*
11089
                 * Very first chars read from the document flow.
11090
                 */
11091
94.5k
                if ((!terminate) && (avail < 4))
11092
1.38k
                    goto done;
11093
11094
                /*
11095
                 * We need more bytes to detect EBCDIC code pages.
11096
                 * See xmlDetectEBCDIC.
11097
                 */
11098
93.1k
                if ((CMP4(CUR_PTR, 0x4C, 0x6F, 0xA7, 0x94)) &&
11099
93.1k
                    (!terminate) && (avail < 200))
11100
809
                    goto done;
11101
11102
92.3k
                xmlDetectEncoding(ctxt);
11103
92.3k
                ctxt->instate = XML_PARSER_XML_DECL;
11104
92.3k
    break;
11105
11106
359k
            case XML_PARSER_XML_DECL:
11107
359k
    if ((!terminate) && (avail < 2))
11108
32
        goto done;
11109
359k
    cur = ctxt->input->cur[0];
11110
359k
    next = ctxt->input->cur[1];
11111
359k
          if ((cur == '<') && (next == '?')) {
11112
        /* PI or XML decl */
11113
287k
        if ((!terminate) &&
11114
287k
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11115
267k
      goto done;
11116
20.0k
        if ((ctxt->input->cur[2] == 'x') &&
11117
20.0k
      (ctxt->input->cur[3] == 'm') &&
11118
20.0k
      (ctxt->input->cur[4] == 'l') &&
11119
20.0k
      (IS_BLANK_CH(ctxt->input->cur[5]))) {
11120
14.0k
      ret += 5;
11121
14.0k
      xmlParseXMLDecl(ctxt);
11122
14.0k
        } else {
11123
5.99k
      ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11124
5.99k
                        if (ctxt->version == NULL) {
11125
48
                            xmlErrMemory(ctxt);
11126
48
                            break;
11127
48
                        }
11128
5.99k
        }
11129
72.1k
    } else {
11130
72.1k
        ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11131
72.1k
        if (ctxt->version == NULL) {
11132
228
            xmlErrMemory(ctxt);
11133
228
      break;
11134
228
        }
11135
72.1k
    }
11136
91.8k
                if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
11137
91.8k
                    xmlSAXLocator copy = xmlDefaultSAXLocator;
11138
91.8k
                    ctxt->sax->setDocumentLocator(ctxt->userData, &copy);
11139
91.8k
                }
11140
91.8k
                if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11141
91.8k
                    (!ctxt->disableSAX))
11142
89.9k
                    ctxt->sax->startDocument(ctxt->userData);
11143
91.8k
                ctxt->instate = XML_PARSER_MISC;
11144
91.8k
    break;
11145
1.89M
            case XML_PARSER_START_TAG: {
11146
1.89M
          const xmlChar *name;
11147
1.89M
    const xmlChar *prefix = NULL;
11148
1.89M
    const xmlChar *URI = NULL;
11149
1.89M
                int line = ctxt->input->line;
11150
1.89M
    int nbNs = 0;
11151
11152
1.89M
    if ((!terminate) && (avail < 2))
11153
523
        goto done;
11154
1.89M
    cur = ctxt->input->cur[0];
11155
1.89M
          if (cur != '<') {
11156
8.88k
        xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
11157
8.88k
                                   "Start tag expected, '<' not found");
11158
8.88k
                    ctxt->instate = XML_PARSER_EOF;
11159
8.88k
                    xmlFinishDocument(ctxt);
11160
8.88k
        goto done;
11161
8.88k
    }
11162
1.88M
    if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11163
569k
                    goto done;
11164
1.31M
    if (ctxt->spaceNr == 0)
11165
0
        spacePush(ctxt, -1);
11166
1.31M
    else if (*ctxt->space == -2)
11167
91.7k
        spacePush(ctxt, -1);
11168
1.22M
    else
11169
1.22M
        spacePush(ctxt, *ctxt->space);
11170
1.31M
#ifdef LIBXML_SAX1_ENABLED
11171
1.31M
    if (ctxt->sax2)
11172
1.15M
#endif /* LIBXML_SAX1_ENABLED */
11173
1.15M
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &nbNs);
11174
152k
#ifdef LIBXML_SAX1_ENABLED
11175
152k
    else
11176
152k
        name = xmlParseStartTag(ctxt);
11177
1.31M
#endif /* LIBXML_SAX1_ENABLED */
11178
1.31M
    if (name == NULL) {
11179
11.7k
        spacePop(ctxt);
11180
11.7k
                    ctxt->instate = XML_PARSER_EOF;
11181
11.7k
                    xmlFinishDocument(ctxt);
11182
11.7k
        goto done;
11183
11.7k
    }
11184
1.30M
#ifdef LIBXML_VALID_ENABLED
11185
    /*
11186
     * [ VC: Root Element Type ]
11187
     * The Name in the document type declaration must match
11188
     * the element type of the root element.
11189
     */
11190
1.30M
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11191
1.30M
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
11192
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11193
1.30M
#endif /* LIBXML_VALID_ENABLED */
11194
11195
    /*
11196
     * Check for an Empty Element.
11197
     */
11198
1.30M
    if ((RAW == '/') && (NXT(1) == '>')) {
11199
267k
        SKIP(2);
11200
11201
267k
        if (ctxt->sax2) {
11202
250k
      if ((ctxt->sax != NULL) &&
11203
250k
          (ctxt->sax->endElementNs != NULL) &&
11204
250k
          (!ctxt->disableSAX))
11205
250k
          ctxt->sax->endElementNs(ctxt->userData, name,
11206
250k
                                  prefix, URI);
11207
250k
      if (nbNs > 0)
11208
20.1k
          xmlParserNsPop(ctxt, nbNs);
11209
250k
#ifdef LIBXML_SAX1_ENABLED
11210
250k
        } else {
11211
16.9k
      if ((ctxt->sax != NULL) &&
11212
16.9k
          (ctxt->sax->endElement != NULL) &&
11213
16.9k
          (!ctxt->disableSAX))
11214
16.8k
          ctxt->sax->endElement(ctxt->userData, name);
11215
16.9k
#endif /* LIBXML_SAX1_ENABLED */
11216
16.9k
        }
11217
267k
        spacePop(ctxt);
11218
1.03M
    } else if (RAW == '>') {
11219
789k
        NEXT;
11220
789k
                    nameNsPush(ctxt, name, prefix, URI, line, nbNs);
11221
789k
    } else {
11222
243k
        xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11223
243k
           "Couldn't find end of Start Tag %s\n",
11224
243k
           name);
11225
243k
        nodePop(ctxt);
11226
243k
        spacePop(ctxt);
11227
243k
                    if (nbNs > 0)
11228
43.6k
                        xmlParserNsPop(ctxt, nbNs);
11229
243k
    }
11230
11231
1.30M
                if (ctxt->nameNr == 0)
11232
11.5k
                    ctxt->instate = XML_PARSER_EPILOG;
11233
1.28M
                else
11234
1.28M
                    ctxt->instate = XML_PARSER_CONTENT;
11235
1.30M
                break;
11236
1.31M
      }
11237
4.27M
            case XML_PARSER_CONTENT: {
11238
4.27M
    cur = ctxt->input->cur[0];
11239
11240
4.27M
    if (cur == '<') {
11241
2.08M
                    if ((!terminate) && (avail < 2))
11242
5.75k
                        goto done;
11243
2.07M
        next = ctxt->input->cur[1];
11244
11245
2.07M
                    if (next == '/') {
11246
384k
                        ctxt->instate = XML_PARSER_END_TAG;
11247
384k
                        break;
11248
1.69M
                    } else if (next == '?') {
11249
68.2k
                        if ((!terminate) &&
11250
68.2k
                            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11251
37.0k
                            goto done;
11252
31.1k
                        xmlParsePI(ctxt);
11253
31.1k
                        ctxt->instate = XML_PARSER_CONTENT;
11254
31.1k
                        break;
11255
1.62M
                    } else if (next == '!') {
11256
365k
                        if ((!terminate) && (avail < 3))
11257
932
                            goto done;
11258
364k
                        next = ctxt->input->cur[2];
11259
11260
364k
                        if (next == '-') {
11261
189k
                            if ((!terminate) && (avail < 4))
11262
290
                                goto done;
11263
189k
                            if (ctxt->input->cur[3] == '-') {
11264
189k
                                if ((!terminate) &&
11265
189k
                                    (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11266
118k
                                    goto done;
11267
70.7k
                                xmlParseComment(ctxt);
11268
70.7k
                                ctxt->instate = XML_PARSER_CONTENT;
11269
70.7k
                                break;
11270
189k
                            }
11271
189k
                        } else if (next == '[') {
11272
171k
                            if ((!terminate) && (avail < 9))
11273
3.86k
                                goto done;
11274
168k
                            if ((ctxt->input->cur[2] == '[') &&
11275
168k
                                (ctxt->input->cur[3] == 'C') &&
11276
168k
                                (ctxt->input->cur[4] == 'D') &&
11277
168k
                                (ctxt->input->cur[5] == 'A') &&
11278
168k
                                (ctxt->input->cur[6] == 'T') &&
11279
168k
                                (ctxt->input->cur[7] == 'A') &&
11280
168k
                                (ctxt->input->cur[8] == '[')) {
11281
167k
                                SKIP(9);
11282
167k
                                ctxt->instate = XML_PARSER_CDATA_SECTION;
11283
167k
                                break;
11284
167k
                            }
11285
168k
                        }
11286
364k
                    }
11287
2.19M
    } else if (cur == '&') {
11288
323k
        if ((!terminate) && (!xmlParseLookupChar(ctxt, ';')))
11289
173k
      goto done;
11290
150k
        xmlParseReference(ctxt);
11291
150k
                    break;
11292
1.86M
    } else {
11293
        /* TODO Avoid the extra copy, handle directly !!! */
11294
        /*
11295
         * Goal of the following test is:
11296
         *  - minimize calls to the SAX 'character' callback
11297
         *    when they are mergeable
11298
         *  - handle an problem for isBlank when we only parse
11299
         *    a sequence of blank chars and the next one is
11300
         *    not available to check against '<' presence.
11301
         *  - tries to homogenize the differences in SAX
11302
         *    callbacks between the push and pull versions
11303
         *    of the parser.
11304
         */
11305
1.86M
        if (avail < XML_PARSER_BIG_BUFFER_SIZE) {
11306
497k
      if ((!terminate) && (!xmlParseLookupCharData(ctxt)))
11307
30.3k
          goto done;
11308
497k
                    }
11309
1.83M
                    ctxt->checkIndex = 0;
11310
1.83M
        xmlParseCharDataInternal(ctxt, !terminate);
11311
1.83M
                    break;
11312
1.86M
    }
11313
11314
1.26M
                ctxt->instate = XML_PARSER_START_TAG;
11315
1.26M
    break;
11316
4.27M
      }
11317
394k
            case XML_PARSER_END_TAG:
11318
394k
    if ((!terminate) && (!xmlParseLookupChar(ctxt, '>')))
11319
10.7k
        goto done;
11320
383k
    if (ctxt->sax2) {
11321
360k
              xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11322
360k
        nameNsPop(ctxt);
11323
360k
    }
11324
23.5k
#ifdef LIBXML_SAX1_ENABLED
11325
23.5k
      else
11326
23.5k
        xmlParseEndTag1(ctxt, 0);
11327
383k
#endif /* LIBXML_SAX1_ENABLED */
11328
383k
    if (ctxt->nameNr == 0) {
11329
1.53k
        ctxt->instate = XML_PARSER_EPILOG;
11330
382k
    } else {
11331
382k
        ctxt->instate = XML_PARSER_CONTENT;
11332
382k
    }
11333
383k
    break;
11334
310k
            case XML_PARSER_CDATA_SECTION: {
11335
          /*
11336
     * The Push mode need to have the SAX callback for
11337
     * cdataBlock merge back contiguous callbacks.
11338
     */
11339
310k
    const xmlChar *term;
11340
11341
310k
                if (terminate) {
11342
                    /*
11343
                     * Don't call xmlParseLookupString. If 'terminate'
11344
                     * is set, checkIndex is invalid.
11345
                     */
11346
3.78k
                    term = BAD_CAST strstr((const char *) ctxt->input->cur,
11347
3.78k
                                           "]]>");
11348
306k
                } else {
11349
306k
        term = xmlParseLookupString(ctxt, 0, "]]>", 3);
11350
306k
                }
11351
11352
310k
    if (term == NULL) {
11353
132k
        int tmp, size;
11354
11355
132k
                    if (terminate) {
11356
                        /* Unfinished CDATA section */
11357
1.54k
                        size = ctxt->input->end - ctxt->input->cur;
11358
131k
                    } else {
11359
131k
                        if (avail < XML_PARSER_BIG_BUFFER_SIZE + 2)
11360
52.8k
                            goto done;
11361
78.4k
                        ctxt->checkIndex = 0;
11362
                        /* XXX: Why don't we pass the full buffer? */
11363
78.4k
                        size = XML_PARSER_BIG_BUFFER_SIZE;
11364
78.4k
                    }
11365
80.0k
                    tmp = xmlCheckCdataPush(ctxt->input->cur, size, 0);
11366
80.0k
                    if (tmp <= 0) {
11367
21.8k
                        tmp = -tmp;
11368
21.8k
                        ctxt->input->cur += tmp;
11369
21.8k
                        goto encoding_error;
11370
21.8k
                    }
11371
58.1k
                    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11372
58.1k
                        if (ctxt->sax->cdataBlock != NULL)
11373
25.3k
                            ctxt->sax->cdataBlock(ctxt->userData,
11374
25.3k
                                                  ctxt->input->cur, tmp);
11375
32.8k
                        else if (ctxt->sax->characters != NULL)
11376
32.8k
                            ctxt->sax->characters(ctxt->userData,
11377
32.8k
                                                  ctxt->input->cur, tmp);
11378
58.1k
                    }
11379
58.1k
                    SKIPL(tmp);
11380
177k
    } else {
11381
177k
                    int base = term - CUR_PTR;
11382
177k
        int tmp;
11383
11384
177k
        tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11385
177k
        if ((tmp < 0) || (tmp != base)) {
11386
12.3k
      tmp = -tmp;
11387
12.3k
      ctxt->input->cur += tmp;
11388
12.3k
      goto encoding_error;
11389
12.3k
        }
11390
165k
        if ((ctxt->sax != NULL) && (base == 0) &&
11391
165k
            (ctxt->sax->cdataBlock != NULL) &&
11392
165k
            (!ctxt->disableSAX)) {
11393
      /*
11394
       * Special case to provide identical behaviour
11395
       * between pull and push parsers on enpty CDATA
11396
       * sections
11397
       */
11398
2.11k
       if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11399
2.11k
           (!strncmp((const char *)&ctxt->input->cur[-9],
11400
2.11k
                     "<![CDATA[", 9)))
11401
2.10k
           ctxt->sax->cdataBlock(ctxt->userData,
11402
2.10k
                                 BAD_CAST "", 0);
11403
162k
        } else if ((ctxt->sax != NULL) && (base > 0) &&
11404
162k
      (!ctxt->disableSAX)) {
11405
162k
      if (ctxt->sax->cdataBlock != NULL)
11406
90.9k
          ctxt->sax->cdataBlock(ctxt->userData,
11407
90.9k
              ctxt->input->cur, base);
11408
71.2k
      else if (ctxt->sax->characters != NULL)
11409
71.2k
          ctxt->sax->characters(ctxt->userData,
11410
71.2k
              ctxt->input->cur, base);
11411
162k
        }
11412
165k
        SKIPL(base + 3);
11413
165k
        ctxt->instate = XML_PARSER_CONTENT;
11414
165k
    }
11415
223k
    break;
11416
310k
      }
11417
547k
            case XML_PARSER_MISC:
11418
584k
            case XML_PARSER_PROLOG:
11419
589k
            case XML_PARSER_EPILOG:
11420
589k
    SKIP_BLANKS;
11421
589k
                avail = ctxt->input->end - ctxt->input->cur;
11422
589k
    if (avail < 1)
11423
1.65k
        goto done;
11424
588k
    if (ctxt->input->cur[0] == '<') {
11425
576k
                    if ((!terminate) && (avail < 2))
11426
357
                        goto done;
11427
576k
                    next = ctxt->input->cur[1];
11428
576k
                    if (next == '?') {
11429
40.9k
                        if ((!terminate) &&
11430
40.9k
                            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11431
25.4k
                            goto done;
11432
15.4k
                        xmlParsePI(ctxt);
11433
15.4k
                        break;
11434
535k
                    } else if (next == '!') {
11435
477k
                        if ((!terminate) && (avail < 3))
11436
240
                            goto done;
11437
11438
477k
                        if (ctxt->input->cur[2] == '-') {
11439
108k
                            if ((!terminate) && (avail < 4))
11440
255
                                goto done;
11441
108k
                            if (ctxt->input->cur[3] == '-') {
11442
108k
                                if ((!terminate) &&
11443
108k
                                    (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11444
26.6k
                                    goto done;
11445
82.0k
                                xmlParseComment(ctxt);
11446
82.0k
                                break;
11447
108k
                            }
11448
368k
                        } else if (ctxt->instate == XML_PARSER_MISC) {
11449
368k
                            if ((!terminate) && (avail < 9))
11450
106
                                goto done;
11451
368k
                            if ((ctxt->input->cur[2] == 'D') &&
11452
368k
                                (ctxt->input->cur[3] == 'O') &&
11453
368k
                                (ctxt->input->cur[4] == 'C') &&
11454
368k
                                (ctxt->input->cur[5] == 'T') &&
11455
368k
                                (ctxt->input->cur[6] == 'Y') &&
11456
368k
                                (ctxt->input->cur[7] == 'P') &&
11457
368k
                                (ctxt->input->cur[8] == 'E')) {
11458
367k
                                if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11459
321k
                                    goto done;
11460
46.7k
                                ctxt->inSubset = 1;
11461
46.7k
                                xmlParseDocTypeDecl(ctxt);
11462
46.7k
                                if (RAW == '[') {
11463
37.9k
                                    ctxt->instate = XML_PARSER_DTD;
11464
37.9k
                                } else {
11465
                                    /*
11466
                                     * Create and update the external subset.
11467
                                     */
11468
8.77k
                                    ctxt->inSubset = 2;
11469
8.77k
                                    if ((ctxt->sax != NULL) &&
11470
8.77k
                                        (!ctxt->disableSAX) &&
11471
8.77k
                                        (ctxt->sax->externalSubset != NULL))
11472
8.19k
                                        ctxt->sax->externalSubset(
11473
8.19k
                                                ctxt->userData,
11474
8.19k
                                                ctxt->intSubName,
11475
8.19k
                                                ctxt->extSubSystem,
11476
8.19k
                                                ctxt->extSubURI);
11477
8.77k
                                    ctxt->inSubset = 0;
11478
8.77k
                                    xmlCleanSpecialAttr(ctxt);
11479
8.77k
                                    ctxt->instate = XML_PARSER_PROLOG;
11480
8.77k
                                }
11481
46.7k
                                break;
11482
367k
                            }
11483
368k
                        }
11484
477k
                    }
11485
576k
                }
11486
11487
69.6k
                if (ctxt->instate == XML_PARSER_EPILOG) {
11488
3.50k
                    if (ctxt->errNo == XML_ERR_OK)
11489
144
                        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11490
3.50k
        ctxt->instate = XML_PARSER_EOF;
11491
3.50k
                    xmlFinishDocument(ctxt);
11492
66.1k
                } else {
11493
66.1k
        ctxt->instate = XML_PARSER_START_TAG;
11494
66.1k
    }
11495
69.6k
    break;
11496
460k
            case XML_PARSER_DTD: {
11497
460k
                if ((!terminate) && (!xmlParseLookupInternalSubset(ctxt)))
11498
426k
                    goto done;
11499
34.3k
    xmlParseInternalSubset(ctxt);
11500
34.3k
    ctxt->inSubset = 2;
11501
34.3k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11502
34.3k
        (ctxt->sax->externalSubset != NULL))
11503
26.6k
        ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11504
26.6k
          ctxt->extSubSystem, ctxt->extSubURI);
11505
34.3k
    ctxt->inSubset = 0;
11506
34.3k
    xmlCleanSpecialAttr(ctxt);
11507
34.3k
    ctxt->instate = XML_PARSER_PROLOG;
11508
34.3k
                break;
11509
460k
      }
11510
0
            default:
11511
0
                xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
11512
0
      "PP: internal error\n");
11513
0
    ctxt->instate = XML_PARSER_EOF;
11514
0
    break;
11515
8.63M
  }
11516
8.63M
    }
11517
2.42M
done:
11518
2.42M
    return(ret);
11519
34.2k
encoding_error:
11520
    /* Only report the first error */
11521
34.2k
    if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
11522
1.12k
        xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
11523
1.12k
        ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
11524
1.12k
    }
11525
34.2k
    return(0);
11526
2.45M
}
11527
11528
/**
11529
 * xmlParseChunk:
11530
 * @ctxt:  an XML parser context
11531
 * @chunk:  chunk of memory
11532
 * @size:  size of chunk in bytes
11533
 * @terminate:  last chunk indicator
11534
 *
11535
 * Parse a chunk of memory in push parser mode.
11536
 *
11537
 * Assumes that the parser context was initialized with
11538
 * xmlCreatePushParserCtxt.
11539
 *
11540
 * The last chunk, which will often be empty, must be marked with
11541
 * the @terminate flag. With the default SAX callbacks, the resulting
11542
 * document will be available in ctxt->myDoc. This pointer will not
11543
 * be freed by the library.
11544
 *
11545
 * If the document isn't well-formed, ctxt->myDoc is set to NULL.
11546
 * The push parser doesn't support recovery mode.
11547
 *
11548
 * Returns an xmlParserErrors code (0 on success).
11549
 */
11550
int
11551
xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
11552
2.61M
              int terminate) {
11553
2.61M
    size_t curBase;
11554
2.61M
    size_t maxLength;
11555
2.61M
    int end_in_lf = 0;
11556
11557
2.61M
    if ((ctxt == NULL) || (size < 0))
11558
0
        return(XML_ERR_ARGUMENT);
11559
2.61M
    if (ctxt->disableSAX != 0)
11560
160k
        return(ctxt->errNo);
11561
2.45M
    if (ctxt->input == NULL)
11562
0
        return(XML_ERR_INTERNAL_ERROR);
11563
11564
2.45M
    ctxt->input->flags |= XML_INPUT_PROGRESSIVE;
11565
2.45M
    if (ctxt->instate == XML_PARSER_START)
11566
95.3k
        xmlCtxtInitializeLate(ctxt);
11567
2.45M
    if ((size > 0) && (chunk != NULL) && (!terminate) &&
11568
2.45M
        (chunk[size - 1] == '\r')) {
11569
8.79k
  end_in_lf = 1;
11570
8.79k
  size--;
11571
8.79k
    }
11572
11573
2.45M
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
11574
2.45M
        (ctxt->input->buf != NULL))  {
11575
2.39M
  size_t pos = ctxt->input->cur - ctxt->input->base;
11576
2.39M
  int res;
11577
11578
2.39M
  res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11579
2.39M
        xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
11580
2.39M
  if (res < 0) {
11581
36
            xmlCtxtErrIO(ctxt, ctxt->input->buf->error, NULL);
11582
36
      xmlHaltParser(ctxt);
11583
36
      return(ctxt->errNo);
11584
36
  }
11585
2.39M
    }
11586
11587
2.45M
    xmlParseTryOrFinish(ctxt, terminate);
11588
11589
2.45M
    curBase = ctxt->input->cur - ctxt->input->base;
11590
2.45M
    maxLength = (ctxt->options & XML_PARSE_HUGE) ?
11591
929k
                XML_MAX_HUGE_LENGTH :
11592
2.45M
                XML_MAX_LOOKUP_LIMIT;
11593
2.45M
    if (curBase > maxLength) {
11594
0
        xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
11595
0
                    "Buffer size limit exceeded, try XML_PARSE_HUGE\n");
11596
0
        xmlHaltParser(ctxt);
11597
0
    }
11598
11599
2.45M
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11600
15.7k
        return(ctxt->errNo);
11601
11602
2.44M
    if ((end_in_lf == 1) && (ctxt->input != NULL) &&
11603
2.44M
        (ctxt->input->buf != NULL)) {
11604
8.76k
  size_t pos = ctxt->input->cur - ctxt->input->base;
11605
8.76k
        int res;
11606
11607
8.76k
  res = xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
11608
8.76k
  xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
11609
8.76k
        if (res < 0) {
11610
25
            xmlCtxtErrIO(ctxt, ctxt->input->buf->error, NULL);
11611
25
            xmlHaltParser(ctxt);
11612
25
            return(ctxt->errNo);
11613
25
        }
11614
8.76k
    }
11615
2.44M
    if (terminate) {
11616
  /*
11617
   * Check for termination
11618
   */
11619
48.6k
        if ((ctxt->instate != XML_PARSER_EOF) &&
11620
48.6k
            (ctxt->instate != XML_PARSER_EPILOG)) {
11621
25.3k
            if (ctxt->nameNr > 0) {
11622
13.3k
                const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
11623
13.3k
                int line = ctxt->pushTab[ctxt->nameNr - 1].line;
11624
13.3k
                xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
11625
13.3k
                        "Premature end of data in tag %s line %d\n",
11626
13.3k
                        name, line, NULL);
11627
13.3k
            } else if (ctxt->instate == XML_PARSER_START) {
11628
341
                xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11629
11.6k
            } else {
11630
11.6k
                xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
11631
11.6k
                               "Start tag expected, '<' not found\n");
11632
11.6k
            }
11633
25.3k
        } else if ((ctxt->input->buf != NULL) &&
11634
23.2k
                   (ctxt->input->buf->encoder != NULL) &&
11635
23.2k
                   (!xmlBufIsEmpty(ctxt->input->buf->raw))) {
11636
597
            xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
11637
597
                           "Truncated multi-byte sequence at EOF\n");
11638
597
        }
11639
48.6k
  if (ctxt->instate != XML_PARSER_EOF) {
11640
31.1k
            ctxt->instate = XML_PARSER_EOF;
11641
31.1k
            xmlFinishDocument(ctxt);
11642
31.1k
  }
11643
48.6k
    }
11644
2.44M
    if (ctxt->wellFormed == 0)
11645
1.22M
  return((xmlParserErrors) ctxt->errNo);
11646
1.21M
    else
11647
1.21M
        return(0);
11648
2.44M
}
11649
11650
/************************************************************************
11651
 *                  *
11652
 *    I/O front end functions to the parser     *
11653
 *                  *
11654
 ************************************************************************/
11655
11656
/**
11657
 * xmlCreatePushParserCtxt:
11658
 * @sax:  a SAX handler (optional)
11659
 * @user_data:  user data for SAX callbacks (optional)
11660
 * @chunk:  initial chunk (optional, deprecated)
11661
 * @size:  size of initial chunk in bytes
11662
 * @filename:  file name or URI (optional)
11663
 *
11664
 * Create a parser context for using the XML parser in push mode.
11665
 * See xmlParseChunk.
11666
 *
11667
 * Passing an initial chunk is useless and deprecated.
11668
 *
11669
 * @filename is used as base URI to fetch external entities and for
11670
 * error reports.
11671
 *
11672
 * Returns the new parser context or NULL in case of error.
11673
 */
11674
11675
xmlParserCtxtPtr
11676
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11677
94.1k
                        const char *chunk, int size, const char *filename) {
11678
94.1k
    xmlParserCtxtPtr ctxt;
11679
94.1k
    xmlParserInputPtr input;
11680
11681
94.1k
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
11682
94.1k
    if (ctxt == NULL)
11683
591
  return(NULL);
11684
11685
93.5k
    ctxt->options &= ~XML_PARSE_NODICT;
11686
93.5k
    ctxt->dictNames = 1;
11687
11688
93.5k
    input = xmlNewInputPush(ctxt, filename, chunk, size, NULL);
11689
93.5k
    if (input == NULL) {
11690
480
  xmlFreeParserCtxt(ctxt);
11691
480
  return(NULL);
11692
480
    }
11693
93.1k
    inputPush(ctxt, input);
11694
11695
93.1k
    return(ctxt);
11696
93.5k
}
11697
#endif /* LIBXML_PUSH_ENABLED */
11698
11699
/**
11700
 * xmlStopParser:
11701
 * @ctxt:  an XML parser context
11702
 *
11703
 * Blocks further parser processing
11704
 */
11705
void
11706
59.0k
xmlStopParser(xmlParserCtxtPtr ctxt) {
11707
59.0k
    if (ctxt == NULL)
11708
0
        return;
11709
59.0k
    xmlHaltParser(ctxt);
11710
59.0k
    if (ctxt->errNo != XML_ERR_NO_MEMORY)
11711
49.8k
        ctxt->errNo = XML_ERR_USER_STOP;
11712
59.0k
}
11713
11714
/**
11715
 * xmlCreateIOParserCtxt:
11716
 * @sax:  a SAX handler (optional)
11717
 * @user_data:  user data for SAX callbacks (optional)
11718
 * @ioread:  an I/O read function
11719
 * @ioclose:  an I/O close function (optional)
11720
 * @ioctx:  an I/O handler
11721
 * @enc:  the charset encoding if known (deprecated)
11722
 *
11723
 * DEPRECATED: Use xmlNewParserCtxt and xmlCtxtReadIO.
11724
 *
11725
 * Create a parser context for using the XML parser with an existing
11726
 * I/O stream
11727
 *
11728
 * Returns the new parser context or NULL
11729
 */
11730
xmlParserCtxtPtr
11731
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11732
                      xmlInputReadCallback ioread,
11733
                      xmlInputCloseCallback ioclose,
11734
0
                      void *ioctx, xmlCharEncoding enc) {
11735
0
    xmlParserCtxtPtr ctxt;
11736
0
    xmlParserInputPtr input;
11737
0
    const char *encoding;
11738
11739
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
11740
0
    if (ctxt == NULL)
11741
0
  return(NULL);
11742
11743
0
    encoding = xmlGetCharEncodingName(enc);
11744
0
    input = xmlNewInputIO(ctxt, NULL, ioread, ioclose, ioctx, encoding, 0);
11745
0
    if (input == NULL) {
11746
0
  xmlFreeParserCtxt(ctxt);
11747
0
        return (NULL);
11748
0
    }
11749
0
    inputPush(ctxt, input);
11750
11751
0
    return(ctxt);
11752
0
}
11753
11754
#ifdef LIBXML_VALID_ENABLED
11755
/************************************************************************
11756
 *                  *
11757
 *    Front ends when parsing a DTD       *
11758
 *                  *
11759
 ************************************************************************/
11760
11761
/**
11762
 * xmlIOParseDTD:
11763
 * @sax:  the SAX handler block or NULL
11764
 * @input:  an Input Buffer
11765
 * @enc:  the charset encoding if known
11766
 *
11767
 * Load and parse a DTD
11768
 *
11769
 * Returns the resulting xmlDtdPtr or NULL in case of error.
11770
 * @input will be freed by the function in any case.
11771
 */
11772
11773
xmlDtdPtr
11774
xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
11775
0
        xmlCharEncoding enc) {
11776
0
    xmlDtdPtr ret = NULL;
11777
0
    xmlParserCtxtPtr ctxt;
11778
0
    xmlParserInputPtr pinput = NULL;
11779
11780
0
    if (input == NULL)
11781
0
  return(NULL);
11782
11783
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
11784
0
    if (ctxt == NULL) {
11785
0
        xmlFreeParserInputBuffer(input);
11786
0
  return(NULL);
11787
0
    }
11788
11789
    /*
11790
     * generate a parser input from the I/O handler
11791
     */
11792
11793
0
    pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
11794
0
    if (pinput == NULL) {
11795
0
        xmlFreeParserInputBuffer(input);
11796
0
  xmlFreeParserCtxt(ctxt);
11797
0
  return(NULL);
11798
0
    }
11799
11800
    /*
11801
     * plug some encoding conversion routines here.
11802
     */
11803
0
    if (xmlPushInput(ctxt, pinput) < 0) {
11804
0
  xmlFreeParserCtxt(ctxt);
11805
0
  return(NULL);
11806
0
    }
11807
0
    if (enc != XML_CHAR_ENCODING_NONE) {
11808
0
        xmlSwitchEncoding(ctxt, enc);
11809
0
    }
11810
11811
    /*
11812
     * let's parse that entity knowing it's an external subset.
11813
     */
11814
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11815
0
    if (ctxt->myDoc == NULL) {
11816
0
  xmlErrMemory(ctxt);
11817
0
  return(NULL);
11818
0
    }
11819
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
11820
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11821
0
                                 BAD_CAST "none", BAD_CAST "none");
11822
11823
0
    xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
11824
11825
0
    if (ctxt->myDoc != NULL) {
11826
0
  if (ctxt->wellFormed) {
11827
0
      ret = ctxt->myDoc->extSubset;
11828
0
      ctxt->myDoc->extSubset = NULL;
11829
0
      if (ret != NULL) {
11830
0
    xmlNodePtr tmp;
11831
11832
0
    ret->doc = NULL;
11833
0
    tmp = ret->children;
11834
0
    while (tmp != NULL) {
11835
0
        tmp->doc = NULL;
11836
0
        tmp = tmp->next;
11837
0
    }
11838
0
      }
11839
0
  } else {
11840
0
      ret = NULL;
11841
0
  }
11842
0
        xmlFreeDoc(ctxt->myDoc);
11843
0
        ctxt->myDoc = NULL;
11844
0
    }
11845
0
    xmlFreeParserCtxt(ctxt);
11846
11847
0
    return(ret);
11848
0
}
11849
11850
/**
11851
 * xmlSAXParseDTD:
11852
 * @sax:  the SAX handler block
11853
 * @ExternalID:  a NAME* containing the External ID of the DTD
11854
 * @SystemID:  a NAME* containing the URL to the DTD
11855
 *
11856
 * DEPRECATED: Don't use.
11857
 *
11858
 * Load and parse an external subset.
11859
 *
11860
 * Returns the resulting xmlDtdPtr or NULL in case of error.
11861
 */
11862
11863
xmlDtdPtr
11864
xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
11865
524
                          const xmlChar *SystemID) {
11866
524
    xmlDtdPtr ret = NULL;
11867
524
    xmlParserCtxtPtr ctxt;
11868
524
    xmlParserInputPtr input = NULL;
11869
524
    xmlChar* systemIdCanonic;
11870
11871
524
    if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
11872
11873
524
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
11874
524
    if (ctxt == NULL) {
11875
20
  return(NULL);
11876
20
    }
11877
11878
    /*
11879
     * Canonicalise the system ID
11880
     */
11881
504
    systemIdCanonic = xmlCanonicPath(SystemID);
11882
504
    if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
11883
3
  xmlFreeParserCtxt(ctxt);
11884
3
  return(NULL);
11885
3
    }
11886
11887
    /*
11888
     * Ask the Entity resolver to load the damn thing
11889
     */
11890
11891
501
    if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
11892
501
  input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
11893
501
                                   systemIdCanonic);
11894
501
    if (input == NULL) {
11895
366
  xmlFreeParserCtxt(ctxt);
11896
366
  if (systemIdCanonic != NULL)
11897
364
      xmlFree(systemIdCanonic);
11898
366
  return(NULL);
11899
366
    }
11900
11901
    /*
11902
     * plug some encoding conversion routines here.
11903
     */
11904
135
    if (xmlPushInput(ctxt, input) < 0) {
11905
0
  xmlFreeParserCtxt(ctxt);
11906
0
  if (systemIdCanonic != NULL)
11907
0
      xmlFree(systemIdCanonic);
11908
0
  return(NULL);
11909
0
    }
11910
11911
135
    xmlDetectEncoding(ctxt);
11912
11913
135
    if (input->filename == NULL)
11914
0
  input->filename = (char *) systemIdCanonic;
11915
135
    else
11916
135
  xmlFree(systemIdCanonic);
11917
11918
    /*
11919
     * let's parse that entity knowing it's an external subset.
11920
     */
11921
135
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11922
135
    if (ctxt->myDoc == NULL) {
11923
3
  xmlErrMemory(ctxt);
11924
3
  xmlFreeParserCtxt(ctxt);
11925
3
  return(NULL);
11926
3
    }
11927
132
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
11928
132
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11929
132
                                 ExternalID, SystemID);
11930
132
    if (ctxt->myDoc->extSubset == NULL) {
11931
3
        xmlFreeDoc(ctxt->myDoc);
11932
3
        xmlFreeParserCtxt(ctxt);
11933
3
        return(NULL);
11934
3
    }
11935
129
    xmlParseExternalSubset(ctxt, ExternalID, SystemID);
11936
11937
129
    if (ctxt->myDoc != NULL) {
11938
129
  if (ctxt->wellFormed) {
11939
19
      ret = ctxt->myDoc->extSubset;
11940
19
      ctxt->myDoc->extSubset = NULL;
11941
19
      if (ret != NULL) {
11942
19
    xmlNodePtr tmp;
11943
11944
19
    ret->doc = NULL;
11945
19
    tmp = ret->children;
11946
324
    while (tmp != NULL) {
11947
305
        tmp->doc = NULL;
11948
305
        tmp = tmp->next;
11949
305
    }
11950
19
      }
11951
110
  } else {
11952
110
      ret = NULL;
11953
110
  }
11954
129
        xmlFreeDoc(ctxt->myDoc);
11955
129
        ctxt->myDoc = NULL;
11956
129
    }
11957
129
    xmlFreeParserCtxt(ctxt);
11958
11959
129
    return(ret);
11960
132
}
11961
11962
11963
/**
11964
 * xmlParseDTD:
11965
 * @ExternalID:  a NAME* containing the External ID of the DTD
11966
 * @SystemID:  a NAME* containing the URL to the DTD
11967
 *
11968
 * Load and parse an external subset.
11969
 *
11970
 * Returns the resulting xmlDtdPtr or NULL in case of error.
11971
 */
11972
11973
xmlDtdPtr
11974
524
xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
11975
524
    return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
11976
524
}
11977
#endif /* LIBXML_VALID_ENABLED */
11978
11979
/************************************************************************
11980
 *                  *
11981
 *    Front ends when parsing an Entity     *
11982
 *                  *
11983
 ************************************************************************/
11984
11985
static xmlNodePtr
11986
xmlCtxtParseContent(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
11987
24.5k
                    int hasTextDecl, int buildTree) {
11988
24.5k
    xmlNodePtr root = NULL;
11989
24.5k
    xmlNodePtr list = NULL;
11990
24.5k
    xmlChar *rootName = BAD_CAST "#root";
11991
24.5k
    int result;
11992
11993
24.5k
    if (buildTree) {
11994
24.5k
        root = xmlNewDocNode(ctxt->myDoc, NULL, rootName, NULL);
11995
24.5k
        if (root == NULL) {
11996
59
            xmlErrMemory(ctxt);
11997
59
            goto error;
11998
59
        }
11999
24.5k
    }
12000
12001
24.4k
    if (xmlPushInput(ctxt, input) < 0)
12002
18
        goto error;
12003
12004
24.4k
    nameNsPush(ctxt, rootName, NULL, NULL, 0, 0);
12005
24.4k
    spacePush(ctxt, -1);
12006
12007
24.4k
    if (buildTree)
12008
24.4k
        nodePush(ctxt, root);
12009
12010
24.4k
    if (hasTextDecl) {
12011
7.55k
        xmlDetectEncoding(ctxt);
12012
12013
        /*
12014
         * Parse a possible text declaration first
12015
         */
12016
7.55k
        if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
12017
7.55k
            (IS_BLANK_CH(NXT(5)))) {
12018
674
            xmlParseTextDecl(ctxt);
12019
            /*
12020
             * An XML-1.0 document can't reference an entity not XML-1.0
12021
             */
12022
674
            if ((xmlStrEqual(ctxt->version, BAD_CAST "1.0")) &&
12023
674
                (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12024
18
                xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12025
18
                               "Version mismatch between document and "
12026
18
                               "entity\n");
12027
18
            }
12028
674
        }
12029
7.55k
    }
12030
12031
24.4k
    xmlParseContentInternal(ctxt);
12032
12033
24.4k
    if (ctxt->input->cur < ctxt->input->end)
12034
2.25k
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12035
12036
24.4k
    if ((ctxt->wellFormed) ||
12037
24.4k
        ((ctxt->recovery) && (ctxt->errNo != XML_ERR_NO_MEMORY))) {
12038
20.7k
        if (root != NULL) {
12039
20.7k
            xmlNodePtr cur;
12040
12041
            /*
12042
             * Return the newly created nodeset after unlinking it from
12043
             * its pseudo parent.
12044
             */
12045
20.7k
            cur = root->children;
12046
20.7k
            list = cur;
12047
247k
            while (cur != NULL) {
12048
226k
                cur->parent = NULL;
12049
226k
                cur = cur->next;
12050
226k
            }
12051
20.7k
            root->children = NULL;
12052
20.7k
            root->last = NULL;
12053
20.7k
        }
12054
20.7k
    }
12055
12056
    /*
12057
     * Read the rest of the stream in case of errors. We want
12058
     * to account for the whole entity size.
12059
     */
12060
25.2k
    do {
12061
25.2k
        ctxt->input->cur = ctxt->input->end;
12062
25.2k
        xmlParserShrink(ctxt);
12063
25.2k
        result = xmlParserGrow(ctxt);
12064
25.2k
    } while (result > 0);
12065
12066
24.4k
    if (buildTree)
12067
24.4k
        nodePop(ctxt);
12068
12069
24.4k
    namePop(ctxt);
12070
24.4k
    spacePop(ctxt);
12071
12072
    /* xmlPopInput would free the stream */
12073
24.4k
    inputPop(ctxt);
12074
12075
24.5k
error:
12076
24.5k
    xmlFreeNode(root);
12077
12078
24.5k
    return(list);
12079
24.4k
}
12080
12081
static void
12082
27.5k
xmlCtxtParseEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr ent) {
12083
27.5k
    xmlParserInputPtr input;
12084
27.5k
    xmlNodePtr list;
12085
27.5k
    unsigned long consumed;
12086
27.5k
    int isExternal;
12087
27.5k
    int buildTree;
12088
27.5k
    int oldMinNsIndex;
12089
27.5k
    int oldNodelen, oldNodemem;
12090
12091
27.5k
    isExternal = (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY);
12092
27.5k
    buildTree = (ctxt->node != NULL);
12093
12094
    /*
12095
     * Recursion check
12096
     */
12097
27.5k
    if (ent->flags & XML_ENT_EXPANDING) {
12098
70
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
12099
70
        xmlHaltParser(ctxt);
12100
70
        goto error;
12101
70
    }
12102
12103
    /*
12104
     * Load entity
12105
     */
12106
27.4k
    input = xmlNewEntityInputStream(ctxt, ent);
12107
27.4k
    if (input == NULL)
12108
2.95k
        goto error;
12109
12110
    /*
12111
     * When building a tree, we need to limit the scope of namespace
12112
     * declarations, so that entities don't reference xmlNs structs
12113
     * from the parent of a reference.
12114
     */
12115
24.5k
    oldMinNsIndex = ctxt->nsdb->minNsIndex;
12116
24.5k
    if (buildTree)
12117
24.5k
        ctxt->nsdb->minNsIndex = ctxt->nsNr;
12118
12119
24.5k
    oldNodelen = ctxt->nodelen;
12120
24.5k
    oldNodemem = ctxt->nodemem;
12121
24.5k
    ctxt->nodelen = 0;
12122
24.5k
    ctxt->nodemem = 0;
12123
12124
    /*
12125
     * Parse content
12126
     *
12127
     * This initiates a recursive call chain:
12128
     *
12129
     * - xmlCtxtParseContent
12130
     * - xmlParseContentInternal
12131
     * - xmlParseReference
12132
     * - xmlCtxtParseEntity
12133
     *
12134
     * The nesting depth is limited by the maximum number of inputs,
12135
     * see xmlPushInput.
12136
     *
12137
     * It's possible to make this non-recursive (minNsIndex must be
12138
     * stored in the input struct) at the expense of code readability.
12139
     */
12140
12141
24.5k
    ent->flags |= XML_ENT_EXPANDING;
12142
12143
24.5k
    list = xmlCtxtParseContent(ctxt, input, isExternal, buildTree);
12144
12145
24.5k
    ent->flags &= ~XML_ENT_EXPANDING;
12146
12147
24.5k
    ctxt->nsdb->minNsIndex = oldMinNsIndex;
12148
24.5k
    ctxt->nodelen = oldNodelen;
12149
24.5k
    ctxt->nodemem = oldNodemem;
12150
12151
    /*
12152
     * Entity size accounting
12153
     */
12154
24.5k
    consumed = input->consumed;
12155
24.5k
    xmlSaturatedAddSizeT(&consumed, input->end - input->base);
12156
12157
24.5k
    if ((ent->flags & XML_ENT_CHECKED) == 0)
12158
13.7k
        xmlSaturatedAdd(&ent->expandedSize, consumed);
12159
12160
24.5k
    if ((ent->flags & XML_ENT_PARSED) == 0) {
12161
13.7k
        if (isExternal)
12162
4.90k
            xmlSaturatedAdd(&ctxt->sizeentities, consumed);
12163
12164
13.7k
        ent->children = list;
12165
12166
240k
        while (list != NULL) {
12167
226k
            list->parent = (xmlNodePtr) ent;
12168
226k
            if (list->next == NULL)
12169
8.11k
                ent->last = list;
12170
226k
            list = list->next;
12171
226k
        }
12172
13.7k
    } else {
12173
10.7k
        xmlFreeNodeList(list);
12174
10.7k
    }
12175
12176
24.5k
    xmlFreeInputStream(input);
12177
12178
27.5k
error:
12179
27.5k
    ent->flags |= XML_ENT_PARSED | XML_ENT_CHECKED;
12180
27.5k
}
12181
12182
/**
12183
 * xmlParseCtxtExternalEntity:
12184
 * @ctx:  the existing parsing context
12185
 * @URL:  the URL for the entity to load
12186
 * @ID:  the System ID for the entity to load
12187
 * @lst:  the return value for the set of parsed nodes
12188
 *
12189
 * Parse an external general entity within an existing parsing context
12190
 * An external general parsed entity is well-formed if it matches the
12191
 * production labeled extParsedEnt.
12192
 *
12193
 * [78] extParsedEnt ::= TextDecl? content
12194
 *
12195
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12196
 *    the parser error code otherwise
12197
 */
12198
12199
int
12200
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctxt, const xmlChar *URL,
12201
0
                           const xmlChar *ID, xmlNodePtr *listOut) {
12202
0
    xmlParserInputPtr input;
12203
0
    xmlNodePtr list;
12204
12205
0
    if (listOut != NULL)
12206
0
        *listOut = NULL;
12207
12208
0
    if (ctxt == NULL)
12209
0
        return(XML_ERR_ARGUMENT);
12210
12211
0
    input = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
12212
0
    if (input == NULL)
12213
0
        return(ctxt->errNo);
12214
12215
0
    xmlCtxtInitializeLate(ctxt);
12216
12217
0
    list = xmlCtxtParseContent(ctxt, input, /* hasTextDecl */ 1, 1);
12218
0
    if (*listOut != NULL)
12219
0
        *listOut = list;
12220
0
    else
12221
0
        xmlFreeNodeList(list);
12222
12223
0
    xmlFreeInputStream(input);
12224
0
    return(ctxt->errNo);
12225
0
}
12226
12227
#ifdef LIBXML_SAX1_ENABLED
12228
/**
12229
 * xmlParseExternalEntity:
12230
 * @doc:  the document the chunk pertains to
12231
 * @sax:  the SAX handler block (possibly NULL)
12232
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12233
 * @depth:  Used for loop detection, use 0
12234
 * @URL:  the URL for the entity to load
12235
 * @ID:  the System ID for the entity to load
12236
 * @lst:  the return value for the set of parsed nodes
12237
 *
12238
 * Parse an external general entity
12239
 * An external general parsed entity is well-formed if it matches the
12240
 * production labeled extParsedEnt.
12241
 *
12242
 * [78] extParsedEnt ::= TextDecl? content
12243
 *
12244
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12245
 *    the parser error code otherwise
12246
 */
12247
12248
int
12249
xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
12250
0
    int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *list) {
12251
0
    xmlParserCtxtPtr ctxt;
12252
0
    int ret;
12253
12254
0
    if (list != NULL)
12255
0
        *list = NULL;
12256
12257
0
    if (doc == NULL)
12258
0
        return(XML_ERR_ARGUMENT);
12259
12260
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12261
0
    if (ctxt == NULL)
12262
0
        return(XML_ERR_NO_MEMORY);
12263
12264
0
    ctxt->depth = depth;
12265
0
    ctxt->myDoc = doc;
12266
0
    ret = xmlParseCtxtExternalEntity(ctxt, URL, ID, list);
12267
12268
0
    xmlFreeParserCtxt(ctxt);
12269
0
    return(ret);
12270
0
}
12271
12272
/**
12273
 * xmlParseBalancedChunkMemory:
12274
 * @doc:  the document the chunk pertains to (must not be NULL)
12275
 * @sax:  the SAX handler block (possibly NULL)
12276
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12277
 * @depth:  Used for loop detection, use 0
12278
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
12279
 * @lst:  the return value for the set of parsed nodes
12280
 *
12281
 * Parse a well-balanced chunk of an XML document
12282
 * called by the parser
12283
 * The allowed sequence for the Well Balanced Chunk is the one defined by
12284
 * the content production in the XML grammar:
12285
 *
12286
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12287
 *
12288
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12289
 *    the parser error code otherwise
12290
 */
12291
12292
int
12293
xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12294
0
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
12295
0
    return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
12296
0
                                                depth, string, lst, 0 );
12297
0
}
12298
#endif /* LIBXML_SAX1_ENABLED */
12299
12300
/**
12301
 * xmlParseInNodeContext:
12302
 * @node:  the context node
12303
 * @data:  the input string
12304
 * @datalen:  the input string length in bytes
12305
 * @options:  a combination of xmlParserOption
12306
 * @lst:  the return value for the set of parsed nodes
12307
 *
12308
 * Parse a well-balanced chunk of an XML document
12309
 * within the context (DTD, namespaces, etc ...) of the given node.
12310
 *
12311
 * The allowed sequence for the data is a Well Balanced Chunk defined by
12312
 * the content production in the XML grammar:
12313
 *
12314
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12315
 *
12316
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12317
 * error code otherwise
12318
 */
12319
xmlParserErrors
12320
xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
12321
0
                      int options, xmlNodePtr *lst) {
12322
0
    xmlParserCtxtPtr ctxt;
12323
0
    xmlDocPtr doc = NULL;
12324
0
    xmlNodePtr fake, cur;
12325
0
    int nsnr = 0;
12326
12327
0
    xmlParserErrors ret = XML_ERR_OK;
12328
12329
    /*
12330
     * check all input parameters, grab the document
12331
     */
12332
0
    if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
12333
0
        return(XML_ERR_ARGUMENT);
12334
0
    switch (node->type) {
12335
0
        case XML_ELEMENT_NODE:
12336
0
        case XML_ATTRIBUTE_NODE:
12337
0
        case XML_TEXT_NODE:
12338
0
        case XML_CDATA_SECTION_NODE:
12339
0
        case XML_ENTITY_REF_NODE:
12340
0
        case XML_PI_NODE:
12341
0
        case XML_COMMENT_NODE:
12342
0
        case XML_DOCUMENT_NODE:
12343
0
        case XML_HTML_DOCUMENT_NODE:
12344
0
      break;
12345
0
  default:
12346
0
      return(XML_ERR_INTERNAL_ERROR);
12347
12348
0
    }
12349
0
    while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
12350
0
           (node->type != XML_DOCUMENT_NODE) &&
12351
0
     (node->type != XML_HTML_DOCUMENT_NODE))
12352
0
  node = node->parent;
12353
0
    if (node == NULL)
12354
0
  return(XML_ERR_INTERNAL_ERROR);
12355
0
    if (node->type == XML_ELEMENT_NODE)
12356
0
  doc = node->doc;
12357
0
    else
12358
0
        doc = (xmlDocPtr) node;
12359
0
    if (doc == NULL)
12360
0
  return(XML_ERR_INTERNAL_ERROR);
12361
12362
    /*
12363
     * allocate a context and set-up everything not related to the
12364
     * node position in the tree
12365
     */
12366
0
    if (doc->type == XML_DOCUMENT_NODE)
12367
0
  ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
12368
0
#ifdef LIBXML_HTML_ENABLED
12369
0
    else if (doc->type == XML_HTML_DOCUMENT_NODE) {
12370
0
  ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
12371
        /*
12372
         * When parsing in context, it makes no sense to add implied
12373
         * elements like html/body/etc...
12374
         */
12375
0
        options |= HTML_PARSE_NOIMPLIED;
12376
0
    }
12377
0
#endif
12378
0
    else
12379
0
        return(XML_ERR_INTERNAL_ERROR);
12380
12381
0
    if (ctxt == NULL)
12382
0
        return(XML_ERR_NO_MEMORY);
12383
12384
    /*
12385
     * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
12386
     * We need a dictionary for xmlCtxtInitializeLate, so if there's no doc dict
12387
     * we must wait until the last moment to free the original one.
12388
     */
12389
0
    if (doc->dict != NULL) {
12390
0
        if (ctxt->dict != NULL)
12391
0
      xmlDictFree(ctxt->dict);
12392
0
  ctxt->dict = doc->dict;
12393
0
    } else {
12394
0
        options |= XML_PARSE_NODICT;
12395
0
        ctxt->dictNames = 0;
12396
0
    }
12397
12398
0
    if (doc->encoding != NULL)
12399
0
        xmlSwitchEncodingName(ctxt, (const char *) doc->encoding);
12400
12401
0
    xmlCtxtUseOptions(ctxt, options);
12402
0
    xmlCtxtInitializeLate(ctxt);
12403
0
    ctxt->myDoc = doc;
12404
    /* parsing in context, i.e. as within existing content */
12405
0
    ctxt->input_id = 2;
12406
12407
    /*
12408
     * TODO: Use xmlCtxtParseContent
12409
     */
12410
12411
0
    fake = xmlNewDocComment(node->doc, NULL);
12412
0
    if (fake == NULL) {
12413
0
        xmlFreeParserCtxt(ctxt);
12414
0
  return(XML_ERR_NO_MEMORY);
12415
0
    }
12416
0
    xmlAddChild(node, fake);
12417
12418
0
    if (node->type == XML_ELEMENT_NODE)
12419
0
  nodePush(ctxt, node);
12420
12421
0
    if ((ctxt->html == 0) && (node->type == XML_ELEMENT_NODE)) {
12422
  /*
12423
   * initialize the SAX2 namespaces stack
12424
   */
12425
0
  cur = node;
12426
0
  while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
12427
0
      xmlNsPtr ns = cur->nsDef;
12428
0
            xmlHashedString hprefix, huri;
12429
12430
0
      while (ns != NULL) {
12431
0
                hprefix = xmlDictLookupHashed(ctxt->dict, ns->prefix, -1);
12432
0
                huri = xmlDictLookupHashed(ctxt->dict, ns->href, -1);
12433
0
                if (xmlParserNsPush(ctxt, &hprefix, &huri, ns, 1) > 0)
12434
0
                    nsnr++;
12435
0
    ns = ns->next;
12436
0
      }
12437
0
      cur = cur->parent;
12438
0
  }
12439
0
    }
12440
12441
0
    if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
12442
  /*
12443
   * ID/IDREF registration will be done in xmlValidateElement below
12444
   */
12445
0
  ctxt->loadsubset |= XML_SKIP_IDS;
12446
0
    }
12447
12448
0
#ifdef LIBXML_HTML_ENABLED
12449
0
    if (doc->type == XML_HTML_DOCUMENT_NODE)
12450
0
        __htmlParseContent(ctxt);
12451
0
    else
12452
0
#endif
12453
0
  xmlParseContentInternal(ctxt);
12454
12455
0
    if (ctxt->input->cur < ctxt->input->end)
12456
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12457
12458
0
    xmlParserNsPop(ctxt, nsnr);
12459
12460
0
    if ((ctxt->wellFormed) ||
12461
0
        ((ctxt->recovery) && (ctxt->errNo != XML_ERR_NO_MEMORY))) {
12462
0
        ret = XML_ERR_OK;
12463
0
    } else {
12464
0
  ret = (xmlParserErrors) ctxt->errNo;
12465
0
    }
12466
12467
    /*
12468
     * Return the newly created nodeset after unlinking it from
12469
     * the pseudo sibling.
12470
     */
12471
12472
0
    cur = fake->next;
12473
0
    fake->next = NULL;
12474
0
    node->last = fake;
12475
12476
0
    if (cur != NULL) {
12477
0
  cur->prev = NULL;
12478
0
    }
12479
12480
0
    *lst = cur;
12481
12482
0
    while (cur != NULL) {
12483
0
  cur->parent = NULL;
12484
0
  cur = cur->next;
12485
0
    }
12486
12487
0
    xmlUnlinkNode(fake);
12488
0
    xmlFreeNode(fake);
12489
12490
12491
0
    if (ret != XML_ERR_OK) {
12492
0
        xmlFreeNodeList(*lst);
12493
0
  *lst = NULL;
12494
0
    }
12495
12496
0
    if (doc->dict != NULL)
12497
0
        ctxt->dict = NULL;
12498
0
    xmlFreeParserCtxt(ctxt);
12499
12500
0
    return(ret);
12501
0
}
12502
12503
#ifdef LIBXML_SAX1_ENABLED
12504
/**
12505
 * xmlParseBalancedChunkMemoryRecover:
12506
 * @doc:  the document the chunk pertains to (must not be NULL)
12507
 * @sax:  the SAX handler block (possibly NULL)
12508
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12509
 * @depth:  Used for loop detection, use 0
12510
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
12511
 * @list:  the return value for the set of parsed nodes
12512
 * @recover: return nodes even if the data is broken (use 0)
12513
 *
12514
 * Parse a well-balanced chunk of an XML document
12515
 *
12516
 * The allowed sequence for the Well Balanced Chunk is the one defined by
12517
 * the content production in the XML grammar:
12518
 *
12519
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12520
 *
12521
 * Returns 0 if the chunk is well balanced, or thehe parser error code
12522
 * otherwise.
12523
 *
12524
 * In case recover is set to 1, the nodelist will not be empty even if
12525
 * the parsed chunk is not well balanced, assuming the parsing succeeded to
12526
 * some extent.
12527
 */
12528
int
12529
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12530
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *listOut,
12531
0
     int recover) {
12532
0
    xmlParserCtxtPtr ctxt;
12533
0
    xmlParserInputPtr input;
12534
0
    xmlNodePtr list;
12535
0
    int ret;
12536
12537
0
    if (listOut != NULL)
12538
0
        *listOut = NULL;
12539
12540
0
    if (string == NULL)
12541
0
        return(XML_ERR_ARGUMENT);
12542
12543
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12544
0
    if (ctxt == NULL)
12545
0
        return(XML_ERR_NO_MEMORY);
12546
12547
0
    xmlCtxtInitializeLate(ctxt);
12548
12549
0
    ctxt->depth = depth;
12550
0
    ctxt->myDoc = doc;
12551
0
    if (recover) {
12552
0
        ctxt->options |= XML_PARSE_RECOVER;
12553
0
        ctxt->recovery = 1;
12554
0
    }
12555
12556
0
    input = xmlNewStringInputStream(ctxt, string);
12557
0
    if (input == NULL)
12558
0
        return(ctxt->errNo);
12559
12560
0
    list = xmlCtxtParseContent(ctxt, input, /* hasTextDecl */ 0, 1);
12561
0
    if (listOut != NULL)
12562
0
        *listOut = list;
12563
0
    else
12564
0
        xmlFreeNodeList(list);
12565
12566
0
    ret = ctxt->errNo;
12567
12568
0
    xmlFreeInputStream(input);
12569
0
    xmlFreeParserCtxt(ctxt);
12570
0
    return(ret);
12571
0
}
12572
12573
/**
12574
 * xmlSAXParseEntity:
12575
 * @sax:  the SAX handler block
12576
 * @filename:  the filename
12577
 *
12578
 * DEPRECATED: Don't use.
12579
 *
12580
 * parse an XML external entity out of context and build a tree.
12581
 * It use the given SAX function block to handle the parsing callback.
12582
 * If sax is NULL, fallback to the default DOM tree building routines.
12583
 *
12584
 * [78] extParsedEnt ::= TextDecl? content
12585
 *
12586
 * This correspond to a "Well Balanced" chunk
12587
 *
12588
 * Returns the resulting document tree
12589
 */
12590
12591
xmlDocPtr
12592
0
xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
12593
0
    xmlDocPtr ret;
12594
0
    xmlParserCtxtPtr ctxt;
12595
12596
0
    ctxt = xmlCreateFileParserCtxt(filename);
12597
0
    if (ctxt == NULL) {
12598
0
  return(NULL);
12599
0
    }
12600
0
    if (sax != NULL) {
12601
0
        if (sax->initialized == XML_SAX2_MAGIC) {
12602
0
            *ctxt->sax = *sax;
12603
0
        } else {
12604
0
            memset(ctxt->sax, 0, sizeof(*ctxt->sax));
12605
0
            memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12606
0
        }
12607
0
        ctxt->userData = NULL;
12608
0
    }
12609
12610
0
    xmlParseExtParsedEnt(ctxt);
12611
12612
0
    if (ctxt->wellFormed) {
12613
0
  ret = ctxt->myDoc;
12614
0
    } else {
12615
0
        ret = NULL;
12616
0
        xmlFreeDoc(ctxt->myDoc);
12617
0
    }
12618
12619
0
    xmlFreeParserCtxt(ctxt);
12620
12621
0
    return(ret);
12622
0
}
12623
12624
/**
12625
 * xmlParseEntity:
12626
 * @filename:  the filename
12627
 *
12628
 * parse an XML external entity out of context and build a tree.
12629
 *
12630
 * [78] extParsedEnt ::= TextDecl? content
12631
 *
12632
 * This correspond to a "Well Balanced" chunk
12633
 *
12634
 * Returns the resulting document tree
12635
 */
12636
12637
xmlDocPtr
12638
0
xmlParseEntity(const char *filename) {
12639
0
    return(xmlSAXParseEntity(NULL, filename));
12640
0
}
12641
#endif /* LIBXML_SAX1_ENABLED */
12642
12643
/**
12644
 * xmlCreateEntityParserCtxt:
12645
 * @URL:  the entity URL
12646
 * @ID:  the entity PUBLIC ID
12647
 * @base:  a possible base for the target URI
12648
 *
12649
 * DEPRECATED: Use xmlNewInputURL.
12650
 *
12651
 * Create a parser context for an external entity
12652
 * Automatic support for ZLIB/Compress compressed document is provided
12653
 * by default if found at compile-time.
12654
 *
12655
 * Returns the new parser context or NULL
12656
 */
12657
xmlParserCtxtPtr
12658
xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
12659
0
                    const xmlChar *base) {
12660
0
    xmlParserCtxtPtr ctxt;
12661
0
    xmlParserInputPtr input;
12662
0
    xmlChar *uri = NULL;
12663
12664
0
    ctxt = xmlNewParserCtxt();
12665
0
    if (ctxt == NULL)
12666
0
  return(NULL);
12667
12668
0
    if (base != NULL) {
12669
0
        if (xmlBuildURISafe(URL, base, &uri) < 0)
12670
0
            goto error;
12671
0
        if (uri != NULL)
12672
0
            URL = uri;
12673
0
    }
12674
12675
0
    input = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
12676
0
    if (input == NULL)
12677
0
        goto error;
12678
12679
0
    if (inputPush(ctxt, input) < 0)
12680
0
        goto error;
12681
12682
0
    xmlFree(uri);
12683
0
    return(ctxt);
12684
12685
0
error:
12686
0
    xmlFree(uri);
12687
0
    xmlFreeParserCtxt(ctxt);
12688
0
    return(NULL);
12689
0
}
12690
12691
/************************************************************************
12692
 *                  *
12693
 *    Front ends when parsing from a file     *
12694
 *                  *
12695
 ************************************************************************/
12696
12697
/**
12698
 * xmlCreateURLParserCtxt:
12699
 * @filename:  the filename or URL
12700
 * @options:  a combination of xmlParserOption
12701
 *
12702
 * DEPRECATED: Use xmlNewParserCtxt and xmlCtxtReadFile.
12703
 *
12704
 * Create a parser context for a file or URL content.
12705
 * Automatic support for ZLIB/Compress compressed document is provided
12706
 * by default if found at compile-time and for file accesses
12707
 *
12708
 * Returns the new parser context or NULL
12709
 */
12710
xmlParserCtxtPtr
12711
xmlCreateURLParserCtxt(const char *filename, int options)
12712
0
{
12713
0
    xmlParserCtxtPtr ctxt;
12714
0
    xmlParserInputPtr input;
12715
12716
0
    ctxt = xmlNewParserCtxt();
12717
0
    if (ctxt == NULL)
12718
0
  return(NULL);
12719
12720
0
    xmlCtxtUseOptions(ctxt, options);
12721
0
    ctxt->linenumbers = 1;
12722
12723
0
    input = xmlLoadExternalEntity(filename, NULL, ctxt);
12724
0
    if (input == NULL) {
12725
0
  xmlFreeParserCtxt(ctxt);
12726
0
  return(NULL);
12727
0
    }
12728
0
    inputPush(ctxt, input);
12729
12730
0
    return(ctxt);
12731
0
}
12732
12733
/**
12734
 * xmlCreateFileParserCtxt:
12735
 * @filename:  the filename
12736
 *
12737
 * DEPRECATED: Use xmlNewParserCtxt and xmlCtxtReadFile.
12738
 *
12739
 * Create a parser context for a file content.
12740
 * Automatic support for ZLIB/Compress compressed document is provided
12741
 * by default if found at compile-time.
12742
 *
12743
 * Returns the new parser context or NULL
12744
 */
12745
xmlParserCtxtPtr
12746
xmlCreateFileParserCtxt(const char *filename)
12747
0
{
12748
0
    return(xmlCreateURLParserCtxt(filename, 0));
12749
0
}
12750
12751
#ifdef LIBXML_SAX1_ENABLED
12752
/**
12753
 * xmlSAXParseFileWithData:
12754
 * @sax:  the SAX handler block
12755
 * @filename:  the filename
12756
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
12757
 *             documents
12758
 * @data:  the userdata
12759
 *
12760
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
12761
 *
12762
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12763
 * compressed document is provided by default if found at compile-time.
12764
 * It use the given SAX function block to handle the parsing callback.
12765
 * If sax is NULL, fallback to the default DOM tree building routines.
12766
 *
12767
 * User data (void *) is stored within the parser context in the
12768
 * context's _private member, so it is available nearly everywhere in libxml
12769
 *
12770
 * Returns the resulting document tree
12771
 */
12772
12773
xmlDocPtr
12774
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
12775
0
                        int recovery, void *data) {
12776
0
    xmlDocPtr ret;
12777
0
    xmlParserCtxtPtr ctxt;
12778
0
    xmlParserInputPtr input;
12779
12780
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12781
0
    if (ctxt == NULL)
12782
0
  return(NULL);
12783
12784
0
    if (data != NULL)
12785
0
  ctxt->_private = data;
12786
12787
0
    if (recovery) {
12788
0
        ctxt->options |= XML_PARSE_RECOVER;
12789
0
        ctxt->recovery = 1;
12790
0
    }
12791
12792
0
    input = xmlNewInputURL(ctxt, filename, NULL, NULL, 0);
12793
12794
0
    ret = xmlCtxtParseDocument(ctxt, input);
12795
12796
0
    xmlFreeParserCtxt(ctxt);
12797
0
    return(ret);
12798
0
}
12799
12800
/**
12801
 * xmlSAXParseFile:
12802
 * @sax:  the SAX handler block
12803
 * @filename:  the filename
12804
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
12805
 *             documents
12806
 *
12807
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
12808
 *
12809
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12810
 * compressed document is provided by default if found at compile-time.
12811
 * It use the given SAX function block to handle the parsing callback.
12812
 * If sax is NULL, fallback to the default DOM tree building routines.
12813
 *
12814
 * Returns the resulting document tree
12815
 */
12816
12817
xmlDocPtr
12818
xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
12819
0
                          int recovery) {
12820
0
    return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
12821
0
}
12822
12823
/**
12824
 * xmlRecoverDoc:
12825
 * @cur:  a pointer to an array of xmlChar
12826
 *
12827
 * DEPRECATED: Use xmlReadDoc with XML_PARSE_RECOVER.
12828
 *
12829
 * parse an XML in-memory document and build a tree.
12830
 * In the case the document is not Well Formed, a attempt to build a
12831
 * tree is tried anyway
12832
 *
12833
 * Returns the resulting document tree or NULL in case of failure
12834
 */
12835
12836
xmlDocPtr
12837
0
xmlRecoverDoc(const xmlChar *cur) {
12838
0
    return(xmlSAXParseDoc(NULL, cur, 1));
12839
0
}
12840
12841
/**
12842
 * xmlParseFile:
12843
 * @filename:  the filename
12844
 *
12845
 * DEPRECATED: Use xmlReadFile.
12846
 *
12847
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12848
 * compressed document is provided by default if found at compile-time.
12849
 *
12850
 * Returns the resulting document tree if the file was wellformed,
12851
 * NULL otherwise.
12852
 */
12853
12854
xmlDocPtr
12855
0
xmlParseFile(const char *filename) {
12856
0
    return(xmlSAXParseFile(NULL, filename, 0));
12857
0
}
12858
12859
/**
12860
 * xmlRecoverFile:
12861
 * @filename:  the filename
12862
 *
12863
 * DEPRECATED: Use xmlReadFile with XML_PARSE_RECOVER.
12864
 *
12865
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12866
 * compressed document is provided by default if found at compile-time.
12867
 * In the case the document is not Well Formed, it attempts to build
12868
 * a tree anyway
12869
 *
12870
 * Returns the resulting document tree or NULL in case of failure
12871
 */
12872
12873
xmlDocPtr
12874
0
xmlRecoverFile(const char *filename) {
12875
0
    return(xmlSAXParseFile(NULL, filename, 1));
12876
0
}
12877
12878
12879
/**
12880
 * xmlSetupParserForBuffer:
12881
 * @ctxt:  an XML parser context
12882
 * @buffer:  a xmlChar * buffer
12883
 * @filename:  a file name
12884
 *
12885
 * DEPRECATED: Don't use.
12886
 *
12887
 * Setup the parser context to parse a new buffer; Clears any prior
12888
 * contents from the parser context. The buffer parameter must not be
12889
 * NULL, but the filename parameter can be
12890
 */
12891
void
12892
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
12893
                             const char* filename)
12894
0
{
12895
0
    xmlParserInputPtr input;
12896
12897
0
    if ((ctxt == NULL) || (buffer == NULL))
12898
0
        return;
12899
12900
0
    xmlClearParserCtxt(ctxt);
12901
12902
0
    input = xmlNewInputString(ctxt, filename, (const char *) buffer, NULL, 0);
12903
0
    if (input == NULL)
12904
0
        return;
12905
0
    inputPush(ctxt, input);
12906
0
}
12907
12908
/**
12909
 * xmlSAXUserParseFile:
12910
 * @sax:  a SAX handler
12911
 * @user_data:  The user data returned on SAX callbacks
12912
 * @filename:  a file name
12913
 *
12914
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
12915
 *
12916
 * parse an XML file and call the given SAX handler routines.
12917
 * Automatic support for ZLIB/Compress compressed document is provided
12918
 *
12919
 * Returns 0 in case of success or a error number otherwise
12920
 */
12921
int
12922
xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
12923
0
                    const char *filename) {
12924
0
    int ret = 0;
12925
0
    xmlParserCtxtPtr ctxt;
12926
12927
0
    ctxt = xmlCreateFileParserCtxt(filename);
12928
0
    if (ctxt == NULL) return -1;
12929
0
    if (sax != NULL) {
12930
0
        if (sax->initialized == XML_SAX2_MAGIC) {
12931
0
            *ctxt->sax = *sax;
12932
0
        } else {
12933
0
            memset(ctxt->sax, 0, sizeof(*ctxt->sax));
12934
0
            memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12935
0
        }
12936
0
  ctxt->userData = user_data;
12937
0
    }
12938
12939
0
    xmlParseDocument(ctxt);
12940
12941
0
    if (ctxt->wellFormed)
12942
0
  ret = 0;
12943
0
    else {
12944
0
        if (ctxt->errNo != 0)
12945
0
      ret = ctxt->errNo;
12946
0
  else
12947
0
      ret = -1;
12948
0
    }
12949
0
    if (ctxt->myDoc != NULL) {
12950
0
        xmlFreeDoc(ctxt->myDoc);
12951
0
  ctxt->myDoc = NULL;
12952
0
    }
12953
0
    xmlFreeParserCtxt(ctxt);
12954
12955
0
    return ret;
12956
0
}
12957
#endif /* LIBXML_SAX1_ENABLED */
12958
12959
/************************************************************************
12960
 *                  *
12961
 *    Front ends when parsing from memory     *
12962
 *                  *
12963
 ************************************************************************/
12964
12965
/**
12966
 * xmlCreateMemoryParserCtxt:
12967
 * @buffer:  a pointer to a char array
12968
 * @size:  the size of the array
12969
 *
12970
 * Create a parser context for an XML in-memory document. The input buffer
12971
 * must not contain a terminating null byte.
12972
 *
12973
 * Returns the new parser context or NULL
12974
 */
12975
xmlParserCtxtPtr
12976
0
xmlCreateMemoryParserCtxt(const char *buffer, int size) {
12977
0
    xmlParserCtxtPtr ctxt;
12978
0
    xmlParserInputPtr input;
12979
12980
0
    if (size < 0)
12981
0
  return(NULL);
12982
12983
0
    ctxt = xmlNewParserCtxt();
12984
0
    if (ctxt == NULL)
12985
0
  return(NULL);
12986
12987
0
    input = xmlNewInputMemory(ctxt, NULL, buffer, size, NULL, 0);
12988
0
    if (input == NULL) {
12989
0
  xmlFreeParserCtxt(ctxt);
12990
0
  return(NULL);
12991
0
    }
12992
0
    inputPush(ctxt, input);
12993
12994
0
    return(ctxt);
12995
0
}
12996
12997
#ifdef LIBXML_SAX1_ENABLED
12998
/**
12999
 * xmlSAXParseMemoryWithData:
13000
 * @sax:  the SAX handler block
13001
 * @buffer:  an pointer to a char array
13002
 * @size:  the size of the array
13003
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13004
 *             documents
13005
 * @data:  the userdata
13006
 *
13007
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
13008
 *
13009
 * parse an XML in-memory block and use the given SAX function block
13010
 * to handle the parsing callback. If sax is NULL, fallback to the default
13011
 * DOM tree building routines.
13012
 *
13013
 * User data (void *) is stored within the parser context in the
13014
 * context's _private member, so it is available nearly everywhere in libxml
13015
 *
13016
 * Returns the resulting document tree
13017
 */
13018
13019
xmlDocPtr
13020
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
13021
0
                          int size, int recovery, void *data) {
13022
0
    xmlDocPtr ret;
13023
0
    xmlParserCtxtPtr ctxt;
13024
0
    xmlParserInputPtr input;
13025
13026
0
    if (size < 0)
13027
0
        return(NULL);
13028
13029
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
13030
0
    if (ctxt == NULL)
13031
0
        return(NULL);
13032
13033
0
    if (data != NULL)
13034
0
  ctxt->_private=data;
13035
13036
0
    if (recovery) {
13037
0
        ctxt->options |= XML_PARSE_RECOVER;
13038
0
        ctxt->recovery = 1;
13039
0
    }
13040
13041
0
    input = xmlNewInputMemory(ctxt, NULL, buffer, size, NULL,
13042
0
                              XML_INPUT_BUF_STATIC);
13043
13044
0
    ret = xmlCtxtParseDocument(ctxt, input);
13045
13046
0
    xmlFreeParserCtxt(ctxt);
13047
0
    return(ret);
13048
0
}
13049
13050
/**
13051
 * xmlSAXParseMemory:
13052
 * @sax:  the SAX handler block
13053
 * @buffer:  an pointer to a char array
13054
 * @size:  the size of the array
13055
 * @recovery:  work in recovery mode, i.e. tries to read not Well Formed
13056
 *             documents
13057
 *
13058
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
13059
 *
13060
 * parse an XML in-memory block and use the given SAX function block
13061
 * to handle the parsing callback. If sax is NULL, fallback to the default
13062
 * DOM tree building routines.
13063
 *
13064
 * Returns the resulting document tree
13065
 */
13066
xmlDocPtr
13067
xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
13068
0
            int size, int recovery) {
13069
0
    return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
13070
0
}
13071
13072
/**
13073
 * xmlParseMemory:
13074
 * @buffer:  an pointer to a char array
13075
 * @size:  the size of the array
13076
 *
13077
 * DEPRECATED: Use xmlReadMemory.
13078
 *
13079
 * parse an XML in-memory block and build a tree.
13080
 *
13081
 * Returns the resulting document tree
13082
 */
13083
13084
0
xmlDocPtr xmlParseMemory(const char *buffer, int size) {
13085
0
   return(xmlSAXParseMemory(NULL, buffer, size, 0));
13086
0
}
13087
13088
/**
13089
 * xmlRecoverMemory:
13090
 * @buffer:  an pointer to a char array
13091
 * @size:  the size of the array
13092
 *
13093
 * DEPRECATED: Use xmlReadMemory with XML_PARSE_RECOVER.
13094
 *
13095
 * parse an XML in-memory block and build a tree.
13096
 * In the case the document is not Well Formed, an attempt to
13097
 * build a tree is tried anyway
13098
 *
13099
 * Returns the resulting document tree or NULL in case of error
13100
 */
13101
13102
0
xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
13103
0
   return(xmlSAXParseMemory(NULL, buffer, size, 1));
13104
0
}
13105
13106
/**
13107
 * xmlSAXUserParseMemory:
13108
 * @sax:  a SAX handler
13109
 * @user_data:  The user data returned on SAX callbacks
13110
 * @buffer:  an in-memory XML document input
13111
 * @size:  the length of the XML document in bytes
13112
 *
13113
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
13114
 *
13115
 * parse an XML in-memory buffer and call the given SAX handler routines.
13116
 *
13117
 * Returns 0 in case of success or a error number otherwise
13118
 */
13119
int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
13120
0
        const char *buffer, int size) {
13121
0
    int ret = 0;
13122
0
    xmlParserCtxtPtr ctxt;
13123
13124
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13125
0
    if (ctxt == NULL) return -1;
13126
0
    if (sax != NULL) {
13127
0
        if (sax->initialized == XML_SAX2_MAGIC) {
13128
0
            *ctxt->sax = *sax;
13129
0
        } else {
13130
0
            memset(ctxt->sax, 0, sizeof(*ctxt->sax));
13131
0
            memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
13132
0
        }
13133
0
  ctxt->userData = user_data;
13134
0
    }
13135
13136
0
    xmlParseDocument(ctxt);
13137
13138
0
    if (ctxt->wellFormed)
13139
0
  ret = 0;
13140
0
    else {
13141
0
        if (ctxt->errNo != 0)
13142
0
      ret = ctxt->errNo;
13143
0
  else
13144
0
      ret = -1;
13145
0
    }
13146
0
    if (ctxt->myDoc != NULL) {
13147
0
        xmlFreeDoc(ctxt->myDoc);
13148
0
  ctxt->myDoc = NULL;
13149
0
    }
13150
0
    xmlFreeParserCtxt(ctxt);
13151
13152
0
    return ret;
13153
0
}
13154
#endif /* LIBXML_SAX1_ENABLED */
13155
13156
/**
13157
 * xmlCreateDocParserCtxt:
13158
 * @str:  a pointer to an array of xmlChar
13159
 *
13160
 * Creates a parser context for an XML in-memory document.
13161
 *
13162
 * Returns the new parser context or NULL
13163
 */
13164
xmlParserCtxtPtr
13165
0
xmlCreateDocParserCtxt(const xmlChar *str) {
13166
0
    xmlParserCtxtPtr ctxt;
13167
0
    xmlParserInputPtr input;
13168
13169
0
    ctxt = xmlNewParserCtxt();
13170
0
    if (ctxt == NULL)
13171
0
  return(NULL);
13172
13173
0
    input = xmlNewInputString(ctxt, NULL, (const char *) str, NULL, 0);
13174
0
    if (input == NULL) {
13175
0
  xmlFreeParserCtxt(ctxt);
13176
0
  return(NULL);
13177
0
    }
13178
0
    inputPush(ctxt, input);
13179
13180
0
    return(ctxt);
13181
0
}
13182
13183
#ifdef LIBXML_SAX1_ENABLED
13184
/**
13185
 * xmlSAXParseDoc:
13186
 * @sax:  the SAX handler block
13187
 * @cur:  a pointer to an array of xmlChar
13188
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13189
 *             documents
13190
 *
13191
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadDoc.
13192
 *
13193
 * parse an XML in-memory document and build a tree.
13194
 * It use the given SAX function block to handle the parsing callback.
13195
 * If sax is NULL, fallback to the default DOM tree building routines.
13196
 *
13197
 * Returns the resulting document tree
13198
 */
13199
13200
xmlDocPtr
13201
0
xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
13202
0
    xmlDocPtr ret;
13203
0
    xmlParserCtxtPtr ctxt;
13204
0
    xmlSAXHandlerPtr oldsax = NULL;
13205
13206
0
    if (cur == NULL) return(NULL);
13207
13208
13209
0
    ctxt = xmlCreateDocParserCtxt(cur);
13210
0
    if (ctxt == NULL) return(NULL);
13211
0
    if (sax != NULL) {
13212
0
        oldsax = ctxt->sax;
13213
0
        ctxt->sax = sax;
13214
0
        ctxt->userData = NULL;
13215
0
    }
13216
13217
0
    xmlParseDocument(ctxt);
13218
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13219
0
    else {
13220
0
       ret = NULL;
13221
0
       xmlFreeDoc(ctxt->myDoc);
13222
0
       ctxt->myDoc = NULL;
13223
0
    }
13224
0
    if (sax != NULL)
13225
0
  ctxt->sax = oldsax;
13226
0
    xmlFreeParserCtxt(ctxt);
13227
13228
0
    return(ret);
13229
0
}
13230
13231
/**
13232
 * xmlParseDoc:
13233
 * @cur:  a pointer to an array of xmlChar
13234
 *
13235
 * DEPRECATED: Use xmlReadDoc.
13236
 *
13237
 * parse an XML in-memory document and build a tree.
13238
 *
13239
 * Returns the resulting document tree
13240
 */
13241
13242
xmlDocPtr
13243
0
xmlParseDoc(const xmlChar *cur) {
13244
0
    return(xmlSAXParseDoc(NULL, cur, 0));
13245
0
}
13246
#endif /* LIBXML_SAX1_ENABLED */
13247
13248
/************************************************************************
13249
 *                  *
13250
 *  New set (2.6.0) of simpler and more flexible APIs   *
13251
 *                  *
13252
 ************************************************************************/
13253
13254
/**
13255
 * DICT_FREE:
13256
 * @str:  a string
13257
 *
13258
 * Free a string if it is not owned by the "dict" dictionary in the
13259
 * current scope
13260
 */
13261
#define DICT_FREE(str)            \
13262
367k
  if ((str) && ((!dict) ||       \
13263
0
      (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
13264
367k
      xmlFree((char *)(str));
13265
13266
/**
13267
 * xmlCtxtReset:
13268
 * @ctxt: an XML parser context
13269
 *
13270
 * Reset a parser context
13271
 */
13272
void
13273
xmlCtxtReset(xmlParserCtxtPtr ctxt)
13274
91.8k
{
13275
91.8k
    xmlParserInputPtr input;
13276
91.8k
    xmlDictPtr dict;
13277
13278
91.8k
    if (ctxt == NULL)
13279
0
        return;
13280
13281
91.8k
    dict = ctxt->dict;
13282
13283
91.8k
    while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
13284
0
        xmlFreeInputStream(input);
13285
0
    }
13286
91.8k
    ctxt->inputNr = 0;
13287
91.8k
    ctxt->input = NULL;
13288
13289
91.8k
    ctxt->spaceNr = 0;
13290
91.8k
    if (ctxt->spaceTab != NULL) {
13291
91.8k
  ctxt->spaceTab[0] = -1;
13292
91.8k
  ctxt->space = &ctxt->spaceTab[0];
13293
91.8k
    } else {
13294
0
        ctxt->space = NULL;
13295
0
    }
13296
13297
13298
91.8k
    ctxt->nodeNr = 0;
13299
91.8k
    ctxt->node = NULL;
13300
13301
91.8k
    ctxt->nameNr = 0;
13302
91.8k
    ctxt->name = NULL;
13303
13304
91.8k
    ctxt->nsNr = 0;
13305
91.8k
    xmlParserNsReset(ctxt->nsdb);
13306
13307
91.8k
    DICT_FREE(ctxt->version);
13308
91.8k
    ctxt->version = NULL;
13309
91.8k
    DICT_FREE(ctxt->encoding);
13310
91.8k
    ctxt->encoding = NULL;
13311
91.8k
    DICT_FREE(ctxt->extSubURI);
13312
91.8k
    ctxt->extSubURI = NULL;
13313
91.8k
    DICT_FREE(ctxt->extSubSystem);
13314
91.8k
    ctxt->extSubSystem = NULL;
13315
91.8k
    if (ctxt->myDoc != NULL)
13316
0
        xmlFreeDoc(ctxt->myDoc);
13317
91.8k
    ctxt->myDoc = NULL;
13318
13319
91.8k
    ctxt->standalone = -1;
13320
91.8k
    ctxt->hasExternalSubset = 0;
13321
91.8k
    ctxt->hasPErefs = 0;
13322
91.8k
    ctxt->html = 0;
13323
91.8k
    ctxt->instate = XML_PARSER_START;
13324
13325
91.8k
    ctxt->wellFormed = 1;
13326
91.8k
    ctxt->nsWellFormed = 1;
13327
91.8k
    ctxt->disableSAX = 0;
13328
91.8k
    ctxt->valid = 1;
13329
#if 0
13330
    ctxt->vctxt.userData = ctxt;
13331
    ctxt->vctxt.error = xmlParserValidityError;
13332
    ctxt->vctxt.warning = xmlParserValidityWarning;
13333
#endif
13334
91.8k
    ctxt->record_info = 0;
13335
91.8k
    ctxt->checkIndex = 0;
13336
91.8k
    ctxt->endCheckState = 0;
13337
91.8k
    ctxt->inSubset = 0;
13338
91.8k
    ctxt->errNo = XML_ERR_OK;
13339
91.8k
    ctxt->depth = 0;
13340
91.8k
    ctxt->catalogs = NULL;
13341
91.8k
    ctxt->sizeentities = 0;
13342
91.8k
    ctxt->sizeentcopy = 0;
13343
91.8k
    xmlInitNodeInfoSeq(&ctxt->node_seq);
13344
13345
91.8k
    if (ctxt->attsDefault != NULL) {
13346
0
        xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
13347
0
        ctxt->attsDefault = NULL;
13348
0
    }
13349
91.8k
    if (ctxt->attsSpecial != NULL) {
13350
0
        xmlHashFree(ctxt->attsSpecial, NULL);
13351
0
        ctxt->attsSpecial = NULL;
13352
0
    }
13353
13354
91.8k
#ifdef LIBXML_CATALOG_ENABLED
13355
91.8k
    if (ctxt->catalogs != NULL)
13356
0
  xmlCatalogFreeLocal(ctxt->catalogs);
13357
91.8k
#endif
13358
91.8k
    ctxt->nbErrors = 0;
13359
91.8k
    ctxt->nbWarnings = 0;
13360
91.8k
    if (ctxt->lastError.code != XML_ERR_OK)
13361
0
        xmlResetError(&ctxt->lastError);
13362
91.8k
}
13363
13364
/**
13365
 * xmlCtxtResetPush:
13366
 * @ctxt: an XML parser context
13367
 * @chunk:  a pointer to an array of chars
13368
 * @size:  number of chars in the array
13369
 * @filename:  an optional file name or URI
13370
 * @encoding:  the document encoding, or NULL
13371
 *
13372
 * Reset a push parser context
13373
 *
13374
 * Returns 0 in case of success and 1 in case of error
13375
 */
13376
int
13377
xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
13378
                 int size, const char *filename, const char *encoding)
13379
0
{
13380
0
    xmlParserInputPtr input;
13381
13382
0
    if (ctxt == NULL)
13383
0
        return(1);
13384
13385
0
    xmlCtxtReset(ctxt);
13386
13387
0
    input = xmlNewInputPush(ctxt, filename, chunk, size, encoding);
13388
0
    if (input == NULL)
13389
0
        return(1);
13390
0
    inputPush(ctxt, input);
13391
13392
0
    return(0);
13393
0
}
13394
13395
static int
13396
xmlCtxtSetOptionsInternal(xmlParserCtxtPtr ctxt, int options, int keepMask)
13397
219k
{
13398
219k
    int allMask;
13399
13400
219k
    if (ctxt == NULL)
13401
0
        return(-1);
13402
13403
    /*
13404
     * XInclude options aren't handled by the parser.
13405
     *
13406
     * XML_PARSE_XINCLUDE
13407
     * XML_PARSE_NOXINCNODE
13408
     * XML_PARSE_NOBASEFIX
13409
     */
13410
219k
    allMask = XML_PARSE_RECOVER |
13411
219k
              XML_PARSE_NOENT |
13412
219k
              XML_PARSE_DTDLOAD |
13413
219k
              XML_PARSE_DTDATTR |
13414
219k
              XML_PARSE_DTDVALID |
13415
219k
              XML_PARSE_NOERROR |
13416
219k
              XML_PARSE_NOWARNING |
13417
219k
              XML_PARSE_PEDANTIC |
13418
219k
              XML_PARSE_NOBLANKS |
13419
219k
#ifdef LIBXML_SAX1_ENABLED
13420
219k
              XML_PARSE_SAX1 |
13421
219k
#endif
13422
219k
              XML_PARSE_NONET |
13423
219k
              XML_PARSE_NODICT |
13424
219k
              XML_PARSE_NSCLEAN |
13425
219k
              XML_PARSE_NOCDATA |
13426
219k
              XML_PARSE_COMPACT |
13427
219k
              XML_PARSE_OLD10 |
13428
219k
              XML_PARSE_HUGE |
13429
219k
              XML_PARSE_OLDSAX |
13430
219k
              XML_PARSE_IGNORE_ENC |
13431
219k
              XML_PARSE_BIG_LINES |
13432
219k
              XML_PARSE_NO_XXE;
13433
13434
219k
    ctxt->options = (ctxt->options & keepMask) | (options & allMask);
13435
13436
    /*
13437
     * For some options, struct members are historically the source
13438
     * of truth. The values are initalized from global variables and
13439
     * old code could also modify them directly. Several older API
13440
     * functions that don't take an options argument rely on these
13441
     * deprecated mechanisms.
13442
     *
13443
     * Once public access to struct members and the globals are
13444
     * disabled, we can use the options bitmask as source of
13445
     * truth, making all these struct members obsolete.
13446
     *
13447
     * The XML_DETECT_IDS flags is misnamed. It simply enables
13448
     * loading of the external subset.
13449
     */
13450
219k
    ctxt->recovery = (options & XML_PARSE_RECOVER) ? 1 : 0;
13451
219k
    ctxt->replaceEntities = (options & XML_PARSE_NOENT) ? 1 : 0;
13452
219k
    ctxt->loadsubset = (options & XML_PARSE_DTDLOAD) ? XML_DETECT_IDS : 0;
13453
219k
    ctxt->loadsubset |= (options & XML_PARSE_DTDATTR) ? XML_COMPLETE_ATTRS : 0;
13454
219k
    ctxt->validate = (options & XML_PARSE_DTDVALID) ? 1 : 0;
13455
219k
    ctxt->pedantic = (options & XML_PARSE_PEDANTIC) ? 1 : 0;
13456
219k
    ctxt->keepBlanks = (options & XML_PARSE_NOBLANKS) ? 0 : 1;
13457
219k
    ctxt->dictNames = (options & XML_PARSE_NODICT) ? 0 : 1;
13458
13459
    /*
13460
     * Changing SAX callbacks is a bad idea. This should be fixed.
13461
     */
13462
219k
    if (options & XML_PARSE_NOBLANKS) {
13463
73.0k
        ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
13464
73.0k
    }
13465
219k
    if (options & XML_PARSE_NOCDATA) {
13466
58.4k
        ctxt->sax->cdataBlock = NULL;
13467
58.4k
    }
13468
219k
    if (options & XML_PARSE_HUGE) {
13469
59.2k
        if (ctxt->dict != NULL)
13470
59.2k
            xmlDictSetLimit(ctxt->dict, 0);
13471
59.2k
    }
13472
13473
219k
    ctxt->linenumbers = 1;
13474
13475
219k
    return(options & ~allMask);
13476
219k
}
13477
13478
/**
13479
 * xmlCtxtSetOptions:
13480
 * @ctxt: an XML parser context
13481
 * @options:  a bitmask of xmlParserOption values
13482
 *
13483
 * Applies the options to the parser context. Unset options are
13484
 * cleared.
13485
 *
13486
 * Available since 2.13.0. With older versions, you can use
13487
 * xmlCtxtUseOptions.
13488
 *
13489
 * XML_PARSE_RECOVER
13490
 *
13491
 * Enable "recovery" mode which allows non-wellformed documents.
13492
 * How this mode behaves exactly is unspecified and may change
13493
 * without further notice. Use of this feature is DISCOURAGED.
13494
 *
13495
 * XML_PARSE_NOENT
13496
 *
13497
 * Despite the confusing name, this option enables substitution
13498
 * of entities. The resulting tree won't contain any entity
13499
 * reference nodes.
13500
 *
13501
 * This option also enables loading of external entities (both
13502
 * general and parameter entities) which is dangerous. If you
13503
 * process untrusted data, it's recommended to set the
13504
 * XML_PARSE_NO_XXE option to disable loading of external
13505
 * entities.
13506
 *
13507
 * XML_PARSE_DTDLOAD
13508
 *
13509
 * Enables loading of an external DTD and the loading and
13510
 * substitution of external parameter entities. Has no effect
13511
 * if XML_PARSE_NO_XXE is set.
13512
 *
13513
 * XML_PARSE_DTDATTR
13514
 *
13515
 * Adds default attributes from the DTD to the result document.
13516
 *
13517
 * Implies XML_PARSE_DTDLOAD, but loading of external content
13518
 * can be disabled with XML_PARSE_NO_XXE.
13519
 *
13520
 * XML_PARSE_DTDVALID
13521
 *
13522
 * This option enables DTD validation which requires to load
13523
 * external DTDs and external entities (both general and
13524
 * parameter entities) unless XML_PARSE_NO_XXE was set.
13525
 *
13526
 * XML_PARSE_NO_XXE
13527
 *
13528
 * Disables loading of external DTDs or entities.
13529
 *
13530
 * XML_PARSE_NOERROR
13531
 *
13532
 * Disable error and warning reports to the error handlers.
13533
 * Errors are still accessible with xmlCtxtGetLastError.
13534
 *
13535
 * XML_PARSE_NOWARNING
13536
 *
13537
 * Disable warning reports.
13538
 *
13539
 * XML_PARSE_PEDANTIC
13540
 *
13541
 * Enable some pedantic warnings.
13542
 *
13543
 * XML_PARSE_NOBLANKS
13544
 *
13545
 * Remove some text nodes containing only whitespace from the
13546
 * result document. Which nodes are removed depends on DTD
13547
 * element declarations or a conservative heuristic. The
13548
 * reindenting feature of the serialization code relies on this
13549
 * option to be set when parsing. Use of this option is
13550
 * DISCOURAGED.
13551
 *
13552
 * XML_PARSE_SAX1
13553
 *
13554
 * Always invoke the deprecated SAX1 startElement and endElement
13555
 * handlers. This option is DEPRECATED.
13556
 *
13557
 * XML_PARSE_NONET
13558
 *
13559
 * Disable network access with the builtin HTTP and FTP clients.
13560
 *
13561
 * XML_PARSE_NODICT
13562
 *
13563
 * Create a document without interned strings, making all
13564
 * strings separate memory allocations.
13565
 *
13566
 * XML_PARSE_NSCLEAN
13567
 *
13568
 * Remove redundant namespace declarations from the result
13569
 * document.
13570
 *
13571
 * XML_PARSE_NOCDATA
13572
 *
13573
 * Output normal text nodes instead of CDATA nodes.
13574
 *
13575
 * XML_PARSE_COMPACT
13576
 *
13577
 * Store small strings directly in the node struct to save
13578
 * memory.
13579
 *
13580
 * XML_PARSE_OLD10
13581
 *
13582
 * Use old Name productions from before XML 1.0 Fifth Edition.
13583
 * This options is DEPRECATED.
13584
 *
13585
 * XML_PARSE_HUGE
13586
 *
13587
 * Relax some internal limits.
13588
 *
13589
 * Maximum size of text nodes, tags, comments, processing instructions,
13590
 * CDATA sections, entity values
13591
 *
13592
 * normal: 10M
13593
 * huge:    1B
13594
 *
13595
 * Maximum size of names, system literals, pubid literals
13596
 *
13597
 * normal: 50K
13598
 * huge:   10M
13599
 *
13600
 * Maximum nesting depth of elements
13601
 *
13602
 * normal:  256
13603
 * huge:   2048
13604
 *
13605
 * Maximum nesting depth of entities
13606
 *
13607
 * normal: 20
13608
 * huge:   40
13609
 *
13610
 * XML_PARSE_OLDSAX
13611
 *
13612
 * Enable an unspecified legacy mode for SAX parsers. This
13613
 * option is DEPRECATED.
13614
 *
13615
 * XML_PARSE_IGNORE_ENC
13616
 *
13617
 * Ignore the encoding in the XML declaration. This option is
13618
 * mostly unneeded these days. The only effect is to enforce
13619
 * UTF-8 decoding of ASCII-like data.
13620
 *
13621
 * XML_PARSE_BIG_LINES
13622
 *
13623
 * Enable reporting of line numbers larger than 65535.
13624
 *
13625
 * Returns 0 in case of success, the set of unknown or unimplemented options
13626
 *         in case of error.
13627
 */
13628
int
13629
xmlCtxtSetOptions(xmlParserCtxtPtr ctxt, int options)
13630
0
{
13631
0
    return(xmlCtxtSetOptionsInternal(ctxt, options, 0));
13632
0
}
13633
13634
/**
13635
 * xmlCtxtUseOptions:
13636
 * @ctxt: an XML parser context
13637
 * @options:  a combination of xmlParserOption
13638
 *
13639
 * DEPRECATED: Use xmlCtxtSetOptions.
13640
 *
13641
 * Applies the options to the parser context. The following options
13642
 * are never cleared and can only be enabled:
13643
 *
13644
 * XML_PARSE_NOERROR
13645
 * XML_PARSE_NOWARNING
13646
 * XML_PARSE_NONET
13647
 * XML_PARSE_NSCLEAN
13648
 * XML_PARSE_NOCDATA
13649
 * XML_PARSE_COMPACT
13650
 * XML_PARSE_OLD10
13651
 * XML_PARSE_HUGE
13652
 * XML_PARSE_OLDSAX
13653
 * XML_PARSE_IGNORE_ENC
13654
 * XML_PARSE_BIG_LINES
13655
 *
13656
 * Returns 0 in case of success, the set of unknown or unimplemented options
13657
 *         in case of error.
13658
 */
13659
int
13660
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
13661
219k
{
13662
219k
    int keepMask;
13663
13664
    /*
13665
     * For historic reasons, some options can only be enabled.
13666
     */
13667
219k
    keepMask = XML_PARSE_NOERROR |
13668
219k
               XML_PARSE_NOWARNING |
13669
219k
               XML_PARSE_NONET |
13670
219k
               XML_PARSE_NSCLEAN |
13671
219k
               XML_PARSE_NOCDATA |
13672
219k
               XML_PARSE_COMPACT |
13673
219k
               XML_PARSE_OLD10 |
13674
219k
               XML_PARSE_HUGE |
13675
219k
               XML_PARSE_OLDSAX |
13676
219k
               XML_PARSE_IGNORE_ENC |
13677
219k
               XML_PARSE_BIG_LINES;
13678
13679
219k
    return(xmlCtxtSetOptionsInternal(ctxt, options, keepMask));
13680
219k
}
13681
13682
/**
13683
 * xmlCtxtSetMaxAmplification:
13684
 * @ctxt: an XML parser context
13685
 * @maxAmpl:  maximum amplification factor
13686
 *
13687
 * To protect against exponential entity expansion ("billion laughs"), the
13688
 * size of serialized output is (roughly) limited to the input size
13689
 * multiplied by this factor. The default value is 5.
13690
 *
13691
 * When working with documents making heavy use of entity expansion, it can
13692
 * be necessary to increase the value. For security reasons, this should only
13693
 * be considered when processing trusted input.
13694
 */
13695
void
13696
xmlCtxtSetMaxAmplification(xmlParserCtxtPtr ctxt, unsigned maxAmpl)
13697
0
{
13698
0
    ctxt->maxAmpl = maxAmpl;
13699
0
}
13700
13701
/**
13702
 * xmlCtxtParseDocument:
13703
 * @ctxt:  an XML parser context
13704
 * @input:  parser input
13705
 *
13706
 * Parse an XML document and return the resulting document tree.
13707
 * Takes ownership of the input object.
13708
 *
13709
 * Returns the resulting document tree or NULL
13710
 */
13711
xmlDocPtr
13712
xmlCtxtParseDocument(xmlParserCtxtPtr ctxt, xmlParserInputPtr input)
13713
125k
{
13714
125k
    xmlDocPtr ret = NULL;
13715
13716
125k
    if ((ctxt == NULL) || (input == NULL))
13717
227
        return(NULL);
13718
13719
    /* assert(ctxt->inputNr == 0); */
13720
125k
    while (ctxt->inputNr > 0)
13721
0
        xmlFreeInputStream(inputPop(ctxt));
13722
13723
125k
    if (inputPush(ctxt, input) < 0) {
13724
0
        xmlFreeInputStream(input);
13725
0
        return(NULL);
13726
0
    }
13727
13728
125k
    xmlParseDocument(ctxt);
13729
13730
125k
    if ((ctxt->wellFormed) ||
13731
125k
        ((ctxt->recovery) && (ctxt->errNo != XML_ERR_NO_MEMORY))) {
13732
79.0k
        ret = ctxt->myDoc;
13733
79.0k
    } else {
13734
46.1k
        if (ctxt->errNo == XML_ERR_OK)
13735
0
            xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, "unknown error\n");
13736
13737
46.1k
        ret = NULL;
13738
46.1k
  xmlFreeDoc(ctxt->myDoc);
13739
46.1k
    }
13740
125k
    ctxt->myDoc = NULL;
13741
13742
    /* assert(ctxt->inputNr == 1); */
13743
250k
    while (ctxt->inputNr > 0)
13744
125k
        xmlFreeInputStream(inputPop(ctxt));
13745
13746
125k
    return(ret);
13747
125k
}
13748
13749
/**
13750
 * xmlReadDoc:
13751
 * @cur:  a pointer to a zero terminated string
13752
 * @URL:  base URL (optional)
13753
 * @encoding:  the document encoding (optional)
13754
 * @options:  a combination of xmlParserOption
13755
 *
13756
 * Convenience function to parse an XML document from a
13757
 * zero-terminated string.
13758
 *
13759
 * See xmlCtxtReadDoc for details.
13760
 *
13761
 * Returns the resulting document tree
13762
 */
13763
xmlDocPtr
13764
xmlReadDoc(const xmlChar *cur, const char *URL, const char *encoding,
13765
           int options)
13766
0
{
13767
0
    xmlParserCtxtPtr ctxt;
13768
0
    xmlParserInputPtr input;
13769
0
    xmlDocPtr doc;
13770
13771
0
    ctxt = xmlNewParserCtxt();
13772
0
    if (ctxt == NULL)
13773
0
        return(NULL);
13774
13775
0
    xmlCtxtUseOptions(ctxt, options);
13776
13777
0
    input = xmlNewInputString(ctxt, URL, (const char *) cur, encoding,
13778
0
                              XML_INPUT_BUF_STATIC);
13779
13780
0
    doc = xmlCtxtParseDocument(ctxt, input);
13781
13782
0
    xmlFreeParserCtxt(ctxt);
13783
0
    return(doc);
13784
0
}
13785
13786
/**
13787
 * xmlReadFile:
13788
 * @filename:  a file or URL
13789
 * @encoding:  the document encoding (optional)
13790
 * @options:  a combination of xmlParserOption
13791
 *
13792
 * Convenience function to parse an XML file from the filesystem,
13793
 * the network or a global user-define resource loader.
13794
 *
13795
 * See xmlCtxtReadFile for details.
13796
 *
13797
 * Returns the resulting document tree
13798
 */
13799
xmlDocPtr
13800
xmlReadFile(const char *filename, const char *encoding, int options)
13801
0
{
13802
0
    xmlParserCtxtPtr ctxt;
13803
0
    xmlParserInputPtr input;
13804
0
    xmlDocPtr doc;
13805
13806
0
    ctxt = xmlNewParserCtxt();
13807
0
    if (ctxt == NULL)
13808
0
        return(NULL);
13809
13810
0
    xmlCtxtUseOptions(ctxt, options);
13811
13812
0
    input = xmlNewInputURL(ctxt, filename, NULL, encoding, 0);
13813
13814
0
    doc = xmlCtxtParseDocument(ctxt, input);
13815
13816
0
    xmlFreeParserCtxt(ctxt);
13817
0
    return(doc);
13818
0
}
13819
13820
/**
13821
 * xmlReadMemory:
13822
 * @buffer:  a pointer to a char array
13823
 * @size:  the size of the array
13824
 * @url:  base URL (optional)
13825
 * @encoding:  the document encoding (optional)
13826
 * @options:  a combination of xmlParserOption
13827
 *
13828
 * Parse an XML in-memory document and build a tree. The input buffer must
13829
 * not contain a terminating null byte.
13830
 *
13831
 * See xmlCtxtReadMemory for details.
13832
 *
13833
 * Returns the resulting document tree
13834
 */
13835
xmlDocPtr
13836
xmlReadMemory(const char *buffer, int size, const char *url,
13837
              const char *encoding, int options)
13838
33.5k
{
13839
33.5k
    xmlParserCtxtPtr ctxt;
13840
33.5k
    xmlParserInputPtr input;
13841
33.5k
    xmlDocPtr doc;
13842
13843
33.5k
    if (size < 0)
13844
0
  return(NULL);
13845
13846
33.5k
    ctxt = xmlNewParserCtxt();
13847
33.5k
    if (ctxt == NULL)
13848
10
        return(NULL);
13849
13850
33.5k
    xmlCtxtUseOptions(ctxt, options);
13851
13852
33.5k
    input = xmlNewInputMemory(ctxt, url, buffer, size, encoding,
13853
33.5k
                              XML_INPUT_BUF_STATIC);
13854
13855
33.5k
    doc = xmlCtxtParseDocument(ctxt, input);
13856
13857
33.5k
    xmlFreeParserCtxt(ctxt);
13858
33.5k
    return(doc);
13859
33.5k
}
13860
13861
/**
13862
 * xmlReadFd:
13863
 * @fd:  an open file descriptor
13864
 * @URL:  base URL (optional)
13865
 * @encoding:  the document encoding (optional)
13866
 * @options:  a combination of xmlParserOption
13867
 *
13868
 * Parse an XML from a file descriptor and build a tree.
13869
 *
13870
 * See xmlCtxtReadFd for details.
13871
 *
13872
 * NOTE that the file descriptor will not be closed when the
13873
 * context is freed or reset.
13874
 *
13875
 * Returns the resulting document tree
13876
 */
13877
xmlDocPtr
13878
xmlReadFd(int fd, const char *URL, const char *encoding, int options)
13879
0
{
13880
0
    xmlParserCtxtPtr ctxt;
13881
0
    xmlParserInputPtr input;
13882
0
    xmlDocPtr doc;
13883
13884
0
    ctxt = xmlNewParserCtxt();
13885
0
    if (ctxt == NULL)
13886
0
        return(NULL);
13887
13888
0
    xmlCtxtUseOptions(ctxt, options);
13889
13890
0
    input = xmlNewInputFd(ctxt, URL, fd, encoding, 0);
13891
0
    input->buf->closecallback = NULL;
13892
13893
0
    doc = xmlCtxtParseDocument(ctxt, input);
13894
13895
0
    xmlFreeParserCtxt(ctxt);
13896
0
    return(doc);
13897
0
}
13898
13899
/**
13900
 * xmlReadIO:
13901
 * @ioread:  an I/O read function
13902
 * @ioclose:  an I/O close function (optional)
13903
 * @ioctx:  an I/O handler
13904
 * @URL:  base URL (optional)
13905
 * @encoding:  the document encoding (optional)
13906
 * @options:  a combination of xmlParserOption
13907
 *
13908
 * Parse an XML document from I/O functions and context and build a tree.
13909
 *
13910
 * See xmlCtxtReadIO for details.
13911
 *
13912
 * Returns the resulting document tree
13913
 */
13914
xmlDocPtr
13915
xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
13916
          void *ioctx, const char *URL, const char *encoding, int options)
13917
0
{
13918
0
    xmlParserCtxtPtr ctxt;
13919
0
    xmlParserInputPtr input;
13920
0
    xmlDocPtr doc;
13921
13922
0
    ctxt = xmlNewParserCtxt();
13923
0
    if (ctxt == NULL)
13924
0
        return(NULL);
13925
13926
0
    xmlCtxtUseOptions(ctxt, options);
13927
13928
0
    input = xmlNewInputIO(ctxt, URL, ioread, ioclose, ioctx, encoding, 0);
13929
13930
0
    doc = xmlCtxtParseDocument(ctxt, input);
13931
13932
0
    xmlFreeParserCtxt(ctxt);
13933
0
    return(doc);
13934
0
}
13935
13936
/**
13937
 * xmlCtxtReadDoc:
13938
 * @ctxt:  an XML parser context
13939
 * @str:  a pointer to a zero terminated string
13940
 * @URL:  base URL (optional)
13941
 * @encoding:  the document encoding (optional)
13942
 * @options:  a combination of xmlParserOption
13943
 *
13944
 * Parse an XML in-memory document and build a tree.
13945
 *
13946
 * @URL is used as base to resolve external entities and for error
13947
 * reporting.
13948
 *
13949
 * See xmlCtxtUseOptions for details.
13950
 *
13951
 * Returns the resulting document tree
13952
 */
13953
xmlDocPtr
13954
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar *str,
13955
               const char *URL, const char *encoding, int options)
13956
0
{
13957
0
    xmlParserInputPtr input;
13958
13959
0
    if (ctxt == NULL)
13960
0
        return(NULL);
13961
13962
0
    xmlCtxtReset(ctxt);
13963
0
    xmlCtxtUseOptions(ctxt, options);
13964
13965
0
    input = xmlNewInputString(ctxt, URL, (const char *) str, encoding,
13966
0
                              XML_INPUT_BUF_STATIC);
13967
13968
0
    return(xmlCtxtParseDocument(ctxt, input));
13969
0
}
13970
13971
/**
13972
 * xmlCtxtReadFile:
13973
 * @ctxt:  an XML parser context
13974
 * @filename:  a file or URL
13975
 * @encoding:  the document encoding (optional)
13976
 * @options:  a combination of xmlParserOption
13977
 *
13978
 * Parse an XML file from the filesystem, the network or a user-defined
13979
 * resource loader.
13980
 *
13981
 * See xmlNewInputURL and xmlCtxtUseOptions for details.
13982
 *
13983
 * Returns the resulting document tree
13984
 */
13985
xmlDocPtr
13986
xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
13987
                const char *encoding, int options)
13988
31.7k
{
13989
31.7k
    xmlParserInputPtr input;
13990
13991
31.7k
    if (ctxt == NULL)
13992
0
        return(NULL);
13993
13994
31.7k
    xmlCtxtReset(ctxt);
13995
31.7k
    xmlCtxtUseOptions(ctxt, options);
13996
13997
31.7k
    input = xmlNewInputURL(ctxt, filename, NULL, encoding, 0);
13998
13999
31.7k
    return(xmlCtxtParseDocument(ctxt, input));
14000
31.7k
}
14001
14002
/**
14003
 * xmlCtxtReadMemory:
14004
 * @ctxt:  an XML parser context
14005
 * @buffer:  a pointer to a char array
14006
 * @size:  the size of the array
14007
 * @URL:  base URL (optional)
14008
 * @encoding:  the document encoding (optional)
14009
 * @options:  a combination of xmlParserOption
14010
 *
14011
 * Parse an XML in-memory document and build a tree. The input buffer must
14012
 * not contain a terminating null byte.
14013
 *
14014
 * @URL is used as base to resolve external entities and for error
14015
 * reporting.
14016
 *
14017
 * See xmlCtxtUseOptions for details.
14018
 *
14019
 * Returns the resulting document tree
14020
 */
14021
xmlDocPtr
14022
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
14023
                  const char *URL, const char *encoding, int options)
14024
60.0k
{
14025
60.0k
    xmlParserInputPtr input;
14026
14027
60.0k
    if ((ctxt == NULL) || (size < 0))
14028
0
        return(NULL);
14029
14030
60.0k
    xmlCtxtReset(ctxt);
14031
60.0k
    xmlCtxtUseOptions(ctxt, options);
14032
14033
60.0k
    input = xmlNewInputMemory(ctxt, URL, buffer, size, encoding,
14034
60.0k
                              XML_INPUT_BUF_STATIC);
14035
14036
60.0k
    return(xmlCtxtParseDocument(ctxt, input));
14037
60.0k
}
14038
14039
/**
14040
 * xmlCtxtReadFd:
14041
 * @ctxt:  an XML parser context
14042
 * @fd:  an open file descriptor
14043
 * @URL:  base URL (optional)
14044
 * @encoding:  the document encoding (optional)
14045
 * @options:  a combination of xmlParserOption
14046
 *
14047
 * Parse an XML document from a file descriptor and build a tree.
14048
 *
14049
 * NOTE that the file descriptor will not be closed when the
14050
 * context is freed or reset.
14051
 *
14052
 * @URL is used as base to resolve external entities and for error
14053
 * reporting.
14054
 *
14055
 * See xmlCtxtUseOptions for details.
14056
 *
14057
 * Returns the resulting document tree
14058
 */
14059
xmlDocPtr
14060
xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
14061
              const char *URL, const char *encoding, int options)
14062
0
{
14063
0
    xmlParserInputPtr input;
14064
14065
0
    if (ctxt == NULL)
14066
0
        return(NULL);
14067
14068
0
    xmlCtxtReset(ctxt);
14069
0
    xmlCtxtUseOptions(ctxt, options);
14070
14071
0
    input = xmlNewInputFd(ctxt, URL, fd, encoding, 0);
14072
0
    input->buf->closecallback = NULL;
14073
14074
0
    return(xmlCtxtParseDocument(ctxt, input));
14075
0
}
14076
14077
/**
14078
 * xmlCtxtReadIO:
14079
 * @ctxt:  an XML parser context
14080
 * @ioread:  an I/O read function
14081
 * @ioclose:  an I/O close function
14082
 * @ioctx:  an I/O handler
14083
 * @URL:  the base URL to use for the document
14084
 * @encoding:  the document encoding, or NULL
14085
 * @options:  a combination of xmlParserOption
14086
 *
14087
 * parse an XML document from I/O functions and source and build a tree.
14088
 * This reuses the existing @ctxt parser context
14089
 *
14090
 * @URL is used as base to resolve external entities and for error
14091
 * reporting.
14092
 *
14093
 * See xmlCtxtUseOptions for details.
14094
 *
14095
 * Returns the resulting document tree
14096
 */
14097
xmlDocPtr
14098
xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
14099
              xmlInputCloseCallback ioclose, void *ioctx,
14100
        const char *URL,
14101
              const char *encoding, int options)
14102
0
{
14103
0
    xmlParserInputPtr input;
14104
14105
0
    if (ctxt == NULL)
14106
0
        return(NULL);
14107
14108
0
    xmlCtxtReset(ctxt);
14109
0
    xmlCtxtUseOptions(ctxt, options);
14110
14111
0
    input = xmlNewInputIO(ctxt, URL, ioread, ioclose, ioctx, encoding, 0);
14112
14113
0
    return(xmlCtxtParseDocument(ctxt, input));
14114
0
}
14115