Coverage Report

Created: 2025-12-31 10:39

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/workdir/UnpackedTarball/libxml2/parser.c
Line
Count
Source
1
/*
2
 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
 *            implemented on top of the SAX interfaces
4
 *
5
 * References:
6
 *   The XML specification:
7
 *     http://www.w3.org/TR/REC-xml
8
 *   Original 1.0 version:
9
 *     http://www.w3.org/TR/1998/REC-xml-19980210
10
 *   XML second edition working draft
11
 *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12
 *
13
 * Okay this is a big file, the parser core is around 7000 lines, then it
14
 * is followed by the progressive parser top routines, then the various
15
 * high level APIs to call the parser and a few miscellaneous functions.
16
 * A number of helper functions and deprecated ones have been moved to
17
 * parserInternals.c to reduce this file size.
18
 * As much as possible the functions are associated with their relative
19
 * production in the XML specification. A few productions defining the
20
 * different ranges of character are actually implanted either in
21
 * parserInternals.h or parserInternals.c
22
 * The DOM tree build is realized from the default SAX callbacks in
23
 * the module SAX2.c.
24
 * The routines doing the validation checks are in valid.c and called either
25
 * from the SAX callbacks or as standalone functions using a preparsed
26
 * document.
27
 *
28
 * See Copyright for the status of this software.
29
 *
30
 * daniel@veillard.com
31
 */
32
33
/* To avoid EBCDIC trouble when parsing on zOS */
34
#if defined(__MVS__)
35
#pragma convert("ISO8859-1")
36
#endif
37
38
#define IN_LIBXML
39
#include "libxml.h"
40
41
#if defined(_WIN32)
42
#define XML_DIR_SEP '\\'
43
#else
44
#define XML_DIR_SEP '/'
45
#endif
46
47
#include <stdlib.h>
48
#include <limits.h>
49
#include <string.h>
50
#include <stdarg.h>
51
#include <stddef.h>
52
#include <ctype.h>
53
#include <stdlib.h>
54
#include <libxml/parser.h>
55
#include <libxml/xmlmemory.h>
56
#include <libxml/tree.h>
57
#include <libxml/parserInternals.h>
58
#include <libxml/valid.h>
59
#include <libxml/entities.h>
60
#include <libxml/xmlerror.h>
61
#include <libxml/encoding.h>
62
#include <libxml/xmlIO.h>
63
#include <libxml/uri.h>
64
#include <libxml/SAX2.h>
65
#include <libxml/HTMLparser.h>
66
#ifdef LIBXML_CATALOG_ENABLED
67
#include <libxml/catalog.h>
68
#endif
69
70
#include "private/buf.h"
71
#include "private/dict.h"
72
#include "private/entities.h"
73
#include "private/error.h"
74
#include "private/html.h"
75
#include "private/io.h"
76
#include "private/memory.h"
77
#include "private/parser.h"
78
79
34.2M
#define NS_INDEX_EMPTY  INT_MAX
80
6.08M
#define NS_INDEX_XML    (INT_MAX - 1)
81
12.3M
#define URI_HASH_EMPTY  0xD943A04E
82
55.6k
#define URI_HASH_XML    0xF0451F02
83
84
#ifndef STDIN_FILENO
85
0
  #define STDIN_FILENO 0
86
#endif
87
88
#ifndef SIZE_MAX
89
  #define SIZE_MAX ((size_t) -1)
90
#endif
91
92
1.18M
#define XML_MAX_ATTRS 100000000 /* 100 million */
93
94
struct _xmlStartTag {
95
    const xmlChar *prefix;
96
    const xmlChar *URI;
97
    int line;
98
    int nsNr;
99
};
100
101
typedef struct {
102
    void *saxData;
103
    unsigned prefixHashValue;
104
    unsigned uriHashValue;
105
    unsigned elementId;
106
    int oldIndex;
107
} xmlParserNsExtra;
108
109
typedef struct {
110
    unsigned hashValue;
111
    int index;
112
} xmlParserNsBucket;
113
114
struct _xmlParserNsData {
115
    xmlParserNsExtra *extra;
116
117
    unsigned hashSize;
118
    unsigned hashElems;
119
    xmlParserNsBucket *hash;
120
121
    unsigned elementId;
122
    int defaultNsIndex;
123
    int minNsIndex;
124
};
125
126
static int
127
xmlParseElementStart(xmlParserCtxtPtr ctxt);
128
129
static void
130
xmlParseElementEnd(xmlParserCtxtPtr ctxt);
131
132
static xmlEntityPtr
133
xmlLookupGeneralEntity(xmlParserCtxtPtr ctxt, const xmlChar *name, int inAttr);
134
135
static const xmlChar *
136
xmlParseEntityRefInternal(xmlParserCtxtPtr ctxt);
137
138
/************************************************************************
139
 *                  *
140
 *  Arbitrary limits set in the parser. See XML_PARSE_HUGE    *
141
 *                  *
142
 ************************************************************************/
143
144
#define XML_PARSER_BIG_ENTITY 1000
145
#define XML_PARSER_LOT_ENTITY 5000
146
147
/*
148
 * Constants for protection against abusive entity expansion
149
 * ("billion laughs").
150
 */
151
152
/*
153
 * A certain amount of entity expansion which is always allowed.
154
 */
155
951k
#define XML_PARSER_ALLOWED_EXPANSION 1000000
156
157
/*
158
 * Fixed cost for each entity reference. This crudely models processing time
159
 * as well to protect, for example, against exponential expansion of empty
160
 * or very short entities.
161
 */
162
951k
#define XML_ENT_FIXED_COST 20
163
164
88.6M
#define XML_PARSER_BIG_BUFFER_SIZE 300
165
265k
#define XML_PARSER_BUFFER_SIZE 100
166
48.8k
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
167
168
/**
169
 * XML_PARSER_CHUNK_SIZE
170
 *
171
 * When calling GROW that's the minimal amount of data
172
 * the parser expected to have received. It is not a hard
173
 * limit but an optimization when reading strings like Names
174
 * It is not strictly needed as long as inputs available characters
175
 * are followed by 0, which should be provided by the I/O level
176
 */
177
#define XML_PARSER_CHUNK_SIZE 100
178
179
/**
180
 * xmlParserVersion:
181
 *
182
 * Constant string describing the internal version of the library
183
 */
184
const char *const
185
xmlParserVersion = LIBXML_VERSION_STRING LIBXML_VERSION_EXTRA;
186
187
/*
188
 * List of XML prefixed PI allowed by W3C specs
189
 */
190
191
static const char* const xmlW3CPIs[] = {
192
    "xml-stylesheet",
193
    "xml-model",
194
    NULL
195
};
196
197
198
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
199
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
200
                                              const xmlChar **str);
201
202
static void
203
xmlCtxtParseEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr ent);
204
205
static int
206
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
207
208
/************************************************************************
209
 *                  *
210
 *    Some factorized error routines        *
211
 *                  *
212
 ************************************************************************/
213
214
static void
215
0
xmlErrMemory(xmlParserCtxtPtr ctxt) {
216
0
    xmlCtxtErrMemory(ctxt);
217
0
}
218
219
/**
220
 * xmlErrAttributeDup:
221
 * @ctxt:  an XML parser context
222
 * @prefix:  the attribute prefix
223
 * @localname:  the attribute localname
224
 *
225
 * Handle a redefinition of attribute error
226
 */
227
static void
228
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
229
                   const xmlChar * localname)
230
245k
{
231
245k
    if (prefix == NULL)
232
180k
        xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, XML_ERR_ATTRIBUTE_REDEFINED,
233
180k
                   XML_ERR_FATAL, localname, NULL, NULL, 0,
234
180k
                   "Attribute %s redefined\n", localname);
235
65.2k
    else
236
65.2k
        xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, XML_ERR_ATTRIBUTE_REDEFINED,
237
65.2k
                   XML_ERR_FATAL, prefix, localname, NULL, 0,
238
65.2k
                   "Attribute %s:%s redefined\n", prefix, localname);
239
245k
}
240
241
/**
242
 * xmlFatalErrMsg:
243
 * @ctxt:  an XML parser context
244
 * @error:  the error number
245
 * @msg:  the error message
246
 *
247
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
248
 */
249
static void LIBXML_ATTR_FORMAT(3,0)
250
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
251
               const char *msg)
252
1.94M
{
253
1.94M
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
254
1.94M
               NULL, NULL, NULL, 0, "%s", msg);
255
1.94M
}
256
257
/**
258
 * xmlWarningMsg:
259
 * @ctxt:  an XML parser context
260
 * @error:  the error number
261
 * @msg:  the error message
262
 * @str1:  extra data
263
 * @str2:  extra data
264
 *
265
 * Handle a warning.
266
 */
267
void LIBXML_ATTR_FORMAT(3,0)
268
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
269
              const char *msg, const xmlChar *str1, const xmlChar *str2)
270
37.2k
{
271
37.2k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_WARNING,
272
37.2k
               str1, str2, NULL, 0, msg, str1, str2);
273
37.2k
}
274
275
/**
276
 * xmlValidityError:
277
 * @ctxt:  an XML parser context
278
 * @error:  the error number
279
 * @msg:  the error message
280
 * @str1:  extra data
281
 *
282
 * Handle a validity error.
283
 */
284
static void LIBXML_ATTR_FORMAT(3,0)
285
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
286
              const char *msg, const xmlChar *str1, const xmlChar *str2)
287
0
{
288
0
    ctxt->valid = 0;
289
290
0
    xmlCtxtErr(ctxt, NULL, XML_FROM_DTD, error, XML_ERR_ERROR,
291
0
               str1, str2, NULL, 0, msg, str1, str2);
292
0
}
293
294
/**
295
 * xmlFatalErrMsgInt:
296
 * @ctxt:  an XML parser context
297
 * @error:  the error number
298
 * @msg:  the error message
299
 * @val:  an integer value
300
 *
301
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
302
 */
303
static void LIBXML_ATTR_FORMAT(3,0)
304
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
305
                  const char *msg, int val)
306
12.8k
{
307
12.8k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
308
12.8k
               NULL, NULL, NULL, val, msg, val);
309
12.8k
}
310
311
/**
312
 * xmlFatalErrMsgStrIntStr:
313
 * @ctxt:  an XML parser context
314
 * @error:  the error number
315
 * @msg:  the error message
316
 * @str1:  an string info
317
 * @val:  an integer value
318
 * @str2:  an string info
319
 *
320
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
321
 */
322
static void LIBXML_ATTR_FORMAT(3,0)
323
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
324
                  const char *msg, const xmlChar *str1, int val,
325
      const xmlChar *str2)
326
78.4k
{
327
78.4k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
328
78.4k
               str1, str2, NULL, val, msg, str1, val, str2);
329
78.4k
}
330
331
/**
332
 * xmlFatalErrMsgStr:
333
 * @ctxt:  an XML parser context
334
 * @error:  the error number
335
 * @msg:  the error message
336
 * @val:  a string value
337
 *
338
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
339
 */
340
static void LIBXML_ATTR_FORMAT(3,0)
341
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
342
                  const char *msg, const xmlChar * val)
343
136k
{
344
136k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
345
136k
               val, NULL, NULL, 0, msg, val);
346
136k
}
347
348
/**
349
 * xmlErrMsgStr:
350
 * @ctxt:  an XML parser context
351
 * @error:  the error number
352
 * @msg:  the error message
353
 * @val:  a string value
354
 *
355
 * Handle a non fatal parser error
356
 */
357
static void LIBXML_ATTR_FORMAT(3,0)
358
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
359
                  const char *msg, const xmlChar * val)
360
9.99k
{
361
9.99k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_ERROR,
362
9.99k
               val, NULL, NULL, 0, msg, val);
363
9.99k
}
364
365
/**
366
 * xmlNsErr:
367
 * @ctxt:  an XML parser context
368
 * @error:  the error number
369
 * @msg:  the message
370
 * @info1:  extra information string
371
 * @info2:  extra information string
372
 *
373
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
374
 */
375
static void LIBXML_ATTR_FORMAT(3,0)
376
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
377
         const char *msg,
378
         const xmlChar * info1, const xmlChar * info2,
379
         const xmlChar * info3)
380
2.09M
{
381
2.09M
    ctxt->nsWellFormed = 0;
382
383
2.09M
    xmlCtxtErr(ctxt, NULL, XML_FROM_NAMESPACE, error, XML_ERR_ERROR,
384
2.09M
               info1, info2, info3, 0, msg, info1, info2, info3);
385
2.09M
}
386
387
/**
388
 * xmlNsWarn
389
 * @ctxt:  an XML parser context
390
 * @error:  the error number
391
 * @msg:  the message
392
 * @info1:  extra information string
393
 * @info2:  extra information string
394
 *
395
 * Handle a namespace warning error
396
 */
397
static void LIBXML_ATTR_FORMAT(3,0)
398
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
399
         const char *msg,
400
         const xmlChar * info1, const xmlChar * info2,
401
         const xmlChar * info3)
402
80.3k
{
403
80.3k
    xmlCtxtErr(ctxt, NULL, XML_FROM_NAMESPACE, error, XML_ERR_WARNING,
404
80.3k
               info1, info2, info3, 0, msg, info1, info2, info3);
405
80.3k
}
406
407
static void
408
2.85M
xmlSaturatedAdd(unsigned long *dst, unsigned long val) {
409
2.85M
    if (val > ULONG_MAX - *dst)
410
0
        *dst = ULONG_MAX;
411
2.85M
    else
412
2.85M
        *dst += val;
413
2.85M
}
414
415
static void
416
951k
xmlSaturatedAddSizeT(unsigned long *dst, size_t val) {
417
951k
    if (val > ULONG_MAX - *dst)
418
0
        *dst = ULONG_MAX;
419
951k
    else
420
951k
        *dst += val;
421
951k
}
422
423
/**
424
 * xmlParserEntityCheck:
425
 * @ctxt:  parser context
426
 * @extra:  sum of unexpanded entity sizes
427
 *
428
 * Check for non-linear entity expansion behaviour.
429
 *
430
 * In some cases like xmlExpandEntityInAttValue, this function is called
431
 * for each, possibly nested entity and its unexpanded content length.
432
 *
433
 * In other cases like xmlParseReference, it's only called for each
434
 * top-level entity with its unexpanded content length plus the sum of
435
 * the unexpanded content lengths (plus fixed cost) of all nested
436
 * entities.
437
 *
438
 * Summing the unexpanded lengths also adds the length of the reference.
439
 * This is by design. Taking the length of the entity name into account
440
 * discourages attacks that try to waste CPU time with abusively long
441
 * entity names. See test/recurse/lol6.xml for example. Each call also
442
 * adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with
443
 * short entities.
444
 *
445
 * Returns 1 on error, 0 on success.
446
 */
447
static int
448
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long extra)
449
951k
{
450
951k
    unsigned long consumed;
451
951k
    unsigned long *expandedSize;
452
951k
    xmlParserInputPtr input = ctxt->input;
453
951k
    xmlEntityPtr entity = input->entity;
454
455
951k
    if ((entity) && (entity->flags & XML_ENT_CHECKED))
456
0
        return(0);
457
458
    /*
459
     * Compute total consumed bytes so far, including input streams of
460
     * external entities.
461
     */
462
951k
    consumed = input->consumed;
463
951k
    xmlSaturatedAddSizeT(&consumed, input->cur - input->base);
464
951k
    xmlSaturatedAdd(&consumed, ctxt->sizeentities);
465
466
951k
    if (entity)
467
0
        expandedSize = &entity->expandedSize;
468
951k
    else
469
951k
        expandedSize = &ctxt->sizeentcopy;
470
471
    /*
472
     * Add extra cost and some fixed cost.
473
     */
474
951k
    xmlSaturatedAdd(expandedSize, extra);
475
951k
    xmlSaturatedAdd(expandedSize, XML_ENT_FIXED_COST);
476
477
    /*
478
     * It's important to always use saturation arithmetic when tracking
479
     * entity sizes to make the size checks reliable. If "sizeentcopy"
480
     * overflows, we have to abort.
481
     */
482
951k
    if ((*expandedSize > XML_PARSER_ALLOWED_EXPANSION) &&
483
121
        ((*expandedSize >= ULONG_MAX) ||
484
121
         (*expandedSize / ctxt->maxAmpl > consumed))) {
485
121
        xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
486
121
                       "Maximum entity amplification factor exceeded, see "
487
121
                       "xmlCtxtSetMaxAmplification.\n");
488
121
        xmlHaltParser(ctxt);
489
121
        return(1);
490
121
    }
491
492
951k
    return(0);
493
951k
}
494
495
/************************************************************************
496
 *                  *
497
 *    Library wide options          *
498
 *                  *
499
 ************************************************************************/
500
501
/**
502
  * xmlHasFeature:
503
  * @feature: the feature to be examined
504
  *
505
  * Examines if the library has been compiled with a given feature.
506
  *
507
  * Returns a non-zero value if the feature exist, otherwise zero.
508
  * Returns zero (0) if the feature does not exist or an unknown
509
  * unknown feature is requested, non-zero otherwise.
510
  */
511
int
512
xmlHasFeature(xmlFeature feature)
513
0
{
514
0
    switch (feature) {
515
0
  case XML_WITH_THREAD:
516
0
#ifdef LIBXML_THREAD_ENABLED
517
0
      return(1);
518
#else
519
      return(0);
520
#endif
521
0
        case XML_WITH_TREE:
522
0
            return(1);
523
0
        case XML_WITH_OUTPUT:
524
0
#ifdef LIBXML_OUTPUT_ENABLED
525
0
            return(1);
526
#else
527
            return(0);
528
#endif
529
0
        case XML_WITH_PUSH:
530
0
#ifdef LIBXML_PUSH_ENABLED
531
0
            return(1);
532
#else
533
            return(0);
534
#endif
535
0
        case XML_WITH_READER:
536
0
#ifdef LIBXML_READER_ENABLED
537
0
            return(1);
538
#else
539
            return(0);
540
#endif
541
0
        case XML_WITH_PATTERN:
542
0
#ifdef LIBXML_PATTERN_ENABLED
543
0
            return(1);
544
#else
545
            return(0);
546
#endif
547
0
        case XML_WITH_WRITER:
548
0
#ifdef LIBXML_WRITER_ENABLED
549
0
            return(1);
550
#else
551
            return(0);
552
#endif
553
0
        case XML_WITH_SAX1:
554
0
#ifdef LIBXML_SAX1_ENABLED
555
0
            return(1);
556
#else
557
            return(0);
558
#endif
559
0
        case XML_WITH_HTTP:
560
#ifdef LIBXML_HTTP_ENABLED
561
            return(1);
562
#else
563
0
            return(0);
564
0
#endif
565
0
        case XML_WITH_VALID:
566
0
#ifdef LIBXML_VALID_ENABLED
567
0
            return(1);
568
#else
569
            return(0);
570
#endif
571
0
        case XML_WITH_HTML:
572
0
#ifdef LIBXML_HTML_ENABLED
573
0
            return(1);
574
#else
575
            return(0);
576
#endif
577
0
        case XML_WITH_LEGACY:
578
0
            return(0);
579
0
        case XML_WITH_C14N:
580
0
#ifdef LIBXML_C14N_ENABLED
581
0
            return(1);
582
#else
583
            return(0);
584
#endif
585
0
        case XML_WITH_CATALOG:
586
0
#ifdef LIBXML_CATALOG_ENABLED
587
0
            return(1);
588
#else
589
            return(0);
590
#endif
591
0
        case XML_WITH_XPATH:
592
0
#ifdef LIBXML_XPATH_ENABLED
593
0
            return(1);
594
#else
595
            return(0);
596
#endif
597
0
        case XML_WITH_XPTR:
598
0
#ifdef LIBXML_XPTR_ENABLED
599
0
            return(1);
600
#else
601
            return(0);
602
#endif
603
0
        case XML_WITH_XINCLUDE:
604
0
#ifdef LIBXML_XINCLUDE_ENABLED
605
0
            return(1);
606
#else
607
            return(0);
608
#endif
609
0
        case XML_WITH_ICONV:
610
#ifdef LIBXML_ICONV_ENABLED
611
            return(1);
612
#else
613
0
            return(0);
614
0
#endif
615
0
        case XML_WITH_ISO8859X:
616
0
#ifdef LIBXML_ISO8859X_ENABLED
617
0
            return(1);
618
#else
619
            return(0);
620
#endif
621
0
        case XML_WITH_UNICODE:
622
0
            return(0);
623
0
        case XML_WITH_REGEXP:
624
0
#ifdef LIBXML_REGEXP_ENABLED
625
0
            return(1);
626
#else
627
            return(0);
628
#endif
629
0
        case XML_WITH_AUTOMATA:
630
0
#ifdef LIBXML_REGEXP_ENABLED
631
0
            return(1);
632
#else
633
            return(0);
634
#endif
635
0
        case XML_WITH_EXPR:
636
#ifdef LIBXML_EXPR_ENABLED
637
            return(1);
638
#else
639
0
            return(0);
640
0
#endif
641
0
        case XML_WITH_RELAXNG:
642
0
#ifdef LIBXML_RELAXNG_ENABLED
643
0
            return(1);
644
#else
645
            return(0);
646
#endif
647
0
        case XML_WITH_SCHEMAS:
648
0
#ifdef LIBXML_SCHEMAS_ENABLED
649
0
            return(1);
650
#else
651
            return(0);
652
#endif
653
0
        case XML_WITH_SCHEMATRON:
654
0
#ifdef LIBXML_SCHEMATRON_ENABLED
655
0
            return(1);
656
#else
657
            return(0);
658
#endif
659
0
        case XML_WITH_MODULES:
660
0
#ifdef LIBXML_MODULES_ENABLED
661
0
            return(1);
662
#else
663
            return(0);
664
#endif
665
0
        case XML_WITH_DEBUG:
666
0
#ifdef LIBXML_DEBUG_ENABLED
667
0
            return(1);
668
#else
669
            return(0);
670
#endif
671
0
        case XML_WITH_DEBUG_MEM:
672
0
            return(0);
673
0
        case XML_WITH_ZLIB:
674
#ifdef LIBXML_ZLIB_ENABLED
675
            return(1);
676
#else
677
0
            return(0);
678
0
#endif
679
0
        case XML_WITH_LZMA:
680
#ifdef LIBXML_LZMA_ENABLED
681
            return(1);
682
#else
683
0
            return(0);
684
0
#endif
685
0
        case XML_WITH_ICU:
686
#ifdef LIBXML_ICU_ENABLED
687
            return(1);
688
#else
689
0
            return(0);
690
0
#endif
691
0
        default:
692
0
      break;
693
0
     }
694
0
     return(0);
695
0
}
696
697
/************************************************************************
698
 *                  *
699
 *      Simple string buffer        *
700
 *                  *
701
 ************************************************************************/
702
703
typedef struct {
704
    xmlChar *mem;
705
    unsigned size;
706
    unsigned cap; /* size < cap */
707
    unsigned max; /* size <= max */
708
    xmlParserErrors code;
709
} xmlSBuf;
710
711
static void
712
24.9M
xmlSBufInit(xmlSBuf *buf, unsigned max) {
713
24.9M
    buf->mem = NULL;
714
24.9M
    buf->size = 0;
715
24.9M
    buf->cap = 0;
716
24.9M
    buf->max = max;
717
24.9M
    buf->code = XML_ERR_OK;
718
24.9M
}
719
720
static int
721
332k
xmlSBufGrow(xmlSBuf *buf, unsigned len) {
722
332k
    xmlChar *mem;
723
332k
    unsigned cap;
724
725
332k
    if (len >= UINT_MAX / 2 - buf->size) {
726
0
        if (buf->code == XML_ERR_OK)
727
0
            buf->code = XML_ERR_RESOURCE_LIMIT;
728
0
        return(-1);
729
0
    }
730
731
332k
    cap = (buf->size + len) * 2;
732
332k
    if (cap < 240)
733
275k
        cap = 240;
734
735
332k
    mem = xmlRealloc(buf->mem, cap);
736
332k
    if (mem == NULL) {
737
0
        buf->code = XML_ERR_NO_MEMORY;
738
0
        return(-1);
739
0
    }
740
741
332k
    buf->mem = mem;
742
332k
    buf->cap = cap;
743
744
332k
    return(0);
745
332k
}
746
747
static void
748
6.01M
xmlSBufAddString(xmlSBuf *buf, const xmlChar *str, unsigned len) {
749
6.01M
    if (buf->max - buf->size < len) {
750
0
        if (buf->code == XML_ERR_OK)
751
0
            buf->code = XML_ERR_RESOURCE_LIMIT;
752
0
        return;
753
0
    }
754
755
6.01M
    if (buf->cap - buf->size <= len) {
756
329k
        if (xmlSBufGrow(buf, len) < 0)
757
0
            return;
758
329k
    }
759
760
6.01M
    if (len > 0)
761
6.01M
        memcpy(buf->mem + buf->size, str, len);
762
6.01M
    buf->size += len;
763
6.01M
}
764
765
static void
766
4.65M
xmlSBufAddCString(xmlSBuf *buf, const char *str, unsigned len) {
767
4.65M
    xmlSBufAddString(buf, (const xmlChar *) str, len);
768
4.65M
}
769
770
static void
771
65.8k
xmlSBufAddChar(xmlSBuf *buf, int c) {
772
65.8k
    xmlChar *end;
773
774
65.8k
    if (buf->max - buf->size < 4) {
775
0
        if (buf->code == XML_ERR_OK)
776
0
            buf->code = XML_ERR_RESOURCE_LIMIT;
777
0
        return;
778
0
    }
779
780
65.8k
    if (buf->cap - buf->size <= 4) {
781
2.74k
        if (xmlSBufGrow(buf, 4) < 0)
782
0
            return;
783
2.74k
    }
784
785
65.8k
    end = buf->mem + buf->size;
786
787
65.8k
    if (c < 0x80) {
788
19.2k
        *end = (xmlChar) c;
789
19.2k
        buf->size += 1;
790
46.6k
    } else {
791
46.6k
        buf->size += xmlCopyCharMultiByte(end, c);
792
46.6k
    }
793
65.8k
}
794
795
static void
796
3.84M
xmlSBufAddReplChar(xmlSBuf *buf) {
797
3.84M
    xmlSBufAddCString(buf, "\xEF\xBF\xBD", 3);
798
3.84M
}
799
800
static void
801
0
xmlSBufReportError(xmlSBuf *buf, xmlParserCtxtPtr ctxt, const char *errMsg) {
802
0
    if (buf->code == XML_ERR_NO_MEMORY)
803
0
        xmlCtxtErrMemory(ctxt);
804
0
    else
805
0
        xmlFatalErr(ctxt, buf->code, errMsg);
806
0
}
807
808
static xmlChar *
809
xmlSBufFinish(xmlSBuf *buf, int *sizeOut, xmlParserCtxtPtr ctxt,
810
314k
              const char *errMsg) {
811
314k
    if (buf->mem == NULL) {
812
28.4k
        buf->mem = xmlMalloc(1);
813
28.4k
        if (buf->mem == NULL) {
814
0
            buf->code = XML_ERR_NO_MEMORY;
815
28.4k
        } else {
816
28.4k
            buf->mem[0] = 0;
817
28.4k
        }
818
286k
    } else {
819
286k
        buf->mem[buf->size] = 0;
820
286k
    }
821
822
314k
    if (buf->code == XML_ERR_OK) {
823
314k
        if (sizeOut != NULL)
824
226k
            *sizeOut = buf->size;
825
314k
        return(buf->mem);
826
314k
    }
827
828
0
    xmlSBufReportError(buf, ctxt, errMsg);
829
830
0
    xmlFree(buf->mem);
831
832
0
    if (sizeOut != NULL)
833
0
        *sizeOut = 0;
834
0
    return(NULL);
835
314k
}
836
837
static void
838
24.6M
xmlSBufCleanup(xmlSBuf *buf, xmlParserCtxtPtr ctxt, const char *errMsg) {
839
24.6M
    if (buf->code != XML_ERR_OK)
840
0
        xmlSBufReportError(buf, ctxt, errMsg);
841
842
24.6M
    xmlFree(buf->mem);
843
24.6M
}
844
845
static int
846
xmlUTF8MultibyteLen(xmlParserCtxtPtr ctxt, const xmlChar *str,
847
7.67M
                    const char *errMsg) {
848
7.67M
    int c = str[0];
849
7.67M
    int c1 = str[1];
850
851
7.67M
    if ((c1 & 0xC0) != 0x80)
852
1.73M
        goto encoding_error;
853
854
5.93M
    if (c < 0xE0) {
855
        /* 2-byte sequence */
856
1.79M
        if (c < 0xC2)
857
574k
            goto encoding_error;
858
859
1.22M
        return(2);
860
4.13M
    } else {
861
4.13M
        int c2 = str[2];
862
863
4.13M
        if ((c2 & 0xC0) != 0x80)
864
23.3k
            goto encoding_error;
865
866
4.11M
        if (c < 0xF0) {
867
            /* 3-byte sequence */
868
3.98M
            if (c == 0xE0) {
869
                /* overlong */
870
159k
                if (c1 < 0xA0)
871
1.70k
                    goto encoding_error;
872
3.82M
            } else if (c == 0xED) {
873
                /* surrogate */
874
15.7k
                if (c1 >= 0xA0)
875
7.23k
                    goto encoding_error;
876
3.81M
            } else if (c == 0xEF) {
877
                /* U+FFFE and U+FFFF are invalid Chars */
878
1.90M
                if ((c1 == 0xBF) && (c2 >= 0xBE))
879
36.8k
                    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR, errMsg);
880
1.90M
            }
881
882
3.97M
            return(3);
883
3.98M
        } else {
884
            /* 4-byte sequence */
885
130k
            if ((str[3] & 0xC0) != 0x80)
886
6.85k
                goto encoding_error;
887
124k
            if (c == 0xF0) {
888
                /* overlong */
889
4.11k
                if (c1 < 0x90)
890
1.57k
                    goto encoding_error;
891
120k
            } else if (c >= 0xF4) {
892
                /* greater than 0x10FFFF */
893
12.0k
                if ((c > 0xF4) || (c1 >= 0x90))
894
3.88k
                    goto encoding_error;
895
12.0k
            }
896
897
118k
            return(4);
898
124k
        }
899
4.11M
    }
900
901
2.35M
encoding_error:
902
    /* Only report the first error */
903
2.35M
    if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
904
5.60k
        xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
905
5.60k
        ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
906
5.60k
    }
907
908
2.35M
    return(0);
909
5.93M
}
910
911
/************************************************************************
912
 *                  *
913
 *    SAX2 defaulted attributes handling      *
914
 *                  *
915
 ************************************************************************/
916
917
/**
918
 * xmlCtxtInitializeLate:
919
 * @ctxt:  an XML parser context
920
 *
921
 * Final initialization of the parser context before starting to parse.
922
 *
923
 * This accounts for users modifying struct members of parser context
924
 * directly.
925
 */
926
static void
927
256k
xmlCtxtInitializeLate(xmlParserCtxtPtr ctxt) {
928
256k
    xmlSAXHandlerPtr sax;
929
930
    /* Avoid unused variable warning if features are disabled. */
931
256k
    (void) sax;
932
933
    /*
934
     * Changing the SAX struct directly is still widespread practice
935
     * in internal and external code.
936
     */
937
256k
    if (ctxt == NULL) return;
938
256k
    sax = ctxt->sax;
939
256k
#ifdef LIBXML_SAX1_ENABLED
940
    /*
941
     * Only enable SAX2 if there SAX2 element handlers, except when there
942
     * are no element handlers at all.
943
     */
944
256k
    if (((ctxt->options & XML_PARSE_SAX1) == 0) &&
945
256k
        (sax) &&
946
256k
        (sax->initialized == XML_SAX2_MAGIC) &&
947
256k
        ((sax->startElementNs != NULL) ||
948
0
         (sax->endElementNs != NULL) ||
949
0
         ((sax->startElement == NULL) && (sax->endElement == NULL))))
950
256k
        ctxt->sax2 = 1;
951
#else
952
    ctxt->sax2 = 1;
953
#endif /* LIBXML_SAX1_ENABLED */
954
955
    /*
956
     * Some users replace the dictionary directly in the context struct.
957
     * We really need an API function to do that cleanly.
958
     */
959
256k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
960
256k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
961
256k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
962
256k
    if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
963
256k
    (ctxt->str_xml_ns == NULL)) {
964
0
        xmlErrMemory(ctxt);
965
0
    }
966
967
256k
    xmlDictSetLimit(ctxt->dict,
968
256k
                    (ctxt->options & XML_PARSE_HUGE) ?
969
249k
                        0 :
970
256k
                        XML_MAX_DICTIONARY_LIMIT);
971
256k
}
972
973
typedef struct {
974
    xmlHashedString prefix;
975
    xmlHashedString name;
976
    xmlHashedString value;
977
    const xmlChar *valueEnd;
978
    int external;
979
    int expandedSize;
980
} xmlDefAttr;
981
982
typedef struct _xmlDefAttrs xmlDefAttrs;
983
typedef xmlDefAttrs *xmlDefAttrsPtr;
984
struct _xmlDefAttrs {
985
    int nbAttrs;  /* number of defaulted attributes on that element */
986
    int maxAttrs;       /* the size of the array */
987
#if __STDC_VERSION__ >= 199901L
988
    /* Using a C99 flexible array member avoids UBSan errors. */
989
    xmlDefAttr attrs[] ATTRIBUTE_COUNTED_BY(maxAttrs);
990
#else
991
    xmlDefAttr attrs[1];
992
#endif
993
};
994
995
/**
996
 * xmlAttrNormalizeSpace:
997
 * @src: the source string
998
 * @dst: the target string
999
 *
1000
 * Normalize the space in non CDATA attribute values:
1001
 * If the attribute type is not CDATA, then the XML processor MUST further
1002
 * process the normalized attribute value by discarding any leading and
1003
 * trailing space (#x20) characters, and by replacing sequences of space
1004
 * (#x20) characters by a single space (#x20) character.
1005
 * Note that the size of dst need to be at least src, and if one doesn't need
1006
 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1007
 * passing src as dst is just fine.
1008
 *
1009
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1010
 *         is needed.
1011
 */
1012
static xmlChar *
1013
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1014
38.2k
{
1015
38.2k
    if ((src == NULL) || (dst == NULL))
1016
0
        return(NULL);
1017
1018
53.4k
    while (*src == 0x20) src++;
1019
753k
    while (*src != 0) {
1020
715k
  if (*src == 0x20) {
1021
126k
      while (*src == 0x20) src++;
1022
35.3k
      if (*src != 0)
1023
28.6k
    *dst++ = 0x20;
1024
680k
  } else {
1025
680k
      *dst++ = *src++;
1026
680k
  }
1027
715k
    }
1028
38.2k
    *dst = 0;
1029
38.2k
    if (dst == src)
1030
24.6k
       return(NULL);
1031
13.5k
    return(dst);
1032
38.2k
}
1033
1034
/**
1035
 * xmlAddDefAttrs:
1036
 * @ctxt:  an XML parser context
1037
 * @fullname:  the element fullname
1038
 * @fullattr:  the attribute fullname
1039
 * @value:  the attribute value
1040
 *
1041
 * Add a defaulted attribute for an element
1042
 */
1043
static void
1044
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1045
               const xmlChar *fullname,
1046
               const xmlChar *fullattr,
1047
41.9k
               const xmlChar *value) {
1048
41.9k
    xmlDefAttrsPtr defaults;
1049
41.9k
    xmlDefAttr *attr;
1050
41.9k
    int len, expandedSize;
1051
41.9k
    xmlHashedString name;
1052
41.9k
    xmlHashedString prefix;
1053
41.9k
    xmlHashedString hvalue;
1054
41.9k
    const xmlChar *localname;
1055
1056
    /*
1057
     * Allows to detect attribute redefinitions
1058
     */
1059
41.9k
    if (ctxt->attsSpecial != NULL) {
1060
38.4k
        if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1061
10.3k
      return;
1062
38.4k
    }
1063
1064
31.6k
    if (ctxt->attsDefault == NULL) {
1065
3.50k
        ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1066
3.50k
  if (ctxt->attsDefault == NULL)
1067
0
      goto mem_error;
1068
3.50k
    }
1069
1070
    /*
1071
     * split the element name into prefix:localname , the string found
1072
     * are within the DTD and then not associated to namespace names.
1073
     */
1074
31.6k
    localname = xmlSplitQName3(fullname, &len);
1075
31.6k
    if (localname == NULL) {
1076
29.1k
        name = xmlDictLookupHashed(ctxt->dict, fullname, -1);
1077
29.1k
  prefix.name = NULL;
1078
29.1k
    } else {
1079
2.47k
        name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1080
2.47k
  prefix = xmlDictLookupHashed(ctxt->dict, fullname, len);
1081
2.47k
        if (prefix.name == NULL)
1082
0
            goto mem_error;
1083
2.47k
    }
1084
31.6k
    if (name.name == NULL)
1085
0
        goto mem_error;
1086
1087
    /*
1088
     * make sure there is some storage
1089
     */
1090
31.6k
    defaults = xmlHashLookup2(ctxt->attsDefault, name.name, prefix.name);
1091
31.6k
    if ((defaults == NULL) ||
1092
25.1k
        (defaults->nbAttrs >= defaults->maxAttrs)) {
1093
10.2k
        xmlDefAttrsPtr temp;
1094
10.2k
        int newSize;
1095
1096
10.2k
        if (defaults == NULL) {
1097
6.42k
            newSize = 4;
1098
6.42k
        } else {
1099
3.82k
            if ((defaults->maxAttrs >= XML_MAX_ATTRS) ||
1100
3.82k
                ((size_t) defaults->maxAttrs >
1101
3.82k
                     SIZE_MAX / 2 / sizeof(temp[0]) - sizeof(*defaults)))
1102
0
                goto mem_error;
1103
1104
3.82k
            if (defaults->maxAttrs > XML_MAX_ATTRS / 2)
1105
0
                newSize = XML_MAX_ATTRS;
1106
3.82k
            else
1107
3.82k
                newSize = defaults->maxAttrs * 2;
1108
3.82k
        }
1109
10.2k
        temp = xmlRealloc(defaults,
1110
10.2k
                          sizeof(*defaults) + newSize * sizeof(xmlDefAttr));
1111
10.2k
  if (temp == NULL)
1112
0
      goto mem_error;
1113
10.2k
        if (defaults == NULL)
1114
6.42k
            temp->nbAttrs = 0;
1115
10.2k
  temp->maxAttrs = newSize;
1116
10.2k
        defaults = temp;
1117
10.2k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name.name, prefix.name,
1118
10.2k
                          defaults, NULL) < 0) {
1119
0
      xmlFree(defaults);
1120
0
      goto mem_error;
1121
0
  }
1122
10.2k
    }
1123
1124
    /*
1125
     * Split the attribute name into prefix:localname , the string found
1126
     * are within the DTD and hen not associated to namespace names.
1127
     */
1128
31.6k
    localname = xmlSplitQName3(fullattr, &len);
1129
31.6k
    if (localname == NULL) {
1130
14.4k
        name = xmlDictLookupHashed(ctxt->dict, fullattr, -1);
1131
14.4k
  prefix.name = NULL;
1132
17.1k
    } else {
1133
17.1k
        name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1134
17.1k
  prefix = xmlDictLookupHashed(ctxt->dict, fullattr, len);
1135
17.1k
        if (prefix.name == NULL)
1136
0
            goto mem_error;
1137
17.1k
    }
1138
31.6k
    if (name.name == NULL)
1139
0
        goto mem_error;
1140
1141
    /* intern the string and precompute the end */
1142
31.6k
    len = strlen((const char *) value);
1143
31.6k
    hvalue = xmlDictLookupHashed(ctxt->dict, value, len);
1144
31.6k
    if (hvalue.name == NULL)
1145
0
        goto mem_error;
1146
1147
31.6k
    expandedSize = strlen((const char *) name.name);
1148
31.6k
    if (prefix.name != NULL)
1149
17.1k
        expandedSize += strlen((const char *) prefix.name);
1150
31.6k
    expandedSize += len;
1151
1152
31.6k
    attr = &defaults->attrs[defaults->nbAttrs++];
1153
31.6k
    attr->name = name;
1154
31.6k
    attr->prefix = prefix;
1155
31.6k
    attr->value = hvalue;
1156
31.6k
    attr->valueEnd = hvalue.name + len;
1157
31.6k
    attr->external = PARSER_EXTERNAL(ctxt);
1158
31.6k
    attr->expandedSize = expandedSize;
1159
1160
31.6k
    return;
1161
1162
0
mem_error:
1163
0
    xmlErrMemory(ctxt);
1164
0
}
1165
1166
/**
1167
 * xmlAddSpecialAttr:
1168
 * @ctxt:  an XML parser context
1169
 * @fullname:  the element fullname
1170
 * @fullattr:  the attribute fullname
1171
 * @type:  the attribute type
1172
 *
1173
 * Register this attribute type
1174
 */
1175
static void
1176
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1177
      const xmlChar *fullname,
1178
      const xmlChar *fullattr,
1179
      int type)
1180
48.4k
{
1181
48.4k
    if (ctxt->attsSpecial == NULL) {
1182
3.85k
        ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1183
3.85k
  if (ctxt->attsSpecial == NULL)
1184
0
      goto mem_error;
1185
3.85k
    }
1186
1187
48.4k
    if (xmlHashAdd2(ctxt->attsSpecial, fullname, fullattr,
1188
48.4k
                    XML_INT_TO_PTR(type)) < 0)
1189
0
        goto mem_error;
1190
48.4k
    return;
1191
1192
48.4k
mem_error:
1193
0
    xmlErrMemory(ctxt);
1194
0
}
1195
1196
/**
1197
 * xmlCleanSpecialAttrCallback:
1198
 *
1199
 * Removes CDATA attributes from the special attribute table
1200
 */
1201
static void
1202
xmlCleanSpecialAttrCallback(void *payload, void *data,
1203
                            const xmlChar *fullname, const xmlChar *fullattr,
1204
34.9k
                            const xmlChar *unused ATTRIBUTE_UNUSED) {
1205
34.9k
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1206
1207
34.9k
    if (XML_PTR_TO_INT(payload) == XML_ATTRIBUTE_CDATA) {
1208
3.04k
        xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1209
3.04k
    }
1210
34.9k
}
1211
1212
/**
1213
 * xmlCleanSpecialAttr:
1214
 * @ctxt:  an XML parser context
1215
 *
1216
 * Trim the list of attributes defined to remove all those of type
1217
 * CDATA as they are not special. This call should be done when finishing
1218
 * to parse the DTD and before starting to parse the document root.
1219
 */
1220
static void
1221
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1222
8.66k
{
1223
8.66k
    if (ctxt->attsSpecial == NULL)
1224
4.80k
        return;
1225
1226
3.85k
    xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1227
1228
3.85k
    if (xmlHashSize(ctxt->attsSpecial) == 0) {
1229
277
        xmlHashFree(ctxt->attsSpecial, NULL);
1230
277
        ctxt->attsSpecial = NULL;
1231
277
    }
1232
3.85k
}
1233
1234
/**
1235
 * xmlCheckLanguageID:
1236
 * @lang:  pointer to the string value
1237
 *
1238
 * DEPRECATED: Internal function, do not use.
1239
 *
1240
 * Checks that the value conforms to the LanguageID production:
1241
 *
1242
 * NOTE: this is somewhat deprecated, those productions were removed from
1243
 *       the XML Second edition.
1244
 *
1245
 * [33] LanguageID ::= Langcode ('-' Subcode)*
1246
 * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1247
 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1248
 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1249
 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1250
 * [38] Subcode ::= ([a-z] | [A-Z])+
1251
 *
1252
 * The current REC reference the successors of RFC 1766, currently 5646
1253
 *
1254
 * http://www.rfc-editor.org/rfc/rfc5646.txt
1255
 * langtag       = language
1256
 *                 ["-" script]
1257
 *                 ["-" region]
1258
 *                 *("-" variant)
1259
 *                 *("-" extension)
1260
 *                 ["-" privateuse]
1261
 * language      = 2*3ALPHA            ; shortest ISO 639 code
1262
 *                 ["-" extlang]       ; sometimes followed by
1263
 *                                     ; extended language subtags
1264
 *               / 4ALPHA              ; or reserved for future use
1265
 *               / 5*8ALPHA            ; or registered language subtag
1266
 *
1267
 * extlang       = 3ALPHA              ; selected ISO 639 codes
1268
 *                 *2("-" 3ALPHA)      ; permanently reserved
1269
 *
1270
 * script        = 4ALPHA              ; ISO 15924 code
1271
 *
1272
 * region        = 2ALPHA              ; ISO 3166-1 code
1273
 *               / 3DIGIT              ; UN M.49 code
1274
 *
1275
 * variant       = 5*8alphanum         ; registered variants
1276
 *               / (DIGIT 3alphanum)
1277
 *
1278
 * extension     = singleton 1*("-" (2*8alphanum))
1279
 *
1280
 *                                     ; Single alphanumerics
1281
 *                                     ; "x" reserved for private use
1282
 * singleton     = DIGIT               ; 0 - 9
1283
 *               / %x41-57             ; A - W
1284
 *               / %x59-5A             ; Y - Z
1285
 *               / %x61-77             ; a - w
1286
 *               / %x79-7A             ; y - z
1287
 *
1288
 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1289
 * The parser below doesn't try to cope with extension or privateuse
1290
 * that could be added but that's not interoperable anyway
1291
 *
1292
 * Returns 1 if correct 0 otherwise
1293
 **/
1294
int
1295
xmlCheckLanguageID(const xmlChar * lang)
1296
0
{
1297
0
    const xmlChar *cur = lang, *nxt;
1298
1299
0
    if (cur == NULL)
1300
0
        return (0);
1301
0
    if (((cur[0] == 'i') && (cur[1] == '-')) ||
1302
0
        ((cur[0] == 'I') && (cur[1] == '-')) ||
1303
0
        ((cur[0] == 'x') && (cur[1] == '-')) ||
1304
0
        ((cur[0] == 'X') && (cur[1] == '-'))) {
1305
        /*
1306
         * Still allow IANA code and user code which were coming
1307
         * from the previous version of the XML-1.0 specification
1308
         * it's deprecated but we should not fail
1309
         */
1310
0
        cur += 2;
1311
0
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1312
0
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
1313
0
            cur++;
1314
0
        return(cur[0] == 0);
1315
0
    }
1316
0
    nxt = cur;
1317
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1318
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1319
0
           nxt++;
1320
0
    if (nxt - cur >= 4) {
1321
        /*
1322
         * Reserved
1323
         */
1324
0
        if ((nxt - cur > 8) || (nxt[0] != 0))
1325
0
            return(0);
1326
0
        return(1);
1327
0
    }
1328
0
    if (nxt - cur < 2)
1329
0
        return(0);
1330
    /* we got an ISO 639 code */
1331
0
    if (nxt[0] == 0)
1332
0
        return(1);
1333
0
    if (nxt[0] != '-')
1334
0
        return(0);
1335
1336
0
    nxt++;
1337
0
    cur = nxt;
1338
    /* now we can have extlang or script or region or variant */
1339
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1340
0
        goto region_m49;
1341
1342
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1343
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1344
0
           nxt++;
1345
0
    if (nxt - cur == 4)
1346
0
        goto script;
1347
0
    if (nxt - cur == 2)
1348
0
        goto region;
1349
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1350
0
        goto variant;
1351
0
    if (nxt - cur != 3)
1352
0
        return(0);
1353
    /* we parsed an extlang */
1354
0
    if (nxt[0] == 0)
1355
0
        return(1);
1356
0
    if (nxt[0] != '-')
1357
0
        return(0);
1358
1359
0
    nxt++;
1360
0
    cur = nxt;
1361
    /* now we can have script or region or variant */
1362
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1363
0
        goto region_m49;
1364
1365
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1366
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1367
0
           nxt++;
1368
0
    if (nxt - cur == 2)
1369
0
        goto region;
1370
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1371
0
        goto variant;
1372
0
    if (nxt - cur != 4)
1373
0
        return(0);
1374
    /* we parsed a script */
1375
0
script:
1376
0
    if (nxt[0] == 0)
1377
0
        return(1);
1378
0
    if (nxt[0] != '-')
1379
0
        return(0);
1380
1381
0
    nxt++;
1382
0
    cur = nxt;
1383
    /* now we can have region or variant */
1384
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1385
0
        goto region_m49;
1386
1387
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1388
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1389
0
           nxt++;
1390
1391
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1392
0
        goto variant;
1393
0
    if (nxt - cur != 2)
1394
0
        return(0);
1395
    /* we parsed a region */
1396
0
region:
1397
0
    if (nxt[0] == 0)
1398
0
        return(1);
1399
0
    if (nxt[0] != '-')
1400
0
        return(0);
1401
1402
0
    nxt++;
1403
0
    cur = nxt;
1404
    /* now we can just have a variant */
1405
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1406
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1407
0
           nxt++;
1408
1409
0
    if ((nxt - cur < 5) || (nxt - cur > 8))
1410
0
        return(0);
1411
1412
    /* we parsed a variant */
1413
0
variant:
1414
0
    if (nxt[0] == 0)
1415
0
        return(1);
1416
0
    if (nxt[0] != '-')
1417
0
        return(0);
1418
    /* extensions and private use subtags not checked */
1419
0
    return (1);
1420
1421
0
region_m49:
1422
0
    if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1423
0
        ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1424
0
        nxt += 3;
1425
0
        goto region;
1426
0
    }
1427
0
    return(0);
1428
0
}
1429
1430
/************************************************************************
1431
 *                  *
1432
 *    Parser stacks related functions and macros    *
1433
 *                  *
1434
 ************************************************************************/
1435
1436
static xmlChar *
1437
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar **str);
1438
1439
/**
1440
 * xmlParserNsCreate:
1441
 *
1442
 * Create a new namespace database.
1443
 *
1444
 * Returns the new obejct.
1445
 */
1446
xmlParserNsData *
1447
256k
xmlParserNsCreate(void) {
1448
256k
    xmlParserNsData *nsdb = xmlMalloc(sizeof(*nsdb));
1449
1450
256k
    if (nsdb == NULL)
1451
0
        return(NULL);
1452
256k
    memset(nsdb, 0, sizeof(*nsdb));
1453
256k
    nsdb->defaultNsIndex = INT_MAX;
1454
1455
256k
    return(nsdb);
1456
256k
}
1457
1458
/**
1459
 * xmlParserNsFree:
1460
 * @nsdb: namespace database
1461
 *
1462
 * Free a namespace database.
1463
 */
1464
void
1465
256k
xmlParserNsFree(xmlParserNsData *nsdb) {
1466
256k
    if (nsdb == NULL)
1467
0
        return;
1468
1469
256k
    xmlFree(nsdb->extra);
1470
256k
    xmlFree(nsdb->hash);
1471
256k
    xmlFree(nsdb);
1472
256k
}
1473
1474
/**
1475
 * xmlParserNsReset:
1476
 * @nsdb: namespace database
1477
 *
1478
 * Reset a namespace database.
1479
 */
1480
static void
1481
6.46k
xmlParserNsReset(xmlParserNsData *nsdb) {
1482
6.46k
    if (nsdb == NULL)
1483
0
        return;
1484
1485
6.46k
    nsdb->hashElems = 0;
1486
6.46k
    nsdb->elementId = 0;
1487
6.46k
    nsdb->defaultNsIndex = INT_MAX;
1488
1489
6.46k
    if (nsdb->hash)
1490
0
        memset(nsdb->hash, 0, nsdb->hashSize * sizeof(nsdb->hash[0]));
1491
6.46k
}
1492
1493
/**
1494
 * xmlParserStartElement:
1495
 * @nsdb: namespace database
1496
 *
1497
 * Signal that a new element has started.
1498
 *
1499
 * Returns 0 on success, -1 if the element counter overflowed.
1500
 */
1501
static int
1502
39.0M
xmlParserNsStartElement(xmlParserNsData *nsdb) {
1503
39.0M
    if (nsdb->elementId == UINT_MAX)
1504
0
        return(-1);
1505
39.0M
    nsdb->elementId++;
1506
1507
39.0M
    return(0);
1508
39.0M
}
1509
1510
/**
1511
 * xmlParserNsLookup:
1512
 * @ctxt: parser context
1513
 * @prefix: namespace prefix
1514
 * @bucketPtr: optional bucket (return value)
1515
 *
1516
 * Lookup namespace with given prefix. If @bucketPtr is non-NULL, it will
1517
 * be set to the matching bucket, or the first empty bucket if no match
1518
 * was found.
1519
 *
1520
 * Returns the namespace index on success, INT_MAX if no namespace was
1521
 * found.
1522
 */
1523
static int
1524
xmlParserNsLookup(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1525
50.8M
                  xmlParserNsBucket **bucketPtr) {
1526
50.8M
    xmlParserNsBucket *bucket, *tombstone;
1527
50.8M
    unsigned index, hashValue;
1528
1529
50.8M
    if (prefix->name == NULL)
1530
25.2M
        return(ctxt->nsdb->defaultNsIndex);
1531
1532
25.6M
    if (ctxt->nsdb->hashSize == 0)
1533
526k
        return(INT_MAX);
1534
1535
25.0M
    hashValue = prefix->hashValue;
1536
25.0M
    index = hashValue & (ctxt->nsdb->hashSize - 1);
1537
25.0M
    bucket = &ctxt->nsdb->hash[index];
1538
25.0M
    tombstone = NULL;
1539
1540
26.6M
    while (bucket->hashValue) {
1541
24.7M
        if (bucket->index == INT_MAX) {
1542
321k
            if (tombstone == NULL)
1543
313k
                tombstone = bucket;
1544
24.4M
        } else if (bucket->hashValue == hashValue) {
1545
23.2M
            if (ctxt->nsTab[bucket->index * 2] == prefix->name) {
1546
23.2M
                if (bucketPtr != NULL)
1547
1.12M
                    *bucketPtr = bucket;
1548
23.2M
                return(bucket->index);
1549
23.2M
            }
1550
23.2M
        }
1551
1552
1.58M
        index++;
1553
1.58M
        bucket++;
1554
1.58M
        if (index == ctxt->nsdb->hashSize) {
1555
20.4k
            index = 0;
1556
20.4k
            bucket = ctxt->nsdb->hash;
1557
20.4k
        }
1558
1.58M
    }
1559
1560
1.87M
    if (bucketPtr != NULL)
1561
1.00M
        *bucketPtr = tombstone ? tombstone : bucket;
1562
1.87M
    return(INT_MAX);
1563
25.0M
}
1564
1565
/**
1566
 * xmlParserNsLookupUri:
1567
 * @ctxt: parser context
1568
 * @prefix: namespace prefix
1569
 *
1570
 * Lookup namespace URI with given prefix.
1571
 *
1572
 * Returns the namespace URI on success, NULL if no namespace was found.
1573
 */
1574
static const xmlChar *
1575
39.0M
xmlParserNsLookupUri(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix) {
1576
39.0M
    const xmlChar *ret;
1577
39.0M
    int nsIndex;
1578
1579
39.0M
    if (prefix->name == ctxt->str_xml)
1580
39.5k
        return(ctxt->str_xml_ns);
1581
1582
    /*
1583
     * minNsIndex is used when building an entity tree. We must
1584
     * ignore namespaces declared outside the entity.
1585
     */
1586
39.0M
    nsIndex = xmlParserNsLookup(ctxt, prefix, NULL);
1587
39.0M
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1588
20.4M
        return(NULL);
1589
1590
18.5M
    ret = ctxt->nsTab[nsIndex * 2 + 1];
1591
18.5M
    if (ret[0] == 0)
1592
36.7k
        ret = NULL;
1593
18.5M
    return(ret);
1594
39.0M
}
1595
1596
/**
1597
 * xmlParserNsLookupSax:
1598
 * @ctxt: parser context
1599
 * @prefix: namespace prefix
1600
 *
1601
 * Lookup extra data for the given prefix. This returns data stored
1602
 * with xmlParserNsUdpateSax.
1603
 *
1604
 * Returns the data on success, NULL if no namespace was found.
1605
 */
1606
void *
1607
1.00M
xmlParserNsLookupSax(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
1608
1.00M
    xmlHashedString hprefix;
1609
1.00M
    int nsIndex;
1610
1611
1.00M
    if (prefix == ctxt->str_xml)
1612
0
        return(NULL);
1613
1614
1.00M
    hprefix.name = prefix;
1615
1.00M
    if (prefix != NULL)
1616
1.00M
        hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1617
1.32k
    else
1618
1.32k
        hprefix.hashValue = 0;
1619
1.00M
    nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1620
1.00M
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1621
0
        return(NULL);
1622
1623
1.00M
    return(ctxt->nsdb->extra[nsIndex].saxData);
1624
1.00M
}
1625
1626
/**
1627
 * xmlParserNsUpdateSax:
1628
 * @ctxt: parser context
1629
 * @prefix: namespace prefix
1630
 * @saxData: extra data for SAX handler
1631
 *
1632
 * Sets or updates extra data for the given prefix. This value will be
1633
 * returned by xmlParserNsLookupSax as long as the namespace with the
1634
 * given prefix is in scope.
1635
 *
1636
 * Returns the data on success, NULL if no namespace was found.
1637
 */
1638
int
1639
xmlParserNsUpdateSax(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
1640
12.6k
                     void *saxData) {
1641
12.6k
    xmlHashedString hprefix;
1642
12.6k
    int nsIndex;
1643
1644
12.6k
    if (prefix == ctxt->str_xml)
1645
0
        return(-1);
1646
1647
12.6k
    hprefix.name = prefix;
1648
12.6k
    if (prefix != NULL)
1649
11.8k
        hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1650
825
    else
1651
825
        hprefix.hashValue = 0;
1652
12.6k
    nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1653
12.6k
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1654
0
        return(-1);
1655
1656
12.6k
    ctxt->nsdb->extra[nsIndex].saxData = saxData;
1657
12.6k
    return(0);
1658
12.6k
}
1659
1660
/**
1661
 * xmlParserNsGrow:
1662
 * @ctxt: parser context
1663
 *
1664
 * Grows the namespace tables.
1665
 *
1666
 * Returns 0 on success, -1 if a memory allocation failed.
1667
 */
1668
static int
1669
618k
xmlParserNsGrow(xmlParserCtxtPtr ctxt) {
1670
618k
    const xmlChar **table;
1671
618k
    xmlParserNsExtra *extra;
1672
618k
    int newSize;
1673
1674
618k
    newSize = xmlGrowCapacity(ctxt->nsMax,
1675
618k
                              sizeof(table[0]) + sizeof(extra[0]),
1676
618k
                              16, XML_MAX_ITEMS);
1677
618k
    if (newSize < 0)
1678
0
        goto error;
1679
1680
618k
    table = xmlRealloc(ctxt->nsTab, 2 * newSize * sizeof(table[0]));
1681
618k
    if (table == NULL)
1682
0
        goto error;
1683
618k
    ctxt->nsTab = table;
1684
1685
618k
    extra = xmlRealloc(ctxt->nsdb->extra, newSize * sizeof(extra[0]));
1686
618k
    if (extra == NULL)
1687
0
        goto error;
1688
618k
    ctxt->nsdb->extra = extra;
1689
1690
618k
    ctxt->nsMax = newSize;
1691
618k
    return(0);
1692
1693
0
error:
1694
0
    xmlErrMemory(ctxt);
1695
0
    return(-1);
1696
618k
}
1697
1698
/**
1699
 * xmlParserNsPush:
1700
 * @ctxt: parser context
1701
 * @prefix: prefix with hash value
1702
 * @uri: uri with hash value
1703
 * @saxData: extra data for SAX handler
1704
 * @defAttr: whether the namespace comes from a default attribute
1705
 *
1706
 * Push a new namespace on the table.
1707
 *
1708
 * Returns 1 if the namespace was pushed, 0 if the namespace was ignored,
1709
 * -1 if a memory allocation failed.
1710
 */
1711
static int
1712
xmlParserNsPush(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1713
1.82M
                const xmlHashedString *uri, void *saxData, int defAttr) {
1714
1.82M
    xmlParserNsBucket *bucket = NULL;
1715
1.82M
    xmlParserNsExtra *extra;
1716
1.82M
    const xmlChar **ns;
1717
1.82M
    unsigned hashValue, nsIndex, oldIndex;
1718
1719
1.82M
    if ((prefix != NULL) && (prefix->name == ctxt->str_xml))
1720
80
        return(0);
1721
1722
1.82M
    if ((ctxt->nsNr >= ctxt->nsMax) && (xmlParserNsGrow(ctxt) < 0)) {
1723
0
        xmlErrMemory(ctxt);
1724
0
        return(-1);
1725
0
    }
1726
1727
    /*
1728
     * Default namespace and 'xml' namespace
1729
     */
1730
1.82M
    if ((prefix == NULL) || (prefix->name == NULL)) {
1731
405k
        oldIndex = ctxt->nsdb->defaultNsIndex;
1732
1733
405k
        if (oldIndex != INT_MAX) {
1734
238k
            extra = &ctxt->nsdb->extra[oldIndex];
1735
1736
238k
            if (extra->elementId == ctxt->nsdb->elementId) {
1737
129k
                if (defAttr == 0)
1738
125k
                    xmlErrAttributeDup(ctxt, NULL, BAD_CAST "xmlns");
1739
129k
                return(0);
1740
129k
            }
1741
1742
108k
            if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1743
0
                (uri->name == ctxt->nsTab[oldIndex * 2 + 1]))
1744
0
                return(0);
1745
108k
        }
1746
1747
276k
        ctxt->nsdb->defaultNsIndex = ctxt->nsNr;
1748
276k
        goto populate_entry;
1749
405k
    }
1750
1751
    /*
1752
     * Hash table lookup
1753
     */
1754
1.41M
    oldIndex = xmlParserNsLookup(ctxt, prefix, &bucket);
1755
1.41M
    if (oldIndex != INT_MAX) {
1756
267k
        extra = &ctxt->nsdb->extra[oldIndex];
1757
1758
        /*
1759
         * Check for duplicate definitions on the same element.
1760
         */
1761
267k
        if (extra->elementId == ctxt->nsdb->elementId) {
1762
63.4k
            if (defAttr == 0)
1763
63.4k
                xmlErrAttributeDup(ctxt, BAD_CAST "xmlns", prefix->name);
1764
63.4k
            return(0);
1765
63.4k
        }
1766
1767
204k
        if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1768
0
            (uri->name == ctxt->nsTab[bucket->index * 2 + 1]))
1769
0
            return(0);
1770
1771
204k
        bucket->index = ctxt->nsNr;
1772
204k
        goto populate_entry;
1773
204k
    }
1774
1775
    /*
1776
     * Insert new bucket
1777
     */
1778
1779
1.14M
    hashValue = prefix->hashValue;
1780
1781
    /*
1782
     * Grow hash table, 50% fill factor
1783
     */
1784
1.14M
    if (ctxt->nsdb->hashElems + 1 > ctxt->nsdb->hashSize / 2) {
1785
182k
        xmlParserNsBucket *newHash;
1786
182k
        unsigned newSize, i, index;
1787
1788
182k
        if (ctxt->nsdb->hashSize > UINT_MAX / 2) {
1789
0
            xmlErrMemory(ctxt);
1790
0
            return(-1);
1791
0
        }
1792
182k
        newSize = ctxt->nsdb->hashSize ? ctxt->nsdb->hashSize * 2 : 16;
1793
182k
        newHash = xmlMalloc(newSize * sizeof(newHash[0]));
1794
182k
        if (newHash == NULL) {
1795
0
            xmlErrMemory(ctxt);
1796
0
            return(-1);
1797
0
        }
1798
182k
        memset(newHash, 0, newSize * sizeof(newHash[0]));
1799
1800
2.03M
        for (i = 0; i < ctxt->nsdb->hashSize; i++) {
1801
1.85M
            unsigned hv = ctxt->nsdb->hash[i].hashValue;
1802
1.85M
            unsigned newIndex;
1803
1804
1.85M
            if ((hv == 0) || (ctxt->nsdb->hash[i].index == INT_MAX))
1805
1.36M
                continue;
1806
490k
            newIndex = hv & (newSize - 1);
1807
1808
507k
            while (newHash[newIndex].hashValue != 0) {
1809
16.3k
                newIndex++;
1810
16.3k
                if (newIndex == newSize)
1811
87
                    newIndex = 0;
1812
16.3k
            }
1813
1814
490k
            newHash[newIndex] = ctxt->nsdb->hash[i];
1815
490k
        }
1816
1817
182k
        xmlFree(ctxt->nsdb->hash);
1818
182k
        ctxt->nsdb->hash = newHash;
1819
182k
        ctxt->nsdb->hashSize = newSize;
1820
1821
        /*
1822
         * Relookup
1823
         */
1824
182k
        index = hashValue & (newSize - 1);
1825
1826
185k
        while (newHash[index].hashValue != 0) {
1827
3.05k
            index++;
1828
3.05k
            if (index == newSize)
1829
38
                index = 0;
1830
3.05k
        }
1831
1832
182k
        bucket = &newHash[index];
1833
182k
    }
1834
1835
1.14M
    bucket->hashValue = hashValue;
1836
1.14M
    bucket->index = ctxt->nsNr;
1837
1.14M
    ctxt->nsdb->hashElems++;
1838
1.14M
    oldIndex = INT_MAX;
1839
1840
1.62M
populate_entry:
1841
1.62M
    nsIndex = ctxt->nsNr;
1842
1843
1.62M
    ns = &ctxt->nsTab[nsIndex * 2];
1844
1.62M
    ns[0] = prefix ? prefix->name : NULL;
1845
1.62M
    ns[1] = uri->name;
1846
1847
1.62M
    extra = &ctxt->nsdb->extra[nsIndex];
1848
1.62M
    extra->saxData = saxData;
1849
1.62M
    extra->prefixHashValue = prefix ? prefix->hashValue : 0;
1850
1.62M
    extra->uriHashValue = uri->hashValue;
1851
1.62M
    extra->elementId = ctxt->nsdb->elementId;
1852
1.62M
    extra->oldIndex = oldIndex;
1853
1854
1.62M
    ctxt->nsNr++;
1855
1856
1.62M
    return(1);
1857
1.14M
}
1858
1859
/**
1860
 * xmlParserNsPop:
1861
 * @ctxt: an XML parser context
1862
 * @nr:  the number to pop
1863
 *
1864
 * Pops the top @nr namespaces and restores the hash table.
1865
 *
1866
 * Returns the number of namespaces popped.
1867
 */
1868
static int
1869
xmlParserNsPop(xmlParserCtxtPtr ctxt, int nr)
1870
379k
{
1871
379k
    int i;
1872
1873
    /* assert(nr <= ctxt->nsNr); */
1874
1875
1.42M
    for (i = ctxt->nsNr - 1; i >= ctxt->nsNr - nr; i--) {
1876
1.04M
        const xmlChar *prefix = ctxt->nsTab[i * 2];
1877
1.04M
        xmlParserNsExtra *extra = &ctxt->nsdb->extra[i];
1878
1879
1.04M
        if (prefix == NULL) {
1880
185k
            ctxt->nsdb->defaultNsIndex = extra->oldIndex;
1881
854k
        } else {
1882
854k
            xmlHashedString hprefix;
1883
854k
            xmlParserNsBucket *bucket = NULL;
1884
1885
854k
            hprefix.name = prefix;
1886
854k
            hprefix.hashValue = extra->prefixHashValue;
1887
854k
            xmlParserNsLookup(ctxt, &hprefix, &bucket);
1888
            /* assert(bucket && bucket->hashValue); */
1889
854k
            bucket->index = extra->oldIndex;
1890
854k
        }
1891
1.04M
    }
1892
1893
379k
    ctxt->nsNr -= nr;
1894
379k
    return(nr);
1895
379k
}
1896
1897
static int
1898
660k
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt) {
1899
660k
    const xmlChar **atts;
1900
660k
    unsigned *attallocs;
1901
660k
    int newSize;
1902
1903
660k
    newSize = xmlGrowCapacity(ctxt->maxatts / 5,
1904
660k
                              sizeof(atts[0]) * 5 + sizeof(attallocs[0]),
1905
660k
                              10, XML_MAX_ATTRS);
1906
660k
    if (newSize < 0) {
1907
0
        xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
1908
0
                    "Maximum number of attributes exceeded");
1909
0
        return(-1);
1910
0
    }
1911
1912
660k
    atts = xmlRealloc(ctxt->atts, newSize * sizeof(atts[0]) * 5);
1913
660k
    if (atts == NULL)
1914
0
        goto mem_error;
1915
660k
    ctxt->atts = atts;
1916
1917
660k
    attallocs = xmlRealloc(ctxt->attallocs,
1918
660k
                           newSize * sizeof(attallocs[0]));
1919
660k
    if (attallocs == NULL)
1920
0
        goto mem_error;
1921
660k
    ctxt->attallocs = attallocs;
1922
1923
660k
    ctxt->maxatts = newSize * 5;
1924
1925
660k
    return(0);
1926
1927
0
mem_error:
1928
0
    xmlErrMemory(ctxt);
1929
0
    return(-1);
1930
660k
}
1931
1932
/**
1933
 * xmlCtxtPushInput:
1934
 * @ctxt:  an XML parser context
1935
 * @value:  the parser input
1936
 *
1937
 * Pushes a new parser input on top of the input stack
1938
 *
1939
 * Returns -1 in case of error, the index in the stack otherwise
1940
 */
1941
int
1942
xmlCtxtPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1943
256k
{
1944
256k
    char *directory = NULL;
1945
256k
    int maxDepth;
1946
1947
256k
    if ((ctxt == NULL) || (value == NULL))
1948
0
        return(-1);
1949
1950
256k
    maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
1951
1952
256k
    if (ctxt->inputNr >= ctxt->inputMax) {
1953
0
        xmlParserInputPtr *tmp;
1954
0
        int newSize;
1955
1956
0
        newSize = xmlGrowCapacity(ctxt->inputMax, sizeof(tmp[0]),
1957
0
                                  5, maxDepth);
1958
0
        if (newSize < 0) {
1959
0
            xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
1960
0
                           "Maximum entity nesting depth exceeded");
1961
0
            xmlHaltParser(ctxt);
1962
0
            return(-1);
1963
0
        }
1964
0
        tmp = xmlRealloc(ctxt->inputTab, newSize * sizeof(tmp[0]));
1965
0
        if (tmp == NULL) {
1966
0
            xmlErrMemory(ctxt);
1967
0
            return(-1);
1968
0
        }
1969
0
        ctxt->inputTab = tmp;
1970
0
        ctxt->inputMax = newSize;
1971
0
    }
1972
1973
256k
    if ((ctxt->inputNr == 0) && (value->filename != NULL)) {
1974
0
        directory = xmlParserGetDirectory(value->filename);
1975
0
        if (directory == NULL) {
1976
0
            xmlErrMemory(ctxt);
1977
0
            return(-1);
1978
0
        }
1979
0
    }
1980
1981
256k
    if (ctxt->input_id >= INT_MAX) {
1982
0
        xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT, "Input ID overflow\n");
1983
0
        return(-1);
1984
0
    }
1985
1986
256k
    ctxt->inputTab[ctxt->inputNr] = value;
1987
256k
    ctxt->input = value;
1988
1989
256k
    if (ctxt->inputNr == 0) {
1990
256k
        xmlFree(ctxt->directory);
1991
256k
        ctxt->directory = directory;
1992
256k
    }
1993
1994
    /*
1995
     * Internally, the input ID is only used to detect parameter entity
1996
     * boundaries. But there are entity loaders in downstream code that
1997
     * detect the main document by checking for "input_id == 1".
1998
     */
1999
256k
    value->id = ctxt->input_id++;
2000
2001
256k
    return(ctxt->inputNr++);
2002
256k
}
2003
2004
/**
2005
 * xmlCtxtPopInput:
2006
 * @ctxt: an XML parser context
2007
 *
2008
 * Pops the top parser input from the input stack
2009
 *
2010
 * Returns the input just removed
2011
 */
2012
xmlParserInputPtr
2013
xmlCtxtPopInput(xmlParserCtxtPtr ctxt)
2014
775k
{
2015
775k
    xmlParserInputPtr ret;
2016
2017
775k
    if (ctxt == NULL)
2018
0
        return(NULL);
2019
775k
    if (ctxt->inputNr <= 0)
2020
518k
        return (NULL);
2021
256k
    ctxt->inputNr--;
2022
256k
    if (ctxt->inputNr > 0)
2023
0
        ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
2024
256k
    else
2025
256k
        ctxt->input = NULL;
2026
256k
    ret = ctxt->inputTab[ctxt->inputNr];
2027
256k
    ctxt->inputTab[ctxt->inputNr] = NULL;
2028
256k
    return (ret);
2029
775k
}
2030
2031
/**
2032
 * nodePush:
2033
 * @ctxt:  an XML parser context
2034
 * @value:  the element node
2035
 *
2036
 * DEPRECATED: Internal function, do not use.
2037
 *
2038
 * Pushes a new element node on top of the node stack
2039
 *
2040
 * Returns -1 in case of error, the index in the stack otherwise
2041
 */
2042
int
2043
nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
2044
1.00M
{
2045
1.00M
    if (ctxt == NULL)
2046
0
        return(0);
2047
2048
1.00M
    if (ctxt->nodeNr >= ctxt->nodeMax) {
2049
25.9k
        int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
2050
25.9k
        xmlNodePtr *tmp;
2051
25.9k
        int newSize;
2052
2053
25.9k
        newSize = xmlGrowCapacity(ctxt->nodeMax, sizeof(tmp[0]),
2054
25.9k
                                  10, maxDepth);
2055
25.9k
        if (newSize < 0) {
2056
0
            xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
2057
0
                    "Excessive depth in document: %d,"
2058
0
                    " use XML_PARSE_HUGE option\n",
2059
0
                    ctxt->nodeNr);
2060
0
            xmlHaltParser(ctxt);
2061
0
            return(-1);
2062
0
        }
2063
2064
25.9k
  tmp = xmlRealloc(ctxt->nodeTab, newSize * sizeof(tmp[0]));
2065
25.9k
        if (tmp == NULL) {
2066
0
            xmlErrMemory(ctxt);
2067
0
            return (-1);
2068
0
        }
2069
25.9k
        ctxt->nodeTab = tmp;
2070
25.9k
  ctxt->nodeMax = newSize;
2071
25.9k
    }
2072
2073
1.00M
    ctxt->nodeTab[ctxt->nodeNr] = value;
2074
1.00M
    ctxt->node = value;
2075
1.00M
    return (ctxt->nodeNr++);
2076
1.00M
}
2077
2078
/**
2079
 * nodePop:
2080
 * @ctxt: an XML parser context
2081
 *
2082
 * DEPRECATED: Internal function, do not use.
2083
 *
2084
 * Pops the top element node from the node stack
2085
 *
2086
 * Returns the node just removed
2087
 */
2088
xmlNodePtr
2089
nodePop(xmlParserCtxtPtr ctxt)
2090
1.11M
{
2091
1.11M
    xmlNodePtr ret;
2092
2093
1.11M
    if (ctxt == NULL) return(NULL);
2094
1.11M
    if (ctxt->nodeNr <= 0)
2095
109k
        return (NULL);
2096
1.00M
    ctxt->nodeNr--;
2097
1.00M
    if (ctxt->nodeNr > 0)
2098
1.00M
        ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
2099
6.36k
    else
2100
6.36k
        ctxt->node = NULL;
2101
1.00M
    ret = ctxt->nodeTab[ctxt->nodeNr];
2102
1.00M
    ctxt->nodeTab[ctxt->nodeNr] = NULL;
2103
1.00M
    return (ret);
2104
1.11M
}
2105
2106
/**
2107
 * nameNsPush:
2108
 * @ctxt:  an XML parser context
2109
 * @value:  the element name
2110
 * @prefix:  the element prefix
2111
 * @URI:  the element namespace name
2112
 * @line:  the current line number for error messages
2113
 * @nsNr:  the number of namespaces pushed on the namespace table
2114
 *
2115
 * Pushes a new element name/prefix/URL on top of the name stack
2116
 *
2117
 * Returns -1 in case of error, the index in the stack otherwise
2118
 */
2119
static int
2120
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
2121
           const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
2122
29.6M
{
2123
29.6M
    xmlStartTag *tag;
2124
2125
29.6M
    if (ctxt->nameNr >= ctxt->nameMax) {
2126
908k
        const xmlChar **tmp;
2127
908k
        xmlStartTag *tmp2;
2128
908k
        int newSize;
2129
2130
908k
        newSize = xmlGrowCapacity(ctxt->nameMax,
2131
908k
                                  sizeof(tmp[0]) + sizeof(tmp2[0]),
2132
908k
                                  10, XML_MAX_ITEMS);
2133
908k
        if (newSize < 0)
2134
0
            goto mem_error;
2135
2136
908k
        tmp = xmlRealloc(ctxt->nameTab, newSize * sizeof(tmp[0]));
2137
908k
        if (tmp == NULL)
2138
0
      goto mem_error;
2139
908k
  ctxt->nameTab = tmp;
2140
2141
908k
        tmp2 = xmlRealloc(ctxt->pushTab, newSize * sizeof(tmp2[0]));
2142
908k
        if (tmp2 == NULL)
2143
0
      goto mem_error;
2144
908k
  ctxt->pushTab = tmp2;
2145
2146
908k
        ctxt->nameMax = newSize;
2147
28.7M
    } else if (ctxt->pushTab == NULL) {
2148
234k
        ctxt->pushTab = xmlMalloc(ctxt->nameMax * sizeof(ctxt->pushTab[0]));
2149
234k
        if (ctxt->pushTab == NULL)
2150
0
            goto mem_error;
2151
234k
    }
2152
29.6M
    ctxt->nameTab[ctxt->nameNr] = value;
2153
29.6M
    ctxt->name = value;
2154
29.6M
    tag = &ctxt->pushTab[ctxt->nameNr];
2155
29.6M
    tag->prefix = prefix;
2156
29.6M
    tag->URI = URI;
2157
29.6M
    tag->line = line;
2158
29.6M
    tag->nsNr = nsNr;
2159
29.6M
    return (ctxt->nameNr++);
2160
0
mem_error:
2161
0
    xmlErrMemory(ctxt);
2162
0
    return (-1);
2163
29.6M
}
2164
#ifdef LIBXML_PUSH_ENABLED
2165
/**
2166
 * nameNsPop:
2167
 * @ctxt: an XML parser context
2168
 *
2169
 * Pops the top element/prefix/URI name from the name stack
2170
 *
2171
 * Returns the name just removed
2172
 */
2173
static const xmlChar *
2174
nameNsPop(xmlParserCtxtPtr ctxt)
2175
8.76M
{
2176
8.76M
    const xmlChar *ret;
2177
2178
8.76M
    if (ctxt->nameNr <= 0)
2179
0
        return (NULL);
2180
8.76M
    ctxt->nameNr--;
2181
8.76M
    if (ctxt->nameNr > 0)
2182
8.61M
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2183
143k
    else
2184
143k
        ctxt->name = NULL;
2185
8.76M
    ret = ctxt->nameTab[ctxt->nameNr];
2186
8.76M
    ctxt->nameTab[ctxt->nameNr] = NULL;
2187
8.76M
    return (ret);
2188
8.76M
}
2189
#endif /* LIBXML_PUSH_ENABLED */
2190
2191
/**
2192
 * namePop:
2193
 * @ctxt: an XML parser context
2194
 *
2195
 * DEPRECATED: Internal function, do not use.
2196
 *
2197
 * Pops the top element name from the name stack
2198
 *
2199
 * Returns the name just removed
2200
 */
2201
static const xmlChar *
2202
namePop(xmlParserCtxtPtr ctxt)
2203
1.10M
{
2204
1.10M
    const xmlChar *ret;
2205
2206
1.10M
    if ((ctxt == NULL) || (ctxt->nameNr <= 0))
2207
0
        return (NULL);
2208
1.10M
    ctxt->nameNr--;
2209
1.10M
    if (ctxt->nameNr > 0)
2210
1.09M
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2211
6.20k
    else
2212
6.20k
        ctxt->name = NULL;
2213
1.10M
    ret = ctxt->nameTab[ctxt->nameNr];
2214
1.10M
    ctxt->nameTab[ctxt->nameNr] = NULL;
2215
1.10M
    return (ret);
2216
1.10M
}
2217
2218
39.0M
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
2219
39.0M
    if (ctxt->spaceNr >= ctxt->spaceMax) {
2220
1.13M
        int *tmp;
2221
1.13M
        int newSize;
2222
2223
1.13M
        newSize = xmlGrowCapacity(ctxt->spaceMax, sizeof(tmp[0]),
2224
1.13M
                                  10, XML_MAX_ITEMS);
2225
1.13M
        if (newSize < 0) {
2226
0
      xmlErrMemory(ctxt);
2227
0
      return(-1);
2228
0
        }
2229
2230
1.13M
        tmp = xmlRealloc(ctxt->spaceTab, newSize * sizeof(tmp[0]));
2231
1.13M
        if (tmp == NULL) {
2232
0
      xmlErrMemory(ctxt);
2233
0
      return(-1);
2234
0
  }
2235
1.13M
  ctxt->spaceTab = tmp;
2236
2237
1.13M
        ctxt->spaceMax = newSize;
2238
1.13M
    }
2239
39.0M
    ctxt->spaceTab[ctxt->spaceNr] = val;
2240
39.0M
    ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
2241
39.0M
    return(ctxt->spaceNr++);
2242
39.0M
}
2243
2244
19.2M
static int spacePop(xmlParserCtxtPtr ctxt) {
2245
19.2M
    int ret;
2246
19.2M
    if (ctxt->spaceNr <= 0) return(0);
2247
19.2M
    ctxt->spaceNr--;
2248
19.2M
    if (ctxt->spaceNr > 0)
2249
19.2M
  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
2250
6.20k
    else
2251
6.20k
        ctxt->space = &ctxt->spaceTab[0];
2252
19.2M
    ret = ctxt->spaceTab[ctxt->spaceNr];
2253
19.2M
    ctxt->spaceTab[ctxt->spaceNr] = -1;
2254
19.2M
    return(ret);
2255
19.2M
}
2256
2257
/*
2258
 * Macros for accessing the content. Those should be used only by the parser,
2259
 * and not exported.
2260
 *
2261
 * Dirty macros, i.e. one often need to make assumption on the context to
2262
 * use them
2263
 *
2264
 *   CUR_PTR return the current pointer to the xmlChar to be parsed.
2265
 *           To be used with extreme caution since operations consuming
2266
 *           characters may move the input buffer to a different location !
2267
 *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
2268
 *           This should be used internally by the parser
2269
 *           only to compare to ASCII values otherwise it would break when
2270
 *           running with UTF-8 encoding.
2271
 *   RAW     same as CUR but in the input buffer, bypass any token
2272
 *           extraction that may have been done
2273
 *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
2274
 *           to compare on ASCII based substring.
2275
 *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2276
 *           strings without newlines within the parser.
2277
 *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2278
 *           defined char within the parser.
2279
 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2280
 *
2281
 *   NEXT    Skip to the next character, this does the proper decoding
2282
 *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2283
 *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2284
 *   CUR_SCHAR  same but operate on a string instead of the context
2285
 *   COPY_BUF  copy the current unicode char to the target buffer, increment
2286
 *            the index
2287
 *   GROW, SHRINK  handling of input buffers
2288
 */
2289
2290
284M
#define RAW (*ctxt->input->cur)
2291
453M
#define CUR (*ctxt->input->cur)
2292
35.5M
#define NXT(val) ctxt->input->cur[(val)]
2293
693M
#define CUR_PTR ctxt->input->cur
2294
161M
#define BASE_PTR ctxt->input->base
2295
2296
#define CMP4( s, c1, c2, c3, c4 ) \
2297
6.09M
  ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2298
3.18M
    ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2299
#define CMP5( s, c1, c2, c3, c4, c5 ) \
2300
5.71M
  ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2301
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2302
5.38M
  ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2303
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2304
4.93M
  ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2305
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2306
4.45M
  ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2307
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2308
2.22M
  ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2309
2.22M
    ((unsigned char *) s)[ 8 ] == c9 )
2310
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2311
111k
  ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2312
111k
    ((unsigned char *) s)[ 9 ] == c10 )
2313
2314
20.9M
#define SKIP(val) do {             \
2315
20.9M
    ctxt->input->cur += (val),ctxt->input->col+=(val);      \
2316
20.9M
    if (*ctxt->input->cur == 0)           \
2317
20.9M
        xmlParserGrow(ctxt);           \
2318
20.9M
  } while (0)
2319
2320
#define SKIPL(val) do {             \
2321
    int skipl;                \
2322
    for(skipl=0; skipl<val; skipl++) {          \
2323
  if (*(ctxt->input->cur) == '\n') {        \
2324
  ctxt->input->line++; ctxt->input->col = 1;      \
2325
  } else ctxt->input->col++;          \
2326
  ctxt->input->cur++;           \
2327
    }                 \
2328
    if (*ctxt->input->cur == 0)           \
2329
        xmlParserGrow(ctxt);            \
2330
  } while (0)
2331
2332
#define SHRINK \
2333
3.51M
    if (!PARSER_PROGRESSIVE(ctxt)) \
2334
3.51M
  xmlParserShrink(ctxt);
2335
2336
#define GROW \
2337
239M
    if ((!PARSER_PROGRESSIVE(ctxt)) && \
2338
239M
        (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2339
454k
  xmlParserGrow(ctxt);
2340
2341
113M
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2342
2343
1.01M
#define SKIP_BLANKS_PE xmlSkipBlankCharsPE(ctxt)
2344
2345
83.5M
#define NEXT xmlNextChar(ctxt)
2346
2347
48.7M
#define NEXT1 {               \
2348
48.7M
  ctxt->input->col++;           \
2349
48.7M
  ctxt->input->cur++;           \
2350
48.7M
  if (*ctxt->input->cur == 0)         \
2351
48.7M
      xmlParserGrow(ctxt);           \
2352
48.7M
    }
2353
2354
513M
#define NEXTL(l) do {             \
2355
513M
    if (*(ctxt->input->cur) == '\n') {         \
2356
1.73M
  ctxt->input->line++; ctxt->input->col = 1;      \
2357
511M
    } else ctxt->input->col++;           \
2358
513M
    ctxt->input->cur += l;        \
2359
513M
  } while (0)
2360
2361
467k
#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2362
2363
#define COPY_BUF(b, i, v)           \
2364
156M
    if (v < 0x80) b[i++] = v;           \
2365
156M
    else i += xmlCopyCharMultiByte(&b[i],v)
2366
2367
static int
2368
152M
xmlCurrentCharRecover(xmlParserCtxtPtr ctxt, int *len) {
2369
152M
    int c = xmlCurrentChar(ctxt, len);
2370
2371
152M
    if (c == XML_INVALID_CHAR)
2372
607k
        c = 0xFFFD; /* replacement character */
2373
2374
152M
    return(c);
2375
152M
}
2376
2377
/**
2378
 * xmlSkipBlankChars:
2379
 * @ctxt:  the XML parser context
2380
 *
2381
 * DEPRECATED: Internal function, do not use.
2382
 *
2383
 * Skip whitespace in the input stream.
2384
 *
2385
 * Returns the number of space chars skipped
2386
 */
2387
int
2388
114M
xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2389
114M
    const xmlChar *cur;
2390
114M
    int res = 0;
2391
2392
114M
    cur = ctxt->input->cur;
2393
114M
    while (IS_BLANK_CH(*cur)) {
2394
33.7M
        if (*cur == '\n') {
2395
916k
            ctxt->input->line++; ctxt->input->col = 1;
2396
32.8M
        } else {
2397
32.8M
            ctxt->input->col++;
2398
32.8M
        }
2399
33.7M
        cur++;
2400
33.7M
        if (res < INT_MAX)
2401
33.7M
            res++;
2402
33.7M
        if (*cur == 0) {
2403
13.1k
            ctxt->input->cur = cur;
2404
13.1k
            xmlParserGrow(ctxt);
2405
13.1k
            cur = ctxt->input->cur;
2406
13.1k
        }
2407
33.7M
    }
2408
114M
    ctxt->input->cur = cur;
2409
2410
114M
    if (res > 4)
2411
394k
        GROW;
2412
2413
114M
    return(res);
2414
114M
}
2415
2416
static void
2417
0
xmlPopPE(xmlParserCtxtPtr ctxt) {
2418
0
    unsigned long consumed;
2419
0
    xmlEntityPtr ent;
2420
2421
0
    ent = ctxt->input->entity;
2422
2423
0
    ent->flags &= ~XML_ENT_EXPANDING;
2424
2425
0
    if ((ent->flags & XML_ENT_CHECKED) == 0) {
2426
0
        int result;
2427
2428
        /*
2429
         * Read the rest of the stream in case of errors. We want
2430
         * to account for the whole entity size.
2431
         */
2432
0
        do {
2433
0
            ctxt->input->cur = ctxt->input->end;
2434
0
            xmlParserShrink(ctxt);
2435
0
            result = xmlParserGrow(ctxt);
2436
0
        } while (result > 0);
2437
2438
0
        consumed = ctxt->input->consumed;
2439
0
        xmlSaturatedAddSizeT(&consumed,
2440
0
                             ctxt->input->end - ctxt->input->base);
2441
2442
0
        xmlSaturatedAdd(&ent->expandedSize, consumed);
2443
2444
        /*
2445
         * Add to sizeentities when parsing an external entity
2446
         * for the first time.
2447
         */
2448
0
        if (ent->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
2449
0
            xmlSaturatedAdd(&ctxt->sizeentities, consumed);
2450
0
        }
2451
2452
0
        ent->flags |= XML_ENT_CHECKED;
2453
0
    }
2454
2455
0
    xmlFreeInputStream(xmlCtxtPopInput(ctxt));
2456
2457
0
    xmlParserEntityCheck(ctxt, ent->expandedSize);
2458
2459
0
    GROW;
2460
0
}
2461
2462
/**
2463
 * xmlSkipBlankCharsPE:
2464
 * @ctxt:  the XML parser context
2465
 *
2466
 * Skip whitespace in the input stream, also handling parameter
2467
 * entities.
2468
 *
2469
 * Returns the number of space chars skipped
2470
 */
2471
static int
2472
1.01M
xmlSkipBlankCharsPE(xmlParserCtxtPtr ctxt) {
2473
1.01M
    int res = 0;
2474
1.01M
    int inParam;
2475
1.01M
    int expandParam;
2476
2477
1.01M
    inParam = PARSER_IN_PE(ctxt);
2478
1.01M
    expandParam = PARSER_EXTERNAL(ctxt);
2479
2480
1.01M
    if (!inParam && !expandParam)
2481
1.01M
        return(xmlSkipBlankChars(ctxt));
2482
2483
    /*
2484
     * It's Okay to use CUR/NEXT here since all the blanks are on
2485
     * the ASCII range.
2486
     */
2487
0
    while (PARSER_STOPPED(ctxt) == 0) {
2488
0
        if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2489
0
            NEXT;
2490
0
        } else if (CUR == '%') {
2491
0
            if ((expandParam == 0) ||
2492
0
                (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2493
0
                break;
2494
2495
            /*
2496
             * Expand parameter entity. We continue to consume
2497
             * whitespace at the start of the entity and possible
2498
             * even consume the whole entity and pop it. We might
2499
             * even pop multiple PEs in this loop.
2500
             */
2501
0
            xmlParsePEReference(ctxt);
2502
2503
0
            inParam = PARSER_IN_PE(ctxt);
2504
0
            expandParam = PARSER_EXTERNAL(ctxt);
2505
0
        } else if (CUR == 0) {
2506
0
            if (inParam == 0)
2507
0
                break;
2508
2509
0
            xmlPopPE(ctxt);
2510
2511
0
            inParam = PARSER_IN_PE(ctxt);
2512
0
            expandParam = PARSER_EXTERNAL(ctxt);
2513
0
        } else {
2514
0
            break;
2515
0
        }
2516
2517
        /*
2518
         * Also increase the counter when entering or exiting a PERef.
2519
         * The spec says: "When a parameter-entity reference is recognized
2520
         * in the DTD and included, its replacement text MUST be enlarged
2521
         * by the attachment of one leading and one following space (#x20)
2522
         * character."
2523
         */
2524
0
        if (res < INT_MAX)
2525
0
            res++;
2526
0
    }
2527
2528
0
    return(res);
2529
1.01M
}
2530
2531
/************************************************************************
2532
 *                  *
2533
 *    Commodity functions to handle entities      *
2534
 *                  *
2535
 ************************************************************************/
2536
2537
/**
2538
 * xmlPopInput:
2539
 * @ctxt:  an XML parser context
2540
 *
2541
 * DEPRECATED: Internal function, don't use.
2542
 *
2543
 * Returns the current xmlChar in the parser context
2544
 */
2545
xmlChar
2546
0
xmlPopInput(xmlParserCtxtPtr ctxt) {
2547
0
    xmlParserInputPtr input;
2548
2549
0
    if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2550
0
    input = xmlCtxtPopInput(ctxt);
2551
0
    xmlFreeInputStream(input);
2552
0
    if (*ctxt->input->cur == 0)
2553
0
        xmlParserGrow(ctxt);
2554
0
    return(CUR);
2555
0
}
2556
2557
/**
2558
 * xmlPushInput:
2559
 * @ctxt:  an XML parser context
2560
 * @input:  an XML parser input fragment (entity, XML fragment ...).
2561
 *
2562
 * DEPRECATED: Internal function, don't use.
2563
 *
2564
 * Push an input stream onto the stack.
2565
 *
2566
 * Returns -1 in case of error or the index in the input stack
2567
 */
2568
int
2569
0
xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2570
0
    int ret;
2571
2572
0
    if ((ctxt == NULL) || (input == NULL))
2573
0
        return(-1);
2574
2575
0
    ret = xmlCtxtPushInput(ctxt, input);
2576
0
    if (ret >= 0)
2577
0
        GROW;
2578
0
    return(ret);
2579
0
}
2580
2581
/**
2582
 * xmlParseCharRef:
2583
 * @ctxt:  an XML parser context
2584
 *
2585
 * DEPRECATED: Internal function, don't use.
2586
 *
2587
 * Parse a numeric character reference. Always consumes '&'.
2588
 *
2589
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2590
 *                  '&#x' [0-9a-fA-F]+ ';'
2591
 *
2592
 * [ WFC: Legal Character ]
2593
 * Characters referred to using character references must match the
2594
 * production for Char.
2595
 *
2596
 * Returns the value parsed (as an int), 0 in case of error
2597
 */
2598
int
2599
148k
xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2600
148k
    int val = 0;
2601
148k
    int count = 0;
2602
2603
    /*
2604
     * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2605
     */
2606
148k
    if ((RAW == '&') && (NXT(1) == '#') &&
2607
148k
        (NXT(2) == 'x')) {
2608
88.9k
  SKIP(3);
2609
88.9k
  GROW;
2610
435k
  while ((RAW != ';') && (PARSER_STOPPED(ctxt) == 0)) {
2611
346k
      if (count++ > 20) {
2612
228
    count = 0;
2613
228
    GROW;
2614
228
      }
2615
346k
      if ((RAW >= '0') && (RAW <= '9'))
2616
221k
          val = val * 16 + (CUR - '0');
2617
125k
      else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2618
88.0k
          val = val * 16 + (CUR - 'a') + 10;
2619
37.3k
      else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2620
36.9k
          val = val * 16 + (CUR - 'A') + 10;
2621
344
      else {
2622
344
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2623
344
    val = 0;
2624
344
    break;
2625
344
      }
2626
346k
      if (val > 0x110000)
2627
2.82k
          val = 0x110000;
2628
2629
346k
      NEXT;
2630
346k
      count++;
2631
346k
  }
2632
88.9k
  if (RAW == ';') {
2633
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2634
88.6k
      ctxt->input->col++;
2635
88.6k
      ctxt->input->cur++;
2636
88.6k
  }
2637
88.9k
    } else if  ((RAW == '&') && (NXT(1) == '#')) {
2638
59.3k
  SKIP(2);
2639
59.3k
  GROW;
2640
197k
  while (RAW != ';') { /* loop blocked by count */
2641
138k
      if (count++ > 20) {
2642
344
    count = 0;
2643
344
    GROW;
2644
344
      }
2645
138k
      if ((RAW >= '0') && (RAW <= '9'))
2646
138k
          val = val * 10 + (CUR - '0');
2647
332
      else {
2648
332
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2649
332
    val = 0;
2650
332
    break;
2651
332
      }
2652
138k
      if (val > 0x110000)
2653
3.35k
          val = 0x110000;
2654
2655
138k
      NEXT;
2656
138k
      count++;
2657
138k
  }
2658
59.3k
  if (RAW == ';') {
2659
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2660
59.0k
      ctxt->input->col++;
2661
59.0k
      ctxt->input->cur++;
2662
59.0k
  }
2663
59.3k
    } else {
2664
0
        if (RAW == '&')
2665
0
            SKIP(1);
2666
0
        xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2667
0
    }
2668
2669
    /*
2670
     * [ WFC: Legal Character ]
2671
     * Characters referred to using character references must match the
2672
     * production for Char.
2673
     */
2674
148k
    if (val >= 0x110000) {
2675
212
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2676
212
                "xmlParseCharRef: character reference out of bounds\n",
2677
212
          val);
2678
212
        val = 0xFFFD;
2679
148k
    } else if (!IS_CHAR(val)) {
2680
1.65k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2681
1.65k
                          "xmlParseCharRef: invalid xmlChar value %d\n",
2682
1.65k
                    val);
2683
1.65k
    }
2684
148k
    return(val);
2685
148k
}
2686
2687
/**
2688
 * xmlParseStringCharRef:
2689
 * @ctxt:  an XML parser context
2690
 * @str:  a pointer to an index in the string
2691
 *
2692
 * parse Reference declarations, variant parsing from a string rather
2693
 * than an an input flow.
2694
 *
2695
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2696
 *                  '&#x' [0-9a-fA-F]+ ';'
2697
 *
2698
 * [ WFC: Legal Character ]
2699
 * Characters referred to using character references must match the
2700
 * production for Char.
2701
 *
2702
 * Returns the value parsed (as an int), 0 in case of error, str will be
2703
 *         updated to the current value of the index
2704
 */
2705
static int
2706
45.4k
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2707
45.4k
    const xmlChar *ptr;
2708
45.4k
    xmlChar cur;
2709
45.4k
    int val = 0;
2710
2711
45.4k
    if ((str == NULL) || (*str == NULL)) return(0);
2712
45.4k
    ptr = *str;
2713
45.4k
    cur = *ptr;
2714
45.4k
    if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2715
18.4k
  ptr += 3;
2716
18.4k
  cur = *ptr;
2717
84.3k
  while (cur != ';') { /* Non input consuming loop */
2718
69.4k
      if ((cur >= '0') && (cur <= '9'))
2719
40.5k
          val = val * 16 + (cur - '0');
2720
28.9k
      else if ((cur >= 'a') && (cur <= 'f'))
2721
12.1k
          val = val * 16 + (cur - 'a') + 10;
2722
16.7k
      else if ((cur >= 'A') && (cur <= 'F'))
2723
13.2k
          val = val * 16 + (cur - 'A') + 10;
2724
3.53k
      else {
2725
3.53k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2726
3.53k
    val = 0;
2727
3.53k
    break;
2728
3.53k
      }
2729
65.9k
      if (val > 0x110000)
2730
9.77k
          val = 0x110000;
2731
2732
65.9k
      ptr++;
2733
65.9k
      cur = *ptr;
2734
65.9k
  }
2735
18.4k
  if (cur == ';')
2736
14.9k
      ptr++;
2737
27.0k
    } else if  ((cur == '&') && (ptr[1] == '#')){
2738
27.0k
  ptr += 2;
2739
27.0k
  cur = *ptr;
2740
154k
  while (cur != ';') { /* Non input consuming loops */
2741
129k
      if ((cur >= '0') && (cur <= '9'))
2742
127k
          val = val * 10 + (cur - '0');
2743
2.52k
      else {
2744
2.52k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2745
2.52k
    val = 0;
2746
2.52k
    break;
2747
2.52k
      }
2748
127k
      if (val > 0x110000)
2749
2.74k
          val = 0x110000;
2750
2751
127k
      ptr++;
2752
127k
      cur = *ptr;
2753
127k
  }
2754
27.0k
  if (cur == ';')
2755
24.5k
      ptr++;
2756
27.0k
    } else {
2757
0
  xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2758
0
  return(0);
2759
0
    }
2760
45.4k
    *str = ptr;
2761
2762
    /*
2763
     * [ WFC: Legal Character ]
2764
     * Characters referred to using character references must match the
2765
     * production for Char.
2766
     */
2767
45.4k
    if (val >= 0x110000) {
2768
176
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2769
176
                "xmlParseStringCharRef: character reference out of bounds\n",
2770
176
                val);
2771
45.3k
    } else if (IS_CHAR(val)) {
2772
37.9k
        return(val);
2773
37.9k
    } else {
2774
7.33k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2775
7.33k
        "xmlParseStringCharRef: invalid xmlChar value %d\n",
2776
7.33k
        val);
2777
7.33k
    }
2778
7.51k
    return(0);
2779
45.4k
}
2780
2781
/**
2782
 * xmlParserHandlePEReference:
2783
 * @ctxt:  the parser context
2784
 *
2785
 * DEPRECATED: Internal function, do not use.
2786
 *
2787
 * [69] PEReference ::= '%' Name ';'
2788
 *
2789
 * [ WFC: No Recursion ]
2790
 * A parsed entity must not contain a recursive
2791
 * reference to itself, either directly or indirectly.
2792
 *
2793
 * [ WFC: Entity Declared ]
2794
 * In a document without any DTD, a document with only an internal DTD
2795
 * subset which contains no parameter entity references, or a document
2796
 * with "standalone='yes'", ...  ... The declaration of a parameter
2797
 * entity must precede any reference to it...
2798
 *
2799
 * [ VC: Entity Declared ]
2800
 * In a document with an external subset or external parameter entities
2801
 * with "standalone='no'", ...  ... The declaration of a parameter entity
2802
 * must precede any reference to it...
2803
 *
2804
 * [ WFC: In DTD ]
2805
 * Parameter-entity references may only appear in the DTD.
2806
 * NOTE: misleading but this is handled.
2807
 *
2808
 * A PEReference may have been detected in the current input stream
2809
 * the handling is done accordingly to
2810
 *      http://www.w3.org/TR/REC-xml#entproc
2811
 * i.e.
2812
 *   - Included in literal in entity values
2813
 *   - Included as Parameter Entity reference within DTDs
2814
 */
2815
void
2816
0
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2817
0
    xmlParsePEReference(ctxt);
2818
0
}
2819
2820
/**
2821
 * xmlStringLenDecodeEntities:
2822
 * @ctxt:  the parser context
2823
 * @str:  the input string
2824
 * @len: the string length
2825
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2826
 * @end:  an end marker xmlChar, 0 if none
2827
 * @end2:  an end marker xmlChar, 0 if none
2828
 * @end3:  an end marker xmlChar, 0 if none
2829
 *
2830
 * DEPRECATED: Internal function, don't use.
2831
 *
2832
 * Returns A newly allocated string with the substitution done. The caller
2833
 *      must deallocate it !
2834
 */
2835
xmlChar *
2836
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2837
                           int what ATTRIBUTE_UNUSED,
2838
0
                           xmlChar end, xmlChar end2, xmlChar end3) {
2839
0
    if ((ctxt == NULL) || (str == NULL) || (len < 0))
2840
0
        return(NULL);
2841
2842
0
    if ((str[len] != 0) ||
2843
0
        (end != 0) || (end2 != 0) || (end3 != 0))
2844
0
        return(NULL);
2845
2846
0
    return(xmlExpandEntitiesInAttValue(ctxt, str, 0));
2847
0
}
2848
2849
/**
2850
 * xmlStringDecodeEntities:
2851
 * @ctxt:  the parser context
2852
 * @str:  the input string
2853
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2854
 * @end:  an end marker xmlChar, 0 if none
2855
 * @end2:  an end marker xmlChar, 0 if none
2856
 * @end3:  an end marker xmlChar, 0 if none
2857
 *
2858
 * DEPRECATED: Internal function, don't use.
2859
 *
2860
 * Returns A newly allocated string with the substitution done. The caller
2861
 *      must deallocate it !
2862
 */
2863
xmlChar *
2864
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str,
2865
                        int what ATTRIBUTE_UNUSED,
2866
0
            xmlChar end, xmlChar  end2, xmlChar end3) {
2867
0
    if ((ctxt == NULL) || (str == NULL))
2868
0
        return(NULL);
2869
2870
0
    if ((end != 0) || (end2 != 0) || (end3 != 0))
2871
0
        return(NULL);
2872
2873
0
    return(xmlExpandEntitiesInAttValue(ctxt, str, 0));
2874
0
}
2875
2876
/************************************************************************
2877
 *                  *
2878
 *    Commodity functions, cleanup needed ?     *
2879
 *                  *
2880
 ************************************************************************/
2881
2882
/**
2883
 * areBlanks:
2884
 * @ctxt:  an XML parser context
2885
 * @str:  a xmlChar *
2886
 * @len:  the size of @str
2887
 * @blank_chars: we know the chars are blanks
2888
 *
2889
 * Is this a sequence of blank chars that one can ignore ?
2890
 *
2891
 * Returns 1 if ignorable 0 otherwise.
2892
 */
2893
2894
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2895
13.9M
                     int blank_chars) {
2896
13.9M
    int i;
2897
13.9M
    xmlNodePtr lastChild;
2898
2899
    /*
2900
     * Check for xml:space value.
2901
     */
2902
13.9M
    if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2903
13.8M
        (*(ctxt->space) == -2))
2904
5.60M
  return(0);
2905
2906
    /*
2907
     * Check that the string is made of blanks
2908
     */
2909
8.34M
    if (blank_chars == 0) {
2910
8.86M
  for (i = 0;i < len;i++)
2911
8.71M
      if (!(IS_BLANK_CH(str[i]))) return(0);
2912
7.35M
    }
2913
2914
    /*
2915
     * Look if the element is mixed content in the DTD if available
2916
     */
2917
1.14M
    if (ctxt->node == NULL) return(0);
2918
18.4E
    if (ctxt->myDoc != NULL) {
2919
0
        xmlElementPtr elemDecl = NULL;
2920
0
        xmlDocPtr doc = ctxt->myDoc;
2921
0
        const xmlChar *prefix = NULL;
2922
2923
0
        if (ctxt->node->ns)
2924
0
            prefix = ctxt->node->ns->prefix;
2925
0
        if (doc->intSubset != NULL)
2926
0
            elemDecl = xmlHashLookup2(doc->intSubset->elements, ctxt->node->name,
2927
0
                                      prefix);
2928
0
        if ((elemDecl == NULL) && (doc->extSubset != NULL))
2929
0
            elemDecl = xmlHashLookup2(doc->extSubset->elements, ctxt->node->name,
2930
0
                                      prefix);
2931
0
        if (elemDecl != NULL) {
2932
0
            if (elemDecl->etype == XML_ELEMENT_TYPE_ELEMENT)
2933
0
                return(1);
2934
0
            if ((elemDecl->etype == XML_ELEMENT_TYPE_ANY) ||
2935
0
                (elemDecl->etype == XML_ELEMENT_TYPE_MIXED))
2936
0
                return(0);
2937
0
        }
2938
0
    }
2939
2940
    /*
2941
     * Otherwise, heuristic :-\
2942
     *
2943
     * When push parsing, we could be at the end of a chunk.
2944
     * This makes the look-ahead and consequently the NOBLANKS
2945
     * option unreliable.
2946
     */
2947
18.4E
    if ((RAW != '<') && (RAW != 0xD)) return(0);
2948
18.4E
    if ((ctxt->node->children == NULL) &&
2949
0
  (RAW == '<') && (NXT(1) == '/')) return(0);
2950
2951
18.4E
    lastChild = xmlGetLastChild(ctxt->node);
2952
18.4E
    if (lastChild == NULL) {
2953
0
        if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2954
0
            (ctxt->node->content != NULL)) return(0);
2955
18.4E
    } else if (xmlNodeIsText(lastChild))
2956
0
        return(0);
2957
18.4E
    else if ((ctxt->node->children != NULL) &&
2958
0
             (xmlNodeIsText(ctxt->node->children)))
2959
0
        return(0);
2960
18.4E
    return(1);
2961
18.4E
}
2962
2963
/************************************************************************
2964
 *                  *
2965
 *    Extra stuff for namespace support     *
2966
 *  Relates to http://www.w3.org/TR/WD-xml-names      *
2967
 *                  *
2968
 ************************************************************************/
2969
2970
/**
2971
 * xmlSplitQName:
2972
 * @ctxt:  an XML parser context
2973
 * @name:  an XML parser context
2974
 * @prefixOut:  a xmlChar **
2975
 *
2976
 * DEPRECATED: Don't use.
2977
 *
2978
 * parse an UTF8 encoded XML qualified name string
2979
 *
2980
 * [NS 5] QName ::= (Prefix ':')? LocalPart
2981
 *
2982
 * [NS 6] Prefix ::= NCName
2983
 *
2984
 * [NS 7] LocalPart ::= NCName
2985
 *
2986
 * Returns the local part, and prefix is updated
2987
 *   to get the Prefix if any.
2988
 */
2989
2990
xmlChar *
2991
0
xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefixOut) {
2992
0
    xmlChar buf[XML_MAX_NAMELEN + 5];
2993
0
    xmlChar *buffer = NULL;
2994
0
    int len = 0;
2995
0
    int max = XML_MAX_NAMELEN;
2996
0
    xmlChar *ret = NULL;
2997
0
    xmlChar *prefix;
2998
0
    const xmlChar *cur = name;
2999
0
    int c;
3000
3001
0
    if (prefixOut == NULL) return(NULL);
3002
0
    *prefixOut = NULL;
3003
3004
0
    if (cur == NULL) return(NULL);
3005
3006
    /* nasty but well=formed */
3007
0
    if (cur[0] == ':')
3008
0
  return(xmlStrdup(name));
3009
3010
0
    c = *cur++;
3011
0
    while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
3012
0
  buf[len++] = c;
3013
0
  c = *cur++;
3014
0
    }
3015
0
    if (len >= max) {
3016
  /*
3017
   * Okay someone managed to make a huge name, so he's ready to pay
3018
   * for the processing speed.
3019
   */
3020
0
  max = len * 2;
3021
3022
0
  buffer = xmlMalloc(max);
3023
0
  if (buffer == NULL) {
3024
0
      xmlErrMemory(ctxt);
3025
0
      return(NULL);
3026
0
  }
3027
0
  memcpy(buffer, buf, len);
3028
0
  while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3029
0
      if (len + 10 > max) {
3030
0
          xmlChar *tmp;
3031
0
                int newSize;
3032
3033
0
                newSize = xmlGrowCapacity(max, 1, 1, XML_MAX_ITEMS);
3034
0
                if (newSize < 0) {
3035
0
        xmlErrMemory(ctxt);
3036
0
        xmlFree(buffer);
3037
0
        return(NULL);
3038
0
                }
3039
0
    tmp = xmlRealloc(buffer, newSize);
3040
0
    if (tmp == NULL) {
3041
0
        xmlErrMemory(ctxt);
3042
0
        xmlFree(buffer);
3043
0
        return(NULL);
3044
0
    }
3045
0
    buffer = tmp;
3046
0
    max = newSize;
3047
0
      }
3048
0
      buffer[len++] = c;
3049
0
      c = *cur++;
3050
0
  }
3051
0
  buffer[len] = 0;
3052
0
    }
3053
3054
0
    if ((c == ':') && (*cur == 0)) {
3055
0
        if (buffer != NULL)
3056
0
      xmlFree(buffer);
3057
0
  return(xmlStrdup(name));
3058
0
    }
3059
3060
0
    if (buffer == NULL) {
3061
0
  ret = xmlStrndup(buf, len);
3062
0
        if (ret == NULL) {
3063
0
      xmlErrMemory(ctxt);
3064
0
      return(NULL);
3065
0
        }
3066
0
    } else {
3067
0
  ret = buffer;
3068
0
  buffer = NULL;
3069
0
  max = XML_MAX_NAMELEN;
3070
0
    }
3071
3072
3073
0
    if (c == ':') {
3074
0
  c = *cur;
3075
0
        prefix = ret;
3076
0
  if (c == 0) {
3077
0
      ret = xmlStrndup(BAD_CAST "", 0);
3078
0
            if (ret == NULL) {
3079
0
                xmlFree(prefix);
3080
0
                return(NULL);
3081
0
            }
3082
0
            *prefixOut = prefix;
3083
0
            return(ret);
3084
0
  }
3085
0
  len = 0;
3086
3087
  /*
3088
   * Check that the first character is proper to start
3089
   * a new name
3090
   */
3091
0
  if (!(((c >= 0x61) && (c <= 0x7A)) ||
3092
0
        ((c >= 0x41) && (c <= 0x5A)) ||
3093
0
        (c == '_') || (c == ':'))) {
3094
0
      int l;
3095
0
      int first = CUR_SCHAR(cur, l);
3096
3097
0
      if (!IS_LETTER(first) && (first != '_')) {
3098
0
    xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3099
0
          "Name %s is not XML Namespace compliant\n",
3100
0
          name);
3101
0
      }
3102
0
  }
3103
0
  cur++;
3104
3105
0
  while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3106
0
      buf[len++] = c;
3107
0
      c = *cur++;
3108
0
  }
3109
0
  if (len >= max) {
3110
      /*
3111
       * Okay someone managed to make a huge name, so he's ready to pay
3112
       * for the processing speed.
3113
       */
3114
0
      max = len * 2;
3115
3116
0
      buffer = xmlMalloc(max);
3117
0
      if (buffer == NULL) {
3118
0
          xmlErrMemory(ctxt);
3119
0
                xmlFree(prefix);
3120
0
    return(NULL);
3121
0
      }
3122
0
      memcpy(buffer, buf, len);
3123
0
      while (c != 0) { /* tested bigname2.xml */
3124
0
    if (len + 10 > max) {
3125
0
        xmlChar *tmp;
3126
0
                    int newSize;
3127
3128
0
                    newSize = xmlGrowCapacity(max, 1, 1, XML_MAX_ITEMS);
3129
0
                    if (newSize < 0) {
3130
0
                        xmlErrMemory(ctxt);
3131
0
                        xmlFree(buffer);
3132
0
                        return(NULL);
3133
0
                    }
3134
0
        tmp = xmlRealloc(buffer, newSize);
3135
0
        if (tmp == NULL) {
3136
0
      xmlErrMemory(ctxt);
3137
0
                        xmlFree(prefix);
3138
0
      xmlFree(buffer);
3139
0
      return(NULL);
3140
0
        }
3141
0
        buffer = tmp;
3142
0
                    max = newSize;
3143
0
    }
3144
0
    buffer[len++] = c;
3145
0
    c = *cur++;
3146
0
      }
3147
0
      buffer[len] = 0;
3148
0
  }
3149
3150
0
  if (buffer == NULL) {
3151
0
      ret = xmlStrndup(buf, len);
3152
0
            if (ret == NULL) {
3153
0
                xmlFree(prefix);
3154
0
                return(NULL);
3155
0
            }
3156
0
  } else {
3157
0
      ret = buffer;
3158
0
  }
3159
3160
0
        *prefixOut = prefix;
3161
0
    }
3162
3163
0
    return(ret);
3164
0
}
3165
3166
/************************************************************************
3167
 *                  *
3168
 *      The parser itself       *
3169
 *  Relates to http://www.w3.org/TR/REC-xml       *
3170
 *                  *
3171
 ************************************************************************/
3172
3173
/************************************************************************
3174
 *                  *
3175
 *  Routines to parse Name, NCName and NmToken      *
3176
 *                  *
3177
 ************************************************************************/
3178
3179
/*
3180
 * The two following functions are related to the change of accepted
3181
 * characters for Name and NmToken in the Revision 5 of XML-1.0
3182
 * They correspond to the modified production [4] and the new production [4a]
3183
 * changes in that revision. Also note that the macros used for the
3184
 * productions Letter, Digit, CombiningChar and Extender are not needed
3185
 * anymore.
3186
 * We still keep compatibility to pre-revision5 parsing semantic if the
3187
 * new XML_PARSE_OLD10 option is given to the parser.
3188
 */
3189
static int
3190
1.86M
xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3191
1.86M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3192
        /*
3193
   * Use the new checks of production [4] [4a] amd [5] of the
3194
   * Update 5 of XML-1.0
3195
   */
3196
1.86M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3197
1.86M
      (((c >= 'a') && (c <= 'z')) ||
3198
871k
       ((c >= 'A') && (c <= 'Z')) ||
3199
531k
       (c == '_') || (c == ':') ||
3200
307k
       ((c >= 0xC0) && (c <= 0xD6)) ||
3201
304k
       ((c >= 0xD8) && (c <= 0xF6)) ||
3202
303k
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3203
298k
       ((c >= 0x370) && (c <= 0x37D)) ||
3204
298k
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3205
296k
       ((c >= 0x200C) && (c <= 0x200D)) ||
3206
296k
       ((c >= 0x2070) && (c <= 0x218F)) ||
3207
296k
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3208
296k
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3209
286k
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3210
286k
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3211
148k
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3212
1.71M
      return(1);
3213
1.86M
    } else {
3214
0
        if (IS_LETTER(c) || (c == '_') || (c == ':'))
3215
0
      return(1);
3216
0
    }
3217
149k
    return(0);
3218
1.86M
}
3219
3220
static int
3221
41.0M
xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3222
41.0M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3223
        /*
3224
   * Use the new checks of production [4] [4a] amd [5] of the
3225
   * Update 5 of XML-1.0
3226
   */
3227
41.0M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3228
40.5M
      (((c >= 'a') && (c <= 'z')) ||
3229
22.9M
       ((c >= 'A') && (c <= 'Z')) ||
3230
17.5M
       ((c >= '0') && (c <= '9')) || /* !start */
3231
15.4M
       (c == '_') || (c == ':') ||
3232
14.6M
       (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3233
14.0M
       ((c >= 0xC0) && (c <= 0xD6)) ||
3234
13.9M
       ((c >= 0xD8) && (c <= 0xF6)) ||
3235
13.9M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3236
13.7M
       ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3237
13.7M
       ((c >= 0x370) && (c <= 0x37D)) ||
3238
13.7M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3239
13.6M
       ((c >= 0x200C) && (c <= 0x200D)) ||
3240
13.6M
       ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3241
13.6M
       ((c >= 0x2070) && (c <= 0x218F)) ||
3242
13.6M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3243
13.6M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3244
13.5M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3245
13.5M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3246
242k
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3247
40.3M
       return(1);
3248
41.0M
    } else {
3249
0
        if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3250
0
            (c == '.') || (c == '-') ||
3251
0
      (c == '_') || (c == ':') ||
3252
0
      (IS_COMBINING(c)) ||
3253
0
      (IS_EXTENDER(c)))
3254
0
      return(1);
3255
0
    }
3256
695k
    return(0);
3257
41.0M
}
3258
3259
static const xmlChar *
3260
445k
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3261
445k
    const xmlChar *ret;
3262
445k
    int len = 0, l;
3263
445k
    int c;
3264
445k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3265
444k
                    XML_MAX_TEXT_LENGTH :
3266
445k
                    XML_MAX_NAME_LENGTH;
3267
3268
    /*
3269
     * Handler for more complex cases
3270
     */
3271
445k
    c = xmlCurrentChar(ctxt, &l);
3272
445k
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3273
        /*
3274
   * Use the new checks of production [4] [4a] amd [5] of the
3275
   * Update 5 of XML-1.0
3276
   */
3277
445k
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3278
437k
      (!(((c >= 'a') && (c <= 'z')) ||
3279
379k
         ((c >= 'A') && (c <= 'Z')) ||
3280
372k
         (c == '_') || (c == ':') ||
3281
358k
         ((c >= 0xC0) && (c <= 0xD6)) ||
3282
357k
         ((c >= 0xD8) && (c <= 0xF6)) ||
3283
332k
         ((c >= 0xF8) && (c <= 0x2FF)) ||
3284
318k
         ((c >= 0x370) && (c <= 0x37D)) ||
3285
315k
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
3286
307k
         ((c >= 0x200C) && (c <= 0x200D)) ||
3287
302k
         ((c >= 0x2070) && (c <= 0x218F)) ||
3288
284k
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3289
276k
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
3290
266k
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
3291
263k
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3292
252k
         ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3293
252k
      return(NULL);
3294
252k
  }
3295
192k
  len += l;
3296
192k
  NEXTL(l);
3297
192k
  c = xmlCurrentChar(ctxt, &l);
3298
1.15M
  while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3299
1.14M
         (((c >= 'a') && (c <= 'z')) ||
3300
821k
          ((c >= 'A') && (c <= 'Z')) ||
3301
767k
          ((c >= '0') && (c <= '9')) || /* !start */
3302
707k
          (c == '_') || (c == ':') ||
3303
678k
          (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3304
653k
          ((c >= 0xC0) && (c <= 0xD6)) ||
3305
644k
          ((c >= 0xD8) && (c <= 0xF6)) ||
3306
624k
          ((c >= 0xF8) && (c <= 0x2FF)) ||
3307
590k
          ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3308
588k
          ((c >= 0x370) && (c <= 0x37D)) ||
3309
586k
          ((c >= 0x37F) && (c <= 0x1FFF)) ||
3310
568k
          ((c >= 0x200C) && (c <= 0x200D)) ||
3311
566k
          ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3312
565k
          ((c >= 0x2070) && (c <= 0x218F)) ||
3313
547k
          ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3314
526k
          ((c >= 0x3001) && (c <= 0xD7FF)) ||
3315
389k
          ((c >= 0xF900) && (c <= 0xFDCF)) ||
3316
388k
          ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3317
183k
          ((c >= 0x10000) && (c <= 0xEFFFF))
3318
1.14M
    )) {
3319
963k
            if (len <= INT_MAX - l)
3320
963k
          len += l;
3321
963k
      NEXTL(l);
3322
963k
      c = xmlCurrentChar(ctxt, &l);
3323
963k
  }
3324
192k
    } else {
3325
0
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3326
0
      (!IS_LETTER(c) && (c != '_') &&
3327
0
       (c != ':'))) {
3328
0
      return(NULL);
3329
0
  }
3330
0
  len += l;
3331
0
  NEXTL(l);
3332
0
  c = xmlCurrentChar(ctxt, &l);
3333
3334
0
  while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3335
0
         ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3336
0
    (c == '.') || (c == '-') ||
3337
0
    (c == '_') || (c == ':') ||
3338
0
    (IS_COMBINING(c)) ||
3339
0
    (IS_EXTENDER(c)))) {
3340
0
            if (len <= INT_MAX - l)
3341
0
          len += l;
3342
0
      NEXTL(l);
3343
0
      c = xmlCurrentChar(ctxt, &l);
3344
0
  }
3345
0
    }
3346
192k
    if (len > maxLength) {
3347
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3348
0
        return(NULL);
3349
0
    }
3350
192k
    if (ctxt->input->cur - ctxt->input->base < len) {
3351
        /*
3352
         * There were a couple of bugs where PERefs lead to to a change
3353
         * of the buffer. Check the buffer size to avoid passing an invalid
3354
         * pointer to xmlDictLookup.
3355
         */
3356
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3357
0
                    "unexpected change of input buffer");
3358
0
        return (NULL);
3359
0
    }
3360
192k
    if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3361
743
        ret = xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len);
3362
191k
    else
3363
191k
        ret = xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len);
3364
192k
    if (ret == NULL)
3365
0
        xmlErrMemory(ctxt);
3366
192k
    return(ret);
3367
192k
}
3368
3369
/**
3370
 * xmlParseName:
3371
 * @ctxt:  an XML parser context
3372
 *
3373
 * DEPRECATED: Internal function, don't use.
3374
 *
3375
 * parse an XML name.
3376
 *
3377
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3378
 *                  CombiningChar | Extender
3379
 *
3380
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3381
 *
3382
 * [6] Names ::= Name (#x20 Name)*
3383
 *
3384
 * Returns the Name parsed or NULL
3385
 */
3386
3387
const xmlChar *
3388
1.32M
xmlParseName(xmlParserCtxtPtr ctxt) {
3389
1.32M
    const xmlChar *in;
3390
1.32M
    const xmlChar *ret;
3391
1.32M
    size_t count = 0;
3392
1.32M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3393
1.32M
                       XML_MAX_TEXT_LENGTH :
3394
1.32M
                       XML_MAX_NAME_LENGTH;
3395
3396
1.32M
    GROW;
3397
3398
    /*
3399
     * Accelerator for simple ASCII names
3400
     */
3401
1.32M
    in = ctxt->input->cur;
3402
1.32M
    if (((*in >= 0x61) && (*in <= 0x7A)) ||
3403
461k
  ((*in >= 0x41) && (*in <= 0x5A)) ||
3404
959k
  (*in == '_') || (*in == ':')) {
3405
959k
  in++;
3406
4.41M
  while (((*in >= 0x61) && (*in <= 0x7A)) ||
3407
1.71M
         ((*in >= 0x41) && (*in <= 0x5A)) ||
3408
1.40M
         ((*in >= 0x30) && (*in <= 0x39)) ||
3409
1.15M
         (*in == '_') || (*in == '-') ||
3410
1.03M
         (*in == ':') || (*in == '.'))
3411
3.45M
      in++;
3412
959k
  if ((*in > 0) && (*in < 0x80)) {
3413
881k
      count = in - ctxt->input->cur;
3414
881k
            if (count > maxLength) {
3415
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3416
0
                return(NULL);
3417
0
            }
3418
881k
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3419
881k
      ctxt->input->cur = in;
3420
881k
      ctxt->input->col += count;
3421
881k
      if (ret == NULL)
3422
0
          xmlErrMemory(ctxt);
3423
881k
      return(ret);
3424
881k
  }
3425
959k
    }
3426
    /* accelerator for special cases */
3427
445k
    return(xmlParseNameComplex(ctxt));
3428
1.32M
}
3429
3430
static xmlHashedString
3431
1.89M
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3432
1.89M
    xmlHashedString ret;
3433
1.89M
    int len = 0, l;
3434
1.89M
    int c;
3435
1.89M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3436
1.84M
                    XML_MAX_TEXT_LENGTH :
3437
1.89M
                    XML_MAX_NAME_LENGTH;
3438
1.89M
    size_t startPosition = 0;
3439
3440
1.89M
    ret.name = NULL;
3441
1.89M
    ret.hashValue = 0;
3442
3443
    /*
3444
     * Handler for more complex cases
3445
     */
3446
1.89M
    startPosition = CUR_PTR - BASE_PTR;
3447
1.89M
    c = xmlCurrentChar(ctxt, &l);
3448
1.89M
    if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3449
1.79M
  (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3450
455k
  return(ret);
3451
455k
    }
3452
3453
36.2M
    while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3454
34.9M
     (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3455
34.8M
        if (len <= INT_MAX - l)
3456
34.8M
      len += l;
3457
34.8M
  NEXTL(l);
3458
34.8M
  c = xmlCurrentChar(ctxt, &l);
3459
34.8M
    }
3460
1.43M
    if (len > maxLength) {
3461
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3462
0
        return(ret);
3463
0
    }
3464
1.43M
    ret = xmlDictLookupHashed(ctxt->dict, (BASE_PTR + startPosition), len);
3465
1.43M
    if (ret.name == NULL)
3466
0
        xmlErrMemory(ctxt);
3467
1.43M
    return(ret);
3468
1.43M
}
3469
3470
/**
3471
 * xmlParseNCName:
3472
 * @ctxt:  an XML parser context
3473
 * @len:  length of the string parsed
3474
 *
3475
 * parse an XML name.
3476
 *
3477
 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3478
 *                      CombiningChar | Extender
3479
 *
3480
 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3481
 *
3482
 * Returns the Name parsed or NULL
3483
 */
3484
3485
static xmlHashedString
3486
87.7M
xmlParseNCName(xmlParserCtxtPtr ctxt) {
3487
87.7M
    const xmlChar *in, *e;
3488
87.7M
    xmlHashedString ret;
3489
87.7M
    size_t count = 0;
3490
87.7M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3491
84.1M
                       XML_MAX_TEXT_LENGTH :
3492
87.7M
                       XML_MAX_NAME_LENGTH;
3493
3494
87.7M
    ret.name = NULL;
3495
3496
    /*
3497
     * Accelerator for simple ASCII names
3498
     */
3499
87.7M
    in = ctxt->input->cur;
3500
87.7M
    e = ctxt->input->end;
3501
87.7M
    if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3502
6.74M
   ((*in >= 0x41) && (*in <= 0x5A)) ||
3503
87.1M
   (*in == '_')) && (in < e)) {
3504
87.1M
  in++;
3505
498M
  while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3506
163M
          ((*in >= 0x41) && (*in <= 0x5A)) ||
3507
110M
          ((*in >= 0x30) && (*in <= 0x39)) ||
3508
96.1M
          (*in == '_') || (*in == '-') ||
3509
410M
          (*in == '.')) && (in < e))
3510
410M
      in++;
3511
87.1M
  if (in >= e)
3512
16.7k
      goto complex;
3513
87.1M
  if ((*in > 0) && (*in < 0x80)) {
3514
85.8M
      count = in - ctxt->input->cur;
3515
85.8M
            if (count > maxLength) {
3516
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3517
0
                return(ret);
3518
0
            }
3519
85.8M
      ret = xmlDictLookupHashed(ctxt->dict, ctxt->input->cur, count);
3520
85.8M
      ctxt->input->cur = in;
3521
85.8M
      ctxt->input->col += count;
3522
85.8M
      if (ret.name == NULL) {
3523
0
          xmlErrMemory(ctxt);
3524
0
      }
3525
85.8M
      return(ret);
3526
85.8M
  }
3527
87.1M
    }
3528
1.89M
complex:
3529
1.89M
    return(xmlParseNCNameComplex(ctxt));
3530
87.7M
}
3531
3532
/**
3533
 * xmlParseNameAndCompare:
3534
 * @ctxt:  an XML parser context
3535
 *
3536
 * parse an XML name and compares for match
3537
 * (specialized for endtag parsing)
3538
 *
3539
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3540
 * and the name for mismatch
3541
 */
3542
3543
static const xmlChar *
3544
2.97M
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3545
2.97M
    register const xmlChar *cmp = other;
3546
2.97M
    register const xmlChar *in;
3547
2.97M
    const xmlChar *ret;
3548
3549
2.97M
    GROW;
3550
3551
2.97M
    in = ctxt->input->cur;
3552
9.73M
    while (*in != 0 && *in == *cmp) {
3553
6.76M
  ++in;
3554
6.76M
  ++cmp;
3555
6.76M
    }
3556
2.97M
    if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3557
  /* success */
3558
2.96M
  ctxt->input->col += in - ctxt->input->cur;
3559
2.96M
  ctxt->input->cur = in;
3560
2.96M
  return (const xmlChar*) 1;
3561
2.96M
    }
3562
    /* failure (or end of input buffer), check with full function */
3563
5.48k
    ret = xmlParseName (ctxt);
3564
    /* strings coming from the dictionary direct compare possible */
3565
5.48k
    if (ret == other) {
3566
62
  return (const xmlChar*) 1;
3567
62
    }
3568
5.42k
    return ret;
3569
5.48k
}
3570
3571
/**
3572
 * xmlParseStringName:
3573
 * @ctxt:  an XML parser context
3574
 * @str:  a pointer to the string pointer (IN/OUT)
3575
 *
3576
 * parse an XML name.
3577
 *
3578
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3579
 *                  CombiningChar | Extender
3580
 *
3581
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3582
 *
3583
 * [6] Names ::= Name (#x20 Name)*
3584
 *
3585
 * Returns the Name parsed or NULL. The @str pointer
3586
 * is updated to the current location in the string.
3587
 */
3588
3589
static xmlChar *
3590
70.6k
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3591
70.6k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3592
70.6k
    xmlChar *ret;
3593
70.6k
    const xmlChar *cur = *str;
3594
70.6k
    int len = 0, l;
3595
70.6k
    int c;
3596
70.6k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3597
70.6k
                    XML_MAX_TEXT_LENGTH :
3598
70.6k
                    XML_MAX_NAME_LENGTH;
3599
3600
70.6k
    c = CUR_SCHAR(cur, l);
3601
70.6k
    if (!xmlIsNameStartChar(ctxt, c)) {
3602
12.8k
  return(NULL);
3603
12.8k
    }
3604
3605
57.7k
    COPY_BUF(buf, len, c);
3606
57.7k
    cur += l;
3607
57.7k
    c = CUR_SCHAR(cur, l);
3608
292k
    while (xmlIsNameChar(ctxt, c)) {
3609
237k
  COPY_BUF(buf, len, c);
3610
237k
  cur += l;
3611
237k
  c = CUR_SCHAR(cur, l);
3612
237k
  if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3613
      /*
3614
       * Okay someone managed to make a huge name, so he's ready to pay
3615
       * for the processing speed.
3616
       */
3617
2.61k
      xmlChar *buffer;
3618
2.61k
      int max = len * 2;
3619
3620
2.61k
      buffer = xmlMalloc(max);
3621
2.61k
      if (buffer == NULL) {
3622
0
          xmlErrMemory(ctxt);
3623
0
    return(NULL);
3624
0
      }
3625
2.61k
      memcpy(buffer, buf, len);
3626
104k
      while (xmlIsNameChar(ctxt, c)) {
3627
101k
    if (len + 10 > max) {
3628
896
        xmlChar *tmp;
3629
896
                    int newSize;
3630
3631
896
                    newSize = xmlGrowCapacity(max, 1, 1, maxLength);
3632
896
                    if (newSize < 0) {
3633
0
                        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3634
0
                        xmlFree(buffer);
3635
0
                        return(NULL);
3636
0
                    }
3637
896
        tmp = xmlRealloc(buffer, newSize);
3638
896
        if (tmp == NULL) {
3639
0
      xmlErrMemory(ctxt);
3640
0
      xmlFree(buffer);
3641
0
      return(NULL);
3642
0
        }
3643
896
        buffer = tmp;
3644
896
                    max = newSize;
3645
896
    }
3646
101k
    COPY_BUF(buffer, len, c);
3647
101k
    cur += l;
3648
101k
    c = CUR_SCHAR(cur, l);
3649
101k
      }
3650
2.61k
      buffer[len] = 0;
3651
2.61k
      *str = cur;
3652
2.61k
      return(buffer);
3653
2.61k
  }
3654
237k
    }
3655
55.1k
    if (len > maxLength) {
3656
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3657
0
        return(NULL);
3658
0
    }
3659
55.1k
    *str = cur;
3660
55.1k
    ret = xmlStrndup(buf, len);
3661
55.1k
    if (ret == NULL)
3662
0
        xmlErrMemory(ctxt);
3663
55.1k
    return(ret);
3664
55.1k
}
3665
3666
/**
3667
 * xmlParseNmtoken:
3668
 * @ctxt:  an XML parser context
3669
 *
3670
 * DEPRECATED: Internal function, don't use.
3671
 *
3672
 * parse an XML Nmtoken.
3673
 *
3674
 * [7] Nmtoken ::= (NameChar)+
3675
 *
3676
 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3677
 *
3678
 * Returns the Nmtoken parsed or NULL
3679
 */
3680
3681
xmlChar *
3682
584k
xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3683
584k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3684
584k
    xmlChar *ret;
3685
584k
    int len = 0, l;
3686
584k
    int c;
3687
584k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3688
577k
                    XML_MAX_TEXT_LENGTH :
3689
584k
                    XML_MAX_NAME_LENGTH;
3690
3691
584k
    c = xmlCurrentChar(ctxt, &l);
3692
3693
5.32M
    while (xmlIsNameChar(ctxt, c)) {
3694
4.74M
  COPY_BUF(buf, len, c);
3695
4.74M
  NEXTL(l);
3696
4.74M
  c = xmlCurrentChar(ctxt, &l);
3697
4.74M
  if (len >= XML_MAX_NAMELEN) {
3698
      /*
3699
       * Okay someone managed to make a huge token, so he's ready to pay
3700
       * for the processing speed.
3701
       */
3702
5.06k
      xmlChar *buffer;
3703
5.06k
      int max = len * 2;
3704
3705
5.06k
      buffer = xmlMalloc(max);
3706
5.06k
      if (buffer == NULL) {
3707
0
          xmlErrMemory(ctxt);
3708
0
    return(NULL);
3709
0
      }
3710
5.06k
      memcpy(buffer, buf, len);
3711
372k
      while (xmlIsNameChar(ctxt, c)) {
3712
367k
    if (len + 10 > max) {
3713
1.50k
        xmlChar *tmp;
3714
1.50k
                    int newSize;
3715
3716
1.50k
                    newSize = xmlGrowCapacity(max, 1, 1, maxLength);
3717
1.50k
                    if (newSize < 0) {
3718
0
                        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3719
0
                        xmlFree(buffer);
3720
0
                        return(NULL);
3721
0
                    }
3722
1.50k
        tmp = xmlRealloc(buffer, newSize);
3723
1.50k
        if (tmp == NULL) {
3724
0
      xmlErrMemory(ctxt);
3725
0
      xmlFree(buffer);
3726
0
      return(NULL);
3727
0
        }
3728
1.50k
        buffer = tmp;
3729
1.50k
                    max = newSize;
3730
1.50k
    }
3731
367k
    COPY_BUF(buffer, len, c);
3732
367k
    NEXTL(l);
3733
367k
    c = xmlCurrentChar(ctxt, &l);
3734
367k
      }
3735
5.06k
      buffer[len] = 0;
3736
5.06k
      return(buffer);
3737
5.06k
  }
3738
4.74M
    }
3739
579k
    if (len == 0)
3740
94.1k
        return(NULL);
3741
485k
    if (len > maxLength) {
3742
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3743
0
        return(NULL);
3744
0
    }
3745
485k
    ret = xmlStrndup(buf, len);
3746
485k
    if (ret == NULL)
3747
0
        xmlErrMemory(ctxt);
3748
485k
    return(ret);
3749
485k
}
3750
3751
/**
3752
 * xmlExpandPEsInEntityValue:
3753
 * @ctxt:  parser context
3754
 * @buf:  string buffer
3755
 * @str:  entity value
3756
 * @length:  size of entity value
3757
 * @depth:  nesting depth
3758
 *
3759
 * Validate an entity value and expand parameter entities.
3760
 */
3761
static void
3762
xmlExpandPEsInEntityValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf,
3763
46.4k
                          const xmlChar *str, int length, int depth) {
3764
46.4k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3765
46.4k
    const xmlChar *end, *chunk;
3766
46.4k
    int c, l;
3767
3768
46.4k
    if (str == NULL)
3769
0
        return;
3770
3771
46.4k
    depth += 1;
3772
46.4k
    if (depth > maxDepth) {
3773
0
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3774
0
                       "Maximum entity nesting depth exceeded");
3775
0
  return;
3776
0
    }
3777
3778
46.4k
    end = str + length;
3779
46.4k
    chunk = str;
3780
3781
3.32M
    while ((str < end) && (!PARSER_STOPPED(ctxt))) {
3782
3.31M
        c = *str;
3783
3784
3.31M
        if (c >= 0x80) {
3785
584k
            l = xmlUTF8MultibyteLen(ctxt, str,
3786
584k
                    "invalid character in entity value\n");
3787
584k
            if (l == 0) {
3788
271k
                if (chunk < str)
3789
30.4k
                    xmlSBufAddString(buf, chunk, str - chunk);
3790
271k
                xmlSBufAddReplChar(buf);
3791
271k
                str += 1;
3792
271k
                chunk = str;
3793
313k
            } else {
3794
313k
                str += l;
3795
313k
            }
3796
2.72M
        } else if (c == '&') {
3797
112k
            if (str[1] == '#') {
3798
45.4k
                if (chunk < str)
3799
31.3k
                    xmlSBufAddString(buf, chunk, str - chunk);
3800
3801
45.4k
                c = xmlParseStringCharRef(ctxt, &str);
3802
45.4k
                if (c == 0)
3803
7.51k
                    return;
3804
3805
37.9k
                xmlSBufAddChar(buf, c);
3806
3807
37.9k
                chunk = str;
3808
66.7k
            } else {
3809
66.7k
                xmlChar *name;
3810
3811
                /*
3812
                 * General entity references are checked for
3813
                 * syntactic validity.
3814
                 */
3815
66.7k
                str++;
3816
66.7k
                name = xmlParseStringName(ctxt, &str);
3817
3818
66.7k
                if ((name == NULL) || (*str++ != ';')) {
3819
20.6k
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3820
20.6k
                            "EntityValue: '&' forbidden except for entities "
3821
20.6k
                            "references\n");
3822
20.6k
                    xmlFree(name);
3823
20.6k
                    return;
3824
20.6k
                }
3825
3826
46.0k
                xmlFree(name);
3827
46.0k
            }
3828
2.61M
        } else if (c == '%') {
3829
3.91k
            xmlEntityPtr ent;
3830
3831
3.91k
            if (chunk < str)
3832
3.40k
                xmlSBufAddString(buf, chunk, str - chunk);
3833
3834
3.91k
            ent = xmlParseStringPEReference(ctxt, &str);
3835
3.91k
            if (ent == NULL)
3836
3.91k
                return;
3837
3838
0
            if (!PARSER_EXTERNAL(ctxt)) {
3839
0
                xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3840
0
                return;
3841
0
            }
3842
3843
0
            if (ent->content == NULL) {
3844
                /*
3845
                 * Note: external parsed entities will not be loaded,
3846
                 * it is not required for a non-validating parser to
3847
                 * complete external PEReferences coming from the
3848
                 * internal subset
3849
                 */
3850
0
                if (((ctxt->options & XML_PARSE_NO_XXE) == 0) &&
3851
0
                    ((ctxt->replaceEntities) ||
3852
0
                     (ctxt->validate))) {
3853
0
                    xmlLoadEntityContent(ctxt, ent);
3854
0
                } else {
3855
0
                    xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
3856
0
                                  "not validating will not read content for "
3857
0
                                  "PE entity %s\n", ent->name, NULL);
3858
0
                }
3859
0
            }
3860
3861
            /*
3862
             * TODO: Skip if ent->content is still NULL.
3863
             */
3864
3865
0
            if (xmlParserEntityCheck(ctxt, ent->length))
3866
0
                return;
3867
3868
0
            if (ent->flags & XML_ENT_EXPANDING) {
3869
0
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
3870
0
                xmlHaltParser(ctxt);
3871
0
                return;
3872
0
            }
3873
3874
0
            ent->flags |= XML_ENT_EXPANDING;
3875
0
            xmlExpandPEsInEntityValue(ctxt, buf, ent->content, ent->length,
3876
0
                                      depth);
3877
0
            ent->flags &= ~XML_ENT_EXPANDING;
3878
3879
0
            chunk = str;
3880
2.61M
        } else {
3881
            /* Normal ASCII char */
3882
2.61M
            if (!IS_BYTE_CHAR(c)) {
3883
250k
                xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3884
250k
                        "invalid character in entity value\n");
3885
250k
                if (chunk < str)
3886
11.1k
                    xmlSBufAddString(buf, chunk, str - chunk);
3887
250k
                xmlSBufAddReplChar(buf);
3888
250k
                str += 1;
3889
250k
                chunk = str;
3890
2.36M
            } else {
3891
2.36M
                str += 1;
3892
2.36M
            }
3893
2.61M
        }
3894
3.31M
    }
3895
3896
14.3k
    if (chunk < str)
3897
10.6k
        xmlSBufAddString(buf, chunk, str - chunk);
3898
14.3k
}
3899
3900
/**
3901
 * xmlParseEntityValue:
3902
 * @ctxt:  an XML parser context
3903
 * @orig:  if non-NULL store a copy of the original entity value
3904
 *
3905
 * DEPRECATED: Internal function, don't use.
3906
 *
3907
 * parse a value for ENTITY declarations
3908
 *
3909
 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3910
 *                 "'" ([^%&'] | PEReference | Reference)* "'"
3911
 *
3912
 * Returns the EntityValue parsed with reference substituted or NULL
3913
 */
3914
xmlChar *
3915
46.9k
xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3916
46.9k
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3917
46.9k
                         XML_MAX_HUGE_LENGTH :
3918
46.9k
                         XML_MAX_TEXT_LENGTH;
3919
46.9k
    xmlSBuf buf;
3920
46.9k
    const xmlChar *start;
3921
46.9k
    int quote, length;
3922
3923
46.9k
    xmlSBufInit(&buf, maxLength);
3924
3925
46.9k
    GROW;
3926
3927
46.9k
    quote = CUR;
3928
46.9k
    if ((quote != '"') && (quote != '\'')) {
3929
0
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3930
0
  return(NULL);
3931
0
    }
3932
46.9k
    CUR_PTR++;
3933
3934
46.9k
    length = 0;
3935
3936
    /*
3937
     * Copy raw content of the entity into a buffer
3938
     */
3939
8.56M
    while (1) {
3940
8.56M
        int c;
3941
3942
8.56M
        if (PARSER_STOPPED(ctxt))
3943
0
            goto error;
3944
3945
8.56M
        if (CUR_PTR >= ctxt->input->end) {
3946
313
            xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3947
313
            goto error;
3948
313
        }
3949
3950
8.56M
        c = CUR;
3951
3952
8.56M
        if (c == 0) {
3953
129
            xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3954
129
                    "invalid character in entity value\n");
3955
129
            goto error;
3956
129
        }
3957
8.56M
        if (c == quote)
3958
46.4k
            break;
3959
8.51M
        NEXTL(1);
3960
8.51M
        length += 1;
3961
3962
        /*
3963
         * TODO: Check growth threshold
3964
         */
3965
8.51M
        if (ctxt->input->end - CUR_PTR < 10)
3966
3.93k
            GROW;
3967
8.51M
    }
3968
3969
46.4k
    start = CUR_PTR - length;
3970
3971
46.4k
    if (orig != NULL) {
3972
46.4k
        *orig = xmlStrndup(start, length);
3973
46.4k
        if (*orig == NULL)
3974
0
            xmlErrMemory(ctxt);
3975
46.4k
    }
3976
3977
46.4k
    xmlExpandPEsInEntityValue(ctxt, &buf, start, length, ctxt->inputNr);
3978
3979
46.4k
    NEXTL(1);
3980
3981
46.4k
    return(xmlSBufFinish(&buf, NULL, ctxt, "entity length too long"));
3982
3983
442
error:
3984
442
    xmlSBufCleanup(&buf, ctxt, "entity length too long");
3985
442
    return(NULL);
3986
46.9k
}
3987
3988
/**
3989
 * xmlCheckEntityInAttValue:
3990
 * @ctxt:  parser context
3991
 * @pent:  entity
3992
 * @depth:  nesting depth
3993
 *
3994
 * Check an entity reference in an attribute value for validity
3995
 * without expanding it.
3996
 */
3997
static void
3998
0
xmlCheckEntityInAttValue(xmlParserCtxtPtr ctxt, xmlEntityPtr pent, int depth) {
3999
0
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
4000
0
    const xmlChar *str;
4001
0
    unsigned long expandedSize = pent->length;
4002
0
    int c, flags;
4003
4004
0
    depth += 1;
4005
0
    if (depth > maxDepth) {
4006
0
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
4007
0
                       "Maximum entity nesting depth exceeded");
4008
0
  return;
4009
0
    }
4010
4011
0
    if (pent->flags & XML_ENT_EXPANDING) {
4012
0
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
4013
0
        xmlHaltParser(ctxt);
4014
0
        return;
4015
0
    }
4016
4017
    /*
4018
     * If we're parsing a default attribute value in DTD content,
4019
     * the entity might reference other entities which weren't
4020
     * defined yet, so the check isn't reliable.
4021
     */
4022
0
    if (ctxt->inSubset == 0)
4023
0
        flags = XML_ENT_CHECKED | XML_ENT_VALIDATED;
4024
0
    else
4025
0
        flags = XML_ENT_VALIDATED;
4026
4027
0
    str = pent->content;
4028
0
    if (str == NULL)
4029
0
        goto done;
4030
4031
    /*
4032
     * Note that entity values are already validated. We only check
4033
     * for illegal less-than signs and compute the expanded size
4034
     * of the entity. No special handling for multi-byte characters
4035
     * is needed.
4036
     */
4037
0
    while (!PARSER_STOPPED(ctxt)) {
4038
0
        c = *str;
4039
4040
0
  if (c != '&') {
4041
0
            if (c == 0)
4042
0
                break;
4043
4044
0
            if (c == '<')
4045
0
                xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
4046
0
                        "'<' in entity '%s' is not allowed in attributes "
4047
0
                        "values\n", pent->name);
4048
4049
0
            str += 1;
4050
0
        } else if (str[1] == '#') {
4051
0
            int val;
4052
4053
0
      val = xmlParseStringCharRef(ctxt, &str);
4054
0
      if (val == 0) {
4055
0
                pent->content[0] = 0;
4056
0
                break;
4057
0
            }
4058
0
  } else {
4059
0
            xmlChar *name;
4060
0
            xmlEntityPtr ent;
4061
4062
0
      name = xmlParseStringEntityRef(ctxt, &str);
4063
0
      if (name == NULL) {
4064
0
                pent->content[0] = 0;
4065
0
                break;
4066
0
            }
4067
4068
0
            ent = xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 1);
4069
0
            xmlFree(name);
4070
4071
0
            if ((ent != NULL) &&
4072
0
                (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
4073
0
                if ((ent->flags & flags) != flags) {
4074
0
                    pent->flags |= XML_ENT_EXPANDING;
4075
0
                    xmlCheckEntityInAttValue(ctxt, ent, depth);
4076
0
                    pent->flags &= ~XML_ENT_EXPANDING;
4077
0
                }
4078
4079
0
                xmlSaturatedAdd(&expandedSize, ent->expandedSize);
4080
0
                xmlSaturatedAdd(&expandedSize, XML_ENT_FIXED_COST);
4081
0
            }
4082
0
        }
4083
0
    }
4084
4085
0
done:
4086
0
    if (ctxt->inSubset == 0)
4087
0
        pent->expandedSize = expandedSize;
4088
4089
0
    pent->flags |= flags;
4090
0
}
4091
4092
/**
4093
 * xmlExpandEntityInAttValue:
4094
 * @ctxt:  parser context
4095
 * @buf:  string buffer
4096
 * @str:  entity or attribute value
4097
 * @pent:  entity for entity value, NULL for attribute values
4098
 * @normalize:  whether to collapse whitespace
4099
 * @inSpace:  whitespace state
4100
 * @depth:  nesting depth
4101
 * @check:  whether to check for amplification
4102
 *
4103
 * Expand general entity references in an entity or attribute value.
4104
 * Perform attribute value normalization.
4105
 */
4106
static void
4107
xmlExpandEntityInAttValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf,
4108
                          const xmlChar *str, xmlEntityPtr pent, int normalize,
4109
0
                          int *inSpace, int depth, int check) {
4110
0
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
4111
0
    int c, chunkSize;
4112
4113
0
    if (str == NULL)
4114
0
        return;
4115
4116
0
    depth += 1;
4117
0
    if (depth > maxDepth) {
4118
0
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
4119
0
                       "Maximum entity nesting depth exceeded");
4120
0
  return;
4121
0
    }
4122
4123
0
    if (pent != NULL) {
4124
0
        if (pent->flags & XML_ENT_EXPANDING) {
4125
0
            xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
4126
0
            xmlHaltParser(ctxt);
4127
0
            return;
4128
0
        }
4129
4130
0
        if (check) {
4131
0
            if (xmlParserEntityCheck(ctxt, pent->length))
4132
0
                return;
4133
0
        }
4134
0
    }
4135
4136
0
    chunkSize = 0;
4137
4138
    /*
4139
     * Note that entity values are already validated. No special
4140
     * handling for multi-byte characters is needed.
4141
     */
4142
0
    while (!PARSER_STOPPED(ctxt)) {
4143
0
        c = *str;
4144
4145
0
  if (c != '&') {
4146
0
            if (c == 0)
4147
0
                break;
4148
4149
            /*
4150
             * If this function is called without an entity, it is used to
4151
             * expand entities in an attribute content where less-than was
4152
             * already unscaped and is allowed.
4153
             */
4154
0
            if ((pent != NULL) && (c == '<')) {
4155
0
                xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
4156
0
                        "'<' in entity '%s' is not allowed in attributes "
4157
0
                        "values\n", pent->name);
4158
0
                break;
4159
0
            }
4160
4161
0
            if (c <= 0x20) {
4162
0
                if ((normalize) && (*inSpace)) {
4163
                    /* Skip char */
4164
0
                    if (chunkSize > 0) {
4165
0
                        xmlSBufAddString(buf, str - chunkSize, chunkSize);
4166
0
                        chunkSize = 0;
4167
0
                    }
4168
0
                } else if (c < 0x20) {
4169
0
                    if (chunkSize > 0) {
4170
0
                        xmlSBufAddString(buf, str - chunkSize, chunkSize);
4171
0
                        chunkSize = 0;
4172
0
                    }
4173
4174
0
                    xmlSBufAddCString(buf, " ", 1);
4175
0
                } else {
4176
0
                    chunkSize += 1;
4177
0
                }
4178
4179
0
                *inSpace = 1;
4180
0
            } else {
4181
0
                chunkSize += 1;
4182
0
                *inSpace = 0;
4183
0
            }
4184
4185
0
            str += 1;
4186
0
        } else if (str[1] == '#') {
4187
0
            int val;
4188
4189
0
            if (chunkSize > 0) {
4190
0
                xmlSBufAddString(buf, str - chunkSize, chunkSize);
4191
0
                chunkSize = 0;
4192
0
            }
4193
4194
0
      val = xmlParseStringCharRef(ctxt, &str);
4195
0
      if (val == 0) {
4196
0
                if (pent != NULL)
4197
0
                    pent->content[0] = 0;
4198
0
                break;
4199
0
            }
4200
4201
0
            if (val == ' ') {
4202
0
                if ((!normalize) || (!*inSpace))
4203
0
                    xmlSBufAddCString(buf, " ", 1);
4204
0
                *inSpace = 1;
4205
0
            } else {
4206
0
                xmlSBufAddChar(buf, val);
4207
0
                *inSpace = 0;
4208
0
            }
4209
0
  } else {
4210
0
            xmlChar *name;
4211
0
            xmlEntityPtr ent;
4212
4213
0
            if (chunkSize > 0) {
4214
0
                xmlSBufAddString(buf, str - chunkSize, chunkSize);
4215
0
                chunkSize = 0;
4216
0
            }
4217
4218
0
      name = xmlParseStringEntityRef(ctxt, &str);
4219
0
            if (name == NULL) {
4220
0
                if (pent != NULL)
4221
0
                    pent->content[0] = 0;
4222
0
                break;
4223
0
            }
4224
4225
0
            ent = xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 1);
4226
0
            xmlFree(name);
4227
4228
0
      if ((ent != NULL) &&
4229
0
    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
4230
0
    if (ent->content == NULL) {
4231
0
        xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
4232
0
          "predefined entity has no content\n");
4233
0
                    break;
4234
0
                }
4235
4236
0
                xmlSBufAddString(buf, ent->content, ent->length);
4237
4238
0
                *inSpace = 0;
4239
0
      } else if ((ent != NULL) && (ent->content != NULL)) {
4240
0
                if (pent != NULL)
4241
0
                    pent->flags |= XML_ENT_EXPANDING;
4242
0
    xmlExpandEntityInAttValue(ctxt, buf, ent->content, ent,
4243
0
                                          normalize, inSpace, depth, check);
4244
0
                if (pent != NULL)
4245
0
                    pent->flags &= ~XML_ENT_EXPANDING;
4246
0
      }
4247
0
        }
4248
0
    }
4249
4250
0
    if (chunkSize > 0)
4251
0
        xmlSBufAddString(buf, str - chunkSize, chunkSize);
4252
0
}
4253
4254
/**
4255
 * xmlExpandEntitiesInAttValue:
4256
 * @ctxt:  parser context
4257
 * @str:  entity or attribute value
4258
 * @normalize:  whether to collapse whitespace
4259
 *
4260
 * Expand general entity references in an entity or attribute value.
4261
 * Perform attribute value normalization.
4262
 *
4263
 * Returns the expanded attribtue value.
4264
 */
4265
xmlChar *
4266
xmlExpandEntitiesInAttValue(xmlParserCtxtPtr ctxt, const xmlChar *str,
4267
0
                            int normalize) {
4268
0
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4269
0
                         XML_MAX_HUGE_LENGTH :
4270
0
                         XML_MAX_TEXT_LENGTH;
4271
0
    xmlSBuf buf;
4272
0
    int inSpace = 1;
4273
4274
0
    xmlSBufInit(&buf, maxLength);
4275
4276
0
    xmlExpandEntityInAttValue(ctxt, &buf, str, NULL, normalize, &inSpace,
4277
0
                              ctxt->inputNr, /* check */ 0);
4278
4279
0
    if ((normalize) && (inSpace) && (buf.size > 0))
4280
0
        buf.size--;
4281
4282
0
    return(xmlSBufFinish(&buf, NULL, ctxt, "AttValue length too long"));
4283
0
}
4284
4285
/**
4286
 * xmlParseAttValueInternal:
4287
 * @ctxt:  an XML parser context
4288
 * @len:  attribute len result
4289
 * @alloc:  whether the attribute was reallocated as a new string
4290
 * @normalize:  if 1 then further non-CDATA normalization must be done
4291
 *
4292
 * parse a value for an attribute.
4293
 * NOTE: if no normalization is needed, the routine will return pointers
4294
 *       directly from the data buffer.
4295
 *
4296
 * 3.3.3 Attribute-Value Normalization:
4297
 * Before the value of an attribute is passed to the application or
4298
 * checked for validity, the XML processor must normalize it as follows:
4299
 * - a character reference is processed by appending the referenced
4300
 *   character to the attribute value
4301
 * - an entity reference is processed by recursively processing the
4302
 *   replacement text of the entity
4303
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4304
 *   appending #x20 to the normalized value, except that only a single
4305
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4306
 *   parsed entity or the literal entity value of an internal parsed entity
4307
 * - other characters are processed by appending them to the normalized value
4308
 * If the declared value is not CDATA, then the XML processor must further
4309
 * process the normalized attribute value by discarding any leading and
4310
 * trailing space (#x20) characters, and by replacing sequences of space
4311
 * (#x20) characters by a single space (#x20) character.
4312
 * All attributes for which no declaration has been read should be treated
4313
 * by a non-validating parser as if declared CDATA.
4314
 *
4315
 * Returns the AttValue parsed or NULL. The value has to be freed by the
4316
 *     caller if it was copied, this can be detected by val[*len] == 0.
4317
 */
4318
static xmlChar *
4319
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *attlen, int *alloc,
4320
24.9M
                         int normalize, int isNamespace) {
4321
24.9M
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4322
23.5M
                         XML_MAX_HUGE_LENGTH :
4323
24.9M
                         XML_MAX_TEXT_LENGTH;
4324
24.9M
    xmlSBuf buf;
4325
24.9M
    xmlChar *ret;
4326
24.9M
    int c, l, quote, flags, chunkSize;
4327
24.9M
    int inSpace = 1;
4328
24.9M
    int replaceEntities;
4329
4330
    /* Always expand namespace URIs */
4331
24.9M
    replaceEntities = (ctxt->replaceEntities) || (isNamespace);
4332
4333
24.9M
    xmlSBufInit(&buf, maxLength);
4334
4335
24.9M
    GROW;
4336
4337
24.9M
    quote = CUR;
4338
24.9M
    if ((quote != '"') && (quote != '\'')) {
4339
3.49k
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
4340
3.49k
  return(NULL);
4341
3.49k
    }
4342
24.9M
    NEXTL(1);
4343
4344
24.9M
    if (ctxt->inSubset == 0)
4345
24.8M
        flags = XML_ENT_CHECKED | XML_ENT_VALIDATED;
4346
42.2k
    else
4347
42.2k
        flags = XML_ENT_VALIDATED;
4348
4349
24.9M
    inSpace = 1;
4350
24.9M
    chunkSize = 0;
4351
4352
288M
    while (1) {
4353
288M
        if (PARSER_STOPPED(ctxt))
4354
0
            goto error;
4355
4356
288M
        if (CUR_PTR >= ctxt->input->end) {
4357
5.29k
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4358
5.29k
                           "AttValue: ' expected\n");
4359
5.29k
            goto error;
4360
5.29k
        }
4361
4362
        /*
4363
         * TODO: Check growth threshold
4364
         */
4365
288M
        if (ctxt->input->end - CUR_PTR < 10)
4366
56.0k
            GROW;
4367
4368
288M
        c = CUR;
4369
4370
288M
        if (c >= 0x80) {
4371
7.09M
            l = xmlUTF8MultibyteLen(ctxt, CUR_PTR,
4372
7.09M
                    "invalid character in attribute value\n");
4373
7.09M
            if (l == 0) {
4374
2.08M
                if (chunkSize > 0) {
4375
239k
                    xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4376
239k
                    chunkSize = 0;
4377
239k
                }
4378
2.08M
                xmlSBufAddReplChar(&buf);
4379
2.08M
                NEXTL(1);
4380
5.00M
            } else {
4381
5.00M
                chunkSize += l;
4382
5.00M
                NEXTL(l);
4383
5.00M
            }
4384
4385
7.09M
            inSpace = 0;
4386
281M
        } else if (c != '&') {
4387
280M
            if (c > 0x20) {
4388
276M
                if (c == quote)
4389
24.8M
                    break;
4390
4391
251M
                if (c == '<')
4392
206k
                    xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4393
4394
251M
                chunkSize += 1;
4395
251M
                inSpace = 0;
4396
251M
            } else if (!IS_BYTE_CHAR(c)) {
4397
1.23M
                xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4398
1.23M
                        "invalid character in attribute value\n");
4399
1.23M
                if (chunkSize > 0) {
4400
62.3k
                    xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4401
62.3k
                    chunkSize = 0;
4402
62.3k
                }
4403
1.23M
                xmlSBufAddReplChar(&buf);
4404
1.23M
                inSpace = 0;
4405
2.93M
            } else {
4406
                /* Whitespace */
4407
2.93M
                if ((normalize) && (inSpace)) {
4408
                    /* Skip char */
4409
31.8k
                    if (chunkSize > 0) {
4410
2.30k
                        xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4411
2.30k
                        chunkSize = 0;
4412
2.30k
                    }
4413
2.90M
                } else if (c < 0x20) {
4414
                    /* Convert to space */
4415
778k
                    if (chunkSize > 0) {
4416
191k
                        xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4417
191k
                        chunkSize = 0;
4418
191k
                    }
4419
4420
778k
                    xmlSBufAddCString(&buf, " ", 1);
4421
2.12M
                } else {
4422
2.12M
                    chunkSize += 1;
4423
2.12M
                }
4424
4425
2.93M
                inSpace = 1;
4426
4427
2.93M
                if ((c == 0xD) && (NXT(1) == 0xA))
4428
32.7k
                    CUR_PTR++;
4429
2.93M
            }
4430
4431
255M
            NEXTL(1);
4432
255M
        } else if (NXT(1) == '#') {
4433
70.9k
            int val;
4434
4435
70.9k
            if (chunkSize > 0) {
4436
34.1k
                xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4437
34.1k
                chunkSize = 0;
4438
34.1k
            }
4439
4440
70.9k
            val = xmlParseCharRef(ctxt);
4441
70.9k
            if (val == 0)
4442
676
                goto error;
4443
4444
70.2k
            if ((val == '&') && (!replaceEntities)) {
4445
                /*
4446
                 * The reparsing will be done in xmlNodeParseContent()
4447
                 * called from SAX2.c
4448
                 */
4449
0
                xmlSBufAddCString(&buf, "&#38;", 5);
4450
0
                inSpace = 0;
4451
70.2k
            } else if (val == ' ') {
4452
42.3k
                if ((!normalize) || (!inSpace))
4453
36.1k
                    xmlSBufAddCString(&buf, " ", 1);
4454
42.3k
                inSpace = 1;
4455
42.3k
            } else {
4456
27.8k
                xmlSBufAddChar(&buf, val);
4457
27.8k
                inSpace = 0;
4458
27.8k
            }
4459
663k
        } else {
4460
663k
            const xmlChar *name;
4461
663k
            xmlEntityPtr ent;
4462
4463
663k
            if (chunkSize > 0) {
4464
361k
                xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4465
361k
                chunkSize = 0;
4466
361k
            }
4467
4468
663k
            name = xmlParseEntityRefInternal(ctxt);
4469
663k
            if (name == NULL) {
4470
                /*
4471
                 * Probably a literal '&' which wasn't escaped.
4472
                 * TODO: Handle gracefully in recovery mode.
4473
                 */
4474
448k
                continue;
4475
448k
            }
4476
4477
214k
            ent = xmlLookupGeneralEntity(ctxt, name, /* isAttr */ 1);
4478
214k
            if (ent == NULL)
4479
21.7k
                continue;
4480
4481
193k
            if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
4482
193k
                if ((ent->content[0] == '&') && (!replaceEntities))
4483
0
                    xmlSBufAddCString(&buf, "&#38;", 5);
4484
193k
                else
4485
193k
                    xmlSBufAddString(&buf, ent->content, ent->length);
4486
193k
                inSpace = 0;
4487
193k
            } else if (replaceEntities) {
4488
0
                xmlExpandEntityInAttValue(ctxt, &buf, ent->content, ent,
4489
0
                                          normalize, &inSpace, ctxt->inputNr,
4490
0
                                          /* check */ 1);
4491
68
            } else {
4492
68
                if ((ent->flags & flags) != flags)
4493
0
                    xmlCheckEntityInAttValue(ctxt, ent, ctxt->inputNr);
4494
4495
68
                if (xmlParserEntityCheck(ctxt, ent->expandedSize)) {
4496
0
                    ent->content[0] = 0;
4497
0
                    goto error;
4498
0
                }
4499
4500
                /*
4501
                 * Just output the reference
4502
                 */
4503
68
                xmlSBufAddCString(&buf, "&", 1);
4504
68
                xmlSBufAddString(&buf, ent->name, xmlStrlen(ent->name));
4505
68
                xmlSBufAddCString(&buf, ";", 1);
4506
4507
68
                inSpace = 0;
4508
68
            }
4509
193k
  }
4510
288M
    }
4511
4512
24.8M
    if ((buf.mem == NULL) && (alloc != NULL)) {
4513
24.6M
        ret = (xmlChar *) CUR_PTR - chunkSize;
4514
4515
24.6M
        if (attlen != NULL)
4516
24.6M
            *attlen = chunkSize;
4517
24.6M
        if ((normalize) && (inSpace) && (chunkSize > 0))
4518
385
            *attlen -= 1;
4519
24.6M
        *alloc = 0;
4520
4521
        /* Report potential error */
4522
24.6M
        xmlSBufCleanup(&buf, ctxt, "AttValue length too long");
4523
24.6M
    } else {
4524
269k
        if (chunkSize > 0)
4525
186k
            xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4526
4527
269k
        if ((normalize) && (inSpace) && (buf.size > 0))
4528
3.63k
            buf.size--;
4529
4530
269k
        ret = xmlSBufFinish(&buf, attlen, ctxt, "AttValue length too long");
4531
4532
269k
        if (ret != NULL) {
4533
268k
            if (attlen != NULL)
4534
226k
                *attlen = buf.size;
4535
268k
            if (alloc != NULL)
4536
226k
                *alloc = 1;
4537
268k
        }
4538
269k
    }
4539
4540
24.8M
    NEXTL(1);
4541
4542
24.8M
    return(ret);
4543
4544
5.96k
error:
4545
5.96k
    xmlSBufCleanup(&buf, ctxt, "AttValue length too long");
4546
5.96k
    return(NULL);
4547
24.9M
}
4548
4549
/**
4550
 * xmlParseAttValue:
4551
 * @ctxt:  an XML parser context
4552
 *
4553
 * DEPRECATED: Internal function, don't use.
4554
 *
4555
 * parse a value for an attribute
4556
 * Note: the parser won't do substitution of entities here, this
4557
 * will be handled later in xmlStringGetNodeList
4558
 *
4559
 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4560
 *                   "'" ([^<&'] | Reference)* "'"
4561
 *
4562
 * 3.3.3 Attribute-Value Normalization:
4563
 * Before the value of an attribute is passed to the application or
4564
 * checked for validity, the XML processor must normalize it as follows:
4565
 * - a character reference is processed by appending the referenced
4566
 *   character to the attribute value
4567
 * - an entity reference is processed by recursively processing the
4568
 *   replacement text of the entity
4569
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4570
 *   appending #x20 to the normalized value, except that only a single
4571
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4572
 *   parsed entity or the literal entity value of an internal parsed entity
4573
 * - other characters are processed by appending them to the normalized value
4574
 * If the declared value is not CDATA, then the XML processor must further
4575
 * process the normalized attribute value by discarding any leading and
4576
 * trailing space (#x20) characters, and by replacing sequences of space
4577
 * (#x20) characters by a single space (#x20) character.
4578
 * All attributes for which no declaration has been read should be treated
4579
 * by a non-validating parser as if declared CDATA.
4580
 *
4581
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4582
 */
4583
4584
4585
xmlChar *
4586
42.5k
xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4587
42.5k
    if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4588
42.5k
    return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0, 0));
4589
42.5k
}
4590
4591
/**
4592
 * xmlParseSystemLiteral:
4593
 * @ctxt:  an XML parser context
4594
 *
4595
 * DEPRECATED: Internal function, don't use.
4596
 *
4597
 * parse an XML Literal
4598
 *
4599
 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4600
 *
4601
 * Returns the SystemLiteral parsed or NULL
4602
 */
4603
4604
xmlChar *
4605
8.14k
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4606
8.14k
    xmlChar *buf = NULL;
4607
8.14k
    int len = 0;
4608
8.14k
    int size = XML_PARSER_BUFFER_SIZE;
4609
8.14k
    int cur, l;
4610
8.14k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4611
8.14k
                    XML_MAX_TEXT_LENGTH :
4612
8.14k
                    XML_MAX_NAME_LENGTH;
4613
8.14k
    xmlChar stop;
4614
4615
8.14k
    if (RAW == '"') {
4616
6.07k
        NEXT;
4617
6.07k
  stop = '"';
4618
6.07k
    } else if (RAW == '\'') {
4619
1.49k
        NEXT;
4620
1.49k
  stop = '\'';
4621
1.49k
    } else {
4622
569
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4623
569
  return(NULL);
4624
569
    }
4625
4626
7.57k
    buf = xmlMalloc(size);
4627
7.57k
    if (buf == NULL) {
4628
0
        xmlErrMemory(ctxt);
4629
0
  return(NULL);
4630
0
    }
4631
7.57k
    cur = xmlCurrentCharRecover(ctxt, &l);
4632
401k
    while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4633
393k
  if (len + 5 >= size) {
4634
2.43k
      xmlChar *tmp;
4635
2.43k
            int newSize;
4636
4637
2.43k
            newSize = xmlGrowCapacity(size, 1, 1, maxLength);
4638
2.43k
            if (newSize < 0) {
4639
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4640
0
                xmlFree(buf);
4641
0
                return(NULL);
4642
0
            }
4643
2.43k
      tmp = xmlRealloc(buf, newSize);
4644
2.43k
      if (tmp == NULL) {
4645
0
          xmlFree(buf);
4646
0
    xmlErrMemory(ctxt);
4647
0
    return(NULL);
4648
0
      }
4649
2.43k
      buf = tmp;
4650
2.43k
            size = newSize;
4651
2.43k
  }
4652
393k
  COPY_BUF(buf, len, cur);
4653
393k
  NEXTL(l);
4654
393k
  cur = xmlCurrentCharRecover(ctxt, &l);
4655
393k
    }
4656
7.57k
    buf[len] = 0;
4657
7.57k
    if (!IS_CHAR(cur)) {
4658
170
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4659
7.40k
    } else {
4660
7.40k
  NEXT;
4661
7.40k
    }
4662
7.57k
    return(buf);
4663
7.57k
}
4664
4665
/**
4666
 * xmlParsePubidLiteral:
4667
 * @ctxt:  an XML parser context
4668
 *
4669
 * DEPRECATED: Internal function, don't use.
4670
 *
4671
 * parse an XML public literal
4672
 *
4673
 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4674
 *
4675
 * Returns the PubidLiteral parsed or NULL.
4676
 */
4677
4678
xmlChar *
4679
4.58k
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4680
4.58k
    xmlChar *buf = NULL;
4681
4.58k
    int len = 0;
4682
4.58k
    int size = XML_PARSER_BUFFER_SIZE;
4683
4.58k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4684
4.58k
                    XML_MAX_TEXT_LENGTH :
4685
4.58k
                    XML_MAX_NAME_LENGTH;
4686
4.58k
    xmlChar cur;
4687
4.58k
    xmlChar stop;
4688
4689
4.58k
    if (RAW == '"') {
4690
2.11k
        NEXT;
4691
2.11k
  stop = '"';
4692
2.47k
    } else if (RAW == '\'') {
4693
1.99k
        NEXT;
4694
1.99k
  stop = '\'';
4695
1.99k
    } else {
4696
481
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4697
481
  return(NULL);
4698
481
    }
4699
4.10k
    buf = xmlMalloc(size);
4700
4.10k
    if (buf == NULL) {
4701
0
  xmlErrMemory(ctxt);
4702
0
  return(NULL);
4703
0
    }
4704
4.10k
    cur = CUR;
4705
172k
    while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop) &&
4706
168k
           (PARSER_STOPPED(ctxt) == 0)) { /* checked */
4707
168k
  if (len + 1 >= size) {
4708
127
      xmlChar *tmp;
4709
127
            int newSize;
4710
4711
127
      newSize = xmlGrowCapacity(size, 1, 1, maxLength);
4712
127
            if (newSize < 0) {
4713
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4714
0
                xmlFree(buf);
4715
0
                return(NULL);
4716
0
            }
4717
127
      tmp = xmlRealloc(buf, newSize);
4718
127
      if (tmp == NULL) {
4719
0
    xmlErrMemory(ctxt);
4720
0
    xmlFree(buf);
4721
0
    return(NULL);
4722
0
      }
4723
127
      buf = tmp;
4724
127
            size = newSize;
4725
127
  }
4726
168k
  buf[len++] = cur;
4727
168k
  NEXT;
4728
168k
  cur = CUR;
4729
168k
    }
4730
4.10k
    buf[len] = 0;
4731
4.10k
    if (cur != stop) {
4732
120
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4733
3.98k
    } else {
4734
3.98k
  NEXTL(1);
4735
3.98k
    }
4736
4.10k
    return(buf);
4737
4.10k
}
4738
4739
static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial);
4740
4741
/*
4742
 * used for the test in the inner loop of the char data testing
4743
 */
4744
static const unsigned char test_char_data[256] = {
4745
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4746
    0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4747
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4748
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4749
    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4750
    0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4751
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4752
    0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4753
    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4754
    0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4755
    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4756
    0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4757
    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4758
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4759
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4760
    0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4761
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4762
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4763
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4764
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4765
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4766
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4767
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4768
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4769
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4770
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4771
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4772
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4773
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4774
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4775
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4776
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4777
};
4778
4779
static void
4780
xmlCharacters(xmlParserCtxtPtr ctxt, const xmlChar *buf, int size,
4781
14.3M
              int isBlank) {
4782
14.3M
    int checkBlanks;
4783
4784
14.3M
    if ((ctxt->sax == NULL) || (ctxt->disableSAX))
4785
69.6k
        return;
4786
4787
14.2M
    checkBlanks = (!ctxt->keepBlanks) ||
4788
14.2M
                  (ctxt->sax->ignorableWhitespace != ctxt->sax->characters);
4789
4790
    /*
4791
     * Calling areBlanks with only parts of a text node
4792
     * is fundamentally broken, making the NOBLANKS option
4793
     * essentially unusable.
4794
     */
4795
14.2M
    if ((checkBlanks) &&
4796
13.9M
        (areBlanks(ctxt, buf, size, isBlank))) {
4797
0
        if ((ctxt->sax->ignorableWhitespace != NULL) &&
4798
0
            (ctxt->keepBlanks))
4799
0
            ctxt->sax->ignorableWhitespace(ctxt->userData, buf, size);
4800
14.2M
    } else {
4801
14.2M
        if (ctxt->sax->characters != NULL)
4802
14.2M
            ctxt->sax->characters(ctxt->userData, buf, size);
4803
4804
        /*
4805
         * The old code used to update this value for "complex" data
4806
         * even if checkBlanks was false. This was probably a bug.
4807
         */
4808
14.2M
        if ((checkBlanks) && (*ctxt->space == -1))
4809
8.34M
            *ctxt->space = -2;
4810
14.2M
    }
4811
14.2M
}
4812
4813
/**
4814
 * xmlParseCharDataInternal:
4815
 * @ctxt:  an XML parser context
4816
 * @partial:  buffer may contain partial UTF-8 sequences
4817
 *
4818
 * Parse character data. Always makes progress if the first char isn't
4819
 * '<' or '&'.
4820
 *
4821
 * The right angle bracket (>) may be represented using the string "&gt;",
4822
 * and must, for compatibility, be escaped using "&gt;" or a character
4823
 * reference when it appears in the string "]]>" in content, when that
4824
 * string is not marking the end of a CDATA section.
4825
 *
4826
 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4827
 */
4828
static void
4829
13.4M
xmlParseCharDataInternal(xmlParserCtxtPtr ctxt, int partial) {
4830
13.4M
    const xmlChar *in;
4831
13.4M
    int nbchar = 0;
4832
13.4M
    int line = ctxt->input->line;
4833
13.4M
    int col = ctxt->input->col;
4834
13.4M
    int ccol;
4835
4836
13.4M
    GROW;
4837
    /*
4838
     * Accelerated common case where input don't need to be
4839
     * modified before passing it to the handler.
4840
     */
4841
13.4M
    in = ctxt->input->cur;
4842
13.8M
    do {
4843
18.0M
get_more_space:
4844
42.4M
        while (*in == 0x20) { in++; ctxt->input->col++; }
4845
18.0M
        if (*in == 0xA) {
4846
4.39M
            do {
4847
4.39M
                ctxt->input->line++; ctxt->input->col = 1;
4848
4.39M
                in++;
4849
4.39M
            } while (*in == 0xA);
4850
4.15M
            goto get_more_space;
4851
4.15M
        }
4852
13.8M
        if (*in == '<') {
4853
4.17M
            nbchar = in - ctxt->input->cur;
4854
4.17M
            if (nbchar > 0) {
4855
4.17M
                const xmlChar *tmp = ctxt->input->cur;
4856
4.17M
                ctxt->input->cur = in;
4857
4858
4.17M
                xmlCharacters(ctxt, tmp, nbchar, 1);
4859
4.17M
            }
4860
4.17M
            return;
4861
4.17M
        }
4862
4863
10.7M
get_more:
4864
10.7M
        ccol = ctxt->input->col;
4865
140M
        while (test_char_data[*in]) {
4866
129M
            in++;
4867
129M
            ccol++;
4868
129M
        }
4869
10.7M
        ctxt->input->col = ccol;
4870
10.7M
        if (*in == 0xA) {
4871
1.06M
            do {
4872
1.06M
                ctxt->input->line++; ctxt->input->col = 1;
4873
1.06M
                in++;
4874
1.06M
            } while (*in == 0xA);
4875
825k
            goto get_more;
4876
825k
        }
4877
9.90M
        if (*in == ']') {
4878
216k
            if ((in[1] == ']') && (in[2] == '>')) {
4879
9
                xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4880
9
                ctxt->input->cur = in + 1;
4881
9
                return;
4882
9
            }
4883
216k
            if ((!partial) || (ctxt->input->end - in >= 2)) {
4884
216k
                in++;
4885
216k
                ctxt->input->col++;
4886
216k
                goto get_more;
4887
216k
            }
4888
216k
        }
4889
9.68M
        nbchar = in - ctxt->input->cur;
4890
9.68M
        if (nbchar > 0) {
4891
8.86M
            const xmlChar *tmp = ctxt->input->cur;
4892
8.86M
            ctxt->input->cur = in;
4893
4894
8.86M
            xmlCharacters(ctxt, tmp, nbchar, 0);
4895
4896
8.86M
            line = ctxt->input->line;
4897
8.86M
            col = ctxt->input->col;
4898
8.86M
        }
4899
9.68M
        ctxt->input->cur = in;
4900
9.68M
        if (*in == 0xD) {
4901
526k
            in++;
4902
526k
            if (*in == 0xA) {
4903
485k
                ctxt->input->cur = in;
4904
485k
                in++;
4905
485k
                ctxt->input->line++; ctxt->input->col = 1;
4906
485k
                continue; /* while */
4907
485k
            }
4908
40.9k
            in--;
4909
40.9k
        }
4910
9.20M
        if (*in == '<') {
4911
7.86M
            return;
4912
7.86M
        }
4913
1.33M
        if (*in == '&') {
4914
254k
            return;
4915
254k
        }
4916
1.08M
        if ((partial) && (*in == ']') && (ctxt->input->end - in < 2)) {
4917
0
            return;
4918
0
        }
4919
1.08M
        SHRINK;
4920
1.08M
        GROW;
4921
1.08M
        in = ctxt->input->cur;
4922
1.56M
    } while (((*in >= 0x20) && (*in <= 0x7F)) ||
4923
1.40M
             (*in == 0x09) || (*in == 0x0a));
4924
1.10M
    ctxt->input->line = line;
4925
1.10M
    ctxt->input->col = col;
4926
1.10M
    xmlParseCharDataComplex(ctxt, partial);
4927
1.10M
}
4928
4929
/**
4930
 * xmlParseCharDataComplex:
4931
 * @ctxt:  an XML parser context
4932
 * @cdata:  int indicating whether we are within a CDATA section
4933
 *
4934
 * Always makes progress if the first char isn't '<' or '&'.
4935
 *
4936
 * parse a CharData section.this is the fallback function
4937
 * of xmlParseCharData() when the parsing requires handling
4938
 * of non-ASCII characters.
4939
 */
4940
static void
4941
1.10M
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial) {
4942
1.10M
    xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4943
1.10M
    int nbchar = 0;
4944
1.10M
    int cur, l;
4945
4946
1.10M
    cur = xmlCurrentCharRecover(ctxt, &l);
4947
76.7M
    while ((cur != '<') && /* checked */
4948
75.6M
           (cur != '&') &&
4949
75.6M
           ((!partial) || (cur != ']') ||
4950
24.6k
            (ctxt->input->end - ctxt->input->cur >= 2)) &&
4951
75.6M
     (IS_CHAR(cur))) {
4952
75.6M
  if ((cur == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4953
365
      xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4954
365
  }
4955
75.6M
  COPY_BUF(buf, nbchar, cur);
4956
  /* move current position before possible calling of ctxt->sax->characters */
4957
75.6M
  NEXTL(l);
4958
75.6M
  if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4959
208k
      buf[nbchar] = 0;
4960
4961
208k
            xmlCharacters(ctxt, buf, nbchar, 0);
4962
208k
      nbchar = 0;
4963
208k
            SHRINK;
4964
208k
  }
4965
75.6M
  cur = xmlCurrentCharRecover(ctxt, &l);
4966
75.6M
    }
4967
1.10M
    if (nbchar != 0) {
4968
1.09M
        buf[nbchar] = 0;
4969
4970
1.09M
        xmlCharacters(ctxt, buf, nbchar, 0);
4971
1.09M
    }
4972
    /*
4973
     * cur == 0 can mean
4974
     *
4975
     * - End of buffer.
4976
     * - An actual 0 character.
4977
     * - An incomplete UTF-8 sequence. This is allowed if partial is set.
4978
     */
4979
1.10M
    if (ctxt->input->cur < ctxt->input->end) {
4980
1.09M
        if ((cur == 0) && (CUR != 0)) {
4981
265
            if (partial == 0) {
4982
259
                xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4983
259
                        "Incomplete UTF-8 sequence starting with %02X\n", CUR);
4984
259
                NEXTL(1);
4985
259
            }
4986
1.09M
        } else if ((cur != '<') && (cur != '&') && (cur != ']')) {
4987
            /* Generate the error and skip the offending character */
4988
3.02k
            xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4989
3.02k
                              "PCDATA invalid Char value %d\n", cur);
4990
3.02k
            NEXTL(l);
4991
3.02k
        }
4992
1.09M
    }
4993
1.10M
}
4994
4995
/**
4996
 * xmlParseCharData:
4997
 * @ctxt:  an XML parser context
4998
 * @cdata:  unused
4999
 *
5000
 * DEPRECATED: Internal function, don't use.
5001
 */
5002
void
5003
0
xmlParseCharData(xmlParserCtxtPtr ctxt, ATTRIBUTE_UNUSED int cdata) {
5004
0
    xmlParseCharDataInternal(ctxt, 0);
5005
0
}
5006
5007
/**
5008
 * xmlParseExternalID:
5009
 * @ctxt:  an XML parser context
5010
 * @publicID:  a xmlChar** receiving PubidLiteral
5011
 * @strict: indicate whether we should restrict parsing to only
5012
 *          production [75], see NOTE below
5013
 *
5014
 * DEPRECATED: Internal function, don't use.
5015
 *
5016
 * Parse an External ID or a Public ID
5017
 *
5018
 * NOTE: Productions [75] and [83] interact badly since [75] can generate
5019
 *       'PUBLIC' S PubidLiteral S SystemLiteral
5020
 *
5021
 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
5022
 *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
5023
 *
5024
 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
5025
 *
5026
 * Returns the function returns SystemLiteral and in the second
5027
 *                case publicID receives PubidLiteral, is strict is off
5028
 *                it is possible to return NULL and have publicID set.
5029
 */
5030
5031
xmlChar *
5032
18.6k
xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
5033
18.6k
    xmlChar *URI = NULL;
5034
5035
18.6k
    *publicID = NULL;
5036
18.6k
    if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
5037
5.18k
        SKIP(6);
5038
5.18k
  if (SKIP_BLANKS == 0) {
5039
503
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5040
503
                     "Space required after 'SYSTEM'\n");
5041
503
  }
5042
5.18k
  URI = xmlParseSystemLiteral(ctxt);
5043
5.18k
  if (URI == NULL) {
5044
135
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
5045
135
        }
5046
13.4k
    } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
5047
4.58k
        SKIP(6);
5048
4.58k
  if (SKIP_BLANKS == 0) {
5049
28
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5050
28
        "Space required after 'PUBLIC'\n");
5051
28
  }
5052
4.58k
  *publicID = xmlParsePubidLiteral(ctxt);
5053
4.58k
  if (*publicID == NULL) {
5054
481
      xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
5055
481
  }
5056
4.58k
  if (strict) {
5057
      /*
5058
       * We don't handle [83] so "S SystemLiteral" is required.
5059
       */
5060
2.48k
      if (SKIP_BLANKS == 0) {
5061
461
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5062
461
      "Space required after the Public Identifier\n");
5063
461
      }
5064
2.48k
  } else {
5065
      /*
5066
       * We handle [83] so we return immediately, if
5067
       * "S SystemLiteral" is not detected. We skip blanks if no
5068
             * system literal was found, but this is harmless since we must
5069
             * be at the end of a NotationDecl.
5070
       */
5071
2.10k
      if (SKIP_BLANKS == 0) return(NULL);
5072
1.04k
      if ((CUR != '\'') && (CUR != '"')) return(NULL);
5073
1.04k
  }
5074
2.96k
  URI = xmlParseSystemLiteral(ctxt);
5075
2.96k
  if (URI == NULL) {
5076
434
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
5077
434
        }
5078
2.96k
    }
5079
16.9k
    return(URI);
5080
18.6k
}
5081
5082
/**
5083
 * xmlParseCommentComplex:
5084
 * @ctxt:  an XML parser context
5085
 * @buf:  the already parsed part of the buffer
5086
 * @len:  number of bytes in the buffer
5087
 * @size:  allocated size of the buffer
5088
 *
5089
 * Skip an XML (SGML) comment <!-- .... -->
5090
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
5091
 *  must not occur within comments. "
5092
 * This is the slow routine in case the accelerator for ascii didn't work
5093
 *
5094
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
5095
 */
5096
static void
5097
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
5098
41.8k
                       size_t len, size_t size) {
5099
41.8k
    int q, ql;
5100
41.8k
    int r, rl;
5101
41.8k
    int cur, l;
5102
41.8k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5103
41.8k
                    XML_MAX_HUGE_LENGTH :
5104
41.8k
                    XML_MAX_TEXT_LENGTH;
5105
5106
41.8k
    if (buf == NULL) {
5107
9.78k
        len = 0;
5108
9.78k
  size = XML_PARSER_BUFFER_SIZE;
5109
9.78k
  buf = xmlMalloc(size);
5110
9.78k
  if (buf == NULL) {
5111
0
      xmlErrMemory(ctxt);
5112
0
      return;
5113
0
  }
5114
9.78k
    }
5115
41.8k
    q = xmlCurrentCharRecover(ctxt, &ql);
5116
41.8k
    if (q == 0)
5117
307
        goto not_terminated;
5118
41.5k
    if (!IS_CHAR(q)) {
5119
45
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
5120
45
                          "xmlParseComment: invalid xmlChar value %d\n",
5121
45
                    q);
5122
45
  xmlFree (buf);
5123
45
  return;
5124
45
    }
5125
41.5k
    NEXTL(ql);
5126
41.5k
    r = xmlCurrentCharRecover(ctxt, &rl);
5127
41.5k
    if (r == 0)
5128
26
        goto not_terminated;
5129
41.5k
    if (!IS_CHAR(r)) {
5130
27
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
5131
27
                          "xmlParseComment: invalid xmlChar value %d\n",
5132
27
                    r);
5133
27
  xmlFree (buf);
5134
27
  return;
5135
27
    }
5136
41.4k
    NEXTL(rl);
5137
41.4k
    cur = xmlCurrentCharRecover(ctxt, &l);
5138
41.4k
    if (cur == 0)
5139
24
        goto not_terminated;
5140
3.17M
    while (IS_CHAR(cur) && /* checked */
5141
3.17M
           ((cur != '>') ||
5142
3.13M
      (r != '-') || (q != '-'))) {
5143
3.13M
  if ((r == '-') && (q == '-')) {
5144
21.3k
      xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
5145
21.3k
  }
5146
3.13M
  if (len + 5 >= size) {
5147
17.4k
      xmlChar *tmp;
5148
17.4k
            int newSize;
5149
5150
17.4k
      newSize = xmlGrowCapacity(size, 1, 1, maxLength);
5151
17.4k
            if (newSize < 0) {
5152
0
                xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5153
0
                             "Comment too big found", NULL);
5154
0
                xmlFree (buf);
5155
0
                return;
5156
0
            }
5157
17.4k
      tmp = xmlRealloc(buf, newSize);
5158
17.4k
      if (tmp == NULL) {
5159
0
    xmlErrMemory(ctxt);
5160
0
    xmlFree(buf);
5161
0
    return;
5162
0
      }
5163
17.4k
      buf = tmp;
5164
17.4k
            size = newSize;
5165
17.4k
  }
5166
3.13M
  COPY_BUF(buf, len, q);
5167
5168
3.13M
  q = r;
5169
3.13M
  ql = rl;
5170
3.13M
  r = cur;
5171
3.13M
  rl = l;
5172
5173
3.13M
  NEXTL(l);
5174
3.13M
  cur = xmlCurrentCharRecover(ctxt, &l);
5175
5176
3.13M
    }
5177
41.4k
    buf[len] = 0;
5178
41.4k
    if (cur == 0) {
5179
558
  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5180
558
                       "Comment not terminated \n<!--%.50s\n", buf);
5181
40.8k
    } else if (!IS_CHAR(cur)) {
5182
106
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
5183
106
                          "xmlParseComment: invalid xmlChar value %d\n",
5184
106
                    cur);
5185
40.7k
    } else {
5186
40.7k
        NEXT;
5187
40.7k
  if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5188
0
      (!ctxt->disableSAX))
5189
0
      ctxt->sax->comment(ctxt->userData, buf);
5190
40.7k
    }
5191
41.4k
    xmlFree(buf);
5192
41.4k
    return;
5193
357
not_terminated:
5194
357
    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5195
357
       "Comment not terminated\n", NULL);
5196
357
    xmlFree(buf);
5197
357
}
5198
5199
/**
5200
 * xmlParseComment:
5201
 * @ctxt:  an XML parser context
5202
 *
5203
 * DEPRECATED: Internal function, don't use.
5204
 *
5205
 * Parse an XML (SGML) comment. Always consumes '<!'.
5206
 *
5207
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
5208
 *  must not occur within comments. "
5209
 *
5210
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
5211
 */
5212
void
5213
85.4k
xmlParseComment(xmlParserCtxtPtr ctxt) {
5214
85.4k
    xmlChar *buf = NULL;
5215
85.4k
    size_t size = XML_PARSER_BUFFER_SIZE;
5216
85.4k
    size_t len = 0;
5217
85.4k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5218
85.4k
                       XML_MAX_HUGE_LENGTH :
5219
85.4k
                       XML_MAX_TEXT_LENGTH;
5220
85.4k
    const xmlChar *in;
5221
85.4k
    size_t nbchar = 0;
5222
85.4k
    int ccol;
5223
5224
    /*
5225
     * Check that there is a comment right here.
5226
     */
5227
85.4k
    if ((RAW != '<') || (NXT(1) != '!'))
5228
0
        return;
5229
85.4k
    SKIP(2);
5230
85.4k
    if ((RAW != '-') || (NXT(1) != '-'))
5231
29
        return;
5232
85.3k
    SKIP(2);
5233
85.3k
    GROW;
5234
5235
    /*
5236
     * Accelerated common case where input don't need to be
5237
     * modified before passing it to the handler.
5238
     */
5239
85.3k
    in = ctxt->input->cur;
5240
85.3k
    do {
5241
85.3k
  if (*in == 0xA) {
5242
10.8k
      do {
5243
10.8k
    ctxt->input->line++; ctxt->input->col = 1;
5244
10.8k
    in++;
5245
10.8k
      } while (*in == 0xA);
5246
4.61k
  }
5247
158k
get_more:
5248
158k
        ccol = ctxt->input->col;
5249
1.73M
  while (((*in > '-') && (*in <= 0x7F)) ||
5250
371k
         ((*in >= 0x20) && (*in < '-')) ||
5251
1.57M
         (*in == 0x09)) {
5252
1.57M
        in++;
5253
1.57M
        ccol++;
5254
1.57M
  }
5255
158k
  ctxt->input->col = ccol;
5256
158k
  if (*in == 0xA) {
5257
23.5k
      do {
5258
23.5k
    ctxt->input->line++; ctxt->input->col = 1;
5259
23.5k
    in++;
5260
23.5k
      } while (*in == 0xA);
5261
14.0k
      goto get_more;
5262
14.0k
  }
5263
144k
  nbchar = in - ctxt->input->cur;
5264
  /*
5265
   * save current set of data
5266
   */
5267
144k
  if (nbchar > 0) {
5268
112k
            if (nbchar > maxLength - len) {
5269
0
                xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5270
0
                                  "Comment too big found", NULL);
5271
0
                xmlFree(buf);
5272
0
                return;
5273
0
            }
5274
112k
            if (buf == NULL) {
5275
68.5k
                if ((*in == '-') && (in[1] == '-'))
5276
32.0k
                    size = nbchar + 1;
5277
36.5k
                else
5278
36.5k
                    size = XML_PARSER_BUFFER_SIZE + nbchar;
5279
68.5k
                buf = xmlMalloc(size);
5280
68.5k
                if (buf == NULL) {
5281
0
                    xmlErrMemory(ctxt);
5282
0
                    return;
5283
0
                }
5284
68.5k
                len = 0;
5285
68.5k
            } else if (len + nbchar + 1 >= size) {
5286
3.72k
                xmlChar *new_buf;
5287
3.72k
                size += len + nbchar + XML_PARSER_BUFFER_SIZE;
5288
3.72k
                new_buf = xmlRealloc(buf, size);
5289
3.72k
                if (new_buf == NULL) {
5290
0
                    xmlErrMemory(ctxt);
5291
0
                    xmlFree(buf);
5292
0
                    return;
5293
0
                }
5294
3.72k
                buf = new_buf;
5295
3.72k
            }
5296
112k
            memcpy(&buf[len], ctxt->input->cur, nbchar);
5297
112k
            len += nbchar;
5298
112k
            buf[len] = 0;
5299
112k
  }
5300
144k
  ctxt->input->cur = in;
5301
144k
  if (*in == 0xA) {
5302
0
      in++;
5303
0
      ctxt->input->line++; ctxt->input->col = 1;
5304
0
  }
5305
144k
  if (*in == 0xD) {
5306
14.4k
      in++;
5307
14.4k
      if (*in == 0xA) {
5308
7.00k
    ctxt->input->cur = in;
5309
7.00k
    in++;
5310
7.00k
    ctxt->input->line++; ctxt->input->col = 1;
5311
7.00k
    goto get_more;
5312
7.00k
      }
5313
7.49k
      in--;
5314
7.49k
  }
5315
137k
  SHRINK;
5316
137k
  GROW;
5317
137k
  in = ctxt->input->cur;
5318
137k
  if (*in == '-') {
5319
95.9k
      if (in[1] == '-') {
5320
59.8k
          if (in[2] == '>') {
5321
43.5k
        SKIP(3);
5322
43.5k
        if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5323
0
            (!ctxt->disableSAX)) {
5324
0
      if (buf != NULL)
5325
0
          ctxt->sax->comment(ctxt->userData, buf);
5326
0
      else
5327
0
          ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5328
0
        }
5329
43.5k
        if (buf != NULL)
5330
36.4k
            xmlFree(buf);
5331
43.5k
        return;
5332
43.5k
    }
5333
16.3k
    if (buf != NULL) {
5334
11.1k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5335
11.1k
                          "Double hyphen within comment: "
5336
11.1k
                                      "<!--%.50s\n",
5337
11.1k
              buf);
5338
11.1k
    } else
5339
5.22k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5340
5.22k
                          "Double hyphen within comment\n", NULL);
5341
16.3k
    in++;
5342
16.3k
    ctxt->input->col++;
5343
16.3k
      }
5344
52.4k
      in++;
5345
52.4k
      ctxt->input->col++;
5346
52.4k
      goto get_more;
5347
95.9k
  }
5348
137k
    } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5349
41.8k
    xmlParseCommentComplex(ctxt, buf, len, size);
5350
41.8k
}
5351
5352
5353
/**
5354
 * xmlParsePITarget:
5355
 * @ctxt:  an XML parser context
5356
 *
5357
 * DEPRECATED: Internal function, don't use.
5358
 *
5359
 * parse the name of a PI
5360
 *
5361
 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5362
 *
5363
 * Returns the PITarget name or NULL
5364
 */
5365
5366
const xmlChar *
5367
97.6k
xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5368
97.6k
    const xmlChar *name;
5369
5370
97.6k
    name = xmlParseName(ctxt);
5371
97.6k
    if ((name != NULL) &&
5372
95.9k
        ((name[0] == 'x') || (name[0] == 'X')) &&
5373
52.7k
        ((name[1] == 'm') || (name[1] == 'M')) &&
5374
44.7k
        ((name[2] == 'l') || (name[2] == 'L'))) {
5375
33.6k
  int i;
5376
33.6k
  if ((name[0] == 'x') && (name[1] == 'm') &&
5377
22.7k
      (name[2] == 'l') && (name[3] == 0)) {
5378
1.75k
      xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5379
1.75k
     "XML declaration allowed only at the start of the document\n");
5380
1.75k
      return(name);
5381
31.8k
  } else if (name[3] == 0) {
5382
2.45k
      xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5383
2.45k
      return(name);
5384
2.45k
  }
5385
85.7k
  for (i = 0;;i++) {
5386
85.7k
      if (xmlW3CPIs[i] == NULL) break;
5387
57.7k
      if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5388
1.46k
          return(name);
5389
57.7k
  }
5390
27.9k
  xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5391
27.9k
          "xmlParsePITarget: invalid name prefix 'xml'\n",
5392
27.9k
          NULL, NULL);
5393
27.9k
    }
5394
91.9k
    if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5395
8.32k
  xmlNsErr(ctxt, XML_NS_ERR_COLON,
5396
8.32k
     "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5397
8.32k
    }
5398
91.9k
    return(name);
5399
97.6k
}
5400
5401
#ifdef LIBXML_CATALOG_ENABLED
5402
/**
5403
 * xmlParseCatalogPI:
5404
 * @ctxt:  an XML parser context
5405
 * @catalog:  the PI value string
5406
 *
5407
 * parse an XML Catalog Processing Instruction.
5408
 *
5409
 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5410
 *
5411
 * Occurs only if allowed by the user and if happening in the Misc
5412
 * part of the document before any doctype information
5413
 * This will add the given catalog to the parsing context in order
5414
 * to be used if there is a resolution need further down in the document
5415
 */
5416
5417
static void
5418
0
xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5419
0
    xmlChar *URL = NULL;
5420
0
    const xmlChar *tmp, *base;
5421
0
    xmlChar marker;
5422
5423
0
    tmp = catalog;
5424
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5425
0
    if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5426
0
  goto error;
5427
0
    tmp += 7;
5428
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5429
0
    if (*tmp != '=') {
5430
0
  return;
5431
0
    }
5432
0
    tmp++;
5433
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5434
0
    marker = *tmp;
5435
0
    if ((marker != '\'') && (marker != '"'))
5436
0
  goto error;
5437
0
    tmp++;
5438
0
    base = tmp;
5439
0
    while ((*tmp != 0) && (*tmp != marker)) tmp++;
5440
0
    if (*tmp == 0)
5441
0
  goto error;
5442
0
    URL = xmlStrndup(base, tmp - base);
5443
0
    tmp++;
5444
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5445
0
    if (*tmp != 0)
5446
0
  goto error;
5447
5448
0
    if (URL != NULL) {
5449
        /*
5450
         * Unfortunately, the catalog API doesn't report OOM errors.
5451
         * xmlGetLastError isn't very helpful since we don't know
5452
         * where the last error came from. We'd have to reset it
5453
         * before this call and restore it afterwards.
5454
         */
5455
0
  ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5456
0
  xmlFree(URL);
5457
0
    }
5458
0
    return;
5459
5460
0
error:
5461
0
    xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5462
0
            "Catalog PI syntax error: %s\n",
5463
0
      catalog, NULL);
5464
0
    if (URL != NULL)
5465
0
  xmlFree(URL);
5466
0
}
5467
#endif
5468
5469
/**
5470
 * xmlParsePI:
5471
 * @ctxt:  an XML parser context
5472
 *
5473
 * DEPRECATED: Internal function, don't use.
5474
 *
5475
 * parse an XML Processing Instruction.
5476
 *
5477
 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5478
 *
5479
 * The processing is transferred to SAX once parsed.
5480
 */
5481
5482
void
5483
97.6k
xmlParsePI(xmlParserCtxtPtr ctxt) {
5484
97.6k
    xmlChar *buf = NULL;
5485
97.6k
    size_t len = 0;
5486
97.6k
    size_t size = XML_PARSER_BUFFER_SIZE;
5487
97.6k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5488
97.4k
                       XML_MAX_HUGE_LENGTH :
5489
97.6k
                       XML_MAX_TEXT_LENGTH;
5490
97.6k
    int cur, l;
5491
97.6k
    const xmlChar *target;
5492
5493
97.6k
    if ((RAW == '<') && (NXT(1) == '?')) {
5494
  /*
5495
   * this is a Processing Instruction.
5496
   */
5497
97.6k
  SKIP(2);
5498
5499
  /*
5500
   * Parse the target name and check for special support like
5501
   * namespace.
5502
   */
5503
97.6k
        target = xmlParsePITarget(ctxt);
5504
97.6k
  if (target != NULL) {
5505
95.9k
      if ((RAW == '?') && (NXT(1) == '>')) {
5506
19.4k
    SKIP(2);
5507
5508
    /*
5509
     * SAX: PI detected.
5510
     */
5511
19.4k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5512
6.77k
        (ctxt->sax->processingInstruction != NULL))
5513
6.77k
        ctxt->sax->processingInstruction(ctxt->userData,
5514
6.77k
                                         target, NULL);
5515
19.4k
    return;
5516
19.4k
      }
5517
76.5k
      buf = xmlMalloc(size);
5518
76.5k
      if (buf == NULL) {
5519
0
    xmlErrMemory(ctxt);
5520
0
    return;
5521
0
      }
5522
76.5k
      if (SKIP_BLANKS == 0) {
5523
15.3k
    xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5524
15.3k
        "ParsePI: PI %s space expected\n", target);
5525
15.3k
      }
5526
76.5k
      cur = xmlCurrentCharRecover(ctxt, &l);
5527
7.72M
      while (IS_CHAR(cur) && /* checked */
5528
7.72M
       ((cur != '?') || (NXT(1) != '>'))) {
5529
7.64M
    if (len + 5 >= size) {
5530
47.8k
        xmlChar *tmp;
5531
47.8k
                    int newSize;
5532
5533
47.8k
                    newSize = xmlGrowCapacity(size, 1, 1, maxLength);
5534
47.8k
                    if (newSize < 0) {
5535
0
                        xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5536
0
                                          "PI %s too big found", target);
5537
0
                        xmlFree(buf);
5538
0
                        return;
5539
0
                    }
5540
47.8k
        tmp = xmlRealloc(buf, newSize);
5541
47.8k
        if (tmp == NULL) {
5542
0
      xmlErrMemory(ctxt);
5543
0
      xmlFree(buf);
5544
0
      return;
5545
0
        }
5546
47.8k
        buf = tmp;
5547
47.8k
                    size = newSize;
5548
47.8k
    }
5549
7.64M
    COPY_BUF(buf, len, cur);
5550
7.64M
    NEXTL(l);
5551
7.64M
    cur = xmlCurrentCharRecover(ctxt, &l);
5552
7.64M
      }
5553
76.5k
      buf[len] = 0;
5554
76.5k
      if (cur != '?') {
5555
2.32k
    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5556
2.32k
          "ParsePI: PI %s never end ...\n", target);
5557
74.2k
      } else {
5558
74.2k
    SKIP(2);
5559
5560
74.2k
#ifdef LIBXML_CATALOG_ENABLED
5561
74.2k
    if ((ctxt->inSubset == 0) &&
5562
57.0k
        (xmlStrEqual(target, XML_CATALOG_PI))) {
5563
16.4k
        xmlCatalogAllow allow = xmlCatalogGetDefaults();
5564
5565
16.4k
        if ((ctxt->options & XML_PARSE_CATALOG_PI) &&
5566
0
                        ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5567
0
       (allow == XML_CATA_ALLOW_ALL)))
5568
0
      xmlParseCatalogPI(ctxt, buf);
5569
16.4k
    }
5570
74.2k
#endif
5571
5572
    /*
5573
     * SAX: PI detected.
5574
     */
5575
74.2k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5576
58.2k
        (ctxt->sax->processingInstruction != NULL))
5577
58.2k
        ctxt->sax->processingInstruction(ctxt->userData,
5578
58.2k
                                         target, buf);
5579
74.2k
      }
5580
76.5k
      xmlFree(buf);
5581
76.5k
  } else {
5582
1.63k
      xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5583
1.63k
  }
5584
97.6k
    }
5585
97.6k
}
5586
5587
/**
5588
 * xmlParseNotationDecl:
5589
 * @ctxt:  an XML parser context
5590
 *
5591
 * DEPRECATED: Internal function, don't use.
5592
 *
5593
 * Parse a notation declaration. Always consumes '<!'.
5594
 *
5595
 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
5596
 *
5597
 * Hence there is actually 3 choices:
5598
 *     'PUBLIC' S PubidLiteral
5599
 *     'PUBLIC' S PubidLiteral S SystemLiteral
5600
 * and 'SYSTEM' S SystemLiteral
5601
 *
5602
 * See the NOTE on xmlParseExternalID().
5603
 */
5604
5605
void
5606
2.93k
xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5607
2.93k
    const xmlChar *name;
5608
2.93k
    xmlChar *Pubid;
5609
2.93k
    xmlChar *Systemid;
5610
5611
2.93k
    if ((CUR != '<') || (NXT(1) != '!'))
5612
0
        return;
5613
2.93k
    SKIP(2);
5614
5615
2.93k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5616
2.85k
  int inputid = ctxt->input->id;
5617
2.85k
  SKIP(8);
5618
2.85k
  if (SKIP_BLANKS_PE == 0) {
5619
38
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5620
38
         "Space required after '<!NOTATION'\n");
5621
38
      return;
5622
38
  }
5623
5624
2.81k
        name = xmlParseName(ctxt);
5625
2.81k
  if (name == NULL) {
5626
60
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5627
60
      return;
5628
60
  }
5629
2.75k
  if (xmlStrchr(name, ':') != NULL) {
5630
6
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5631
6
         "colons are forbidden from notation names '%s'\n",
5632
6
         name, NULL, NULL);
5633
6
  }
5634
2.75k
  if (SKIP_BLANKS_PE == 0) {
5635
21
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5636
21
         "Space required after the NOTATION name'\n");
5637
21
      return;
5638
21
  }
5639
5640
  /*
5641
   * Parse the IDs.
5642
   */
5643
2.73k
  Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5644
2.73k
  SKIP_BLANKS_PE;
5645
5646
2.73k
  if (RAW == '>') {
5647
2.48k
      if (inputid != ctxt->input->id) {
5648
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5649
0
                         "Notation declaration doesn't start and stop"
5650
0
                               " in the same entity\n");
5651
0
      }
5652
2.48k
      NEXT;
5653
2.48k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5654
1.84k
    (ctxt->sax->notationDecl != NULL))
5655
0
    ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5656
2.48k
  } else {
5657
254
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5658
254
  }
5659
2.73k
  if (Systemid != NULL) xmlFree(Systemid);
5660
2.73k
  if (Pubid != NULL) xmlFree(Pubid);
5661
2.73k
    }
5662
2.93k
}
5663
5664
/**
5665
 * xmlParseEntityDecl:
5666
 * @ctxt:  an XML parser context
5667
 *
5668
 * DEPRECATED: Internal function, don't use.
5669
 *
5670
 * Parse an entity declaration. Always consumes '<!'.
5671
 *
5672
 * [70] EntityDecl ::= GEDecl | PEDecl
5673
 *
5674
 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5675
 *
5676
 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5677
 *
5678
 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5679
 *
5680
 * [74] PEDef ::= EntityValue | ExternalID
5681
 *
5682
 * [76] NDataDecl ::= S 'NDATA' S Name
5683
 *
5684
 * [ VC: Notation Declared ]
5685
 * The Name must match the declared name of a notation.
5686
 */
5687
5688
void
5689
54.4k
xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5690
54.4k
    const xmlChar *name = NULL;
5691
54.4k
    xmlChar *value = NULL;
5692
54.4k
    xmlChar *URI = NULL, *literal = NULL;
5693
54.4k
    const xmlChar *ndata = NULL;
5694
54.4k
    int isParameter = 0;
5695
54.4k
    xmlChar *orig = NULL;
5696
5697
54.4k
    if ((CUR != '<') || (NXT(1) != '!'))
5698
0
        return;
5699
54.4k
    SKIP(2);
5700
5701
    /* GROW; done in the caller */
5702
54.4k
    if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5703
54.2k
  int inputid = ctxt->input->id;
5704
54.2k
  SKIP(6);
5705
54.2k
  if (SKIP_BLANKS_PE == 0) {
5706
11.3k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5707
11.3k
         "Space required after '<!ENTITY'\n");
5708
11.3k
  }
5709
5710
54.2k
  if (RAW == '%') {
5711
9.75k
      NEXT;
5712
9.75k
      if (SKIP_BLANKS_PE == 0) {
5713
4.61k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5714
4.61k
             "Space required after '%%'\n");
5715
4.61k
      }
5716
9.75k
      isParameter = 1;
5717
9.75k
  }
5718
5719
54.2k
        name = xmlParseName(ctxt);
5720
54.2k
  if (name == NULL) {
5721
191
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5722
191
                     "xmlParseEntityDecl: no name\n");
5723
191
            return;
5724
191
  }
5725
54.0k
  if (xmlStrchr(name, ':') != NULL) {
5726
1.17k
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5727
1.17k
         "colons are forbidden from entities names '%s'\n",
5728
1.17k
         name, NULL, NULL);
5729
1.17k
  }
5730
54.0k
  if (SKIP_BLANKS_PE == 0) {
5731
20.0k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5732
20.0k
         "Space required after the entity name\n");
5733
20.0k
  }
5734
5735
  /*
5736
   * handle the various case of definitions...
5737
   */
5738
54.0k
  if (isParameter) {
5739
9.73k
      if ((RAW == '"') || (RAW == '\'')) {
5740
8.70k
          value = xmlParseEntityValue(ctxt, &orig);
5741
8.70k
    if (value) {
5742
8.65k
        if ((ctxt->sax != NULL) &&
5743
8.65k
      (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5744
0
      ctxt->sax->entityDecl(ctxt->userData, name,
5745
0
                        XML_INTERNAL_PARAMETER_ENTITY,
5746
0
            NULL, NULL, value);
5747
8.65k
    }
5748
8.70k
      } else {
5749
1.02k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5750
1.02k
    if ((URI == NULL) && (literal == NULL)) {
5751
54
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5752
54
    }
5753
1.02k
    if (URI) {
5754
949
                    if (xmlStrchr(URI, '#')) {
5755
4
                        xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5756
945
                    } else {
5757
945
                        if ((ctxt->sax != NULL) &&
5758
945
                            (!ctxt->disableSAX) &&
5759
520
                            (ctxt->sax->entityDecl != NULL))
5760
0
                            ctxt->sax->entityDecl(ctxt->userData, name,
5761
0
                                        XML_EXTERNAL_PARAMETER_ENTITY,
5762
0
                                        literal, URI, NULL);
5763
945
                    }
5764
949
    }
5765
1.02k
      }
5766
44.3k
  } else {
5767
44.3k
      if ((RAW == '"') || (RAW == '\'')) {
5768
38.2k
          value = xmlParseEntityValue(ctxt, &orig);
5769
38.2k
    if ((ctxt->sax != NULL) &&
5770
38.2k
        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5771
0
        ctxt->sax->entityDecl(ctxt->userData, name,
5772
0
        XML_INTERNAL_GENERAL_ENTITY,
5773
0
        NULL, NULL, value);
5774
    /*
5775
     * For expat compatibility in SAX mode.
5776
     */
5777
38.2k
    if ((ctxt->myDoc == NULL) ||
5778
38.2k
        (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5779
38.2k
        if (ctxt->myDoc == NULL) {
5780
1.46k
      ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5781
1.46k
      if (ctxt->myDoc == NULL) {
5782
0
          xmlErrMemory(ctxt);
5783
0
          goto done;
5784
0
      }
5785
1.46k
      ctxt->myDoc->properties = XML_DOC_INTERNAL;
5786
1.46k
        }
5787
38.2k
        if (ctxt->myDoc->intSubset == NULL) {
5788
1.46k
      ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5789
1.46k
              BAD_CAST "fake", NULL, NULL);
5790
1.46k
                        if (ctxt->myDoc->intSubset == NULL) {
5791
0
                            xmlErrMemory(ctxt);
5792
0
                            goto done;
5793
0
                        }
5794
1.46k
                    }
5795
5796
38.2k
        xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5797
38.2k
                    NULL, NULL, value);
5798
38.2k
    }
5799
38.2k
      } else {
5800
6.10k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5801
6.10k
    if ((URI == NULL) && (literal == NULL)) {
5802
832
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5803
832
    }
5804
6.10k
    if (URI) {
5805
5.22k
                    if (xmlStrchr(URI, '#')) {
5806
256
                        xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5807
256
                    }
5808
5.22k
    }
5809
6.10k
    if ((RAW != '>') && (SKIP_BLANKS_PE == 0)) {
5810
366
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5811
366
           "Space required before 'NDATA'\n");
5812
366
    }
5813
6.10k
    if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5814
1.91k
        SKIP(5);
5815
1.91k
        if (SKIP_BLANKS_PE == 0) {
5816
7
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5817
7
               "Space required after 'NDATA'\n");
5818
7
        }
5819
1.91k
        ndata = xmlParseName(ctxt);
5820
1.91k
        if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5821
750
            (ctxt->sax->unparsedEntityDecl != NULL))
5822
0
      ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5823
0
            literal, URI, ndata);
5824
4.18k
    } else {
5825
4.18k
        if ((ctxt->sax != NULL) &&
5826
4.18k
            (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5827
0
      ctxt->sax->entityDecl(ctxt->userData, name,
5828
0
            XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5829
0
            literal, URI, NULL);
5830
        /*
5831
         * For expat compatibility in SAX mode.
5832
         * assuming the entity replacement was asked for
5833
         */
5834
4.18k
        if ((ctxt->replaceEntities != 0) &&
5835
4.18k
      ((ctxt->myDoc == NULL) ||
5836
4.18k
      (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5837
4.18k
      if (ctxt->myDoc == NULL) {
5838
308
          ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5839
308
          if (ctxt->myDoc == NULL) {
5840
0
              xmlErrMemory(ctxt);
5841
0
        goto done;
5842
0
          }
5843
308
          ctxt->myDoc->properties = XML_DOC_INTERNAL;
5844
308
      }
5845
5846
4.18k
      if (ctxt->myDoc->intSubset == NULL) {
5847
308
          ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5848
308
            BAD_CAST "fake", NULL, NULL);
5849
308
                            if (ctxt->myDoc->intSubset == NULL) {
5850
0
                                xmlErrMemory(ctxt);
5851
0
                                goto done;
5852
0
                            }
5853
308
                        }
5854
4.18k
      xmlSAX2EntityDecl(ctxt, name,
5855
4.18k
                  XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5856
4.18k
                  literal, URI, NULL);
5857
4.18k
        }
5858
4.18k
    }
5859
6.10k
      }
5860
44.3k
  }
5861
54.0k
  SKIP_BLANKS_PE;
5862
54.0k
  if (RAW != '>') {
5863
1.33k
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5864
1.33k
              "xmlParseEntityDecl: entity %s not terminated\n", name);
5865
1.33k
      xmlHaltParser(ctxt);
5866
52.7k
  } else {
5867
52.7k
      if (inputid != ctxt->input->id) {
5868
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5869
0
                         "Entity declaration doesn't start and stop in"
5870
0
                               " the same entity\n");
5871
0
      }
5872
52.7k
      NEXT;
5873
52.7k
  }
5874
54.0k
  if (orig != NULL) {
5875
      /*
5876
       * Ugly mechanism to save the raw entity value.
5877
       */
5878
46.4k
      xmlEntityPtr cur = NULL;
5879
5880
46.4k
      if (isParameter) {
5881
8.65k
          if ((ctxt->sax != NULL) &&
5882
8.65k
        (ctxt->sax->getParameterEntity != NULL))
5883
0
        cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5884
37.8k
      } else {
5885
37.8k
          if ((ctxt->sax != NULL) &&
5886
37.8k
        (ctxt->sax->getEntity != NULL))
5887
37.8k
        cur = ctxt->sax->getEntity(ctxt->userData, name);
5888
37.8k
    if ((cur == NULL) && (ctxt->userData==ctxt)) {
5889
0
        cur = xmlSAX2GetEntity(ctxt, name);
5890
0
    }
5891
37.8k
      }
5892
46.4k
            if ((cur != NULL) && (cur->orig == NULL)) {
5893
0
    cur->orig = orig;
5894
0
                orig = NULL;
5895
0
      }
5896
46.4k
  }
5897
5898
54.0k
done:
5899
54.0k
  if (value != NULL) xmlFree(value);
5900
54.0k
  if (URI != NULL) xmlFree(URI);
5901
54.0k
  if (literal != NULL) xmlFree(literal);
5902
54.0k
        if (orig != NULL) xmlFree(orig);
5903
54.0k
    }
5904
54.4k
}
5905
5906
/**
5907
 * xmlParseDefaultDecl:
5908
 * @ctxt:  an XML parser context
5909
 * @value:  Receive a possible fixed default value for the attribute
5910
 *
5911
 * DEPRECATED: Internal function, don't use.
5912
 *
5913
 * Parse an attribute default declaration
5914
 *
5915
 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5916
 *
5917
 * [ VC: Required Attribute ]
5918
 * if the default declaration is the keyword #REQUIRED, then the
5919
 * attribute must be specified for all elements of the type in the
5920
 * attribute-list declaration.
5921
 *
5922
 * [ VC: Attribute Default Legal ]
5923
 * The declared default value must meet the lexical constraints of
5924
 * the declared attribute type c.f. xmlValidateAttributeDecl()
5925
 *
5926
 * [ VC: Fixed Attribute Default ]
5927
 * if an attribute has a default value declared with the #FIXED
5928
 * keyword, instances of that attribute must match the default value.
5929
 *
5930
 * [ WFC: No < in Attribute Values ]
5931
 * handled in xmlParseAttValue()
5932
 *
5933
 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5934
 *          or XML_ATTRIBUTE_FIXED.
5935
 */
5936
5937
int
5938
49.0k
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5939
49.0k
    int val;
5940
49.0k
    xmlChar *ret;
5941
5942
49.0k
    *value = NULL;
5943
49.0k
    if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5944
2.43k
  SKIP(9);
5945
2.43k
  return(XML_ATTRIBUTE_REQUIRED);
5946
2.43k
    }
5947
46.6k
    if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5948
4.06k
  SKIP(8);
5949
4.06k
  return(XML_ATTRIBUTE_IMPLIED);
5950
4.06k
    }
5951
42.5k
    val = XML_ATTRIBUTE_NONE;
5952
42.5k
    if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5953
1.09k
  SKIP(6);
5954
1.09k
  val = XML_ATTRIBUTE_FIXED;
5955
1.09k
  if (SKIP_BLANKS_PE == 0) {
5956
52
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5957
52
         "Space required after '#FIXED'\n");
5958
52
  }
5959
1.09k
    }
5960
42.5k
    ret = xmlParseAttValue(ctxt);
5961
42.5k
    if (ret == NULL) {
5962
536
  xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5963
536
           "Attribute default value declaration error\n");
5964
536
    } else
5965
42.0k
        *value = ret;
5966
42.5k
    return(val);
5967
46.6k
}
5968
5969
/**
5970
 * xmlParseNotationType:
5971
 * @ctxt:  an XML parser context
5972
 *
5973
 * DEPRECATED: Internal function, don't use.
5974
 *
5975
 * parse an Notation attribute type.
5976
 *
5977
 * Note: the leading 'NOTATION' S part has already being parsed...
5978
 *
5979
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5980
 *
5981
 * [ VC: Notation Attributes ]
5982
 * Values of this type must match one of the notation names included
5983
 * in the declaration; all notation names in the declaration must be declared.
5984
 *
5985
 * Returns: the notation attribute tree built while parsing
5986
 */
5987
5988
xmlEnumerationPtr
5989
715
xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5990
715
    const xmlChar *name;
5991
715
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5992
5993
715
    if (RAW != '(') {
5994
8
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5995
8
  return(NULL);
5996
8
    }
5997
1.56k
    do {
5998
1.56k
        NEXT;
5999
1.56k
  SKIP_BLANKS_PE;
6000
1.56k
        name = xmlParseName(ctxt);
6001
1.56k
  if (name == NULL) {
6002
26
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6003
26
         "Name expected in NOTATION declaration\n");
6004
26
            xmlFreeEnumeration(ret);
6005
26
      return(NULL);
6006
26
  }
6007
1.53k
        tmp = NULL;
6008
1.53k
#ifdef LIBXML_VALID_ENABLED
6009
1.53k
        if (ctxt->validate) {
6010
0
            tmp = ret;
6011
0
            while (tmp != NULL) {
6012
0
                if (xmlStrEqual(name, tmp->name)) {
6013
0
                    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
6014
0
              "standalone: attribute notation value token %s duplicated\n",
6015
0
                                     name, NULL);
6016
0
                    if (!xmlDictOwns(ctxt->dict, name))
6017
0
                        xmlFree((xmlChar *) name);
6018
0
                    break;
6019
0
                }
6020
0
                tmp = tmp->next;
6021
0
            }
6022
0
        }
6023
1.53k
#endif /* LIBXML_VALID_ENABLED */
6024
1.53k
  if (tmp == NULL) {
6025
1.53k
      cur = xmlCreateEnumeration(name);
6026
1.53k
      if (cur == NULL) {
6027
0
                xmlErrMemory(ctxt);
6028
0
                xmlFreeEnumeration(ret);
6029
0
                return(NULL);
6030
0
            }
6031
1.53k
      if (last == NULL) ret = last = cur;
6032
839
      else {
6033
839
    last->next = cur;
6034
839
    last = cur;
6035
839
      }
6036
1.53k
  }
6037
1.53k
  SKIP_BLANKS_PE;
6038
1.53k
    } while (RAW == '|');
6039
681
    if (RAW != ')') {
6040
45
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
6041
45
        xmlFreeEnumeration(ret);
6042
45
  return(NULL);
6043
45
    }
6044
636
    NEXT;
6045
636
    return(ret);
6046
681
}
6047
6048
/**
6049
 * xmlParseEnumerationType:
6050
 * @ctxt:  an XML parser context
6051
 *
6052
 * DEPRECATED: Internal function, don't use.
6053
 *
6054
 * parse an Enumeration attribute type.
6055
 *
6056
 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
6057
 *
6058
 * [ VC: Enumeration ]
6059
 * Values of this type must match one of the Nmtoken tokens in
6060
 * the declaration
6061
 *
6062
 * Returns: the enumeration attribute tree built while parsing
6063
 */
6064
6065
xmlEnumerationPtr
6066
6.87k
xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
6067
6.87k
    xmlChar *name;
6068
6.87k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
6069
6070
6.87k
    if (RAW != '(') {
6071
288
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
6072
288
  return(NULL);
6073
288
    }
6074
8.64k
    do {
6075
8.64k
        NEXT;
6076
8.64k
  SKIP_BLANKS_PE;
6077
8.64k
        name = xmlParseNmtoken(ctxt);
6078
8.64k
  if (name == NULL) {
6079
64
      xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
6080
64
      return(ret);
6081
64
  }
6082
8.58k
        tmp = NULL;
6083
8.58k
#ifdef LIBXML_VALID_ENABLED
6084
8.58k
        if (ctxt->validate) {
6085
0
            tmp = ret;
6086
0
            while (tmp != NULL) {
6087
0
                if (xmlStrEqual(name, tmp->name)) {
6088
0
                    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
6089
0
              "standalone: attribute enumeration value token %s duplicated\n",
6090
0
                                     name, NULL);
6091
0
                    if (!xmlDictOwns(ctxt->dict, name))
6092
0
                        xmlFree(name);
6093
0
                    break;
6094
0
                }
6095
0
                tmp = tmp->next;
6096
0
            }
6097
0
        }
6098
8.58k
#endif /* LIBXML_VALID_ENABLED */
6099
8.58k
  if (tmp == NULL) {
6100
8.58k
      cur = xmlCreateEnumeration(name);
6101
8.58k
      if (!xmlDictOwns(ctxt->dict, name))
6102
8.58k
    xmlFree(name);
6103
8.58k
      if (cur == NULL) {
6104
0
                xmlErrMemory(ctxt);
6105
0
                xmlFreeEnumeration(ret);
6106
0
                return(NULL);
6107
0
            }
6108
8.58k
      if (last == NULL) ret = last = cur;
6109
2.00k
      else {
6110
2.00k
    last->next = cur;
6111
2.00k
    last = cur;
6112
2.00k
      }
6113
8.58k
  }
6114
8.58k
  SKIP_BLANKS_PE;
6115
8.58k
    } while (RAW == '|');
6116
6.52k
    if (RAW != ')') {
6117
85
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
6118
85
  return(ret);
6119
85
    }
6120
6.43k
    NEXT;
6121
6.43k
    return(ret);
6122
6.52k
}
6123
6124
/**
6125
 * xmlParseEnumeratedType:
6126
 * @ctxt:  an XML parser context
6127
 * @tree:  the enumeration tree built while parsing
6128
 *
6129
 * DEPRECATED: Internal function, don't use.
6130
 *
6131
 * parse an Enumerated attribute type.
6132
 *
6133
 * [57] EnumeratedType ::= NotationType | Enumeration
6134
 *
6135
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
6136
 *
6137
 *
6138
 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
6139
 */
6140
6141
int
6142
7.59k
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6143
7.59k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
6144
722
  SKIP(8);
6145
722
  if (SKIP_BLANKS_PE == 0) {
6146
7
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6147
7
         "Space required after 'NOTATION'\n");
6148
7
      return(0);
6149
7
  }
6150
715
  *tree = xmlParseNotationType(ctxt);
6151
715
  if (*tree == NULL) return(0);
6152
636
  return(XML_ATTRIBUTE_NOTATION);
6153
715
    }
6154
6.87k
    *tree = xmlParseEnumerationType(ctxt);
6155
6.87k
    if (*tree == NULL) return(0);
6156
6.57k
    return(XML_ATTRIBUTE_ENUMERATION);
6157
6.87k
}
6158
6159
/**
6160
 * xmlParseAttributeType:
6161
 * @ctxt:  an XML parser context
6162
 * @tree:  the enumeration tree built while parsing
6163
 *
6164
 * DEPRECATED: Internal function, don't use.
6165
 *
6166
 * parse the Attribute list def for an element
6167
 *
6168
 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
6169
 *
6170
 * [55] StringType ::= 'CDATA'
6171
 *
6172
 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
6173
 *                        'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
6174
 *
6175
 * Validity constraints for attribute values syntax are checked in
6176
 * xmlValidateAttributeValue()
6177
 *
6178
 * [ VC: ID ]
6179
 * Values of type ID must match the Name production. A name must not
6180
 * appear more than once in an XML document as a value of this type;
6181
 * i.e., ID values must uniquely identify the elements which bear them.
6182
 *
6183
 * [ VC: One ID per Element Type ]
6184
 * No element type may have more than one ID attribute specified.
6185
 *
6186
 * [ VC: ID Attribute Default ]
6187
 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
6188
 *
6189
 * [ VC: IDREF ]
6190
 * Values of type IDREF must match the Name production, and values
6191
 * of type IDREFS must match Names; each IDREF Name must match the value
6192
 * of an ID attribute on some element in the XML document; i.e. IDREF
6193
 * values must match the value of some ID attribute.
6194
 *
6195
 * [ VC: Entity Name ]
6196
 * Values of type ENTITY must match the Name production, values
6197
 * of type ENTITIES must match Names; each Entity Name must match the
6198
 * name of an unparsed entity declared in the DTD.
6199
 *
6200
 * [ VC: Name Token ]
6201
 * Values of type NMTOKEN must match the Nmtoken production; values
6202
 * of type NMTOKENS must match Nmtokens.
6203
 *
6204
 * Returns the attribute type
6205
 */
6206
int
6207
49.7k
xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6208
49.7k
    if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
6209
4.30k
  SKIP(5);
6210
4.30k
  return(XML_ATTRIBUTE_CDATA);
6211
45.4k
     } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
6212
669
  SKIP(6);
6213
669
  return(XML_ATTRIBUTE_IDREFS);
6214
44.7k
     } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
6215
654
  SKIP(5);
6216
654
  return(XML_ATTRIBUTE_IDREF);
6217
44.0k
     } else if ((RAW == 'I') && (NXT(1) == 'D')) {
6218
29.7k
        SKIP(2);
6219
29.7k
  return(XML_ATTRIBUTE_ID);
6220
29.7k
     } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
6221
579
  SKIP(6);
6222
579
  return(XML_ATTRIBUTE_ENTITY);
6223
13.7k
     } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
6224
5.06k
  SKIP(8);
6225
5.06k
  return(XML_ATTRIBUTE_ENTITIES);
6226
8.65k
     } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
6227
414
  SKIP(8);
6228
414
  return(XML_ATTRIBUTE_NMTOKENS);
6229
8.24k
     } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
6230
647
  SKIP(7);
6231
647
  return(XML_ATTRIBUTE_NMTOKEN);
6232
647
     }
6233
7.59k
     return(xmlParseEnumeratedType(ctxt, tree));
6234
49.7k
}
6235
6236
/**
6237
 * xmlParseAttributeListDecl:
6238
 * @ctxt:  an XML parser context
6239
 *
6240
 * DEPRECATED: Internal function, don't use.
6241
 *
6242
 * Parse an attribute list declaration for an element. Always consumes '<!'.
6243
 *
6244
 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
6245
 *
6246
 * [53] AttDef ::= S Name S AttType S DefaultDecl
6247
 *
6248
 */
6249
void
6250
12.4k
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
6251
12.4k
    const xmlChar *elemName;
6252
12.4k
    const xmlChar *attrName;
6253
12.4k
    xmlEnumerationPtr tree;
6254
6255
12.4k
    if ((CUR != '<') || (NXT(1) != '!'))
6256
0
        return;
6257
12.4k
    SKIP(2);
6258
6259
12.4k
    if (CMP7(CUR_PTR, 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
6260
12.4k
  int inputid = ctxt->input->id;
6261
6262
12.4k
  SKIP(7);
6263
12.4k
  if (SKIP_BLANKS_PE == 0) {
6264
205
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6265
205
                     "Space required after '<!ATTLIST'\n");
6266
205
  }
6267
12.4k
        elemName = xmlParseName(ctxt);
6268
12.4k
  if (elemName == NULL) {
6269
48
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6270
48
         "ATTLIST: no name for Element\n");
6271
48
      return;
6272
48
  }
6273
12.3k
  SKIP_BLANKS_PE;
6274
12.3k
  GROW;
6275
60.7k
  while ((RAW != '>') && (PARSER_STOPPED(ctxt) == 0)) {
6276
50.2k
      int type;
6277
50.2k
      int def;
6278
50.2k
      xmlChar *defaultValue = NULL;
6279
6280
50.2k
      GROW;
6281
50.2k
            tree = NULL;
6282
50.2k
      attrName = xmlParseName(ctxt);
6283
50.2k
      if (attrName == NULL) {
6284
165
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6285
165
             "ATTLIST: no name for Attribute\n");
6286
165
    break;
6287
165
      }
6288
50.0k
      GROW;
6289
50.0k
      if (SKIP_BLANKS_PE == 0) {
6290
369
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6291
369
            "Space required after the attribute name\n");
6292
369
    break;
6293
369
      }
6294
6295
49.7k
      type = xmlParseAttributeType(ctxt, &tree);
6296
49.7k
      if (type <= 0) {
6297
381
          break;
6298
381
      }
6299
6300
49.3k
      GROW;
6301
49.3k
      if (SKIP_BLANKS_PE == 0) {
6302
235
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6303
235
             "Space required after the attribute type\n");
6304
235
          if (tree != NULL)
6305
160
        xmlFreeEnumeration(tree);
6306
235
    break;
6307
235
      }
6308
6309
49.0k
      def = xmlParseDefaultDecl(ctxt, &defaultValue);
6310
49.0k
      if (def <= 0) {
6311
0
                if (defaultValue != NULL)
6312
0
        xmlFree(defaultValue);
6313
0
          if (tree != NULL)
6314
0
        xmlFreeEnumeration(tree);
6315
0
          break;
6316
0
      }
6317
49.0k
      if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6318
38.2k
          xmlAttrNormalizeSpace(defaultValue, defaultValue);
6319
6320
49.0k
      GROW;
6321
49.0k
            if (RAW != '>') {
6322
41.2k
    if (SKIP_BLANKS_PE == 0) {
6323
688
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6324
688
      "Space required after the attribute default value\n");
6325
688
        if (defaultValue != NULL)
6326
145
      xmlFree(defaultValue);
6327
688
        if (tree != NULL)
6328
71
      xmlFreeEnumeration(tree);
6329
688
        break;
6330
688
    }
6331
41.2k
      }
6332
48.4k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6333
40.8k
    (ctxt->sax->attributeDecl != NULL))
6334
0
    ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6335
0
                          type, def, defaultValue, tree);
6336
48.4k
      else if (tree != NULL)
6337
6.98k
    xmlFreeEnumeration(tree);
6338
6339
48.4k
      if ((ctxt->sax2) && (defaultValue != NULL) &&
6340
41.9k
          (def != XML_ATTRIBUTE_IMPLIED) &&
6341
41.9k
    (def != XML_ATTRIBUTE_REQUIRED)) {
6342
41.9k
    xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6343
41.9k
      }
6344
48.4k
      if (ctxt->sax2) {
6345
48.4k
    xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6346
48.4k
      }
6347
48.4k
      if (defaultValue != NULL)
6348
41.9k
          xmlFree(defaultValue);
6349
48.4k
      GROW;
6350
48.4k
  }
6351
12.3k
  if (RAW == '>') {
6352
10.5k
      if (inputid != ctxt->input->id) {
6353
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6354
0
                               "Attribute list declaration doesn't start and"
6355
0
                               " stop in the same entity\n");
6356
0
      }
6357
10.5k
      NEXT;
6358
10.5k
  }
6359
12.3k
    }
6360
12.4k
}
6361
6362
/**
6363
 * xmlParseElementMixedContentDecl:
6364
 * @ctxt:  an XML parser context
6365
 * @inputchk:  the input used for the current entity, needed for boundary checks
6366
 *
6367
 * DEPRECATED: Internal function, don't use.
6368
 *
6369
 * parse the declaration for a Mixed Element content
6370
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6371
 *
6372
 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6373
 *                '(' S? '#PCDATA' S? ')'
6374
 *
6375
 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6376
 *
6377
 * [ VC: No Duplicate Types ]
6378
 * The same name must not appear more than once in a single
6379
 * mixed-content declaration.
6380
 *
6381
 * returns: the list of the xmlElementContentPtr describing the element choices
6382
 */
6383
xmlElementContentPtr
6384
6.10k
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6385
6.10k
    xmlElementContentPtr ret = NULL, cur = NULL, n;
6386
6.10k
    const xmlChar *elem = NULL;
6387
6388
6.10k
    GROW;
6389
6.10k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6390
6.10k
  SKIP(7);
6391
6.10k
  SKIP_BLANKS_PE;
6392
6.10k
  if (RAW == ')') {
6393
2.55k
      if (ctxt->input->id != inputchk) {
6394
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6395
0
                               "Element content declaration doesn't start and"
6396
0
                               " stop in the same entity\n");
6397
0
      }
6398
2.55k
      NEXT;
6399
2.55k
      ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6400
2.55k
      if (ret == NULL)
6401
0
                goto mem_error;
6402
2.55k
      if (RAW == '*') {
6403
557
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6404
557
    NEXT;
6405
557
      }
6406
2.55k
      return(ret);
6407
2.55k
  }
6408
3.54k
  if ((RAW == '(') || (RAW == '|')) {
6409
3.41k
      ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6410
3.41k
      if (ret == NULL)
6411
0
                goto mem_error;
6412
3.41k
  }
6413
10.7k
  while ((RAW == '|') && (PARSER_STOPPED(ctxt) == 0)) {
6414
7.30k
      NEXT;
6415
7.30k
            n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6416
7.30k
            if (n == NULL)
6417
0
                goto mem_error;
6418
7.30k
      if (elem == NULL) {
6419
3.41k
    n->c1 = cur;
6420
3.41k
    if (cur != NULL)
6421
3.41k
        cur->parent = n;
6422
3.41k
    ret = cur = n;
6423
3.88k
      } else {
6424
3.88k
          cur->c2 = n;
6425
3.88k
    n->parent = cur;
6426
3.88k
    n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6427
3.88k
                if (n->c1 == NULL)
6428
0
                    goto mem_error;
6429
3.88k
    n->c1->parent = n;
6430
3.88k
    cur = n;
6431
3.88k
      }
6432
7.30k
      SKIP_BLANKS_PE;
6433
7.30k
      elem = xmlParseName(ctxt);
6434
7.30k
      if (elem == NULL) {
6435
105
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6436
105
      "xmlParseElementMixedContentDecl : Name expected\n");
6437
105
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6438
105
    return(NULL);
6439
105
      }
6440
7.19k
      SKIP_BLANKS_PE;
6441
7.19k
      GROW;
6442
7.19k
  }
6443
3.43k
  if ((RAW == ')') && (NXT(1) == '*')) {
6444
2.34k
      if (elem != NULL) {
6445
2.34k
    cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6446
2.34k
                                   XML_ELEMENT_CONTENT_ELEMENT);
6447
2.34k
    if (cur->c2 == NULL)
6448
0
                    goto mem_error;
6449
2.34k
    cur->c2->parent = cur;
6450
2.34k
            }
6451
2.34k
            if (ret != NULL)
6452
2.34k
                ret->ocur = XML_ELEMENT_CONTENT_MULT;
6453
2.34k
      if (ctxt->input->id != inputchk) {
6454
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6455
0
                               "Element content declaration doesn't start and"
6456
0
                               " stop in the same entity\n");
6457
0
      }
6458
2.34k
      SKIP(2);
6459
2.34k
  } else {
6460
1.09k
      xmlFreeDocElementContent(ctxt->myDoc, ret);
6461
1.09k
      xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6462
1.09k
      return(NULL);
6463
1.09k
  }
6464
6465
3.43k
    } else {
6466
0
  xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6467
0
    }
6468
2.34k
    return(ret);
6469
6470
0
mem_error:
6471
0
    xmlErrMemory(ctxt);
6472
0
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6473
0
    return(NULL);
6474
6.10k
}
6475
6476
/**
6477
 * xmlParseElementChildrenContentDeclPriv:
6478
 * @ctxt:  an XML parser context
6479
 * @inputchk:  the input used for the current entity, needed for boundary checks
6480
 * @depth: the level of recursion
6481
 *
6482
 * parse the declaration for a Mixed Element content
6483
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6484
 *
6485
 *
6486
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6487
 *
6488
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6489
 *
6490
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6491
 *
6492
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6493
 *
6494
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6495
 * TODO Parameter-entity replacement text must be properly nested
6496
 *  with parenthesized groups. That is to say, if either of the
6497
 *  opening or closing parentheses in a choice, seq, or Mixed
6498
 *  construct is contained in the replacement text for a parameter
6499
 *  entity, both must be contained in the same replacement text. For
6500
 *  interoperability, if a parameter-entity reference appears in a
6501
 *  choice, seq, or Mixed construct, its replacement text should not
6502
 *  be empty, and neither the first nor last non-blank character of
6503
 *  the replacement text should be a connector (| or ,).
6504
 *
6505
 * Returns the tree of xmlElementContentPtr describing the element
6506
 *          hierarchy.
6507
 */
6508
static xmlElementContentPtr
6509
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6510
116k
                                       int depth) {
6511
116k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
6512
116k
    xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6513
116k
    const xmlChar *elem;
6514
116k
    xmlChar type = 0;
6515
6516
116k
    if (depth > maxDepth) {
6517
4
        xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
6518
4
                "xmlParseElementChildrenContentDecl : depth %d too deep, "
6519
4
                "use XML_PARSE_HUGE\n", depth);
6520
4
  return(NULL);
6521
4
    }
6522
116k
    SKIP_BLANKS_PE;
6523
116k
    GROW;
6524
116k
    if (RAW == '(') {
6525
101k
  int inputid = ctxt->input->id;
6526
6527
        /* Recurse on first child */
6528
101k
  NEXT;
6529
101k
  SKIP_BLANKS_PE;
6530
101k
        cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6531
101k
                                                           depth + 1);
6532
101k
        if (cur == NULL)
6533
80.1k
            return(NULL);
6534
21.3k
  SKIP_BLANKS_PE;
6535
21.3k
  GROW;
6536
21.3k
    } else {
6537
14.7k
  elem = xmlParseName(ctxt);
6538
14.7k
  if (elem == NULL) {
6539
188
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6540
188
      return(NULL);
6541
188
  }
6542
14.5k
        cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6543
14.5k
  if (cur == NULL) {
6544
0
      xmlErrMemory(ctxt);
6545
0
      return(NULL);
6546
0
  }
6547
14.5k
  GROW;
6548
14.5k
  if (RAW == '?') {
6549
1.46k
      cur->ocur = XML_ELEMENT_CONTENT_OPT;
6550
1.46k
      NEXT;
6551
13.0k
  } else if (RAW == '*') {
6552
830
      cur->ocur = XML_ELEMENT_CONTENT_MULT;
6553
830
      NEXT;
6554
12.2k
  } else if (RAW == '+') {
6555
2.14k
      cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6556
2.14k
      NEXT;
6557
10.0k
  } else {
6558
10.0k
      cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6559
10.0k
  }
6560
14.5k
  GROW;
6561
14.5k
    }
6562
35.8k
    SKIP_BLANKS_PE;
6563
92.0k
    while ((RAW != ')') && (PARSER_STOPPED(ctxt) == 0)) {
6564
        /*
6565
   * Each loop we parse one separator and one element.
6566
   */
6567
63.1k
        if (RAW == ',') {
6568
47.1k
      if (type == 0) type = CUR;
6569
6570
      /*
6571
       * Detect "Name | Name , Name" error
6572
       */
6573
42.5k
      else if (type != CUR) {
6574
6
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6575
6
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6576
6
                      type);
6577
6
    if ((last != NULL) && (last != ret))
6578
6
        xmlFreeDocElementContent(ctxt->myDoc, last);
6579
6
    if (ret != NULL)
6580
6
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6581
6
    return(NULL);
6582
6
      }
6583
47.1k
      NEXT;
6584
6585
47.1k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6586
47.1k
      if (op == NULL) {
6587
0
                xmlErrMemory(ctxt);
6588
0
    if ((last != NULL) && (last != ret))
6589
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6590
0
          xmlFreeDocElementContent(ctxt->myDoc, ret);
6591
0
    return(NULL);
6592
0
      }
6593
47.1k
      if (last == NULL) {
6594
4.59k
    op->c1 = ret;
6595
4.59k
    if (ret != NULL)
6596
4.59k
        ret->parent = op;
6597
4.59k
    ret = cur = op;
6598
42.5k
      } else {
6599
42.5k
          cur->c2 = op;
6600
42.5k
    if (op != NULL)
6601
42.5k
        op->parent = cur;
6602
42.5k
    op->c1 = last;
6603
42.5k
    if (last != NULL)
6604
42.5k
        last->parent = op;
6605
42.5k
    cur =op;
6606
42.5k
    last = NULL;
6607
42.5k
      }
6608
47.1k
  } else if (RAW == '|') {
6609
14.8k
      if (type == 0) type = CUR;
6610
6611
      /*
6612
       * Detect "Name , Name | Name" error
6613
       */
6614
6.85k
      else if (type != CUR) {
6615
3
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6616
3
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6617
3
          type);
6618
3
    if ((last != NULL) && (last != ret))
6619
3
        xmlFreeDocElementContent(ctxt->myDoc, last);
6620
3
    if (ret != NULL)
6621
3
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6622
3
    return(NULL);
6623
3
      }
6624
14.8k
      NEXT;
6625
6626
14.8k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6627
14.8k
      if (op == NULL) {
6628
0
                xmlErrMemory(ctxt);
6629
0
    if ((last != NULL) && (last != ret))
6630
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6631
0
    if (ret != NULL)
6632
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6633
0
    return(NULL);
6634
0
      }
6635
14.8k
      if (last == NULL) {
6636
8.03k
    op->c1 = ret;
6637
8.03k
    if (ret != NULL)
6638
8.03k
        ret->parent = op;
6639
8.03k
    ret = cur = op;
6640
8.03k
      } else {
6641
6.85k
          cur->c2 = op;
6642
6.85k
    if (op != NULL)
6643
6.85k
        op->parent = cur;
6644
6.85k
    op->c1 = last;
6645
6.85k
    if (last != NULL)
6646
6.85k
        last->parent = op;
6647
6.85k
    cur =op;
6648
6.85k
    last = NULL;
6649
6.85k
      }
6650
14.8k
  } else {
6651
1.13k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6652
1.13k
      if ((last != NULL) && (last != ret))
6653
809
          xmlFreeDocElementContent(ctxt->myDoc, last);
6654
1.13k
      if (ret != NULL)
6655
1.13k
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6656
1.13k
      return(NULL);
6657
1.13k
  }
6658
62.0k
  GROW;
6659
62.0k
  SKIP_BLANKS_PE;
6660
62.0k
  GROW;
6661
62.0k
  if (RAW == '(') {
6662
9.52k
      int inputid = ctxt->input->id;
6663
      /* Recurse on second child */
6664
9.52k
      NEXT;
6665
9.52k
      SKIP_BLANKS_PE;
6666
9.52k
      last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6667
9.52k
                                                          depth + 1);
6668
9.52k
            if (last == NULL) {
6669
5.81k
    if (ret != NULL)
6670
5.81k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6671
5.81k
    return(NULL);
6672
5.81k
            }
6673
3.70k
      SKIP_BLANKS_PE;
6674
52.5k
  } else {
6675
52.5k
      elem = xmlParseName(ctxt);
6676
52.5k
      if (elem == NULL) {
6677
83
    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6678
83
    if (ret != NULL)
6679
83
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6680
83
    return(NULL);
6681
83
      }
6682
52.4k
      last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6683
52.4k
      if (last == NULL) {
6684
0
                xmlErrMemory(ctxt);
6685
0
    if (ret != NULL)
6686
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6687
0
    return(NULL);
6688
0
      }
6689
52.4k
      if (RAW == '?') {
6690
413
    last->ocur = XML_ELEMENT_CONTENT_OPT;
6691
413
    NEXT;
6692
52.0k
      } else if (RAW == '*') {
6693
605
    last->ocur = XML_ELEMENT_CONTENT_MULT;
6694
605
    NEXT;
6695
51.4k
      } else if (RAW == '+') {
6696
831
    last->ocur = XML_ELEMENT_CONTENT_PLUS;
6697
831
    NEXT;
6698
50.5k
      } else {
6699
50.5k
    last->ocur = XML_ELEMENT_CONTENT_ONCE;
6700
50.5k
      }
6701
52.4k
  }
6702
56.1k
  SKIP_BLANKS_PE;
6703
56.1k
  GROW;
6704
56.1k
    }
6705
28.8k
    if ((cur != NULL) && (last != NULL)) {
6706
5.91k
        cur->c2 = last;
6707
5.91k
  if (last != NULL)
6708
5.91k
      last->parent = cur;
6709
5.91k
    }
6710
28.8k
    if (ctxt->input->id != inputchk) {
6711
0
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6712
0
                       "Element content declaration doesn't start and stop in"
6713
0
                       " the same entity\n");
6714
0
    }
6715
28.8k
    NEXT;
6716
28.8k
    if (RAW == '?') {
6717
3.36k
  if (ret != NULL) {
6718
3.36k
      if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6719
3.21k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6720
823
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6721
2.54k
      else
6722
2.54k
          ret->ocur = XML_ELEMENT_CONTENT_OPT;
6723
3.36k
  }
6724
3.36k
  NEXT;
6725
25.4k
    } else if (RAW == '*') {
6726
2.92k
  if (ret != NULL) {
6727
2.92k
      ret->ocur = XML_ELEMENT_CONTENT_MULT;
6728
2.92k
      cur = ret;
6729
      /*
6730
       * Some normalization:
6731
       * (a | b* | c?)* == (a | b | c)*
6732
       */
6733
5.83k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6734
2.90k
    if ((cur->c1 != NULL) &&
6735
2.90k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6736
2.88k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6737
908
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6738
2.90k
    if ((cur->c2 != NULL) &&
6739
2.90k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6740
2.89k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6741
133
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6742
2.90k
    cur = cur->c2;
6743
2.90k
      }
6744
2.92k
  }
6745
2.92k
  NEXT;
6746
22.5k
    } else if (RAW == '+') {
6747
6.89k
  if (ret != NULL) {
6748
6.89k
      int found = 0;
6749
6750
6.89k
      if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6751
5.36k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6752
3.06k
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6753
3.82k
      else
6754
3.82k
          ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6755
      /*
6756
       * Some normalization:
6757
       * (a | b*)+ == (a | b)*
6758
       * (a | b?)+ == (a | b)*
6759
       */
6760
11.2k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6761
4.36k
    if ((cur->c1 != NULL) &&
6762
4.36k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6763
4.24k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6764
1.13k
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6765
1.13k
        found = 1;
6766
1.13k
    }
6767
4.36k
    if ((cur->c2 != NULL) &&
6768
4.36k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6769
4.17k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6770
325
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6771
325
        found = 1;
6772
325
    }
6773
4.36k
    cur = cur->c2;
6774
4.36k
      }
6775
6.89k
      if (found)
6776
1.24k
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6777
6.89k
  }
6778
6.89k
  NEXT;
6779
6.89k
    }
6780
28.8k
    return(ret);
6781
35.8k
}
6782
6783
/**
6784
 * xmlParseElementChildrenContentDecl:
6785
 * @ctxt:  an XML parser context
6786
 * @inputchk:  the input used for the current entity, needed for boundary checks
6787
 *
6788
 * DEPRECATED: Internal function, don't use.
6789
 *
6790
 * parse the declaration for a Mixed Element content
6791
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6792
 *
6793
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6794
 *
6795
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6796
 *
6797
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6798
 *
6799
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6800
 *
6801
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6802
 * TODO Parameter-entity replacement text must be properly nested
6803
 *  with parenthesized groups. That is to say, if either of the
6804
 *  opening or closing parentheses in a choice, seq, or Mixed
6805
 *  construct is contained in the replacement text for a parameter
6806
 *  entity, both must be contained in the same replacement text. For
6807
 *  interoperability, if a parameter-entity reference appears in a
6808
 *  choice, seq, or Mixed construct, its replacement text should not
6809
 *  be empty, and neither the first nor last non-blank character of
6810
 *  the replacement text should be a connector (| or ,).
6811
 *
6812
 * Returns the tree of xmlElementContentPtr describing the element
6813
 *          hierarchy.
6814
 */
6815
xmlElementContentPtr
6816
0
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6817
    /* stub left for API/ABI compat */
6818
0
    return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6819
0
}
6820
6821
/**
6822
 * xmlParseElementContentDecl:
6823
 * @ctxt:  an XML parser context
6824
 * @name:  the name of the element being defined.
6825
 * @result:  the Element Content pointer will be stored here if any
6826
 *
6827
 * DEPRECATED: Internal function, don't use.
6828
 *
6829
 * parse the declaration for an Element content either Mixed or Children,
6830
 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6831
 *
6832
 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6833
 *
6834
 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6835
 */
6836
6837
int
6838
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6839
11.3k
                           xmlElementContentPtr *result) {
6840
6841
11.3k
    xmlElementContentPtr tree = NULL;
6842
11.3k
    int inputid = ctxt->input->id;
6843
11.3k
    int res;
6844
6845
11.3k
    *result = NULL;
6846
6847
11.3k
    if (RAW != '(') {
6848
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6849
0
    "xmlParseElementContentDecl : %s '(' expected\n", name);
6850
0
  return(-1);
6851
0
    }
6852
11.3k
    NEXT;
6853
11.3k
    GROW;
6854
11.3k
    SKIP_BLANKS_PE;
6855
11.3k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6856
6.10k
        tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6857
6.10k
  res = XML_ELEMENT_TYPE_MIXED;
6858
6.10k
    } else {
6859
5.20k
        tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6860
5.20k
  res = XML_ELEMENT_TYPE_ELEMENT;
6861
5.20k
    }
6862
11.3k
    SKIP_BLANKS_PE;
6863
11.3k
    *result = tree;
6864
11.3k
    return(res);
6865
11.3k
}
6866
6867
/**
6868
 * xmlParseElementDecl:
6869
 * @ctxt:  an XML parser context
6870
 *
6871
 * DEPRECATED: Internal function, don't use.
6872
 *
6873
 * Parse an element declaration. Always consumes '<!'.
6874
 *
6875
 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6876
 *
6877
 * [ VC: Unique Element Type Declaration ]
6878
 * No element type may be declared more than once
6879
 *
6880
 * Returns the type of the element, or -1 in case of error
6881
 */
6882
int
6883
13.4k
xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6884
13.4k
    const xmlChar *name;
6885
13.4k
    int ret = -1;
6886
13.4k
    xmlElementContentPtr content  = NULL;
6887
6888
13.4k
    if ((CUR != '<') || (NXT(1) != '!'))
6889
0
        return(ret);
6890
13.4k
    SKIP(2);
6891
6892
    /* GROW; done in the caller */
6893
13.4k
    if (CMP7(CUR_PTR, 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6894
13.3k
  int inputid = ctxt->input->id;
6895
6896
13.3k
  SKIP(7);
6897
13.3k
  if (SKIP_BLANKS_PE == 0) {
6898
20
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6899
20
               "Space required after 'ELEMENT'\n");
6900
20
      return(-1);
6901
20
  }
6902
13.3k
        name = xmlParseName(ctxt);
6903
13.3k
  if (name == NULL) {
6904
28
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6905
28
         "xmlParseElementDecl: no name for Element\n");
6906
28
      return(-1);
6907
28
  }
6908
13.3k
  if (SKIP_BLANKS_PE == 0) {
6909
149
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6910
149
         "Space required after the element name\n");
6911
149
  }
6912
13.3k
  if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6913
623
      SKIP(5);
6914
      /*
6915
       * Element must always be empty.
6916
       */
6917
623
      ret = XML_ELEMENT_TYPE_EMPTY;
6918
12.7k
  } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6919
578
             (NXT(2) == 'Y')) {
6920
573
      SKIP(3);
6921
      /*
6922
       * Element is a generic container.
6923
       */
6924
573
      ret = XML_ELEMENT_TYPE_ANY;
6925
12.1k
  } else if (RAW == '(') {
6926
11.3k
      ret = xmlParseElementContentDecl(ctxt, name, &content);
6927
11.3k
  } else {
6928
      /*
6929
       * [ WFC: PEs in Internal Subset ] error handling.
6930
       */
6931
829
            xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6932
829
                  "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6933
829
      return(-1);
6934
829
  }
6935
6936
12.5k
  SKIP_BLANKS_PE;
6937
6938
12.5k
  if (RAW != '>') {
6939
1.19k
      xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6940
1.19k
      if (content != NULL) {
6941
66
    xmlFreeDocElementContent(ctxt->myDoc, content);
6942
66
      }
6943
11.3k
  } else {
6944
11.3k
      if (inputid != ctxt->input->id) {
6945
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6946
0
                               "Element declaration doesn't start and stop in"
6947
0
                               " the same entity\n");
6948
0
      }
6949
6950
11.3k
      NEXT;
6951
11.3k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6952
5.23k
    (ctxt->sax->elementDecl != NULL)) {
6953
0
    if (content != NULL)
6954
0
        content->parent = NULL;
6955
0
          ctxt->sax->elementDecl(ctxt->userData, name, ret,
6956
0
                           content);
6957
0
    if ((content != NULL) && (content->parent == NULL)) {
6958
        /*
6959
         * this is a trick: if xmlAddElementDecl is called,
6960
         * instead of copying the full tree it is plugged directly
6961
         * if called from the parser. Avoid duplicating the
6962
         * interfaces or change the API/ABI
6963
         */
6964
0
        xmlFreeDocElementContent(ctxt->myDoc, content);
6965
0
    }
6966
11.3k
      } else if (content != NULL) {
6967
8.61k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6968
8.61k
      }
6969
11.3k
  }
6970
12.5k
    }
6971
12.5k
    return(ret);
6972
13.4k
}
6973
6974
/**
6975
 * xmlParseConditionalSections
6976
 * @ctxt:  an XML parser context
6977
 *
6978
 * Parse a conditional section. Always consumes '<!['.
6979
 *
6980
 * [61] conditionalSect ::= includeSect | ignoreSect
6981
 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6982
 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6983
 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6984
 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6985
 */
6986
6987
static void
6988
0
xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6989
0
    int *inputIds = NULL;
6990
0
    size_t inputIdsSize = 0;
6991
0
    size_t depth = 0;
6992
6993
0
    while (PARSER_STOPPED(ctxt) == 0) {
6994
0
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6995
0
            int id = ctxt->input->id;
6996
6997
0
            SKIP(3);
6998
0
            SKIP_BLANKS_PE;
6999
7000
0
            if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
7001
0
                SKIP(7);
7002
0
                SKIP_BLANKS_PE;
7003
0
                if (RAW != '[') {
7004
0
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
7005
0
                    xmlHaltParser(ctxt);
7006
0
                    goto error;
7007
0
                }
7008
0
                if (ctxt->input->id != id) {
7009
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
7010
0
                                   "All markup of the conditional section is"
7011
0
                                   " not in the same entity\n");
7012
0
                }
7013
0
                NEXT;
7014
7015
0
                if (inputIdsSize <= depth) {
7016
0
                    int *tmp;
7017
0
                    int newSize;
7018
7019
0
                    newSize = xmlGrowCapacity(inputIdsSize, sizeof(tmp[0]),
7020
0
                                              4, 1000);
7021
0
                    if (newSize < 0) {
7022
0
                        xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
7023
0
                                       "Maximum conditional section nesting"
7024
0
                                       " depth exceeded\n");
7025
0
                        goto error;
7026
0
                    }
7027
0
                    tmp = xmlRealloc(inputIds, newSize * sizeof(tmp[0]));
7028
0
                    if (tmp == NULL) {
7029
0
                        xmlErrMemory(ctxt);
7030
0
                        goto error;
7031
0
                    }
7032
0
                    inputIds = tmp;
7033
0
                    inputIdsSize = newSize;
7034
0
                }
7035
0
                inputIds[depth] = id;
7036
0
                depth++;
7037
0
            } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
7038
0
                size_t ignoreDepth = 0;
7039
7040
0
                SKIP(6);
7041
0
                SKIP_BLANKS_PE;
7042
0
                if (RAW != '[') {
7043
0
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
7044
0
                    xmlHaltParser(ctxt);
7045
0
                    goto error;
7046
0
                }
7047
0
                if (ctxt->input->id != id) {
7048
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
7049
0
                                   "All markup of the conditional section is"
7050
0
                                   " not in the same entity\n");
7051
0
                }
7052
0
                NEXT;
7053
7054
0
                while (PARSER_STOPPED(ctxt) == 0) {
7055
0
                    if (RAW == 0) {
7056
0
                        xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
7057
0
                        goto error;
7058
0
                    }
7059
0
                    if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7060
0
                        SKIP(3);
7061
0
                        ignoreDepth++;
7062
                        /* Check for integer overflow */
7063
0
                        if (ignoreDepth == 0) {
7064
0
                            xmlErrMemory(ctxt);
7065
0
                            goto error;
7066
0
                        }
7067
0
                    } else if ((RAW == ']') && (NXT(1) == ']') &&
7068
0
                               (NXT(2) == '>')) {
7069
0
                        SKIP(3);
7070
0
                        if (ignoreDepth == 0)
7071
0
                            break;
7072
0
                        ignoreDepth--;
7073
0
                    } else {
7074
0
                        NEXT;
7075
0
                    }
7076
0
                }
7077
7078
0
                if (ctxt->input->id != id) {
7079
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
7080
0
                                   "All markup of the conditional section is"
7081
0
                                   " not in the same entity\n");
7082
0
                }
7083
0
            } else {
7084
0
                xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
7085
0
                xmlHaltParser(ctxt);
7086
0
                goto error;
7087
0
            }
7088
0
        } else if ((depth > 0) &&
7089
0
                   (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
7090
0
            depth--;
7091
0
            if (ctxt->input->id != inputIds[depth]) {
7092
0
                xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
7093
0
                               "All markup of the conditional section is not"
7094
0
                               " in the same entity\n");
7095
0
            }
7096
0
            SKIP(3);
7097
0
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7098
0
            xmlParseMarkupDecl(ctxt);
7099
0
        } else {
7100
0
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7101
0
            xmlHaltParser(ctxt);
7102
0
            goto error;
7103
0
        }
7104
7105
0
        if (depth == 0)
7106
0
            break;
7107
7108
0
        SKIP_BLANKS_PE;
7109
0
        SHRINK;
7110
0
        GROW;
7111
0
    }
7112
7113
0
error:
7114
0
    xmlFree(inputIds);
7115
0
}
7116
7117
/**
7118
 * xmlParseMarkupDecl:
7119
 * @ctxt:  an XML parser context
7120
 *
7121
 * DEPRECATED: Internal function, don't use.
7122
 *
7123
 * Parse markup declarations. Always consumes '<!' or '<?'.
7124
 *
7125
 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
7126
 *                     NotationDecl | PI | Comment
7127
 *
7128
 * [ VC: Proper Declaration/PE Nesting ]
7129
 * Parameter-entity replacement text must be properly nested with
7130
 * markup declarations. That is to say, if either the first character
7131
 * or the last character of a markup declaration (markupdecl above) is
7132
 * contained in the replacement text for a parameter-entity reference,
7133
 * both must be contained in the same replacement text.
7134
 *
7135
 * [ WFC: PEs in Internal Subset ]
7136
 * In the internal DTD subset, parameter-entity references can occur
7137
 * only where markup declarations can occur, not within markup declarations.
7138
 * (This does not apply to references that occur in external parameter
7139
 * entities or to the external subset.)
7140
 */
7141
void
7142
141k
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
7143
141k
    GROW;
7144
141k
    if (CUR == '<') {
7145
141k
        if (NXT(1) == '!') {
7146
108k
      switch (NXT(2)) {
7147
67.9k
          case 'E':
7148
67.9k
        if (NXT(3) == 'L')
7149
13.4k
      xmlParseElementDecl(ctxt);
7150
54.4k
        else if (NXT(3) == 'N')
7151
54.4k
      xmlParseEntityDecl(ctxt);
7152
55
                    else
7153
55
                        SKIP(2);
7154
67.9k
        break;
7155
12.4k
          case 'A':
7156
12.4k
        xmlParseAttributeListDecl(ctxt);
7157
12.4k
        break;
7158
2.93k
          case 'N':
7159
2.93k
        xmlParseNotationDecl(ctxt);
7160
2.93k
        break;
7161
25.3k
          case '-':
7162
25.3k
        xmlParseComment(ctxt);
7163
25.3k
        break;
7164
161
    default:
7165
161
                    xmlFatalErr(ctxt,
7166
161
                                ctxt->inSubset == 2 ?
7167
0
                                    XML_ERR_EXT_SUBSET_NOT_FINISHED :
7168
161
                                    XML_ERR_INT_SUBSET_NOT_FINISHED,
7169
161
                                NULL);
7170
161
                    SKIP(2);
7171
161
        break;
7172
108k
      }
7173
108k
  } else if (NXT(1) == '?') {
7174
32.4k
      xmlParsePI(ctxt);
7175
32.4k
  }
7176
141k
    }
7177
141k
}
7178
7179
/**
7180
 * xmlParseTextDecl:
7181
 * @ctxt:  an XML parser context
7182
 *
7183
 * DEPRECATED: Internal function, don't use.
7184
 *
7185
 * parse an XML declaration header for external entities
7186
 *
7187
 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
7188
 */
7189
7190
void
7191
0
xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
7192
0
    xmlChar *version;
7193
7194
    /*
7195
     * We know that '<?xml' is here.
7196
     */
7197
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7198
0
  SKIP(5);
7199
0
    } else {
7200
0
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
7201
0
  return;
7202
0
    }
7203
7204
0
    if (SKIP_BLANKS == 0) {
7205
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7206
0
           "Space needed after '<?xml'\n");
7207
0
    }
7208
7209
    /*
7210
     * We may have the VersionInfo here.
7211
     */
7212
0
    version = xmlParseVersionInfo(ctxt);
7213
0
    if (version == NULL) {
7214
0
  version = xmlCharStrdup(XML_DEFAULT_VERSION);
7215
0
        if (version == NULL) {
7216
0
            xmlErrMemory(ctxt);
7217
0
            return;
7218
0
        }
7219
0
    } else {
7220
0
  if (SKIP_BLANKS == 0) {
7221
0
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7222
0
               "Space needed here\n");
7223
0
  }
7224
0
    }
7225
0
    ctxt->input->version = version;
7226
7227
    /*
7228
     * We must have the encoding declaration
7229
     */
7230
0
    xmlParseEncodingDecl(ctxt);
7231
7232
0
    SKIP_BLANKS;
7233
0
    if ((RAW == '?') && (NXT(1) == '>')) {
7234
0
        SKIP(2);
7235
0
    } else if (RAW == '>') {
7236
        /* Deprecated old WD ... */
7237
0
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7238
0
  NEXT;
7239
0
    } else {
7240
0
        int c;
7241
7242
0
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7243
0
        while ((PARSER_STOPPED(ctxt) == 0) && ((c = CUR) != 0)) {
7244
0
            NEXT;
7245
0
            if (c == '>')
7246
0
                break;
7247
0
        }
7248
0
    }
7249
0
}
7250
7251
/**
7252
 * xmlParseExternalSubset:
7253
 * @ctxt:  an XML parser context
7254
 * @ExternalID: the external identifier
7255
 * @SystemID: the system identifier (or URL)
7256
 *
7257
 * DEPRECATED: Internal function, don't use.
7258
 *
7259
 * parse Markup declarations from an external subset
7260
 *
7261
 * [30] extSubset ::= textDecl? extSubsetDecl
7262
 *
7263
 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7264
 */
7265
void
7266
xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7267
0
                       const xmlChar *SystemID) {
7268
0
    int oldInputNr;
7269
7270
0
    xmlCtxtInitializeLate(ctxt);
7271
7272
0
    xmlDetectEncoding(ctxt);
7273
7274
0
    if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7275
0
  xmlParseTextDecl(ctxt);
7276
0
    }
7277
0
    if (ctxt->myDoc == NULL) {
7278
0
        ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7279
0
  if (ctxt->myDoc == NULL) {
7280
0
      xmlErrMemory(ctxt);
7281
0
      return;
7282
0
  }
7283
0
  ctxt->myDoc->properties = XML_DOC_INTERNAL;
7284
0
    }
7285
0
    if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL) &&
7286
0
        (xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID) == NULL)) {
7287
0
        xmlErrMemory(ctxt);
7288
0
    }
7289
7290
0
    ctxt->inSubset = 2;
7291
0
    oldInputNr = ctxt->inputNr;
7292
7293
0
    SKIP_BLANKS_PE;
7294
0
    while (((RAW != 0) || (ctxt->inputNr > oldInputNr)) &&
7295
0
           (!PARSER_STOPPED(ctxt))) {
7296
0
  GROW;
7297
0
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7298
0
            xmlParseConditionalSections(ctxt);
7299
0
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7300
0
            xmlParseMarkupDecl(ctxt);
7301
0
        } else {
7302
0
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7303
0
            xmlHaltParser(ctxt);
7304
0
            return;
7305
0
        }
7306
0
        SKIP_BLANKS_PE;
7307
0
        SHRINK;
7308
0
    }
7309
7310
0
    while (ctxt->inputNr > oldInputNr)
7311
0
        xmlPopPE(ctxt);
7312
7313
0
    xmlParserCheckEOF(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED);
7314
0
}
7315
7316
/**
7317
 * xmlParseReference:
7318
 * @ctxt:  an XML parser context
7319
 *
7320
 * DEPRECATED: Internal function, don't use.
7321
 *
7322
 * parse and handle entity references in content, depending on the SAX
7323
 * interface, this may end-up in a call to character() if this is a
7324
 * CharRef, a predefined entity, if there is no reference() callback.
7325
 * or if the parser was asked to switch to that mode.
7326
 *
7327
 * Always consumes '&'.
7328
 *
7329
 * [67] Reference ::= EntityRef | CharRef
7330
 */
7331
void
7332
407k
xmlParseReference(xmlParserCtxtPtr ctxt) {
7333
407k
    xmlEntityPtr ent = NULL;
7334
407k
    const xmlChar *name;
7335
407k
    xmlChar *val;
7336
7337
407k
    if (RAW != '&')
7338
0
        return;
7339
7340
    /*
7341
     * Simple case of a CharRef
7342
     */
7343
407k
    if (NXT(1) == '#') {
7344
77.4k
  int i = 0;
7345
77.4k
  xmlChar out[16];
7346
77.4k
  int value = xmlParseCharRef(ctxt);
7347
7348
77.4k
  if (value == 0)
7349
203
      return;
7350
7351
        /*
7352
         * Just encode the value in UTF-8
7353
         */
7354
77.2k
        COPY_BUF(out, i, value);
7355
77.2k
        out[i] = 0;
7356
77.2k
        if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7357
77.2k
            (!ctxt->disableSAX))
7358
77.1k
            ctxt->sax->characters(ctxt->userData, out, i);
7359
77.2k
  return;
7360
77.4k
    }
7361
7362
    /*
7363
     * We are seeing an entity reference
7364
     */
7365
330k
    name = xmlParseEntityRefInternal(ctxt);
7366
330k
    if (name == NULL)
7367
298
        return;
7368
330k
    ent = xmlLookupGeneralEntity(ctxt, name, /* isAttr */ 0);
7369
330k
    if (ent == NULL) {
7370
        /*
7371
         * Create a reference for undeclared entities.
7372
         */
7373
1.27k
        if ((ctxt->replaceEntities == 0) &&
7374
0
            (ctxt->sax != NULL) &&
7375
0
            (ctxt->disableSAX == 0) &&
7376
0
            (ctxt->sax->reference != NULL)) {
7377
0
            ctxt->sax->reference(ctxt->userData, name);
7378
0
        }
7379
1.27k
        return;
7380
1.27k
    }
7381
328k
    if (!ctxt->wellFormed)
7382
0
  return;
7383
7384
    /* special case of predefined entities */
7385
328k
    if ((ent->name == NULL) ||
7386
328k
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7387
328k
  val = ent->content;
7388
328k
  if (val == NULL) return;
7389
  /*
7390
   * inline the entity.
7391
   */
7392
328k
  if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7393
328k
      (!ctxt->disableSAX))
7394
328k
      ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7395
328k
  return;
7396
328k
    }
7397
7398
    /*
7399
     * Some users try to parse entities on their own and used to set
7400
     * the renamed "checked" member. Fix the flags to cover this
7401
     * case.
7402
     */
7403
1
    if (((ent->flags & XML_ENT_PARSED) == 0) && (ent->children != NULL))
7404
0
        ent->flags |= XML_ENT_PARSED;
7405
7406
    /*
7407
     * The first reference to the entity trigger a parsing phase
7408
     * where the ent->children is filled with the result from
7409
     * the parsing.
7410
     * Note: external parsed entities will not be loaded, it is not
7411
     * required for a non-validating parser, unless the parsing option
7412
     * of validating, or substituting entities were given. Doing so is
7413
     * far more secure as the parser will only process data coming from
7414
     * the document entity by default.
7415
     *
7416
     * FIXME: This doesn't work correctly since entities can be
7417
     * expanded with different namespace declarations in scope.
7418
     * For example:
7419
     *
7420
     * <!DOCTYPE doc [
7421
     *   <!ENTITY ent "<ns:elem/>">
7422
     * ]>
7423
     * <doc>
7424
     *   <decl1 xmlns:ns="urn:ns1">
7425
     *     &ent;
7426
     *   </decl1>
7427
     *   <decl2 xmlns:ns="urn:ns2">
7428
     *     &ent;
7429
     *   </decl2>
7430
     * </doc>
7431
     *
7432
     * Proposed fix:
7433
     *
7434
     * - Ignore current namespace declarations when parsing the
7435
     *   entity. If a prefix can't be resolved, don't report an error
7436
     *   but mark it as unresolved.
7437
     * - Try to resolve these prefixes when expanding the entity.
7438
     *   This will require a specialized version of xmlStaticCopyNode
7439
     *   which can also make use of the namespace hash table to avoid
7440
     *   quadratic behavior.
7441
     *
7442
     * Alternatively, we could simply reparse the entity on each
7443
     * expansion like we already do with custom SAX callbacks.
7444
     * External entity content should be cached in this case.
7445
     */
7446
1
    if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7447
0
        (((ctxt->options & XML_PARSE_NO_XXE) == 0) &&
7448
0
         ((ctxt->replaceEntities) ||
7449
0
          (ctxt->validate)))) {
7450
0
        if ((ent->flags & XML_ENT_PARSED) == 0) {
7451
0
            xmlCtxtParseEntity(ctxt, ent);
7452
0
        } else if (ent->children == NULL) {
7453
            /*
7454
             * Probably running in SAX mode and the callbacks don't
7455
             * build the entity content. Parse the entity again.
7456
             *
7457
             * This will also be triggered in normal tree builder mode
7458
             * if an entity happens to be empty, causing unnecessary
7459
             * reloads. It's hard to come up with a reliable check in
7460
             * which mode we're running.
7461
             */
7462
0
            xmlCtxtParseEntity(ctxt, ent);
7463
0
        }
7464
0
    }
7465
7466
    /*
7467
     * We also check for amplification if entities aren't substituted.
7468
     * They might be expanded later.
7469
     */
7470
1
    if (xmlParserEntityCheck(ctxt, ent->expandedSize))
7471
0
        return;
7472
7473
1
    if ((ctxt->sax == NULL) || (ctxt->disableSAX))
7474
0
        return;
7475
7476
1
    if (ctxt->replaceEntities == 0) {
7477
  /*
7478
   * Create a reference
7479
   */
7480
0
        if (ctxt->sax->reference != NULL)
7481
0
      ctxt->sax->reference(ctxt->userData, ent->name);
7482
1
    } else if ((ent->children != NULL) && (ctxt->node != NULL)) {
7483
0
        xmlNodePtr copy, cur;
7484
7485
        /*
7486
         * Seems we are generating the DOM content, copy the tree
7487
   */
7488
0
        cur = ent->children;
7489
7490
        /*
7491
         * Handle first text node with SAX to coalesce text efficiently
7492
         */
7493
0
        if ((cur->type == XML_TEXT_NODE) ||
7494
0
            (cur->type == XML_CDATA_SECTION_NODE)) {
7495
0
            int len = xmlStrlen(cur->content);
7496
7497
0
            if ((cur->type == XML_TEXT_NODE) ||
7498
0
                (ctxt->options & XML_PARSE_NOCDATA)) {
7499
0
                if (ctxt->sax->characters != NULL)
7500
0
                    ctxt->sax->characters(ctxt, cur->content, len);
7501
0
            } else {
7502
0
                if (ctxt->sax->cdataBlock != NULL)
7503
0
                    ctxt->sax->cdataBlock(ctxt, cur->content, len);
7504
0
            }
7505
7506
0
            cur = cur->next;
7507
0
        }
7508
7509
0
        while (cur != NULL) {
7510
0
            xmlNodePtr last;
7511
7512
            /*
7513
             * Handle last text node with SAX to coalesce text efficiently
7514
             */
7515
0
            if ((cur->next == NULL) &&
7516
0
                ((cur->type == XML_TEXT_NODE) ||
7517
0
                 (cur->type == XML_CDATA_SECTION_NODE))) {
7518
0
                int len = xmlStrlen(cur->content);
7519
7520
0
                if ((cur->type == XML_TEXT_NODE) ||
7521
0
                    (ctxt->options & XML_PARSE_NOCDATA)) {
7522
0
                    if (ctxt->sax->characters != NULL)
7523
0
                        ctxt->sax->characters(ctxt, cur->content, len);
7524
0
                } else {
7525
0
                    if (ctxt->sax->cdataBlock != NULL)
7526
0
                        ctxt->sax->cdataBlock(ctxt, cur->content, len);
7527
0
                }
7528
7529
0
                break;
7530
0
            }
7531
7532
            /*
7533
             * Reset coalesce buffer stats only for non-text nodes.
7534
             */
7535
0
            ctxt->nodemem = 0;
7536
0
            ctxt->nodelen = 0;
7537
7538
0
            copy = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7539
7540
0
            if (copy == NULL) {
7541
0
                xmlErrMemory(ctxt);
7542
0
                break;
7543
0
            }
7544
7545
0
            if (ctxt->parseMode == XML_PARSE_READER) {
7546
                /* Needed for reader */
7547
0
                copy->extra = cur->extra;
7548
                /* Maybe needed for reader */
7549
0
                copy->_private = cur->_private;
7550
0
            }
7551
7552
0
            copy->parent = ctxt->node;
7553
0
            last = ctxt->node->last;
7554
0
            if (last == NULL) {
7555
0
                ctxt->node->children = copy;
7556
0
            } else {
7557
0
                last->next = copy;
7558
0
                copy->prev = last;
7559
0
            }
7560
0
            ctxt->node->last = copy;
7561
7562
0
            cur = cur->next;
7563
0
        }
7564
0
    }
7565
1
}
7566
7567
static void
7568
31.5k
xmlHandleUndeclaredEntity(xmlParserCtxtPtr ctxt, const xmlChar *name) {
7569
    /*
7570
     * [ WFC: Entity Declared ]
7571
     * In a document without any DTD, a document with only an
7572
     * internal DTD subset which contains no parameter entity
7573
     * references, or a document with "standalone='yes'", the
7574
     * Name given in the entity reference must match that in an
7575
     * entity declaration, except that well-formed documents
7576
     * need not declare any of the following entities: amp, lt,
7577
     * gt, apos, quot.
7578
     * The declaration of a parameter entity must precede any
7579
     * reference to it.
7580
     * Similarly, the declaration of a general entity must
7581
     * precede any reference to it which appears in a default
7582
     * value in an attribute-list declaration. Note that if
7583
     * entities are declared in the external subset or in
7584
     * external parameter entities, a non-validating processor
7585
     * is not obligated to read and process their declarations;
7586
     * for such documents, the rule that an entity must be
7587
     * declared is a well-formedness constraint only if
7588
     * standalone='yes'.
7589
     */
7590
31.5k
    if ((ctxt->standalone == 1) ||
7591
30.2k
        ((ctxt->hasExternalSubset == 0) &&
7592
29.4k
         (ctxt->hasPErefs == 0))) {
7593
21.5k
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7594
21.5k
                          "Entity '%s' not defined\n", name);
7595
21.5k
    } else if (ctxt->validate) {
7596
        /*
7597
         * [ VC: Entity Declared ]
7598
         * In a document with an external subset or external
7599
         * parameter entities with "standalone='no'", ...
7600
         * ... The declaration of a parameter entity must
7601
         * precede any reference to it...
7602
         */
7603
0
        xmlValidityError(ctxt, XML_ERR_UNDECLARED_ENTITY,
7604
0
                         "Entity '%s' not defined\n", name, NULL);
7605
9.99k
    } else if ((ctxt->loadsubset & ~XML_SKIP_IDS) ||
7606
9.99k
               ((ctxt->replaceEntities) &&
7607
9.99k
                ((ctxt->options & XML_PARSE_NO_XXE) == 0))) {
7608
        /*
7609
         * Also raise a non-fatal error
7610
         *
7611
         * - if the external subset is loaded and all entity declarations
7612
         *   should be available, or
7613
         * - entity substition was requested without restricting
7614
         *   external entity access.
7615
         */
7616
9.99k
        xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7617
9.99k
                     "Entity '%s' not defined\n", name);
7618
9.99k
    } else {
7619
0
        xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7620
0
                      "Entity '%s' not defined\n", name, NULL);
7621
0
    }
7622
7623
31.5k
    ctxt->valid = 0;
7624
31.5k
}
7625
7626
static xmlEntityPtr
7627
544k
xmlLookupGeneralEntity(xmlParserCtxtPtr ctxt, const xmlChar *name, int inAttr) {
7628
544k
    xmlEntityPtr ent = NULL;
7629
7630
    /*
7631
     * Predefined entities override any extra definition
7632
     */
7633
544k
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7634
544k
        ent = xmlGetPredefinedEntity(name);
7635
544k
        if (ent != NULL)
7636
521k
            return(ent);
7637
544k
    }
7638
7639
    /*
7640
     * Ask first SAX for entity resolution, otherwise try the
7641
     * entities which may have stored in the parser context.
7642
     */
7643
22.9k
    if (ctxt->sax != NULL) {
7644
22.9k
  if (ctxt->sax->getEntity != NULL)
7645
22.9k
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7646
22.9k
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7647
1.63k
      (ctxt->options & XML_PARSE_OLDSAX))
7648
0
      ent = xmlGetPredefinedEntity(name);
7649
22.9k
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7650
1.63k
      (ctxt->userData==ctxt)) {
7651
0
      ent = xmlSAX2GetEntity(ctxt, name);
7652
0
  }
7653
22.9k
    }
7654
7655
22.9k
    if (ent == NULL) {
7656
22.9k
        xmlHandleUndeclaredEntity(ctxt, name);
7657
22.9k
    }
7658
7659
    /*
7660
     * [ WFC: Parsed Entity ]
7661
     * An entity reference must not contain the name of an
7662
     * unparsed entity
7663
     */
7664
0
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7665
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7666
0
     "Entity reference to unparsed entity %s\n", name);
7667
0
        ent = NULL;
7668
0
    }
7669
7670
    /*
7671
     * [ WFC: No External Entity References ]
7672
     * Attribute values cannot contain direct or indirect
7673
     * entity references to external entities.
7674
     */
7675
0
    else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7676
0
        if (inAttr) {
7677
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7678
0
                 "Attribute references external entity '%s'\n", name);
7679
0
            ent = NULL;
7680
0
        }
7681
0
    }
7682
7683
22.9k
    return(ent);
7684
544k
}
7685
7686
/**
7687
 * xmlParseEntityRefInternal:
7688
 * @ctxt:  an XML parser context
7689
 * @inAttr:  whether we are in an attribute value
7690
 *
7691
 * Parse an entity reference. Always consumes '&'.
7692
 *
7693
 * [68] EntityRef ::= '&' Name ';'
7694
 *
7695
 * Returns the name, or NULL in case of error.
7696
 */
7697
static const xmlChar *
7698
994k
xmlParseEntityRefInternal(xmlParserCtxtPtr ctxt) {
7699
994k
    const xmlChar *name;
7700
7701
994k
    GROW;
7702
7703
994k
    if (RAW != '&')
7704
0
        return(NULL);
7705
994k
    NEXT;
7706
994k
    name = xmlParseName(ctxt);
7707
994k
    if (name == NULL) {
7708
247k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7709
247k
           "xmlParseEntityRef: no name\n");
7710
247k
        return(NULL);
7711
247k
    }
7712
746k
    if (RAW != ';') {
7713
201k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7714
201k
  return(NULL);
7715
201k
    }
7716
544k
    NEXT;
7717
7718
544k
    return(name);
7719
746k
}
7720
7721
/**
7722
 * xmlParseEntityRef:
7723
 * @ctxt:  an XML parser context
7724
 *
7725
 * DEPRECATED: Internal function, don't use.
7726
 *
7727
 * Returns the xmlEntityPtr if found, or NULL otherwise.
7728
 */
7729
xmlEntityPtr
7730
0
xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7731
0
    const xmlChar *name;
7732
7733
0
    if (ctxt == NULL)
7734
0
        return(NULL);
7735
7736
0
    name = xmlParseEntityRefInternal(ctxt);
7737
0
    if (name == NULL)
7738
0
        return(NULL);
7739
7740
0
    return(xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 0));
7741
0
}
7742
7743
/**
7744
 * xmlParseStringEntityRef:
7745
 * @ctxt:  an XML parser context
7746
 * @str:  a pointer to an index in the string
7747
 *
7748
 * parse ENTITY references declarations, but this version parses it from
7749
 * a string value.
7750
 *
7751
 * [68] EntityRef ::= '&' Name ';'
7752
 *
7753
 * [ WFC: Entity Declared ]
7754
 * In a document without any DTD, a document with only an internal DTD
7755
 * subset which contains no parameter entity references, or a document
7756
 * with "standalone='yes'", the Name given in the entity reference
7757
 * must match that in an entity declaration, except that well-formed
7758
 * documents need not declare any of the following entities: amp, lt,
7759
 * gt, apos, quot.  The declaration of a parameter entity must precede
7760
 * any reference to it.  Similarly, the declaration of a general entity
7761
 * must precede any reference to it which appears in a default value in an
7762
 * attribute-list declaration. Note that if entities are declared in the
7763
 * external subset or in external parameter entities, a non-validating
7764
 * processor is not obligated to read and process their declarations;
7765
 * for such documents, the rule that an entity must be declared is a
7766
 * well-formedness constraint only if standalone='yes'.
7767
 *
7768
 * [ WFC: Parsed Entity ]
7769
 * An entity reference must not contain the name of an unparsed entity
7770
 *
7771
 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7772
 * is updated to the current location in the string.
7773
 */
7774
static xmlChar *
7775
0
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7776
0
    xmlChar *name;
7777
0
    const xmlChar *ptr;
7778
0
    xmlChar cur;
7779
7780
0
    if ((str == NULL) || (*str == NULL))
7781
0
        return(NULL);
7782
0
    ptr = *str;
7783
0
    cur = *ptr;
7784
0
    if (cur != '&')
7785
0
  return(NULL);
7786
7787
0
    ptr++;
7788
0
    name = xmlParseStringName(ctxt, &ptr);
7789
0
    if (name == NULL) {
7790
0
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7791
0
           "xmlParseStringEntityRef: no name\n");
7792
0
  *str = ptr;
7793
0
  return(NULL);
7794
0
    }
7795
0
    if (*ptr != ';') {
7796
0
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7797
0
        xmlFree(name);
7798
0
  *str = ptr;
7799
0
  return(NULL);
7800
0
    }
7801
0
    ptr++;
7802
7803
0
    *str = ptr;
7804
0
    return(name);
7805
0
}
7806
7807
/**
7808
 * xmlParsePEReference:
7809
 * @ctxt:  an XML parser context
7810
 *
7811
 * DEPRECATED: Internal function, don't use.
7812
 *
7813
 * Parse a parameter entity reference. Always consumes '%'.
7814
 *
7815
 * The entity content is handled directly by pushing it's content as
7816
 * a new input stream.
7817
 *
7818
 * [69] PEReference ::= '%' Name ';'
7819
 *
7820
 * [ WFC: No Recursion ]
7821
 * A parsed entity must not contain a recursive
7822
 * reference to itself, either directly or indirectly.
7823
 *
7824
 * [ WFC: Entity Declared ]
7825
 * In a document without any DTD, a document with only an internal DTD
7826
 * subset which contains no parameter entity references, or a document
7827
 * with "standalone='yes'", ...  ... The declaration of a parameter
7828
 * entity must precede any reference to it...
7829
 *
7830
 * [ VC: Entity Declared ]
7831
 * In a document with an external subset or external parameter entities
7832
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7833
 * must precede any reference to it...
7834
 *
7835
 * [ WFC: In DTD ]
7836
 * Parameter-entity references may only appear in the DTD.
7837
 * NOTE: misleading but this is handled.
7838
 */
7839
void
7840
xmlParsePEReference(xmlParserCtxtPtr ctxt)
7841
9.22k
{
7842
9.22k
    const xmlChar *name;
7843
9.22k
    xmlEntityPtr entity = NULL;
7844
9.22k
    xmlParserInputPtr input;
7845
7846
9.22k
    if (RAW != '%')
7847
0
        return;
7848
9.22k
    NEXT;
7849
9.22k
    name = xmlParseName(ctxt);
7850
9.22k
    if (name == NULL) {
7851
1.99k
  xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
7852
1.99k
  return;
7853
1.99k
    }
7854
7.22k
    if (RAW != ';') {
7855
59
  xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
7856
59
        return;
7857
59
    }
7858
7859
7.16k
    NEXT;
7860
7861
    /* Must be set before xmlHandleUndeclaredEntity */
7862
7.16k
    ctxt->hasPErefs = 1;
7863
7864
    /*
7865
     * Request the entity from SAX
7866
     */
7867
7.16k
    if ((ctxt->sax != NULL) &&
7868
7.16k
  (ctxt->sax->getParameterEntity != NULL))
7869
0
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7870
7871
7.16k
    if (entity == NULL) {
7872
7.16k
        xmlHandleUndeclaredEntity(ctxt, name);
7873
7.16k
    } else {
7874
  /*
7875
   * Internal checking in case the entity quest barfed
7876
   */
7877
0
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7878
0
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7879
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7880
0
      "Internal: %%%s; is not a parameter entity\n",
7881
0
        name, NULL);
7882
0
  } else {
7883
0
      if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7884
0
                ((ctxt->options & XML_PARSE_NO_XXE) ||
7885
0
     ((ctxt->loadsubset == 0) &&
7886
0
      (ctxt->replaceEntities == 0) &&
7887
0
      (ctxt->validate == 0))))
7888
0
    return;
7889
7890
0
            if (entity->flags & XML_ENT_EXPANDING) {
7891
0
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7892
0
                xmlHaltParser(ctxt);
7893
0
                return;
7894
0
            }
7895
7896
0
      input = xmlNewEntityInputStream(ctxt, entity);
7897
0
      if (xmlCtxtPushInput(ctxt, input) < 0) {
7898
0
                xmlFreeInputStream(input);
7899
0
    return;
7900
0
            }
7901
7902
0
            entity->flags |= XML_ENT_EXPANDING;
7903
7904
0
            GROW;
7905
7906
0
      if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
7907
0
                xmlDetectEncoding(ctxt);
7908
7909
0
                if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7910
0
                    (IS_BLANK_CH(NXT(5)))) {
7911
0
                    xmlParseTextDecl(ctxt);
7912
0
                }
7913
0
            }
7914
0
  }
7915
0
    }
7916
7.16k
}
7917
7918
/**
7919
 * xmlLoadEntityContent:
7920
 * @ctxt:  an XML parser context
7921
 * @entity: an unloaded system entity
7922
 *
7923
 * Load the content of an entity.
7924
 *
7925
 * Returns 0 in case of success and -1 in case of failure
7926
 */
7927
static int
7928
0
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7929
0
    xmlParserInputPtr oldinput, input = NULL;
7930
0
    xmlParserInputPtr *oldinputTab;
7931
0
    const xmlChar *oldencoding;
7932
0
    xmlChar *content = NULL;
7933
0
    xmlResourceType rtype;
7934
0
    size_t length, i;
7935
0
    int oldinputNr, oldinputMax;
7936
0
    int ret = -1;
7937
0
    int res;
7938
7939
0
    if ((ctxt == NULL) || (entity == NULL) ||
7940
0
        ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7941
0
   (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7942
0
  (entity->content != NULL)) {
7943
0
  xmlFatalErr(ctxt, XML_ERR_ARGUMENT,
7944
0
              "xmlLoadEntityContent parameter error");
7945
0
        return(-1);
7946
0
    }
7947
7948
0
    if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)
7949
0
        rtype = XML_RESOURCE_PARAMETER_ENTITY;
7950
0
    else
7951
0
        rtype = XML_RESOURCE_GENERAL_ENTITY;
7952
7953
0
    input = xmlLoadResource(ctxt, (char *) entity->URI,
7954
0
                            (char *) entity->ExternalID, rtype);
7955
0
    if (input == NULL)
7956
0
        return(-1);
7957
7958
0
    oldinput = ctxt->input;
7959
0
    oldinputNr = ctxt->inputNr;
7960
0
    oldinputMax = ctxt->inputMax;
7961
0
    oldinputTab = ctxt->inputTab;
7962
0
    oldencoding = ctxt->encoding;
7963
7964
0
    ctxt->input = NULL;
7965
0
    ctxt->inputNr = 0;
7966
0
    ctxt->inputMax = 1;
7967
0
    ctxt->encoding = NULL;
7968
0
    ctxt->inputTab = xmlMalloc(sizeof(xmlParserInputPtr));
7969
0
    if (ctxt->inputTab == NULL) {
7970
0
        xmlErrMemory(ctxt);
7971
0
        xmlFreeInputStream(input);
7972
0
        goto error;
7973
0
    }
7974
7975
0
    xmlBufResetInput(input->buf->buffer, input);
7976
7977
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
7978
0
        xmlFreeInputStream(input);
7979
0
        goto error;
7980
0
    }
7981
7982
0
    xmlDetectEncoding(ctxt);
7983
7984
    /*
7985
     * Parse a possible text declaration first
7986
     */
7987
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7988
0
  xmlParseTextDecl(ctxt);
7989
        /*
7990
         * An XML-1.0 document can't reference an entity not XML-1.0
7991
         */
7992
0
        if ((xmlStrEqual(ctxt->version, BAD_CAST "1.0")) &&
7993
0
            (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
7994
0
            xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
7995
0
                           "Version mismatch between document and entity\n");
7996
0
        }
7997
0
    }
7998
7999
0
    length = input->cur - input->base;
8000
0
    xmlBufShrink(input->buf->buffer, length);
8001
0
    xmlSaturatedAdd(&ctxt->sizeentities, length);
8002
8003
0
    while ((res = xmlParserInputBufferGrow(input->buf, 4096)) > 0)
8004
0
        ;
8005
8006
0
    xmlBufResetInput(input->buf->buffer, input);
8007
8008
0
    if (res < 0) {
8009
0
        xmlCtxtErrIO(ctxt, input->buf->error, NULL);
8010
0
        goto error;
8011
0
    }
8012
8013
0
    length = xmlBufUse(input->buf->buffer);
8014
0
    if (length > INT_MAX) {
8015
0
        xmlErrMemory(ctxt);
8016
0
        goto error;
8017
0
    }
8018
8019
0
    content = xmlStrndup(xmlBufContent(input->buf->buffer), length);
8020
0
    if (content == NULL) {
8021
0
        xmlErrMemory(ctxt);
8022
0
        goto error;
8023
0
    }
8024
8025
0
    for (i = 0; i < length; ) {
8026
0
        int clen = length - i;
8027
0
        int c = xmlGetUTF8Char(content + i, &clen);
8028
8029
0
        if ((c < 0) || (!IS_CHAR(c))) {
8030
0
            xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8031
0
                              "xmlLoadEntityContent: invalid char value %d\n",
8032
0
                              content[i]);
8033
0
            goto error;
8034
0
        }
8035
0
        i += clen;
8036
0
    }
8037
8038
0
    xmlSaturatedAdd(&ctxt->sizeentities, length);
8039
0
    entity->content = content;
8040
0
    entity->length = length;
8041
0
    content = NULL;
8042
0
    ret = 0;
8043
8044
0
error:
8045
0
    while (ctxt->inputNr > 0)
8046
0
        xmlFreeInputStream(xmlCtxtPopInput(ctxt));
8047
0
    xmlFree(ctxt->inputTab);
8048
0
    xmlFree((xmlChar *) ctxt->encoding);
8049
8050
0
    ctxt->input = oldinput;
8051
0
    ctxt->inputNr = oldinputNr;
8052
0
    ctxt->inputMax = oldinputMax;
8053
0
    ctxt->inputTab = oldinputTab;
8054
0
    ctxt->encoding = oldencoding;
8055
8056
0
    xmlFree(content);
8057
8058
0
    return(ret);
8059
0
}
8060
8061
/**
8062
 * xmlParseStringPEReference:
8063
 * @ctxt:  an XML parser context
8064
 * @str:  a pointer to an index in the string
8065
 *
8066
 * parse PEReference declarations
8067
 *
8068
 * [69] PEReference ::= '%' Name ';'
8069
 *
8070
 * [ WFC: No Recursion ]
8071
 * A parsed entity must not contain a recursive
8072
 * reference to itself, either directly or indirectly.
8073
 *
8074
 * [ WFC: Entity Declared ]
8075
 * In a document without any DTD, a document with only an internal DTD
8076
 * subset which contains no parameter entity references, or a document
8077
 * with "standalone='yes'", ...  ... The declaration of a parameter
8078
 * entity must precede any reference to it...
8079
 *
8080
 * [ VC: Entity Declared ]
8081
 * In a document with an external subset or external parameter entities
8082
 * with "standalone='no'", ...  ... The declaration of a parameter entity
8083
 * must precede any reference to it...
8084
 *
8085
 * [ WFC: In DTD ]
8086
 * Parameter-entity references may only appear in the DTD.
8087
 * NOTE: misleading but this is handled.
8088
 *
8089
 * Returns the string of the entity content.
8090
 *         str is updated to the current value of the index
8091
 */
8092
static xmlEntityPtr
8093
3.91k
xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8094
3.91k
    const xmlChar *ptr;
8095
3.91k
    xmlChar cur;
8096
3.91k
    xmlChar *name;
8097
3.91k
    xmlEntityPtr entity = NULL;
8098
8099
3.91k
    if ((str == NULL) || (*str == NULL)) return(NULL);
8100
3.91k
    ptr = *str;
8101
3.91k
    cur = *ptr;
8102
3.91k
    if (cur != '%')
8103
0
        return(NULL);
8104
3.91k
    ptr++;
8105
3.91k
    name = xmlParseStringName(ctxt, &ptr);
8106
3.91k
    if (name == NULL) {
8107
1.34k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8108
1.34k
           "xmlParseStringPEReference: no name\n");
8109
1.34k
  *str = ptr;
8110
1.34k
  return(NULL);
8111
1.34k
    }
8112
2.56k
    cur = *ptr;
8113
2.56k
    if (cur != ';') {
8114
1.17k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8115
1.17k
  xmlFree(name);
8116
1.17k
  *str = ptr;
8117
1.17k
  return(NULL);
8118
1.17k
    }
8119
1.39k
    ptr++;
8120
8121
    /* Must be set before xmlHandleUndeclaredEntity */
8122
1.39k
    ctxt->hasPErefs = 1;
8123
8124
    /*
8125
     * Request the entity from SAX
8126
     */
8127
1.39k
    if ((ctxt->sax != NULL) &&
8128
1.39k
  (ctxt->sax->getParameterEntity != NULL))
8129
0
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8130
8131
1.39k
    if (entity == NULL) {
8132
1.39k
        xmlHandleUndeclaredEntity(ctxt, name);
8133
1.39k
    } else {
8134
  /*
8135
   * Internal checking in case the entity quest barfed
8136
   */
8137
0
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8138
0
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8139
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8140
0
        "%%%s; is not a parameter entity\n",
8141
0
        name, NULL);
8142
0
  }
8143
0
    }
8144
8145
1.39k
    xmlFree(name);
8146
1.39k
    *str = ptr;
8147
1.39k
    return(entity);
8148
2.56k
}
8149
8150
/**
8151
 * xmlParseDocTypeDecl:
8152
 * @ctxt:  an XML parser context
8153
 *
8154
 * DEPRECATED: Internal function, don't use.
8155
 *
8156
 * parse a DOCTYPE declaration
8157
 *
8158
 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8159
 *                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8160
 *
8161
 * [ VC: Root Element Type ]
8162
 * The Name in the document type declaration must match the element
8163
 * type of the root element.
8164
 */
8165
8166
void
8167
8.74k
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8168
8.74k
    const xmlChar *name = NULL;
8169
8.74k
    xmlChar *ExternalID = NULL;
8170
8.74k
    xmlChar *URI = NULL;
8171
8172
    /*
8173
     * We know that '<!DOCTYPE' has been detected.
8174
     */
8175
8.74k
    SKIP(9);
8176
8177
8.74k
    if (SKIP_BLANKS == 0) {
8178
150
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8179
150
                       "Space required after 'DOCTYPE'\n");
8180
150
    }
8181
8182
    /*
8183
     * Parse the DOCTYPE name.
8184
     */
8185
8.74k
    name = xmlParseName(ctxt);
8186
8.74k
    if (name == NULL) {
8187
21
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8188
21
           "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8189
21
    }
8190
8.74k
    ctxt->intSubName = name;
8191
8192
8.74k
    SKIP_BLANKS;
8193
8194
    /*
8195
     * Check for SystemID and ExternalID
8196
     */
8197
8.74k
    URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8198
8199
8.74k
    if ((URI != NULL) || (ExternalID != NULL)) {
8200
389
        ctxt->hasExternalSubset = 1;
8201
389
    }
8202
8.74k
    ctxt->extSubURI = URI;
8203
8.74k
    ctxt->extSubSystem = ExternalID;
8204
8205
8.74k
    SKIP_BLANKS;
8206
8207
    /*
8208
     * Create and update the internal subset.
8209
     */
8210
8.74k
    if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8211
0
  (!ctxt->disableSAX))
8212
0
  ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8213
8214
8.74k
    if ((RAW != '[') && (RAW != '>')) {
8215
230
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8216
230
    }
8217
8.74k
}
8218
8219
/**
8220
 * xmlParseInternalSubset:
8221
 * @ctxt:  an XML parser context
8222
 *
8223
 * parse the internal subset declaration
8224
 *
8225
 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8226
 */
8227
8228
static void
8229
8.15k
xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8230
    /*
8231
     * Is there any DTD definition ?
8232
     */
8233
8.15k
    if (RAW == '[') {
8234
8.15k
        int oldInputNr = ctxt->inputNr;
8235
8236
8.15k
        NEXT;
8237
  /*
8238
   * Parse the succession of Markup declarations and
8239
   * PEReferences.
8240
   * Subsequence (markupdecl | PEReference | S)*
8241
   */
8242
8.15k
  SKIP_BLANKS;
8243
158k
  while (((RAW != ']') || (ctxt->inputNr > oldInputNr)) &&
8244
155k
               (PARSER_STOPPED(ctxt) == 0)) {
8245
8246
            /*
8247
             * Conditional sections are allowed from external entities included
8248
             * by PE References in the internal subset.
8249
             */
8250
154k
            if ((PARSER_EXTERNAL(ctxt)) &&
8251
0
                (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8252
0
                xmlParseConditionalSections(ctxt);
8253
154k
            } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
8254
141k
          xmlParseMarkupDecl(ctxt);
8255
141k
            } else if (RAW == '%') {
8256
9.22k
          xmlParsePEReference(ctxt);
8257
9.22k
            } else {
8258
3.76k
    xmlFatalErr(ctxt, XML_ERR_INT_SUBSET_NOT_FINISHED, NULL);
8259
3.76k
                break;
8260
3.76k
            }
8261
150k
      SKIP_BLANKS_PE;
8262
150k
            SHRINK;
8263
150k
            GROW;
8264
150k
  }
8265
8266
8.15k
        while (ctxt->inputNr > oldInputNr)
8267
0
            xmlPopPE(ctxt);
8268
8269
8.15k
  if (RAW == ']') {
8270
3.08k
      NEXT;
8271
3.08k
      SKIP_BLANKS;
8272
3.08k
  }
8273
8.15k
    }
8274
8275
    /*
8276
     * We should be at the end of the DOCTYPE declaration.
8277
     */
8278
8.15k
    if ((ctxt->wellFormed) && (RAW != '>')) {
8279
19
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8280
19
  return;
8281
19
    }
8282
8.14k
    NEXT;
8283
8.14k
}
8284
8285
#ifdef LIBXML_SAX1_ENABLED
8286
/**
8287
 * xmlParseAttribute:
8288
 * @ctxt:  an XML parser context
8289
 * @value:  a xmlChar ** used to store the value of the attribute
8290
 *
8291
 * DEPRECATED: Internal function, don't use.
8292
 *
8293
 * parse an attribute
8294
 *
8295
 * [41] Attribute ::= Name Eq AttValue
8296
 *
8297
 * [ WFC: No External Entity References ]
8298
 * Attribute values cannot contain direct or indirect entity references
8299
 * to external entities.
8300
 *
8301
 * [ WFC: No < in Attribute Values ]
8302
 * The replacement text of any entity referred to directly or indirectly in
8303
 * an attribute value (other than "&lt;") must not contain a <.
8304
 *
8305
 * [ VC: Attribute Value Type ]
8306
 * The attribute must have been declared; the value must be of the type
8307
 * declared for it.
8308
 *
8309
 * [25] Eq ::= S? '=' S?
8310
 *
8311
 * With namespace:
8312
 *
8313
 * [NS 11] Attribute ::= QName Eq AttValue
8314
 *
8315
 * Also the case QName == xmlns:??? is handled independently as a namespace
8316
 * definition.
8317
 *
8318
 * Returns the attribute name, and the value in *value.
8319
 */
8320
8321
const xmlChar *
8322
0
xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8323
0
    const xmlChar *name;
8324
0
    xmlChar *val;
8325
8326
0
    *value = NULL;
8327
0
    GROW;
8328
0
    name = xmlParseName(ctxt);
8329
0
    if (name == NULL) {
8330
0
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8331
0
                 "error parsing attribute name\n");
8332
0
        return(NULL);
8333
0
    }
8334
8335
    /*
8336
     * read the value
8337
     */
8338
0
    SKIP_BLANKS;
8339
0
    if (RAW == '=') {
8340
0
        NEXT;
8341
0
  SKIP_BLANKS;
8342
0
  val = xmlParseAttValue(ctxt);
8343
0
    } else {
8344
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8345
0
         "Specification mandates value for attribute %s\n", name);
8346
0
  return(name);
8347
0
    }
8348
8349
    /*
8350
     * Check that xml:lang conforms to the specification
8351
     * No more registered as an error, just generate a warning now
8352
     * since this was deprecated in XML second edition
8353
     */
8354
0
    if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8355
0
  if (!xmlCheckLanguageID(val)) {
8356
0
      xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8357
0
              "Malformed value for xml:lang : %s\n",
8358
0
        val, NULL);
8359
0
  }
8360
0
    }
8361
8362
    /*
8363
     * Check that xml:space conforms to the specification
8364
     */
8365
0
    if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8366
0
  if (xmlStrEqual(val, BAD_CAST "default"))
8367
0
      *(ctxt->space) = 0;
8368
0
  else if (xmlStrEqual(val, BAD_CAST "preserve"))
8369
0
      *(ctxt->space) = 1;
8370
0
  else {
8371
0
    xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8372
0
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8373
0
                                 val, NULL);
8374
0
  }
8375
0
    }
8376
8377
0
    *value = val;
8378
0
    return(name);
8379
0
}
8380
8381
/**
8382
 * xmlParseStartTag:
8383
 * @ctxt:  an XML parser context
8384
 *
8385
 * DEPRECATED: Internal function, don't use.
8386
 *
8387
 * Parse a start tag. Always consumes '<'.
8388
 *
8389
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8390
 *
8391
 * [ WFC: Unique Att Spec ]
8392
 * No attribute name may appear more than once in the same start-tag or
8393
 * empty-element tag.
8394
 *
8395
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8396
 *
8397
 * [ WFC: Unique Att Spec ]
8398
 * No attribute name may appear more than once in the same start-tag or
8399
 * empty-element tag.
8400
 *
8401
 * With namespace:
8402
 *
8403
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8404
 *
8405
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8406
 *
8407
 * Returns the element name parsed
8408
 */
8409
8410
const xmlChar *
8411
0
xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8412
0
    const xmlChar *name;
8413
0
    const xmlChar *attname;
8414
0
    xmlChar *attvalue;
8415
0
    const xmlChar **atts = ctxt->atts;
8416
0
    int nbatts = 0;
8417
0
    int maxatts = ctxt->maxatts;
8418
0
    int i;
8419
8420
0
    if (RAW != '<') return(NULL);
8421
0
    NEXT1;
8422
8423
0
    name = xmlParseName(ctxt);
8424
0
    if (name == NULL) {
8425
0
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8426
0
       "xmlParseStartTag: invalid element name\n");
8427
0
        return(NULL);
8428
0
    }
8429
8430
    /*
8431
     * Now parse the attributes, it ends up with the ending
8432
     *
8433
     * (S Attribute)* S?
8434
     */
8435
0
    SKIP_BLANKS;
8436
0
    GROW;
8437
8438
0
    while (((RAW != '>') &&
8439
0
     ((RAW != '/') || (NXT(1) != '>')) &&
8440
0
     (IS_BYTE_CHAR(RAW))) && (PARSER_STOPPED(ctxt) == 0)) {
8441
0
  attname = xmlParseAttribute(ctxt, &attvalue);
8442
0
        if (attname == NULL)
8443
0
      break;
8444
0
        if (attvalue != NULL) {
8445
      /*
8446
       * [ WFC: Unique Att Spec ]
8447
       * No attribute name may appear more than once in the same
8448
       * start-tag or empty-element tag.
8449
       */
8450
0
      for (i = 0; i < nbatts;i += 2) {
8451
0
          if (xmlStrEqual(atts[i], attname)) {
8452
0
        xmlErrAttributeDup(ctxt, NULL, attname);
8453
0
        goto failed;
8454
0
    }
8455
0
      }
8456
      /*
8457
       * Add the pair to atts
8458
       */
8459
0
      if (nbatts + 4 > maxatts) {
8460
0
          const xmlChar **n;
8461
0
                int newSize;
8462
8463
0
                newSize = xmlGrowCapacity(maxatts, sizeof(n[0]) * 2,
8464
0
                                          11, XML_MAX_ATTRS);
8465
0
                if (newSize < 0) {
8466
0
        xmlErrMemory(ctxt);
8467
0
        goto failed;
8468
0
    }
8469
0
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
8470
0
                if (newSize < 2)
8471
0
                    newSize = 2;
8472
0
#endif
8473
0
          n = xmlRealloc(atts, newSize * sizeof(n[0]) * 2);
8474
0
    if (n == NULL) {
8475
0
        xmlErrMemory(ctxt);
8476
0
        goto failed;
8477
0
    }
8478
0
    atts = n;
8479
0
                maxatts = newSize * 2;
8480
0
    ctxt->atts = atts;
8481
0
    ctxt->maxatts = maxatts;
8482
0
      }
8483
8484
0
      atts[nbatts++] = attname;
8485
0
      atts[nbatts++] = attvalue;
8486
0
      atts[nbatts] = NULL;
8487
0
      atts[nbatts + 1] = NULL;
8488
8489
0
            attvalue = NULL;
8490
0
  }
8491
8492
0
failed:
8493
8494
0
        if (attvalue != NULL)
8495
0
            xmlFree(attvalue);
8496
8497
0
  GROW
8498
0
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8499
0
      break;
8500
0
  if (SKIP_BLANKS == 0) {
8501
0
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8502
0
         "attributes construct error\n");
8503
0
  }
8504
0
  SHRINK;
8505
0
        GROW;
8506
0
    }
8507
8508
    /*
8509
     * SAX: Start of Element !
8510
     */
8511
0
    if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8512
0
  (!ctxt->disableSAX)) {
8513
0
  if (nbatts > 0)
8514
0
      ctxt->sax->startElement(ctxt->userData, name, atts);
8515
0
  else
8516
0
      ctxt->sax->startElement(ctxt->userData, name, NULL);
8517
0
    }
8518
8519
0
    if (atts != NULL) {
8520
        /* Free only the content strings */
8521
0
        for (i = 1;i < nbatts;i+=2)
8522
0
      if (atts[i] != NULL)
8523
0
         xmlFree((xmlChar *) atts[i]);
8524
0
    }
8525
0
    return(name);
8526
0
}
8527
8528
/**
8529
 * xmlParseEndTag1:
8530
 * @ctxt:  an XML parser context
8531
 * @line:  line of the start tag
8532
 * @nsNr:  number of namespaces on the start tag
8533
 *
8534
 * Parse an end tag. Always consumes '</'.
8535
 *
8536
 * [42] ETag ::= '</' Name S? '>'
8537
 *
8538
 * With namespace
8539
 *
8540
 * [NS 9] ETag ::= '</' QName S? '>'
8541
 */
8542
8543
static void
8544
0
xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8545
0
    const xmlChar *name;
8546
8547
0
    GROW;
8548
0
    if ((RAW != '<') || (NXT(1) != '/')) {
8549
0
  xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8550
0
           "xmlParseEndTag: '</' not found\n");
8551
0
  return;
8552
0
    }
8553
0
    SKIP(2);
8554
8555
0
    name = xmlParseNameAndCompare(ctxt,ctxt->name);
8556
8557
    /*
8558
     * We should definitely be at the ending "S? '>'" part
8559
     */
8560
0
    GROW;
8561
0
    SKIP_BLANKS;
8562
0
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8563
0
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8564
0
    } else
8565
0
  NEXT1;
8566
8567
    /*
8568
     * [ WFC: Element Type Match ]
8569
     * The Name in an element's end-tag must match the element type in the
8570
     * start-tag.
8571
     *
8572
     */
8573
0
    if (name != (xmlChar*)1) {
8574
0
        if (name == NULL) name = BAD_CAST "unparsable";
8575
0
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8576
0
         "Opening and ending tag mismatch: %s line %d and %s\n",
8577
0
                    ctxt->name, line, name);
8578
0
    }
8579
8580
    /*
8581
     * SAX: End of Tag
8582
     */
8583
0
    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8584
0
  (!ctxt->disableSAX))
8585
0
        ctxt->sax->endElement(ctxt->userData, ctxt->name);
8586
8587
0
    namePop(ctxt);
8588
0
    spacePop(ctxt);
8589
0
}
8590
8591
/**
8592
 * xmlParseEndTag:
8593
 * @ctxt:  an XML parser context
8594
 *
8595
 * DEPRECATED: Internal function, don't use.
8596
 *
8597
 * parse an end of tag
8598
 *
8599
 * [42] ETag ::= '</' Name S? '>'
8600
 *
8601
 * With namespace
8602
 *
8603
 * [NS 9] ETag ::= '</' QName S? '>'
8604
 */
8605
8606
void
8607
0
xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8608
0
    xmlParseEndTag1(ctxt, 0);
8609
0
}
8610
#endif /* LIBXML_SAX1_ENABLED */
8611
8612
/************************************************************************
8613
 *                  *
8614
 *          SAX 2 specific operations       *
8615
 *                  *
8616
 ************************************************************************/
8617
8618
/**
8619
 * xmlParseQNameHashed:
8620
 * @ctxt:  an XML parser context
8621
 * @prefix:  pointer to store the prefix part
8622
 *
8623
 * parse an XML Namespace QName
8624
 *
8625
 * [6]  QName  ::= (Prefix ':')? LocalPart
8626
 * [7]  Prefix  ::= NCName
8627
 * [8]  LocalPart  ::= NCName
8628
 *
8629
 * Returns the Name parsed or NULL
8630
 */
8631
8632
static xmlHashedString
8633
64.0M
xmlParseQNameHashed(xmlParserCtxtPtr ctxt, xmlHashedString *prefix) {
8634
64.0M
    xmlHashedString l, p;
8635
64.0M
    int start, isNCName = 0;
8636
8637
64.0M
    l.name = NULL;
8638
64.0M
    p.name = NULL;
8639
8640
64.0M
    GROW;
8641
64.0M
    start = CUR_PTR - BASE_PTR;
8642
8643
64.0M
    l = xmlParseNCName(ctxt);
8644
64.0M
    if (l.name != NULL) {
8645
63.7M
        isNCName = 1;
8646
63.7M
        if (CUR == ':') {
8647
23.7M
            NEXT;
8648
23.7M
            p = l;
8649
23.7M
            l = xmlParseNCName(ctxt);
8650
23.7M
        }
8651
63.7M
    }
8652
64.0M
    if ((l.name == NULL) || (CUR == ':')) {
8653
644k
        xmlChar *tmp;
8654
8655
644k
        l.name = NULL;
8656
644k
        p.name = NULL;
8657
644k
        if ((isNCName == 0) && (CUR != ':'))
8658
68.9k
            return(l);
8659
576k
        tmp = xmlParseNmtoken(ctxt);
8660
576k
        if (tmp != NULL)
8661
481k
            xmlFree(tmp);
8662
576k
        l = xmlDictLookupHashed(ctxt->dict, BASE_PTR + start,
8663
576k
                                CUR_PTR - (BASE_PTR + start));
8664
576k
        if (l.name == NULL) {
8665
0
            xmlErrMemory(ctxt);
8666
0
            return(l);
8667
0
        }
8668
576k
        xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8669
576k
                 "Failed to parse QName '%s'\n", l.name, NULL, NULL);
8670
576k
    }
8671
8672
63.9M
    *prefix = p;
8673
63.9M
    return(l);
8674
64.0M
}
8675
8676
/**
8677
 * xmlParseQName:
8678
 * @ctxt:  an XML parser context
8679
 * @prefix:  pointer to store the prefix part
8680
 *
8681
 * parse an XML Namespace QName
8682
 *
8683
 * [6]  QName  ::= (Prefix ':')? LocalPart
8684
 * [7]  Prefix  ::= NCName
8685
 * [8]  LocalPart  ::= NCName
8686
 *
8687
 * Returns the Name parsed or NULL
8688
 */
8689
8690
static const xmlChar *
8691
12.4k
xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8692
12.4k
    xmlHashedString n, p;
8693
8694
12.4k
    n = xmlParseQNameHashed(ctxt, &p);
8695
12.4k
    if (n.name == NULL)
8696
459
        return(NULL);
8697
12.0k
    *prefix = p.name;
8698
12.0k
    return(n.name);
8699
12.4k
}
8700
8701
/**
8702
 * xmlParseQNameAndCompare:
8703
 * @ctxt:  an XML parser context
8704
 * @name:  the localname
8705
 * @prefix:  the prefix, if any.
8706
 *
8707
 * parse an XML name and compares for match
8708
 * (specialized for endtag parsing)
8709
 *
8710
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8711
 * and the name for mismatch
8712
 */
8713
8714
static const xmlChar *
8715
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8716
6.23M
                        xmlChar const *prefix) {
8717
6.23M
    const xmlChar *cmp;
8718
6.23M
    const xmlChar *in;
8719
6.23M
    const xmlChar *ret;
8720
6.23M
    const xmlChar *prefix2;
8721
8722
6.23M
    if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8723
8724
6.23M
    GROW;
8725
6.23M
    in = ctxt->input->cur;
8726
8727
6.23M
    cmp = prefix;
8728
19.6M
    while (*in != 0 && *in == *cmp) {
8729
13.3M
  ++in;
8730
13.3M
  ++cmp;
8731
13.3M
    }
8732
6.23M
    if ((*cmp == 0) && (*in == ':')) {
8733
6.23M
        in++;
8734
6.23M
  cmp = name;
8735
42.7M
  while (*in != 0 && *in == *cmp) {
8736
36.5M
      ++in;
8737
36.5M
      ++cmp;
8738
36.5M
  }
8739
6.23M
  if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8740
      /* success */
8741
6.22M
            ctxt->input->col += in - ctxt->input->cur;
8742
6.22M
      ctxt->input->cur = in;
8743
6.22M
      return((const xmlChar*) 1);
8744
6.22M
  }
8745
6.23M
    }
8746
    /*
8747
     * all strings coms from the dictionary, equality can be done directly
8748
     */
8749
12.4k
    ret = xmlParseQName (ctxt, &prefix2);
8750
12.4k
    if (ret == NULL)
8751
459
        return(NULL);
8752
12.0k
    if ((ret == name) && (prefix == prefix2))
8753
198
  return((const xmlChar*) 1);
8754
11.8k
    return ret;
8755
12.0k
}
8756
8757
/**
8758
 * xmlParseAttribute2:
8759
 * @ctxt:  an XML parser context
8760
 * @pref:  the element prefix
8761
 * @elem:  the element name
8762
 * @prefix:  a xmlChar ** used to store the value of the attribute prefix
8763
 * @value:  a xmlChar ** used to store the value of the attribute
8764
 * @len:  an int * to save the length of the attribute
8765
 * @alloc:  an int * to indicate if the attribute was allocated
8766
 *
8767
 * parse an attribute in the new SAX2 framework.
8768
 *
8769
 * Returns the attribute name, and the value in *value, .
8770
 */
8771
8772
static xmlHashedString
8773
xmlParseAttribute2(xmlParserCtxtPtr ctxt,
8774
                   const xmlChar * pref, const xmlChar * elem,
8775
                   xmlHashedString * hprefix, xmlChar ** value,
8776
                   int *len, int *alloc)
8777
24.9M
{
8778
24.9M
    xmlHashedString hname;
8779
24.9M
    const xmlChar *prefix, *name;
8780
24.9M
    xmlChar *val = NULL, *internal_val = NULL;
8781
24.9M
    int normalize = 0;
8782
24.9M
    int isNamespace;
8783
8784
24.9M
    *value = NULL;
8785
24.9M
    GROW;
8786
24.9M
    hname = xmlParseQNameHashed(ctxt, hprefix);
8787
24.9M
    if (hname.name == NULL) {
8788
53.7k
        xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8789
53.7k
                       "error parsing attribute name\n");
8790
53.7k
        return(hname);
8791
53.7k
    }
8792
24.8M
    name = hname.name;
8793
24.8M
    prefix = hprefix->name;
8794
8795
    /*
8796
     * get the type if needed
8797
     */
8798
24.8M
    if (ctxt->attsSpecial != NULL) {
8799
27.4k
        int type;
8800
8801
27.4k
        type = XML_PTR_TO_INT(xmlHashQLookup2(ctxt->attsSpecial, pref, elem,
8802
27.4k
                                              prefix, name));
8803
27.4k
        if (type != 0)
8804
12.1k
            normalize = 1;
8805
27.4k
    }
8806
8807
    /*
8808
     * read the value
8809
     */
8810
24.8M
    SKIP_BLANKS;
8811
24.8M
    if (RAW == '=') {
8812
24.8M
        NEXT;
8813
24.8M
        SKIP_BLANKS;
8814
24.8M
        isNamespace = (((prefix == NULL) && (name == ctxt->str_xmlns)) ||
8815
24.4M
                       (prefix == ctxt->str_xmlns));
8816
24.8M
        val = xmlParseAttValueInternal(ctxt, len, alloc, normalize,
8817
24.8M
                                       isNamespace);
8818
24.8M
        if (val == NULL)
8819
8.92k
            goto error;
8820
24.8M
    } else {
8821
17.0k
        xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8822
17.0k
                          "Specification mandates value for attribute %s\n",
8823
17.0k
                          name);
8824
17.0k
        goto error;
8825
17.0k
    }
8826
8827
24.8M
    if (prefix == ctxt->str_xml) {
8828
        /*
8829
         * Check that xml:lang conforms to the specification
8830
         * No more registered as an error, just generate a warning now
8831
         * since this was deprecated in XML second edition
8832
         */
8833
112k
        if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8834
0
            internal_val = xmlStrndup(val, *len);
8835
0
            if (internal_val == NULL)
8836
0
                goto mem_error;
8837
0
            if (!xmlCheckLanguageID(internal_val)) {
8838
0
                xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8839
0
                              "Malformed value for xml:lang : %s\n",
8840
0
                              internal_val, NULL);
8841
0
            }
8842
0
        }
8843
8844
        /*
8845
         * Check that xml:space conforms to the specification
8846
         */
8847
112k
        if (xmlStrEqual(name, BAD_CAST "space")) {
8848
90.5k
            internal_val = xmlStrndup(val, *len);
8849
90.5k
            if (internal_val == NULL)
8850
0
                goto mem_error;
8851
90.5k
            if (xmlStrEqual(internal_val, BAD_CAST "default"))
8852
123
                *(ctxt->space) = 0;
8853
90.4k
            else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8854
87.4k
                *(ctxt->space) = 1;
8855
3.00k
            else {
8856
3.00k
                xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8857
3.00k
                              "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8858
3.00k
                              internal_val, NULL);
8859
3.00k
            }
8860
90.5k
        }
8861
112k
        if (internal_val) {
8862
90.5k
            xmlFree(internal_val);
8863
90.5k
        }
8864
112k
    }
8865
8866
24.8M
    *value = val;
8867
24.8M
    return (hname);
8868
8869
0
mem_error:
8870
0
    xmlErrMemory(ctxt);
8871
26.4k
error:
8872
26.4k
    if ((val != NULL) && (*alloc != 0))
8873
0
        xmlFree(val);
8874
26.4k
    return(hname);
8875
0
}
8876
8877
/**
8878
 * xmlAttrHashInsert:
8879
 * @ctxt: parser context
8880
 * @size: size of the hash table
8881
 * @name: attribute name
8882
 * @uri: namespace uri
8883
 * @hashValue: combined hash value of name and uri
8884
 * @aindex: attribute index (this is a multiple of 5)
8885
 *
8886
 * Inserts a new attribute into the hash table.
8887
 *
8888
 * Returns INT_MAX if no existing attribute was found, the attribute
8889
 * index if an attribute was found, -1 if a memory allocation failed.
8890
 */
8891
static int
8892
xmlAttrHashInsert(xmlParserCtxtPtr ctxt, unsigned size, const xmlChar *name,
8893
18.2M
                  const xmlChar *uri, unsigned hashValue, int aindex) {
8894
18.2M
    xmlAttrHashBucket *table = ctxt->attrHash;
8895
18.2M
    xmlAttrHashBucket *bucket;
8896
18.2M
    unsigned hindex;
8897
8898
18.2M
    hindex = hashValue & (size - 1);
8899
18.2M
    bucket = &table[hindex];
8900
8901
21.3M
    while (bucket->index >= 0) {
8902
3.20M
        const xmlChar **atts = &ctxt->atts[bucket->index];
8903
8904
3.20M
        if (name == atts[0]) {
8905
186k
            int nsIndex = XML_PTR_TO_INT(atts[2]);
8906
8907
186k
            if ((nsIndex == NS_INDEX_EMPTY) ? (uri == NULL) :
8908
186k
                (nsIndex == NS_INDEX_XML) ? (uri == ctxt->str_xml_ns) :
8909
2.50k
                (uri == ctxt->nsTab[nsIndex * 2 + 1]))
8910
70.0k
                return(bucket->index);
8911
186k
        }
8912
8913
3.13M
        hindex++;
8914
3.13M
        bucket++;
8915
3.13M
        if (hindex >= size) {
8916
1.06M
            hindex = 0;
8917
1.06M
            bucket = table;
8918
1.06M
        }
8919
3.13M
    }
8920
8921
18.1M
    bucket->index = aindex;
8922
8923
18.1M
    return(INT_MAX);
8924
18.2M
}
8925
8926
static int
8927
xmlAttrHashInsertQName(xmlParserCtxtPtr ctxt, unsigned size,
8928
                       const xmlChar *name, const xmlChar *prefix,
8929
45
                       unsigned hashValue, int aindex) {
8930
45
    xmlAttrHashBucket *table = ctxt->attrHash;
8931
45
    xmlAttrHashBucket *bucket;
8932
45
    unsigned hindex;
8933
8934
45
    hindex = hashValue & (size - 1);
8935
45
    bucket = &table[hindex];
8936
8937
54
    while (bucket->index >= 0) {
8938
32
        const xmlChar **atts = &ctxt->atts[bucket->index];
8939
8940
32
        if ((name == atts[0]) && (prefix == atts[1]))
8941
23
            return(bucket->index);
8942
8943
9
        hindex++;
8944
9
        bucket++;
8945
9
        if (hindex >= size) {
8946
0
            hindex = 0;
8947
0
            bucket = table;
8948
0
        }
8949
9
    }
8950
8951
22
    bucket->index = aindex;
8952
8953
22
    return(INT_MAX);
8954
45
}
8955
/**
8956
 * xmlParseStartTag2:
8957
 * @ctxt:  an XML parser context
8958
 *
8959
 * Parse a start tag. Always consumes '<'.
8960
 *
8961
 * This routine is called when running SAX2 parsing
8962
 *
8963
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8964
 *
8965
 * [ WFC: Unique Att Spec ]
8966
 * No attribute name may appear more than once in the same start-tag or
8967
 * empty-element tag.
8968
 *
8969
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8970
 *
8971
 * [ WFC: Unique Att Spec ]
8972
 * No attribute name may appear more than once in the same start-tag or
8973
 * empty-element tag.
8974
 *
8975
 * With namespace:
8976
 *
8977
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8978
 *
8979
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8980
 *
8981
 * Returns the element name parsed
8982
 */
8983
8984
static const xmlChar *
8985
xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
8986
39.0M
                  const xmlChar **URI, int *nbNsPtr) {
8987
39.0M
    xmlHashedString hlocalname;
8988
39.0M
    xmlHashedString hprefix;
8989
39.0M
    xmlHashedString hattname;
8990
39.0M
    xmlHashedString haprefix;
8991
39.0M
    const xmlChar *localname;
8992
39.0M
    const xmlChar *prefix;
8993
39.0M
    const xmlChar *attname;
8994
39.0M
    const xmlChar *aprefix;
8995
39.0M
    const xmlChar *uri;
8996
39.0M
    xmlChar *attvalue = NULL;
8997
39.0M
    const xmlChar **atts = ctxt->atts;
8998
39.0M
    unsigned attrHashSize = 0;
8999
39.0M
    int maxatts = ctxt->maxatts;
9000
39.0M
    int nratts, nbatts, nbdef;
9001
39.0M
    int i, j, nbNs, nbTotalDef, attval, nsIndex, maxAtts;
9002
39.0M
    int alloc = 0;
9003
39.0M
    int numNsErr = 0;
9004
39.0M
    int numDupErr = 0;
9005
9006
39.0M
    if (RAW != '<') return(NULL);
9007
39.0M
    NEXT1;
9008
9009
39.0M
    nbatts = 0;
9010
39.0M
    nratts = 0;
9011
39.0M
    nbdef = 0;
9012
39.0M
    nbNs = 0;
9013
39.0M
    nbTotalDef = 0;
9014
39.0M
    attval = 0;
9015
9016
39.0M
    if (xmlParserNsStartElement(ctxt->nsdb) < 0) {
9017
0
        xmlErrMemory(ctxt);
9018
0
        return(NULL);
9019
0
    }
9020
9021
39.0M
    hlocalname = xmlParseQNameHashed(ctxt, &hprefix);
9022
39.0M
    if (hlocalname.name == NULL) {
9023
14.7k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9024
14.7k
           "StartTag: invalid element name\n");
9025
14.7k
        return(NULL);
9026
14.7k
    }
9027
39.0M
    localname = hlocalname.name;
9028
39.0M
    prefix = hprefix.name;
9029
9030
    /*
9031
     * Now parse the attributes, it ends up with the ending
9032
     *
9033
     * (S Attribute)* S?
9034
     */
9035
39.0M
    SKIP_BLANKS;
9036
39.0M
    GROW;
9037
9038
    /*
9039
     * The ctxt->atts array will be ultimately passed to the SAX callback
9040
     * containing five xmlChar pointers for each attribute:
9041
     *
9042
     * [0] attribute name
9043
     * [1] attribute prefix
9044
     * [2] namespace URI
9045
     * [3] attribute value
9046
     * [4] end of attribute value
9047
     *
9048
     * To save memory, we reuse this array temporarily and store integers
9049
     * in these pointer variables.
9050
     *
9051
     * [0] attribute name
9052
     * [1] attribute prefix
9053
     * [2] hash value of attribute prefix, and later namespace index
9054
     * [3] for non-allocated values: ptrdiff_t offset into input buffer
9055
     * [4] for non-allocated values: ptrdiff_t offset into input buffer
9056
     *
9057
     * The ctxt->attallocs array contains an additional unsigned int for
9058
     * each attribute, containing the hash value of the attribute name
9059
     * and the alloc flag in bit 31.
9060
     */
9061
9062
52.4M
    while (((RAW != '>') &&
9063
27.6M
     ((RAW != '/') || (NXT(1) != '>')) &&
9064
24.9M
     (IS_BYTE_CHAR(RAW))) && (PARSER_STOPPED(ctxt) == 0)) {
9065
24.9M
  int len = -1;
9066
9067
24.9M
  hattname = xmlParseAttribute2(ctxt, prefix, localname,
9068
24.9M
                                          &haprefix, &attvalue, &len,
9069
24.9M
                                          &alloc);
9070
24.9M
        if (hattname.name == NULL)
9071
53.7k
      break;
9072
24.8M
        if (attvalue == NULL)
9073
26.4k
            goto next_attr;
9074
24.8M
        attname = hattname.name;
9075
24.8M
        aprefix = haprefix.name;
9076
24.8M
  if (len < 0) len = xmlStrlen(attvalue);
9077
9078
24.8M
        if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9079
369k
            xmlHashedString huri;
9080
369k
            xmlURIPtr parsedUri;
9081
9082
369k
            huri = xmlDictLookupHashed(ctxt->dict, attvalue, len);
9083
369k
            uri = huri.name;
9084
369k
            if (uri == NULL) {
9085
0
                xmlErrMemory(ctxt);
9086
0
                goto next_attr;
9087
0
            }
9088
369k
            if (*uri != 0) {
9089
365k
                if (xmlParseURISafe((const char *) uri, &parsedUri) < 0) {
9090
0
                    xmlErrMemory(ctxt);
9091
0
                    goto next_attr;
9092
0
                }
9093
365k
                if (parsedUri == NULL) {
9094
127k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9095
127k
                             "xmlns: '%s' is not a valid URI\n",
9096
127k
                                       uri, NULL, NULL);
9097
237k
                } else {
9098
237k
                    if (parsedUri->scheme == NULL) {
9099
80.3k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9100
80.3k
                                  "xmlns: URI %s is not absolute\n",
9101
80.3k
                                  uri, NULL, NULL);
9102
80.3k
                    }
9103
237k
                    xmlFreeURI(parsedUri);
9104
237k
                }
9105
365k
                if (uri == ctxt->str_xml_ns) {
9106
126
                    if (attname != ctxt->str_xml) {
9107
126
                        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9108
126
                     "xml namespace URI cannot be the default namespace\n",
9109
126
                                 NULL, NULL, NULL);
9110
126
                    }
9111
126
                    goto next_attr;
9112
126
                }
9113
364k
                if ((len == 29) &&
9114
8.27k
                    (xmlStrEqual(uri,
9115
8.27k
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9116
298
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9117
298
                         "reuse of the xmlns namespace name is forbidden\n",
9118
298
                             NULL, NULL, NULL);
9119
298
                    goto next_attr;
9120
298
                }
9121
364k
            }
9122
9123
369k
            if (xmlParserNsPush(ctxt, NULL, &huri, NULL, 0) > 0)
9124
244k
                nbNs++;
9125
24.4M
        } else if (aprefix == ctxt->str_xmlns) {
9126
1.01M
            xmlHashedString huri;
9127
1.01M
            xmlURIPtr parsedUri;
9128
9129
1.01M
            huri = xmlDictLookupHashed(ctxt->dict, attvalue, len);
9130
1.01M
            uri = huri.name;
9131
1.01M
            if (uri == NULL) {
9132
0
                xmlErrMemory(ctxt);
9133
0
                goto next_attr;
9134
0
            }
9135
9136
1.01M
            if (attname == ctxt->str_xml) {
9137
274
                if (uri != ctxt->str_xml_ns) {
9138
94
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9139
94
                             "xml namespace prefix mapped to wrong URI\n",
9140
94
                             NULL, NULL, NULL);
9141
94
                }
9142
                /*
9143
                 * Do not keep a namespace definition node
9144
                 */
9145
274
                goto next_attr;
9146
274
            }
9147
1.01M
            if (uri == ctxt->str_xml_ns) {
9148
28
                if (attname != ctxt->str_xml) {
9149
28
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9150
28
                             "xml namespace URI mapped to wrong prefix\n",
9151
28
                             NULL, NULL, NULL);
9152
28
                }
9153
28
                goto next_attr;
9154
28
            }
9155
1.01M
            if (attname == ctxt->str_xmlns) {
9156
262
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9157
262
                         "redefinition of the xmlns prefix is forbidden\n",
9158
262
                         NULL, NULL, NULL);
9159
262
                goto next_attr;
9160
262
            }
9161
1.01M
            if ((len == 29) &&
9162
20.6k
                (xmlStrEqual(uri,
9163
20.6k
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9164
80
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9165
80
                         "reuse of the xmlns namespace name is forbidden\n",
9166
80
                         NULL, NULL, NULL);
9167
80
                goto next_attr;
9168
80
            }
9169
1.01M
            if ((uri == NULL) || (uri[0] == 0)) {
9170
401
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9171
401
                         "xmlns:%s: Empty XML namespace is not allowed\n",
9172
401
                              attname, NULL, NULL);
9173
401
                goto next_attr;
9174
1.01M
            } else {
9175
1.01M
                if (xmlParseURISafe((const char *) uri, &parsedUri) < 0) {
9176
0
                    xmlErrMemory(ctxt);
9177
0
                    goto next_attr;
9178
0
                }
9179
1.01M
                if (parsedUri == NULL) {
9180
112k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9181
112k
                         "xmlns:%s: '%s' is not a valid URI\n",
9182
112k
                                       attname, uri, NULL);
9183
903k
                } else {
9184
903k
                    if ((ctxt->pedantic) && (parsedUri->scheme == NULL)) {
9185
0
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9186
0
                                  "xmlns:%s: URI %s is not absolute\n",
9187
0
                                  attname, uri, NULL);
9188
0
                    }
9189
903k
                    xmlFreeURI(parsedUri);
9190
903k
                }
9191
1.01M
            }
9192
9193
1.01M
            if (xmlParserNsPush(ctxt, &hattname, &huri, NULL, 0) > 0)
9194
952k
                nbNs++;
9195
23.4M
        } else {
9196
            /*
9197
             * Populate attributes array, see above for repurposing
9198
             * of xmlChar pointers.
9199
             */
9200
23.4M
            if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9201
652k
                int res = xmlCtxtGrowAttrs(ctxt);
9202
9203
652k
                maxatts = ctxt->maxatts;
9204
652k
                atts = ctxt->atts;
9205
9206
652k
                if (res < 0)
9207
0
                    goto next_attr;
9208
652k
            }
9209
23.4M
            ctxt->attallocs[nratts++] = (hattname.hashValue & 0x7FFFFFFF) |
9210
23.4M
                                        ((unsigned) alloc << 31);
9211
23.4M
            atts[nbatts++] = attname;
9212
23.4M
            atts[nbatts++] = aprefix;
9213
23.4M
            atts[nbatts++] = (const xmlChar *) (size_t) haprefix.hashValue;
9214
23.4M
            if (alloc) {
9215
112k
                atts[nbatts++] = attvalue;
9216
112k
                attvalue += len;
9217
112k
                atts[nbatts++] = attvalue;
9218
23.3M
            } else {
9219
                /*
9220
                 * attvalue points into the input buffer which can be
9221
                 * reallocated. Store differences to input->base instead.
9222
                 * The pointers will be reconstructed later.
9223
                 */
9224
23.3M
                atts[nbatts++] = (void *) (attvalue - BASE_PTR);
9225
23.3M
                attvalue += len;
9226
23.3M
                atts[nbatts++] = (void *) (attvalue - BASE_PTR);
9227
23.3M
            }
9228
            /*
9229
             * tag if some deallocation is needed
9230
             */
9231
23.4M
            if (alloc != 0) attval = 1;
9232
23.4M
            attvalue = NULL; /* moved into atts */
9233
23.4M
        }
9234
9235
24.8M
next_attr:
9236
24.8M
        if ((attvalue != NULL) && (alloc != 0)) {
9237
113k
            xmlFree(attvalue);
9238
113k
            attvalue = NULL;
9239
113k
        }
9240
9241
24.8M
  GROW
9242
24.8M
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9243
11.4M
      break;
9244
13.4M
  if (SKIP_BLANKS == 0) {
9245
35.0k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9246
35.0k
         "attributes construct error\n");
9247
35.0k
      break;
9248
35.0k
  }
9249
13.4M
        GROW;
9250
13.4M
    }
9251
9252
    /*
9253
     * Namespaces from default attributes
9254
     */
9255
39.0M
    if (ctxt->attsDefault != NULL) {
9256
174k
        xmlDefAttrsPtr defaults;
9257
9258
174k
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9259
174k
  if (defaults != NULL) {
9260
1.06M
      for (i = 0; i < defaults->nbAttrs; i++) {
9261
952k
                xmlDefAttr *attr = &defaults->attrs[i];
9262
9263
952k
          attname = attr->name.name;
9264
952k
    aprefix = attr->prefix.name;
9265
9266
952k
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9267
36.3k
                    xmlParserEntityCheck(ctxt, attr->expandedSize);
9268
9269
36.3k
                    if (xmlParserNsPush(ctxt, NULL, &attr->value, NULL, 1) > 0)
9270
31.8k
                        nbNs++;
9271
916k
    } else if (aprefix == ctxt->str_xmlns) {
9272
398k
                    xmlParserEntityCheck(ctxt, attr->expandedSize);
9273
9274
398k
                    if (xmlParserNsPush(ctxt, &attr->name, &attr->value,
9275
398k
                                      NULL, 1) > 0)
9276
398k
                        nbNs++;
9277
517k
    } else {
9278
517k
                    if (nratts + nbTotalDef >= XML_MAX_ATTRS) {
9279
0
                        xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
9280
0
                                    "Maximum number of attributes exceeded");
9281
0
                        break;
9282
0
                    }
9283
517k
                    nbTotalDef += 1;
9284
517k
                }
9285
952k
      }
9286
109k
  }
9287
174k
    }
9288
9289
    /*
9290
     * Resolve attribute namespaces
9291
     */
9292
62.5M
    for (i = 0; i < nbatts; i += 5) {
9293
23.4M
        attname = atts[i];
9294
23.4M
        aprefix = atts[i+1];
9295
9296
        /*
9297
  * The default namespace does not apply to attribute names.
9298
  */
9299
23.4M
  if (aprefix == NULL) {
9300
15.0M
            nsIndex = NS_INDEX_EMPTY;
9301
15.0M
        } else if (aprefix == ctxt->str_xml) {
9302
112k
            nsIndex = NS_INDEX_XML;
9303
8.31M
        } else {
9304
8.31M
            haprefix.name = aprefix;
9305
8.31M
            haprefix.hashValue = (size_t) atts[i+2];
9306
8.31M
            nsIndex = xmlParserNsLookup(ctxt, &haprefix, NULL);
9307
9308
8.31M
      if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex)) {
9309
436k
                xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9310
436k
        "Namespace prefix %s for %s on %s is not defined\n",
9311
436k
        aprefix, attname, localname);
9312
436k
                nsIndex = NS_INDEX_EMPTY;
9313
436k
            }
9314
8.31M
        }
9315
9316
23.4M
        atts[i+2] = XML_INT_TO_PTR(nsIndex);
9317
23.4M
    }
9318
9319
    /*
9320
     * Maximum number of attributes including default attributes.
9321
     */
9322
39.0M
    maxAtts = nratts + nbTotalDef;
9323
9324
    /*
9325
     * Verify that attribute names are unique.
9326
     */
9327
39.0M
    if (maxAtts > 1) {
9328
6.16M
        attrHashSize = 4;
9329
9.71M
        while (attrHashSize / 2 < (unsigned) maxAtts)
9330
3.54M
            attrHashSize *= 2;
9331
9332
6.16M
        if (attrHashSize > ctxt->attrHashMax) {
9333
265k
            xmlAttrHashBucket *tmp;
9334
9335
265k
            tmp = xmlRealloc(ctxt->attrHash, attrHashSize * sizeof(tmp[0]));
9336
265k
            if (tmp == NULL) {
9337
0
                xmlErrMemory(ctxt);
9338
0
                goto done;
9339
0
            }
9340
9341
265k
            ctxt->attrHash = tmp;
9342
265k
            ctxt->attrHashMax = attrHashSize;
9343
265k
        }
9344
9345
6.16M
        memset(ctxt->attrHash, -1, attrHashSize * sizeof(ctxt->attrHash[0]));
9346
9347
24.2M
        for (i = 0, j = 0; j < nratts; i += 5, j++) {
9348
18.1M
            const xmlChar *nsuri;
9349
18.1M
            unsigned hashValue, nameHashValue, uriHashValue;
9350
18.1M
            int res;
9351
9352
18.1M
            attname = atts[i];
9353
18.1M
            aprefix = atts[i+1];
9354
18.1M
            nsIndex = XML_PTR_TO_INT(atts[i+2]);
9355
            /* Hash values always have bit 31 set, see dict.c */
9356
18.1M
            nameHashValue = ctxt->attallocs[j] | 0x80000000;
9357
9358
18.1M
            if (nsIndex == NS_INDEX_EMPTY) {
9359
                /*
9360
                 * Prefix with empty namespace means an undeclared
9361
                 * prefix which was already reported above.
9362
                 */
9363
12.2M
                if (aprefix != NULL)
9364
350k
                    continue;
9365
11.8M
                nsuri = NULL;
9366
11.8M
                uriHashValue = URI_HASH_EMPTY;
9367
11.8M
            } else if (nsIndex == NS_INDEX_XML) {
9368
2.54k
                nsuri = ctxt->str_xml_ns;
9369
2.54k
                uriHashValue = URI_HASH_XML;
9370
5.91M
            } else {
9371
5.91M
                nsuri = ctxt->nsTab[nsIndex * 2 + 1];
9372
5.91M
                uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue;
9373
5.91M
            }
9374
9375
17.7M
            hashValue = xmlDictCombineHash(nameHashValue, uriHashValue);
9376
17.7M
            res = xmlAttrHashInsert(ctxt, attrHashSize, attname, nsuri,
9377
17.7M
                                    hashValue, i);
9378
17.7M
            if (res < 0)
9379
0
                continue;
9380
9381
            /*
9382
             * [ WFC: Unique Att Spec ]
9383
             * No attribute name may appear more than once in the same
9384
             * start-tag or empty-element tag.
9385
             * As extended by the Namespace in XML REC.
9386
             */
9387
17.7M
            if (res < INT_MAX) {
9388
57.3k
                if (aprefix == atts[res+1]) {
9389
57.0k
                    xmlErrAttributeDup(ctxt, aprefix, attname);
9390
57.0k
                    numDupErr += 1;
9391
57.0k
                } else {
9392
271
                    xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9393
271
                             "Namespaced Attribute %s in '%s' redefined\n",
9394
271
                             attname, nsuri, NULL);
9395
271
                    numNsErr += 1;
9396
271
                }
9397
57.3k
            }
9398
17.7M
        }
9399
6.16M
    }
9400
9401
    /*
9402
     * Default attributes
9403
     */
9404
39.0M
    if (ctxt->attsDefault != NULL) {
9405
174k
        xmlDefAttrsPtr defaults;
9406
9407
174k
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9408
174k
  if (defaults != NULL) {
9409
1.06M
      for (i = 0; i < defaults->nbAttrs; i++) {
9410
952k
                xmlDefAttr *attr = &defaults->attrs[i];
9411
952k
                const xmlChar *nsuri = NULL;
9412
952k
                unsigned hashValue, uriHashValue = 0;
9413
952k
                int res;
9414
9415
952k
          attname = attr->name.name;
9416
952k
    aprefix = attr->prefix.name;
9417
9418
952k
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL))
9419
36.3k
                    continue;
9420
916k
    if (aprefix == ctxt->str_xmlns)
9421
398k
                    continue;
9422
9423
517k
                if (aprefix == NULL) {
9424
292k
                    nsIndex = NS_INDEX_EMPTY;
9425
292k
                    nsuri = NULL;
9426
292k
                    uriHashValue = URI_HASH_EMPTY;
9427
292k
                } else if (aprefix == ctxt->str_xml) {
9428
53.1k
                    nsIndex = NS_INDEX_XML;
9429
53.1k
                    nsuri = ctxt->str_xml_ns;
9430
53.1k
                    uriHashValue = URI_HASH_XML;
9431
171k
                } else {
9432
171k
                    nsIndex = xmlParserNsLookup(ctxt, &attr->prefix, NULL);
9433
171k
                    if ((nsIndex == INT_MAX) ||
9434
170k
                        (nsIndex < ctxt->nsdb->minNsIndex)) {
9435
170k
                        xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9436
170k
                                 "Namespace prefix %s for %s on %s is not "
9437
170k
                                 "defined\n",
9438
170k
                                 aprefix, attname, localname);
9439
170k
                        nsIndex = NS_INDEX_EMPTY;
9440
170k
                        nsuri = NULL;
9441
170k
                        uriHashValue = URI_HASH_EMPTY;
9442
170k
                    } else {
9443
1.33k
                        nsuri = ctxt->nsTab[nsIndex * 2 + 1];
9444
1.33k
                        uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue;
9445
1.33k
                    }
9446
171k
                }
9447
9448
                /*
9449
                 * Check whether the attribute exists
9450
                 */
9451
517k
                if (maxAtts > 1) {
9452
495k
                    hashValue = xmlDictCombineHash(attr->name.hashValue,
9453
495k
                                                   uriHashValue);
9454
495k
                    res = xmlAttrHashInsert(ctxt, attrHashSize, attname, nsuri,
9455
495k
                                            hashValue, nbatts);
9456
495k
                    if (res < 0)
9457
0
                        continue;
9458
495k
                    if (res < INT_MAX) {
9459
12.6k
                        if (aprefix == atts[res+1])
9460
1.00k
                            continue;
9461
11.6k
                        xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9462
11.6k
                                 "Namespaced Attribute %s in '%s' redefined\n",
9463
11.6k
                                 attname, nsuri, NULL);
9464
11.6k
                    }
9465
495k
                }
9466
9467
516k
                xmlParserEntityCheck(ctxt, attr->expandedSize);
9468
9469
516k
                if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9470
8.38k
                    res = xmlCtxtGrowAttrs(ctxt);
9471
9472
8.38k
                    maxatts = ctxt->maxatts;
9473
8.38k
                    atts = ctxt->atts;
9474
9475
8.38k
                    if (res < 0) {
9476
0
                        localname = NULL;
9477
0
                        goto done;
9478
0
                    }
9479
8.38k
                }
9480
9481
516k
                atts[nbatts++] = attname;
9482
516k
                atts[nbatts++] = aprefix;
9483
516k
                atts[nbatts++] = XML_INT_TO_PTR(nsIndex);
9484
516k
                atts[nbatts++] = attr->value.name;
9485
516k
                atts[nbatts++] = attr->valueEnd;
9486
516k
                if ((ctxt->standalone == 1) && (attr->external != 0)) {
9487
0
                    xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9488
0
                            "standalone: attribute %s on %s defaulted "
9489
0
                            "from external subset\n",
9490
0
                            attname, localname);
9491
0
                }
9492
516k
                nbdef++;
9493
516k
      }
9494
109k
  }
9495
174k
    }
9496
9497
    /*
9498
     * Using a single hash table for nsUri/localName pairs cannot
9499
     * detect duplicate QNames reliably. The following example will
9500
     * only result in two namespace errors.
9501
     *
9502
     * <doc xmlns:a="a" xmlns:b="a">
9503
     *   <elem a:a="" b:a="" b:a=""/>
9504
     * </doc>
9505
     *
9506
     * If we saw more than one namespace error but no duplicate QNames
9507
     * were found, we have to scan for duplicate QNames.
9508
     */
9509
39.0M
    if ((numDupErr == 0) && (numNsErr > 1)) {
9510
10
        memset(ctxt->attrHash, -1,
9511
10
               attrHashSize * sizeof(ctxt->attrHash[0]));
9512
9513
66
        for (i = 0, j = 0; j < nratts; i += 5, j++) {
9514
56
            unsigned hashValue, nameHashValue, prefixHashValue;
9515
56
            int res;
9516
9517
56
            aprefix = atts[i+1];
9518
56
            if (aprefix == NULL)
9519
11
                continue;
9520
9521
45
            attname = atts[i];
9522
            /* Hash values always have bit 31 set, see dict.c */
9523
45
            nameHashValue = ctxt->attallocs[j] | 0x80000000;
9524
45
            prefixHashValue = xmlDictComputeHash(ctxt->dict, aprefix);
9525
9526
45
            hashValue = xmlDictCombineHash(nameHashValue, prefixHashValue);
9527
45
            res = xmlAttrHashInsertQName(ctxt, attrHashSize, attname,
9528
45
                                         aprefix, hashValue, i);
9529
45
            if (res < INT_MAX)
9530
23
                xmlErrAttributeDup(ctxt, aprefix, attname);
9531
45
        }
9532
10
    }
9533
9534
    /*
9535
     * Reconstruct attribute pointers
9536
     */
9537
63.0M
    for (i = 0, j = 0; i < nbatts; i += 5, j++) {
9538
        /* namespace URI */
9539
23.9M
        nsIndex = XML_PTR_TO_INT(atts[i+2]);
9540
23.9M
        if (nsIndex == INT_MAX)
9541
15.9M
            atts[i+2] = NULL;
9542
8.04M
        else if (nsIndex == INT_MAX - 1)
9543
165k
            atts[i+2] = ctxt->str_xml_ns;
9544
7.87M
        else
9545
7.87M
            atts[i+2] = ctxt->nsTab[nsIndex * 2 + 1];
9546
9547
23.9M
        if ((j < nratts) && (ctxt->attallocs[j] & 0x80000000) == 0) {
9548
23.3M
            atts[i+3] = BASE_PTR + XML_PTR_TO_INT(atts[i+3]);  /* value */
9549
23.3M
            atts[i+4] = BASE_PTR + XML_PTR_TO_INT(atts[i+4]);  /* valuend */
9550
23.3M
        }
9551
23.9M
    }
9552
9553
39.0M
    uri = xmlParserNsLookupUri(ctxt, &hprefix);
9554
39.0M
    if ((prefix != NULL) && (uri == NULL)) {
9555
651k
  xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9556
651k
           "Namespace prefix %s on %s is not defined\n",
9557
651k
     prefix, localname, NULL);
9558
651k
    }
9559
39.0M
    *pref = prefix;
9560
39.0M
    *URI = uri;
9561
9562
    /*
9563
     * SAX callback
9564
     */
9565
39.0M
    if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9566
39.0M
  (!ctxt->disableSAX)) {
9567
38.9M
  if (nbNs > 0)
9568
529k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix, uri,
9569
529k
                          nbNs, ctxt->nsTab + 2 * (ctxt->nsNr - nbNs),
9570
529k
        nbatts / 5, nbdef, atts);
9571
38.4M
  else
9572
38.4M
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix, uri,
9573
38.4M
                          0, NULL, nbatts / 5, nbdef, atts);
9574
38.9M
    }
9575
9576
39.0M
done:
9577
    /*
9578
     * Free allocated attribute values
9579
     */
9580
39.0M
    if (attval != 0) {
9581
433k
  for (i = 0, j = 0; j < nratts; i += 5, j++)
9582
331k
      if (ctxt->attallocs[j] & 0x80000000)
9583
112k
          xmlFree((xmlChar *) atts[i+3]);
9584
101k
    }
9585
9586
39.0M
    *nbNsPtr = nbNs;
9587
39.0M
    return(localname);
9588
39.0M
}
9589
9590
/**
9591
 * xmlParseEndTag2:
9592
 * @ctxt:  an XML parser context
9593
 * @line:  line of the start tag
9594
 * @nsNr:  number of namespaces on the start tag
9595
 *
9596
 * Parse an end tag. Always consumes '</'.
9597
 *
9598
 * [42] ETag ::= '</' Name S? '>'
9599
 *
9600
 * With namespace
9601
 *
9602
 * [NS 9] ETag ::= '</' QName S? '>'
9603
 */
9604
9605
static void
9606
9.20M
xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9607
9.20M
    const xmlChar *name;
9608
9609
9.20M
    GROW;
9610
9.20M
    if ((RAW != '<') || (NXT(1) != '/')) {
9611
4
  xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9612
4
  return;
9613
4
    }
9614
9.20M
    SKIP(2);
9615
9616
9.20M
    if (tag->prefix == NULL)
9617
2.97M
        name = xmlParseNameAndCompare(ctxt, ctxt->name);
9618
6.23M
    else
9619
6.23M
        name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9620
9621
    /*
9622
     * We should definitely be at the ending "S? '>'" part
9623
     */
9624
9.20M
    GROW;
9625
9.20M
    SKIP_BLANKS;
9626
9.20M
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9627
8.38k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9628
8.38k
    } else
9629
9.20M
  NEXT1;
9630
9631
    /*
9632
     * [ WFC: Element Type Match ]
9633
     * The Name in an element's end-tag must match the element type in the
9634
     * start-tag.
9635
     *
9636
     */
9637
9.20M
    if (name != (xmlChar*)1) {
9638
17.6k
        if (name == NULL) name = BAD_CAST "unparsable";
9639
17.6k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9640
17.6k
         "Opening and ending tag mismatch: %s line %d and %s\n",
9641
17.6k
                    ctxt->name, tag->line, name);
9642
17.6k
    }
9643
9644
    /*
9645
     * SAX: End of Tag
9646
     */
9647
9.20M
    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9648
9.20M
  (!ctxt->disableSAX))
9649
9.18M
  ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9650
9.18M
                                tag->URI);
9651
9652
9.20M
    spacePop(ctxt);
9653
9.20M
    if (tag->nsNr != 0)
9654
195k
  xmlParserNsPop(ctxt, tag->nsNr);
9655
9.20M
}
9656
9657
/**
9658
 * xmlParseCDSect:
9659
 * @ctxt:  an XML parser context
9660
 *
9661
 * DEPRECATED: Internal function, don't use.
9662
 *
9663
 * Parse escaped pure raw content. Always consumes '<!['.
9664
 *
9665
 * [18] CDSect ::= CDStart CData CDEnd
9666
 *
9667
 * [19] CDStart ::= '<![CDATA['
9668
 *
9669
 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9670
 *
9671
 * [21] CDEnd ::= ']]>'
9672
 */
9673
void
9674
20.1k
xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9675
20.1k
    xmlChar *buf = NULL;
9676
20.1k
    int len = 0;
9677
20.1k
    int size = XML_PARSER_BUFFER_SIZE;
9678
20.1k
    int r, rl;
9679
20.1k
    int s, sl;
9680
20.1k
    int cur, l;
9681
20.1k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9682
20.1k
                    XML_MAX_HUGE_LENGTH :
9683
20.1k
                    XML_MAX_TEXT_LENGTH;
9684
9685
20.1k
    if ((CUR != '<') || (NXT(1) != '!') || (NXT(2) != '['))
9686
0
        return;
9687
20.1k
    SKIP(3);
9688
9689
20.1k
    if (!CMP6(CUR_PTR, 'C', 'D', 'A', 'T', 'A', '['))
9690
0
        return;
9691
20.1k
    SKIP(6);
9692
9693
20.1k
    r = xmlCurrentCharRecover(ctxt, &rl);
9694
20.1k
    if (!IS_CHAR(r)) {
9695
19
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9696
19
        goto out;
9697
19
    }
9698
20.1k
    NEXTL(rl);
9699
20.1k
    s = xmlCurrentCharRecover(ctxt, &sl);
9700
20.1k
    if (!IS_CHAR(s)) {
9701
23
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9702
23
        goto out;
9703
23
    }
9704
20.1k
    NEXTL(sl);
9705
20.1k
    cur = xmlCurrentCharRecover(ctxt, &l);
9706
20.1k
    buf = xmlMalloc(size);
9707
20.1k
    if (buf == NULL) {
9708
0
  xmlErrMemory(ctxt);
9709
0
        goto out;
9710
0
    }
9711
64.0M
    while (IS_CHAR(cur) &&
9712
64.0M
           ((r != ']') || (s != ']') || (cur != '>'))) {
9713
64.0M
  if (len + 5 >= size) {
9714
82.4k
      xmlChar *tmp;
9715
82.4k
            int newSize;
9716
9717
82.4k
            newSize = xmlGrowCapacity(size, 1, 1, maxLength);
9718
82.4k
            if (newSize < 0) {
9719
0
                xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9720
0
                               "CData section too big found\n");
9721
0
                goto out;
9722
0
            }
9723
82.4k
      tmp = xmlRealloc(buf, newSize);
9724
82.4k
      if (tmp == NULL) {
9725
0
    xmlErrMemory(ctxt);
9726
0
                goto out;
9727
0
      }
9728
82.4k
      buf = tmp;
9729
82.4k
      size = newSize;
9730
82.4k
  }
9731
64.0M
  COPY_BUF(buf, len, r);
9732
64.0M
  r = s;
9733
64.0M
  rl = sl;
9734
64.0M
  s = cur;
9735
64.0M
  sl = l;
9736
64.0M
  NEXTL(l);
9737
64.0M
  cur = xmlCurrentCharRecover(ctxt, &l);
9738
64.0M
    }
9739
20.1k
    buf[len] = 0;
9740
20.1k
    if (cur != '>') {
9741
401
  xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9742
401
                       "CData section not finished\n%.50s\n", buf);
9743
401
        goto out;
9744
401
    }
9745
19.7k
    NEXTL(l);
9746
9747
    /*
9748
     * OK the buffer is to be consumed as cdata.
9749
     */
9750
19.7k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9751
19.6k
        if ((ctxt->sax->cdataBlock != NULL) &&
9752
0
            ((ctxt->options & XML_PARSE_NOCDATA) == 0)) {
9753
0
            ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9754
19.6k
        } else if (ctxt->sax->characters != NULL) {
9755
19.6k
            ctxt->sax->characters(ctxt->userData, buf, len);
9756
19.6k
        }
9757
19.6k
    }
9758
9759
20.1k
out:
9760
20.1k
    xmlFree(buf);
9761
20.1k
}
9762
9763
/**
9764
 * xmlParseContentInternal:
9765
 * @ctxt:  an XML parser context
9766
 *
9767
 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9768
 * unexpected EOF to the caller.
9769
 */
9770
9771
static void
9772
6.20k
xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9773
6.20k
    int oldNameNr = ctxt->nameNr;
9774
6.20k
    int oldSpaceNr = ctxt->spaceNr;
9775
6.20k
    int oldNodeNr = ctxt->nodeNr;
9776
9777
6.20k
    GROW;
9778
1.94M
    while ((ctxt->input->cur < ctxt->input->end) &&
9779
1.94M
     (PARSER_STOPPED(ctxt) == 0)) {
9780
1.94M
  const xmlChar *cur = ctxt->input->cur;
9781
9782
  /*
9783
   * First case : a Processing Instruction.
9784
   */
9785
1.94M
  if ((*cur == '<') && (cur[1] == '?')) {
9786
136
      xmlParsePI(ctxt);
9787
136
  }
9788
9789
  /*
9790
   * Second case : a CDSection
9791
   */
9792
  /* 2.6.0 test was *cur not RAW */
9793
1.94M
  else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9794
0
      xmlParseCDSect(ctxt);
9795
0
  }
9796
9797
  /*
9798
   * Third case :  a comment
9799
   */
9800
1.94M
  else if ((*cur == '<') && (NXT(1) == '!') &&
9801
0
     (NXT(2) == '-') && (NXT(3) == '-')) {
9802
0
      xmlParseComment(ctxt);
9803
0
  }
9804
9805
  /*
9806
   * Fourth case :  a sub-element.
9807
   */
9808
1.94M
  else if (*cur == '<') {
9809
1.55M
            if (NXT(1) == '/') {
9810
449k
                if (ctxt->nameNr <= oldNameNr)
9811
5.99k
                    break;
9812
443k
          xmlParseElementEnd(ctxt);
9813
1.10M
            } else {
9814
1.10M
          xmlParseElementStart(ctxt);
9815
1.10M
            }
9816
1.55M
  }
9817
9818
  /*
9819
   * Fifth case : a reference. If if has not been resolved,
9820
   *    parsing returns it's Name, create the node
9821
   */
9822
9823
384k
  else if (*cur == '&') {
9824
14
      xmlParseReference(ctxt);
9825
14
  }
9826
9827
  /*
9828
   * Last case, text. Note that References are handled directly.
9829
   */
9830
384k
  else {
9831
384k
      xmlParseCharDataInternal(ctxt, 0);
9832
384k
  }
9833
9834
1.93M
  SHRINK;
9835
1.93M
  GROW;
9836
1.93M
    }
9837
9838
6.20k
    if ((ctxt->nameNr > oldNameNr) &&
9839
207
        (ctxt->input->cur >= ctxt->input->end) &&
9840
203
        (ctxt->wellFormed)) {
9841
0
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9842
0
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9843
0
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9844
0
                "Premature end of data in tag %s line %d\n",
9845
0
                name, line, NULL);
9846
0
    }
9847
9848
    /*
9849
     * Clean up in error case
9850
     */
9851
9852
6.28k
    while (ctxt->nodeNr > oldNodeNr)
9853
76
        nodePop(ctxt);
9854
9855
19.8k
    while (ctxt->nameNr > oldNameNr) {
9856
13.6k
        xmlStartTag *tag = &ctxt->pushTab[ctxt->nameNr - 1];
9857
9858
13.6k
        if (tag->nsNr != 0)
9859
24
            xmlParserNsPop(ctxt, tag->nsNr);
9860
9861
13.6k
        namePop(ctxt);
9862
13.6k
    }
9863
9864
19.8k
    while (ctxt->spaceNr > oldSpaceNr)
9865
13.6k
        spacePop(ctxt);
9866
6.20k
}
9867
9868
/**
9869
 * xmlParseContent:
9870
 * @ctxt:  an XML parser context
9871
 *
9872
 * Parse XML element content. This is useful if you're only interested
9873
 * in custom SAX callbacks. If you want a node list, use
9874
 * xmlCtxtParseContent.
9875
 */
9876
void
9877
0
xmlParseContent(xmlParserCtxtPtr ctxt) {
9878
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
9879
0
        return;
9880
9881
0
    xmlCtxtInitializeLate(ctxt);
9882
9883
0
    xmlParseContentInternal(ctxt);
9884
9885
0
    xmlParserCheckEOF(ctxt, XML_ERR_NOT_WELL_BALANCED);
9886
0
}
9887
9888
/**
9889
 * xmlParseElement:
9890
 * @ctxt:  an XML parser context
9891
 *
9892
 * DEPRECATED: Internal function, don't use.
9893
 *
9894
 * parse an XML element
9895
 *
9896
 * [39] element ::= EmptyElemTag | STag content ETag
9897
 *
9898
 * [ WFC: Element Type Match ]
9899
 * The Name in an element's end-tag must match the element type in the
9900
 * start-tag.
9901
 *
9902
 */
9903
9904
void
9905
6.41k
xmlParseElement(xmlParserCtxtPtr ctxt) {
9906
6.41k
    if (xmlParseElementStart(ctxt) != 0)
9907
210
        return;
9908
9909
6.20k
    xmlParseContentInternal(ctxt);
9910
9911
6.20k
    if (ctxt->input->cur >= ctxt->input->end) {
9912
209
        if (ctxt->wellFormed) {
9913
0
            const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9914
0
            int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9915
0
            xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9916
0
                    "Premature end of data in tag %s line %d\n",
9917
0
                    name, line, NULL);
9918
0
        }
9919
209
        return;
9920
209
    }
9921
9922
6.00k
    xmlParseElementEnd(ctxt);
9923
6.00k
}
9924
9925
/**
9926
 * xmlParseElementStart:
9927
 * @ctxt:  an XML parser context
9928
 *
9929
 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
9930
 * opening tag was parsed, 1 if an empty element was parsed.
9931
 *
9932
 * Always consumes '<'.
9933
 */
9934
static int
9935
1.11M
xmlParseElementStart(xmlParserCtxtPtr ctxt) {
9936
1.11M
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
9937
1.11M
    const xmlChar *name;
9938
1.11M
    const xmlChar *prefix = NULL;
9939
1.11M
    const xmlChar *URI = NULL;
9940
1.11M
    xmlParserNodeInfo node_info;
9941
1.11M
    int line;
9942
1.11M
    xmlNodePtr cur;
9943
1.11M
    int nbNs = 0;
9944
9945
1.11M
    if (ctxt->nameNr > maxDepth) {
9946
4
        xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
9947
4
                "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9948
4
                ctxt->nameNr);
9949
4
  xmlHaltParser(ctxt);
9950
4
  return(-1);
9951
4
    }
9952
9953
    /* Capture start position */
9954
1.11M
    if (ctxt->record_info) {
9955
0
        node_info.begin_pos = ctxt->input->consumed +
9956
0
                          (CUR_PTR - ctxt->input->base);
9957
0
  node_info.begin_line = ctxt->input->line;
9958
0
    }
9959
9960
1.11M
    if (ctxt->spaceNr == 0)
9961
6.41k
  spacePush(ctxt, -1);
9962
1.10M
    else if (*ctxt->space == -2)
9963
0
  spacePush(ctxt, -1);
9964
1.10M
    else
9965
1.10M
  spacePush(ctxt, *ctxt->space);
9966
9967
1.11M
    line = ctxt->input->line;
9968
1.11M
#ifdef LIBXML_SAX1_ENABLED
9969
1.11M
    if (ctxt->sax2)
9970
1.11M
#endif /* LIBXML_SAX1_ENABLED */
9971
1.11M
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &nbNs);
9972
0
#ifdef LIBXML_SAX1_ENABLED
9973
0
    else
9974
0
  name = xmlParseStartTag(ctxt);
9975
1.11M
#endif /* LIBXML_SAX1_ENABLED */
9976
1.11M
    if (name == NULL) {
9977
9.61k
  spacePop(ctxt);
9978
9.61k
        return(-1);
9979
9.61k
    }
9980
1.10M
    nameNsPush(ctxt, name, prefix, URI, line, nbNs);
9981
1.10M
    cur = ctxt->node;
9982
9983
1.10M
#ifdef LIBXML_VALID_ENABLED
9984
    /*
9985
     * [ VC: Root Element Type ]
9986
     * The Name in the document type declaration must match the element
9987
     * type of the root element.
9988
     */
9989
1.10M
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9990
0
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
9991
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9992
1.10M
#endif /* LIBXML_VALID_ENABLED */
9993
9994
    /*
9995
     * Check for an Empty Element.
9996
     */
9997
1.10M
    if ((RAW == '/') && (NXT(1) == '>')) {
9998
590k
        SKIP(2);
9999
590k
  if (ctxt->sax2) {
10000
590k
      if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10001
590k
    (!ctxt->disableSAX))
10002
575k
    ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
10003
590k
#ifdef LIBXML_SAX1_ENABLED
10004
590k
  } else {
10005
0
      if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10006
0
    (!ctxt->disableSAX))
10007
0
    ctxt->sax->endElement(ctxt->userData, name);
10008
0
#endif /* LIBXML_SAX1_ENABLED */
10009
0
  }
10010
590k
  namePop(ctxt);
10011
590k
  spacePop(ctxt);
10012
590k
  if (nbNs > 0)
10013
1.18k
      xmlParserNsPop(ctxt, nbNs);
10014
590k
  if (cur != NULL && ctxt->record_info) {
10015
0
            node_info.node = cur;
10016
0
            node_info.end_pos = ctxt->input->consumed +
10017
0
                                (CUR_PTR - ctxt->input->base);
10018
0
            node_info.end_line = ctxt->input->line;
10019
0
            xmlParserAddNodeInfo(ctxt, &node_info);
10020
0
  }
10021
590k
  return(1);
10022
590k
    }
10023
514k
    if (RAW == '>') {
10024
463k
        NEXT1;
10025
463k
        if (cur != NULL && ctxt->record_info) {
10026
0
            node_info.node = cur;
10027
0
            node_info.end_pos = 0;
10028
0
            node_info.end_line = 0;
10029
0
            xmlParserAddNodeInfo(ctxt, &node_info);
10030
0
        }
10031
463k
    } else {
10032
50.6k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10033
50.6k
         "Couldn't find end of Start Tag %s line %d\n",
10034
50.6k
                    name, line, NULL);
10035
10036
  /*
10037
   * end of parsing of this node.
10038
   */
10039
50.6k
  nodePop(ctxt);
10040
50.6k
  namePop(ctxt);
10041
50.6k
  spacePop(ctxt);
10042
50.6k
  if (nbNs > 0)
10043
168
      xmlParserNsPop(ctxt, nbNs);
10044
50.6k
  return(-1);
10045
50.6k
    }
10046
10047
463k
    return(0);
10048
514k
}
10049
10050
/**
10051
 * xmlParseElementEnd:
10052
 * @ctxt:  an XML parser context
10053
 *
10054
 * Parse the end of an XML element. Always consumes '</'.
10055
 */
10056
static void
10057
449k
xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
10058
449k
    xmlNodePtr cur = ctxt->node;
10059
10060
449k
    if (ctxt->nameNr <= 0) {
10061
0
        if ((RAW == '<') && (NXT(1) == '/'))
10062
0
            SKIP(2);
10063
0
        return;
10064
0
    }
10065
10066
    /*
10067
     * parse the end of tag: '</' should be here.
10068
     */
10069
449k
    if (ctxt->sax2) {
10070
449k
  xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
10071
449k
  namePop(ctxt);
10072
449k
    }
10073
0
#ifdef LIBXML_SAX1_ENABLED
10074
0
    else
10075
0
  xmlParseEndTag1(ctxt, 0);
10076
449k
#endif /* LIBXML_SAX1_ENABLED */
10077
10078
    /*
10079
     * Capture end position
10080
     */
10081
449k
    if (cur != NULL && ctxt->record_info) {
10082
0
        xmlParserNodeInfoPtr node_info;
10083
10084
0
        node_info = (xmlParserNodeInfoPtr) xmlParserFindNodeInfo(ctxt, cur);
10085
0
        if (node_info != NULL) {
10086
0
            node_info->end_pos = ctxt->input->consumed +
10087
0
                                 (CUR_PTR - ctxt->input->base);
10088
0
            node_info->end_line = ctxt->input->line;
10089
0
        }
10090
0
    }
10091
449k
}
10092
10093
/**
10094
 * xmlParseVersionNum:
10095
 * @ctxt:  an XML parser context
10096
 *
10097
 * DEPRECATED: Internal function, don't use.
10098
 *
10099
 * parse the XML version value.
10100
 *
10101
 * [26] VersionNum ::= '1.' [0-9]+
10102
 *
10103
 * In practice allow [0-9].[0-9]+ at that level
10104
 *
10105
 * Returns the string giving the XML version number, or NULL
10106
 */
10107
xmlChar *
10108
187k
xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10109
187k
    xmlChar *buf = NULL;
10110
187k
    int len = 0;
10111
187k
    int size = 10;
10112
187k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
10113
181k
                    XML_MAX_TEXT_LENGTH :
10114
187k
                    XML_MAX_NAME_LENGTH;
10115
187k
    xmlChar cur;
10116
10117
187k
    buf = xmlMalloc(size);
10118
187k
    if (buf == NULL) {
10119
0
  xmlErrMemory(ctxt);
10120
0
  return(NULL);
10121
0
    }
10122
187k
    cur = CUR;
10123
187k
    if (!((cur >= '0') && (cur <= '9'))) {
10124
125
  xmlFree(buf);
10125
125
  return(NULL);
10126
125
    }
10127
187k
    buf[len++] = cur;
10128
187k
    NEXT;
10129
187k
    cur=CUR;
10130
187k
    if (cur != '.') {
10131
42
  xmlFree(buf);
10132
42
  return(NULL);
10133
42
    }
10134
187k
    buf[len++] = cur;
10135
187k
    NEXT;
10136
187k
    cur=CUR;
10137
383k
    while ((cur >= '0') && (cur <= '9')) {
10138
196k
  if (len + 1 >= size) {
10139
434
      xmlChar *tmp;
10140
434
            int newSize;
10141
10142
434
            newSize = xmlGrowCapacity(size, 1, 1, maxLength);
10143
434
            if (newSize < 0) {
10144
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "VersionNum");
10145
0
                xmlFree(buf);
10146
0
                return(NULL);
10147
0
            }
10148
434
      tmp = xmlRealloc(buf, newSize);
10149
434
      if (tmp == NULL) {
10150
0
    xmlErrMemory(ctxt);
10151
0
          xmlFree(buf);
10152
0
    return(NULL);
10153
0
      }
10154
434
      buf = tmp;
10155
434
            size = newSize;
10156
434
  }
10157
196k
  buf[len++] = cur;
10158
196k
  NEXT;
10159
196k
  cur=CUR;
10160
196k
    }
10161
187k
    buf[len] = 0;
10162
187k
    return(buf);
10163
187k
}
10164
10165
/**
10166
 * xmlParseVersionInfo:
10167
 * @ctxt:  an XML parser context
10168
 *
10169
 * DEPRECATED: Internal function, don't use.
10170
 *
10171
 * parse the XML version.
10172
 *
10173
 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10174
 *
10175
 * [25] Eq ::= S? '=' S?
10176
 *
10177
 * Returns the version string, e.g. "1.0"
10178
 */
10179
10180
xmlChar *
10181
188k
xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10182
188k
    xmlChar *version = NULL;
10183
10184
188k
    if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10185
187k
  SKIP(7);
10186
187k
  SKIP_BLANKS;
10187
187k
  if (RAW != '=') {
10188
34
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10189
34
      return(NULL);
10190
34
        }
10191
187k
  NEXT;
10192
187k
  SKIP_BLANKS;
10193
187k
  if (RAW == '"') {
10194
187k
      NEXT;
10195
187k
      version = xmlParseVersionNum(ctxt);
10196
187k
      if (RAW != '"') {
10197
175
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10198
175
      } else
10199
187k
          NEXT;
10200
187k
  } else if (RAW == '\''){
10201
468
      NEXT;
10202
468
      version = xmlParseVersionNum(ctxt);
10203
468
      if (RAW != '\'') {
10204
13
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10205
13
      } else
10206
455
          NEXT;
10207
468
  } else {
10208
50
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10209
50
  }
10210
187k
    }
10211
188k
    return(version);
10212
188k
}
10213
10214
/**
10215
 * xmlParseEncName:
10216
 * @ctxt:  an XML parser context
10217
 *
10218
 * DEPRECATED: Internal function, don't use.
10219
 *
10220
 * parse the XML encoding name
10221
 *
10222
 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10223
 *
10224
 * Returns the encoding name value or NULL
10225
 */
10226
xmlChar *
10227
157k
xmlParseEncName(xmlParserCtxtPtr ctxt) {
10228
157k
    xmlChar *buf = NULL;
10229
157k
    int len = 0;
10230
157k
    int size = 10;
10231
157k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
10232
150k
                    XML_MAX_TEXT_LENGTH :
10233
157k
                    XML_MAX_NAME_LENGTH;
10234
157k
    xmlChar cur;
10235
10236
157k
    cur = CUR;
10237
157k
    if (((cur >= 'a') && (cur <= 'z')) ||
10238
156k
        ((cur >= 'A') && (cur <= 'Z'))) {
10239
156k
  buf = xmlMalloc(size);
10240
156k
  if (buf == NULL) {
10241
0
      xmlErrMemory(ctxt);
10242
0
      return(NULL);
10243
0
  }
10244
10245
156k
  buf[len++] = cur;
10246
156k
  NEXT;
10247
156k
  cur = CUR;
10248
802k
  while (((cur >= 'a') && (cur <= 'z')) ||
10249
789k
         ((cur >= 'A') && (cur <= 'Z')) ||
10250
478k
         ((cur >= '0') && (cur <= '9')) ||
10251
317k
         (cur == '.') || (cur == '_') ||
10252
645k
         (cur == '-')) {
10253
645k
      if (len + 1 >= size) {
10254
1.36k
          xmlChar *tmp;
10255
1.36k
                int newSize;
10256
10257
1.36k
                newSize = xmlGrowCapacity(size, 1, 1, maxLength);
10258
1.36k
                if (newSize < 0) {
10259
0
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "EncName");
10260
0
                    xmlFree(buf);
10261
0
                    return(NULL);
10262
0
                }
10263
1.36k
    tmp = xmlRealloc(buf, newSize);
10264
1.36k
    if (tmp == NULL) {
10265
0
        xmlErrMemory(ctxt);
10266
0
        xmlFree(buf);
10267
0
        return(NULL);
10268
0
    }
10269
1.36k
    buf = tmp;
10270
1.36k
                size = newSize;
10271
1.36k
      }
10272
645k
      buf[len++] = cur;
10273
645k
      NEXT;
10274
645k
      cur = CUR;
10275
645k
        }
10276
156k
  buf[len] = 0;
10277
156k
    } else {
10278
38
  xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10279
38
    }
10280
157k
    return(buf);
10281
157k
}
10282
10283
/**
10284
 * xmlParseEncodingDecl:
10285
 * @ctxt:  an XML parser context
10286
 *
10287
 * DEPRECATED: Internal function, don't use.
10288
 *
10289
 * parse the XML encoding declaration
10290
 *
10291
 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'")
10292
 *
10293
 * this setups the conversion filters.
10294
 *
10295
 * Returns the encoding value or NULL
10296
 */
10297
10298
const xmlChar *
10299
159k
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10300
159k
    xmlChar *encoding = NULL;
10301
10302
159k
    SKIP_BLANKS;
10303
159k
    if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g') == 0)
10304
2.58k
        return(NULL);
10305
10306
157k
    SKIP(8);
10307
157k
    SKIP_BLANKS;
10308
157k
    if (RAW != '=') {
10309
39
        xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10310
39
        return(NULL);
10311
39
    }
10312
157k
    NEXT;
10313
157k
    SKIP_BLANKS;
10314
157k
    if (RAW == '"') {
10315
156k
        NEXT;
10316
156k
        encoding = xmlParseEncName(ctxt);
10317
156k
        if (RAW != '"') {
10318
166
            xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10319
166
            xmlFree((xmlChar *) encoding);
10320
166
            return(NULL);
10321
166
        } else
10322
156k
            NEXT;
10323
156k
    } else if (RAW == '\''){
10324
37
        NEXT;
10325
37
        encoding = xmlParseEncName(ctxt);
10326
37
        if (RAW != '\'') {
10327
14
            xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10328
14
            xmlFree((xmlChar *) encoding);
10329
14
            return(NULL);
10330
14
        } else
10331
23
            NEXT;
10332
37
    } else {
10333
16
        xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10334
16
    }
10335
10336
156k
    if (encoding == NULL)
10337
26
        return(NULL);
10338
10339
156k
    xmlSetDeclaredEncoding(ctxt, encoding);
10340
10341
156k
    return(ctxt->encoding);
10342
156k
}
10343
10344
/**
10345
 * xmlParseSDDecl:
10346
 * @ctxt:  an XML parser context
10347
 *
10348
 * DEPRECATED: Internal function, don't use.
10349
 *
10350
 * parse the XML standalone declaration
10351
 *
10352
 * [32] SDDecl ::= S 'standalone' Eq
10353
 *                 (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10354
 *
10355
 * [ VC: Standalone Document Declaration ]
10356
 * TODO The standalone document declaration must have the value "no"
10357
 * if any external markup declarations contain declarations of:
10358
 *  - attributes with default values, if elements to which these
10359
 *    attributes apply appear in the document without specifications
10360
 *    of values for these attributes, or
10361
 *  - entities (other than amp, lt, gt, apos, quot), if references
10362
 *    to those entities appear in the document, or
10363
 *  - attributes with values subject to normalization, where the
10364
 *    attribute appears in the document with a value which will change
10365
 *    as a result of normalization, or
10366
 *  - element types with element content, if white space occurs directly
10367
 *    within any instance of those types.
10368
 *
10369
 * Returns:
10370
 *   1 if standalone="yes"
10371
 *   0 if standalone="no"
10372
 *  -2 if standalone attribute is missing or invalid
10373
 *    (A standalone value of -2 means that the XML declaration was found,
10374
 *     but no value was specified for the standalone attribute).
10375
 */
10376
10377
int
10378
111k
xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10379
111k
    int standalone = -2;
10380
10381
111k
    SKIP_BLANKS;
10382
111k
    if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10383
108k
  SKIP(10);
10384
108k
        SKIP_BLANKS;
10385
108k
  if (RAW != '=') {
10386
13
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10387
13
      return(standalone);
10388
13
        }
10389
108k
  NEXT;
10390
108k
  SKIP_BLANKS;
10391
108k
        if (RAW == '\''){
10392
39
      NEXT;
10393
39
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10394
21
          standalone = 0;
10395
21
                SKIP(2);
10396
21
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10397
8
                 (NXT(2) == 's')) {
10398
4
          standalone = 1;
10399
4
    SKIP(3);
10400
14
            } else {
10401
14
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10402
14
      }
10403
39
      if (RAW != '\'') {
10404
19
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10405
19
      } else
10406
20
          NEXT;
10407
108k
  } else if (RAW == '"'){
10408
108k
      NEXT;
10409
108k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10410
491
          standalone = 0;
10411
491
    SKIP(2);
10412
108k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10413
108k
                 (NXT(2) == 's')) {
10414
108k
          standalone = 1;
10415
108k
                SKIP(3);
10416
108k
            } else {
10417
51
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10418
51
      }
10419
108k
      if (RAW != '"') {
10420
67
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10421
67
      } else
10422
108k
          NEXT;
10423
108k
  } else {
10424
10
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10425
10
        }
10426
108k
    }
10427
111k
    return(standalone);
10428
111k
}
10429
10430
/**
10431
 * xmlParseXMLDecl:
10432
 * @ctxt:  an XML parser context
10433
 *
10434
 * DEPRECATED: Internal function, don't use.
10435
 *
10436
 * parse an XML declaration header
10437
 *
10438
 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10439
 */
10440
10441
void
10442
188k
xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10443
188k
    xmlChar *version;
10444
10445
    /*
10446
     * This value for standalone indicates that the document has an
10447
     * XML declaration but it does not have a standalone attribute.
10448
     * It will be overwritten later if a standalone attribute is found.
10449
     */
10450
10451
188k
    ctxt->standalone = -2;
10452
10453
    /*
10454
     * We know that '<?xml' is here.
10455
     */
10456
188k
    SKIP(5);
10457
10458
188k
    if (!IS_BLANK_CH(RAW)) {
10459
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10460
0
                 "Blank needed after '<?xml'\n");
10461
0
    }
10462
188k
    SKIP_BLANKS;
10463
10464
    /*
10465
     * We must have the VersionInfo here.
10466
     */
10467
188k
    version = xmlParseVersionInfo(ctxt);
10468
188k
    if (version == NULL) {
10469
1.12k
  xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10470
187k
    } else {
10471
187k
  if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10472
      /*
10473
       * Changed here for XML-1.0 5th edition
10474
       */
10475
6.51k
      if (ctxt->options & XML_PARSE_OLD10) {
10476
0
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10477
0
                "Unsupported version '%s'\n",
10478
0
                version);
10479
6.51k
      } else {
10480
6.51k
          if ((version[0] == '1') && ((version[1] == '.'))) {
10481
6.33k
        xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10482
6.33k
                      "Unsupported version '%s'\n",
10483
6.33k
          version, NULL);
10484
6.33k
    } else {
10485
183
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10486
183
              "Unsupported version '%s'\n",
10487
183
              version);
10488
183
    }
10489
6.51k
      }
10490
6.51k
  }
10491
187k
  if (ctxt->version != NULL)
10492
0
      xmlFree((void *) ctxt->version);
10493
187k
  ctxt->version = version;
10494
187k
    }
10495
10496
    /*
10497
     * We may have the encoding declaration
10498
     */
10499
188k
    if (!IS_BLANK_CH(RAW)) {
10500
30.3k
        if ((RAW == '?') && (NXT(1) == '>')) {
10501
29.1k
      SKIP(2);
10502
29.1k
      return;
10503
29.1k
  }
10504
1.19k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10505
1.19k
    }
10506
159k
    xmlParseEncodingDecl(ctxt);
10507
10508
    /*
10509
     * We may have the standalone status.
10510
     */
10511
159k
    if ((ctxt->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10512
48.6k
        if ((RAW == '?') && (NXT(1) == '>')) {
10513
48.5k
      SKIP(2);
10514
48.5k
      return;
10515
48.5k
  }
10516
39
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10517
39
    }
10518
10519
    /*
10520
     * We can grow the input buffer freely at that point
10521
     */
10522
111k
    GROW;
10523
10524
111k
    SKIP_BLANKS;
10525
111k
    ctxt->standalone = xmlParseSDDecl(ctxt);
10526
10527
111k
    SKIP_BLANKS;
10528
111k
    if ((RAW == '?') && (NXT(1) == '>')) {
10529
109k
        SKIP(2);
10530
109k
    } else if (RAW == '>') {
10531
        /* Deprecated old WD ... */
10532
55
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10533
55
  NEXT;
10534
1.70k
    } else {
10535
1.70k
        int c;
10536
10537
1.70k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10538
967k
        while ((PARSER_STOPPED(ctxt) == 0) &&
10539
967k
               ((c = CUR) != 0)) {
10540
967k
            NEXT;
10541
967k
            if (c == '>')
10542
1.17k
                break;
10543
967k
        }
10544
1.70k
    }
10545
111k
}
10546
10547
/**
10548
 * xmlCtxtGetVersion:
10549
 * @ctxt:  parser context
10550
 *
10551
 * Available since 2.14.0.
10552
 *
10553
 * Returns the version from the XML declaration.
10554
 */
10555
const xmlChar *
10556
0
xmlCtxtGetVersion(xmlParserCtxtPtr ctxt) {
10557
0
    if (ctxt == NULL)
10558
0
        return(NULL);
10559
10560
0
    return(ctxt->version);
10561
0
}
10562
10563
/**
10564
 * xmlCtxtGetStandalone:
10565
 * @ctxt:  parser context
10566
 *
10567
 * Available since 2.14.0.
10568
 *
10569
 * Returns the value from the standalone document declaration.
10570
 */
10571
int
10572
0
xmlCtxtGetStandalone(xmlParserCtxtPtr ctxt) {
10573
0
    if (ctxt == NULL)
10574
0
        return(0);
10575
10576
0
    return(ctxt->standalone);
10577
0
}
10578
10579
/**
10580
 * xmlParseMisc:
10581
 * @ctxt:  an XML parser context
10582
 *
10583
 * DEPRECATED: Internal function, don't use.
10584
 *
10585
 * parse an XML Misc* optional field.
10586
 *
10587
 * [27] Misc ::= Comment | PI |  S
10588
 */
10589
10590
void
10591
12.8k
xmlParseMisc(xmlParserCtxtPtr ctxt) {
10592
12.9k
    while (PARSER_STOPPED(ctxt) == 0) {
10593
12.9k
        SKIP_BLANKS;
10594
12.9k
        GROW;
10595
12.9k
        if ((RAW == '<') && (NXT(1) == '?')) {
10596
65
      xmlParsePI(ctxt);
10597
12.8k
        } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10598
0
      xmlParseComment(ctxt);
10599
12.8k
        } else {
10600
12.8k
            break;
10601
12.8k
        }
10602
12.9k
    }
10603
12.8k
}
10604
10605
static void
10606
166k
xmlFinishDocument(xmlParserCtxtPtr ctxt) {
10607
166k
    xmlDocPtr doc;
10608
10609
    /*
10610
     * SAX: end of the document processing.
10611
     */
10612
166k
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10613
6.42k
        ctxt->sax->endDocument(ctxt->userData);
10614
10615
166k
    doc = ctxt->myDoc;
10616
166k
    if (doc != NULL) {
10617
6.42k
        if (ctxt->wellFormed) {
10618
6.09k
            doc->properties |= XML_DOC_WELLFORMED;
10619
6.09k
            if (ctxt->valid)
10620
6.09k
                doc->properties |= XML_DOC_DTDVALID;
10621
6.09k
            if (ctxt->nsWellFormed)
10622
6.09k
                doc->properties |= XML_DOC_NSVALID;
10623
6.09k
        }
10624
10625
6.42k
        if (ctxt->options & XML_PARSE_OLD10)
10626
0
            doc->properties |= XML_DOC_OLD10;
10627
10628
        /*
10629
         * Remove locally kept entity definitions if the tree was not built
10630
         */
10631
6.42k
  if (xmlStrEqual(doc->version, SAX_COMPAT_MODE)) {
10632
10
            xmlFreeDoc(doc);
10633
10
            ctxt->myDoc = NULL;
10634
10
        }
10635
6.42k
    }
10636
166k
}
10637
10638
/**
10639
 * xmlParseDocument:
10640
 * @ctxt:  an XML parser context
10641
 *
10642
 * Parse an XML document and invoke the SAX handlers. This is useful
10643
 * if you're only interested in custom SAX callbacks. If you want a
10644
 * document tree, use xmlCtxtParseDocument.
10645
 *
10646
 * Returns 0, -1 in case of error.
10647
 */
10648
10649
int
10650
6.46k
xmlParseDocument(xmlParserCtxtPtr ctxt) {
10651
6.46k
    if ((ctxt == NULL) || (ctxt->input == NULL))
10652
0
        return(-1);
10653
10654
6.46k
    GROW;
10655
10656
    /*
10657
     * SAX: detecting the level.
10658
     */
10659
6.46k
    xmlCtxtInitializeLate(ctxt);
10660
10661
6.46k
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10662
6.46k
        ctxt->sax->setDocumentLocator(ctxt->userData,
10663
6.46k
                (xmlSAXLocator *) &xmlDefaultSAXLocator);
10664
6.46k
    }
10665
10666
6.46k
    xmlDetectEncoding(ctxt);
10667
10668
6.46k
    if (CUR == 0) {
10669
42
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10670
42
  return(-1);
10671
42
    }
10672
10673
6.42k
    GROW;
10674
6.42k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10675
10676
  /*
10677
   * Note that we will switch encoding on the fly.
10678
   */
10679
6.12k
  xmlParseXMLDecl(ctxt);
10680
6.12k
  SKIP_BLANKS;
10681
6.12k
    } else {
10682
294
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10683
294
        if (ctxt->version == NULL) {
10684
0
            xmlErrMemory(ctxt);
10685
0
            return(-1);
10686
0
        }
10687
294
    }
10688
6.42k
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10689
6.41k
        ctxt->sax->startDocument(ctxt->userData);
10690
6.42k
    if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10691
6.41k
        (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10692
0
  ctxt->myDoc->compression = ctxt->input->buf->compressed;
10693
0
    }
10694
10695
    /*
10696
     * The Misc part of the Prolog
10697
     */
10698
6.42k
    xmlParseMisc(ctxt);
10699
10700
    /*
10701
     * Then possibly doc type declaration(s) and more Misc
10702
     * (doctypedecl Misc*)?
10703
     */
10704
6.42k
    GROW;
10705
6.42k
    if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10706
10707
0
  ctxt->inSubset = 1;
10708
0
  xmlParseDocTypeDecl(ctxt);
10709
0
  if (RAW == '[') {
10710
0
      xmlParseInternalSubset(ctxt);
10711
0
  } else if (RAW == '>') {
10712
0
            NEXT;
10713
0
        }
10714
10715
  /*
10716
   * Create and update the external subset.
10717
   */
10718
0
  ctxt->inSubset = 2;
10719
0
  if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10720
0
      (!ctxt->disableSAX))
10721
0
      ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10722
0
                                ctxt->extSubSystem, ctxt->extSubURI);
10723
0
  ctxt->inSubset = 0;
10724
10725
0
        xmlCleanSpecialAttr(ctxt);
10726
10727
0
  xmlParseMisc(ctxt);
10728
0
    }
10729
10730
    /*
10731
     * Time to start parsing the tree itself
10732
     */
10733
6.42k
    GROW;
10734
6.42k
    if (RAW != '<') {
10735
2
        if (ctxt->wellFormed)
10736
1
            xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10737
1
                           "Start tag expected, '<' not found\n");
10738
6.41k
    } else {
10739
6.41k
  xmlParseElement(ctxt);
10740
10741
  /*
10742
   * The Misc part at the end
10743
   */
10744
6.41k
  xmlParseMisc(ctxt);
10745
10746
6.41k
        xmlParserCheckEOF(ctxt, XML_ERR_DOCUMENT_END);
10747
6.41k
    }
10748
10749
6.42k
    ctxt->instate = XML_PARSER_EOF;
10750
6.42k
    xmlFinishDocument(ctxt);
10751
10752
6.42k
    if (! ctxt->wellFormed) {
10753
324
  ctxt->valid = 0;
10754
324
  return(-1);
10755
324
    }
10756
10757
6.09k
    return(0);
10758
6.42k
}
10759
10760
/**
10761
 * xmlParseExtParsedEnt:
10762
 * @ctxt:  an XML parser context
10763
 *
10764
 * DEPRECATED: Internal function, don't use.
10765
 *
10766
 * parse a general parsed entity
10767
 * An external general parsed entity is well-formed if it matches the
10768
 * production labeled extParsedEnt.
10769
 *
10770
 * [78] extParsedEnt ::= TextDecl? content
10771
 *
10772
 * Returns 0, -1 in case of error. the parser context is augmented
10773
 *                as a result of the parsing.
10774
 */
10775
10776
int
10777
0
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10778
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
10779
0
        return(-1);
10780
10781
0
    xmlCtxtInitializeLate(ctxt);
10782
10783
0
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10784
0
        ctxt->sax->setDocumentLocator(ctxt->userData,
10785
0
                (xmlSAXLocator *) &xmlDefaultSAXLocator);
10786
0
    }
10787
10788
0
    xmlDetectEncoding(ctxt);
10789
10790
0
    if (CUR == 0) {
10791
0
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10792
0
    }
10793
10794
    /*
10795
     * Check for the XMLDecl in the Prolog.
10796
     */
10797
0
    GROW;
10798
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10799
10800
  /*
10801
   * Note that we will switch encoding on the fly.
10802
   */
10803
0
  xmlParseXMLDecl(ctxt);
10804
0
  SKIP_BLANKS;
10805
0
    } else {
10806
0
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10807
0
    }
10808
0
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10809
0
        ctxt->sax->startDocument(ctxt->userData);
10810
10811
    /*
10812
     * Doing validity checking on chunk doesn't make sense
10813
     */
10814
0
    ctxt->options &= ~XML_PARSE_DTDVALID;
10815
0
    ctxt->validate = 0;
10816
0
    ctxt->depth = 0;
10817
10818
0
    xmlParseContentInternal(ctxt);
10819
10820
0
    if (ctxt->input->cur < ctxt->input->end)
10821
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10822
10823
    /*
10824
     * SAX: end of the document processing.
10825
     */
10826
0
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10827
0
        ctxt->sax->endDocument(ctxt->userData);
10828
10829
0
    if (! ctxt->wellFormed) return(-1);
10830
0
    return(0);
10831
0
}
10832
10833
#ifdef LIBXML_PUSH_ENABLED
10834
/************************************************************************
10835
 *                  *
10836
 *    Progressive parsing interfaces        *
10837
 *                  *
10838
 ************************************************************************/
10839
10840
/**
10841
 * xmlParseLookupChar:
10842
 * @ctxt:  an XML parser context
10843
 * @c:  character
10844
 *
10845
 * Check whether the input buffer contains a character.
10846
 */
10847
static int
10848
4.50M
xmlParseLookupChar(xmlParserCtxtPtr ctxt, int c) {
10849
4.50M
    const xmlChar *cur;
10850
10851
4.50M
    if (ctxt->checkIndex == 0) {
10852
4.50M
        cur = ctxt->input->cur + 1;
10853
4.50M
    } else {
10854
638
        cur = ctxt->input->cur + ctxt->checkIndex;
10855
638
    }
10856
10857
4.50M
    if (memchr(cur, c, ctxt->input->end - cur) == NULL) {
10858
837
        size_t index = ctxt->input->end - ctxt->input->cur;
10859
10860
837
        if (index > LONG_MAX) {
10861
0
            ctxt->checkIndex = 0;
10862
0
            return(1);
10863
0
        }
10864
837
        ctxt->checkIndex = index;
10865
837
        return(0);
10866
4.50M
    } else {
10867
4.50M
        ctxt->checkIndex = 0;
10868
4.50M
        return(1);
10869
4.50M
    }
10870
4.50M
}
10871
10872
/**
10873
 * xmlParseLookupString:
10874
 * @ctxt:  an XML parser context
10875
 * @startDelta: delta to apply at the start
10876
 * @str:  string
10877
 * @strLen:  length of string
10878
 *
10879
 * Check whether the input buffer contains a string.
10880
 */
10881
static const xmlChar *
10882
xmlParseLookupString(xmlParserCtxtPtr ctxt, size_t startDelta,
10883
105k
                     const char *str, size_t strLen) {
10884
105k
    const xmlChar *cur, *term;
10885
10886
105k
    if (ctxt->checkIndex == 0) {
10887
104k
        cur = ctxt->input->cur + startDelta;
10888
104k
    } else {
10889
1.11k
        cur = ctxt->input->cur + ctxt->checkIndex;
10890
1.11k
    }
10891
10892
105k
    term = BAD_CAST strstr((const char *) cur, str);
10893
105k
    if (term == NULL) {
10894
1.52k
        const xmlChar *end = ctxt->input->end;
10895
1.52k
        size_t index;
10896
10897
        /* Rescan (strLen - 1) characters. */
10898
1.52k
        if ((size_t) (end - cur) < strLen)
10899
28
            end = cur;
10900
1.49k
        else
10901
1.49k
            end -= strLen - 1;
10902
1.52k
        index = end - ctxt->input->cur;
10903
1.52k
        if (index > LONG_MAX) {
10904
0
            ctxt->checkIndex = 0;
10905
0
            return(ctxt->input->end - strLen);
10906
0
        }
10907
1.52k
        ctxt->checkIndex = index;
10908
104k
    } else {
10909
104k
        ctxt->checkIndex = 0;
10910
104k
    }
10911
10912
105k
    return(term);
10913
105k
}
10914
10915
/**
10916
 * xmlParseLookupCharData:
10917
 * @ctxt:  an XML parser context
10918
 *
10919
 * Check whether the input buffer contains terminated char data.
10920
 */
10921
static int
10922
88.2k
xmlParseLookupCharData(xmlParserCtxtPtr ctxt) {
10923
88.2k
    const xmlChar *cur = ctxt->input->cur + ctxt->checkIndex;
10924
88.2k
    const xmlChar *end = ctxt->input->end;
10925
88.2k
    size_t index;
10926
10927
1.09M
    while (cur < end) {
10928
1.09M
        if ((*cur == '<') || (*cur == '&')) {
10929
85.6k
            ctxt->checkIndex = 0;
10930
85.6k
            return(1);
10931
85.6k
        }
10932
1.01M
        cur++;
10933
1.01M
    }
10934
10935
2.56k
    index = cur - ctxt->input->cur;
10936
2.56k
    if (index > LONG_MAX) {
10937
0
        ctxt->checkIndex = 0;
10938
0
        return(1);
10939
0
    }
10940
2.56k
    ctxt->checkIndex = index;
10941
2.56k
    return(0);
10942
2.56k
}
10943
10944
/**
10945
 * xmlParseLookupGt:
10946
 * @ctxt:  an XML parser context
10947
 *
10948
 * Check whether there's enough data in the input buffer to finish parsing
10949
 * a start tag. This has to take quotes into account.
10950
 */
10951
static int
10952
22.4M
xmlParseLookupGt(xmlParserCtxtPtr ctxt) {
10953
22.4M
    const xmlChar *cur;
10954
22.4M
    const xmlChar *end = ctxt->input->end;
10955
22.4M
    int state = ctxt->endCheckState;
10956
22.4M
    size_t index;
10957
10958
22.4M
    if (ctxt->checkIndex == 0)
10959
22.4M
        cur = ctxt->input->cur + 1;
10960
8.65k
    else
10961
8.65k
        cur = ctxt->input->cur + ctxt->checkIndex;
10962
10963
553M
    while (cur < end) {
10964
553M
        if (state) {
10965
149M
            if (*cur == state)
10966
12.3M
                state = 0;
10967
403M
        } else if (*cur == '\'' || *cur == '"') {
10968
12.3M
            state = *cur;
10969
391M
        } else if (*cur == '>') {
10970
22.4M
            ctxt->checkIndex = 0;
10971
22.4M
            ctxt->endCheckState = 0;
10972
22.4M
            return(1);
10973
22.4M
        }
10974
530M
        cur++;
10975
530M
    }
10976
10977
12.4k
    index = cur - ctxt->input->cur;
10978
12.4k
    if (index > LONG_MAX) {
10979
0
        ctxt->checkIndex = 0;
10980
0
        ctxt->endCheckState = 0;
10981
0
        return(1);
10982
0
    }
10983
12.4k
    ctxt->checkIndex = index;
10984
12.4k
    ctxt->endCheckState = state;
10985
12.4k
    return(0);
10986
12.4k
}
10987
10988
/**
10989
 * xmlParseLookupInternalSubset:
10990
 * @ctxt:  an XML parser context
10991
 *
10992
 * Check whether there's enough data in the input buffer to finish parsing
10993
 * the internal subset.
10994
 */
10995
static int
10996
951
xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt) {
10997
    /*
10998
     * Sorry, but progressive parsing of the internal subset is not
10999
     * supported. We first check that the full content of the internal
11000
     * subset is available and parsing is launched only at that point.
11001
     * Internal subset ends with "']' S? '>'" in an unescaped section and
11002
     * not in a ']]>' sequence which are conditional sections.
11003
     */
11004
951
    const xmlChar *cur, *start;
11005
951
    const xmlChar *end = ctxt->input->end;
11006
951
    int state = ctxt->endCheckState;
11007
951
    size_t index;
11008
11009
951
    if (ctxt->checkIndex == 0) {
11010
716
        cur = ctxt->input->cur + 1;
11011
716
    } else {
11012
235
        cur = ctxt->input->cur + ctxt->checkIndex;
11013
235
    }
11014
951
    start = cur;
11015
11016
17.5M
    while (cur < end) {
11017
17.5M
        if (state == '-') {
11018
2.58M
            if ((*cur == '-') &&
11019
162k
                (cur[1] == '-') &&
11020
82.6k
                (cur[2] == '>')) {
11021
35.4k
                state = 0;
11022
35.4k
                cur += 3;
11023
35.4k
                start = cur;
11024
35.4k
                continue;
11025
35.4k
            }
11026
2.58M
        }
11027
14.9M
        else if (state == ']') {
11028
13.7k
            if (*cur == '>') {
11029
155
                ctxt->checkIndex = 0;
11030
155
                ctxt->endCheckState = 0;
11031
155
                return(1);
11032
155
            }
11033
13.6k
            if (IS_BLANK_CH(*cur)) {
11034
3.73k
                state = ' ';
11035
9.87k
            } else if (*cur != ']') {
11036
3.62k
                state = 0;
11037
3.62k
                start = cur;
11038
3.62k
                continue;
11039
3.62k
            }
11040
13.6k
        }
11041
14.9M
        else if (state == ' ') {
11042
13.4k
            if (*cur == '>') {
11043
5
                ctxt->checkIndex = 0;
11044
5
                ctxt->endCheckState = 0;
11045
5
                return(1);
11046
5
            }
11047
13.4k
            if (!IS_BLANK_CH(*cur)) {
11048
3.72k
                state = 0;
11049
3.72k
                start = cur;
11050
3.72k
                continue;
11051
3.72k
            }
11052
13.4k
        }
11053
14.9M
        else if (state != 0) {
11054
10.6M
            if (*cur == state) {
11055
84.4k
                state = 0;
11056
84.4k
                start = cur + 1;
11057
84.4k
            }
11058
10.6M
        }
11059
4.31M
        else if (*cur == '<') {
11060
170k
            if ((cur[1] == '!') &&
11061
78.3k
                (cur[2] == '-') &&
11062
35.7k
                (cur[3] == '-')) {
11063
35.5k
                state = '-';
11064
35.5k
                cur += 4;
11065
                /* Don't treat <!--> as comment */
11066
35.5k
                start = cur;
11067
35.5k
                continue;
11068
35.5k
            }
11069
170k
        }
11070
4.14M
        else if ((*cur == '"') || (*cur == '\'') || (*cur == ']')) {
11071
92.2k
            state = *cur;
11072
92.2k
        }
11073
11074
17.4M
        cur++;
11075
17.4M
    }
11076
11077
    /*
11078
     * Rescan the three last characters to detect "<!--" and "-->"
11079
     * split across chunks.
11080
     */
11081
791
    if ((state == 0) || (state == '-')) {
11082
357
        if (cur - start < 3)
11083
17
            cur = start;
11084
340
        else
11085
340
            cur -= 3;
11086
357
    }
11087
791
    index = cur - ctxt->input->cur;
11088
791
    if (index > LONG_MAX) {
11089
0
        ctxt->checkIndex = 0;
11090
0
        ctxt->endCheckState = 0;
11091
0
        return(1);
11092
0
    }
11093
791
    ctxt->checkIndex = index;
11094
791
    ctxt->endCheckState = state;
11095
791
    return(0);
11096
791
}
11097
11098
/**
11099
 * xmlParseTryOrFinish:
11100
 * @ctxt:  an XML parser context
11101
 * @terminate:  last chunk indicator
11102
 *
11103
 * Try to progress on parsing
11104
 *
11105
 * Returns zero if no parsing was possible
11106
 */
11107
static int
11108
277k
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11109
277k
    int ret = 0;
11110
277k
    size_t avail;
11111
277k
    xmlChar cur, next;
11112
11113
277k
    if (ctxt->input == NULL)
11114
0
        return(0);
11115
11116
277k
    if ((ctxt->input != NULL) &&
11117
277k
        (ctxt->input->cur - ctxt->input->base > 4096)) {
11118
23.3k
        xmlParserShrink(ctxt);
11119
23.3k
    }
11120
11121
107M
    while (ctxt->disableSAX == 0) {
11122
107M
        avail = ctxt->input->end - ctxt->input->cur;
11123
107M
        if (avail < 1)
11124
150k
      goto done;
11125
107M
        switch (ctxt->instate) {
11126
1.75k
            case XML_PARSER_EOF:
11127
          /*
11128
     * Document parsing is done !
11129
     */
11130
1.75k
          goto done;
11131
249k
            case XML_PARSER_START:
11132
                /*
11133
                 * Very first chars read from the document flow.
11134
                 */
11135
249k
                if ((!terminate) && (avail < 4))
11136
0
                    goto done;
11137
11138
                /*
11139
                 * We need more bytes to detect EBCDIC code pages.
11140
                 * See xmlDetectEBCDIC.
11141
                 */
11142
249k
                if ((CMP4(CUR_PTR, 0x4C, 0x6F, 0xA7, 0x94)) &&
11143
6
                    (!terminate) && (avail < 200))
11144
0
                    goto done;
11145
11146
249k
                xmlDetectEncoding(ctxt);
11147
249k
                ctxt->instate = XML_PARSER_XML_DECL;
11148
249k
    break;
11149
11150
249k
            case XML_PARSER_XML_DECL:
11151
249k
    if ((!terminate) && (avail < 2))
11152
0
        goto done;
11153
249k
    cur = ctxt->input->cur[0];
11154
249k
    next = ctxt->input->cur[1];
11155
249k
          if ((cur == '<') && (next == '?')) {
11156
        /* PI or XML decl */
11157
203k
        if ((!terminate) &&
11158
23.6k
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11159
172
      goto done;
11160
203k
        if ((ctxt->input->cur[2] == 'x') &&
11161
199k
      (ctxt->input->cur[3] == 'm') &&
11162
198k
      (ctxt->input->cur[4] == 'l') &&
11163
196k
      (IS_BLANK_CH(ctxt->input->cur[5]))) {
11164
182k
      ret += 5;
11165
182k
      xmlParseXMLDecl(ctxt);
11166
182k
        } else {
11167
20.8k
      ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11168
20.8k
                        if (ctxt->version == NULL) {
11169
0
                            xmlErrMemory(ctxt);
11170
0
                            break;
11171
0
                        }
11172
20.8k
        }
11173
203k
    } else {
11174
46.0k
        ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11175
46.0k
        if (ctxt->version == NULL) {
11176
0
            xmlErrMemory(ctxt);
11177
0
      break;
11178
0
        }
11179
46.0k
    }
11180
249k
                if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
11181
0
                    ctxt->sax->setDocumentLocator(ctxt->userData,
11182
0
                            (xmlSAXLocator *) &xmlDefaultSAXLocator);
11183
0
                }
11184
249k
                if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11185
0
                    (!ctxt->disableSAX))
11186
0
                    ctxt->sax->startDocument(ctxt->userData);
11187
249k
                ctxt->instate = XML_PARSER_MISC;
11188
249k
    break;
11189
37.9M
            case XML_PARSER_START_TAG: {
11190
37.9M
          const xmlChar *name;
11191
37.9M
    const xmlChar *prefix = NULL;
11192
37.9M
    const xmlChar *URI = NULL;
11193
37.9M
                int line = ctxt->input->line;
11194
37.9M
    int nbNs = 0;
11195
11196
37.9M
    if ((!terminate) && (avail < 2))
11197
3
        goto done;
11198
37.9M
    cur = ctxt->input->cur[0];
11199
37.9M
          if (cur != '<') {
11200
452
        xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
11201
452
                                   "Start tag expected, '<' not found");
11202
452
                    ctxt->instate = XML_PARSER_EOF;
11203
452
                    xmlFinishDocument(ctxt);
11204
452
        goto done;
11205
452
    }
11206
37.9M
    if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11207
12.6k
                    goto done;
11208
37.9M
    if (ctxt->spaceNr == 0)
11209
0
        spacePush(ctxt, -1);
11210
37.9M
    else if (*ctxt->space == -2)
11211
9.88M
        spacePush(ctxt, -1);
11212
28.0M
    else
11213
28.0M
        spacePush(ctxt, *ctxt->space);
11214
37.9M
#ifdef LIBXML_SAX1_ENABLED
11215
37.9M
    if (ctxt->sax2)
11216
37.9M
#endif /* LIBXML_SAX1_ENABLED */
11217
37.9M
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &nbNs);
11218
337
#ifdef LIBXML_SAX1_ENABLED
11219
337
    else
11220
337
        name = xmlParseStartTag(ctxt);
11221
37.9M
#endif /* LIBXML_SAX1_ENABLED */
11222
37.9M
    if (name == NULL) {
11223
5.11k
        spacePop(ctxt);
11224
5.11k
                    ctxt->instate = XML_PARSER_EOF;
11225
5.11k
                    xmlFinishDocument(ctxt);
11226
5.11k
        goto done;
11227
5.11k
    }
11228
37.9M
#ifdef LIBXML_VALID_ENABLED
11229
    /*
11230
     * [ VC: Root Element Type ]
11231
     * The Name in the document type declaration must match
11232
     * the element type of the root element.
11233
     */
11234
37.9M
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11235
0
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
11236
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11237
37.9M
#endif /* LIBXML_VALID_ENABLED */
11238
11239
    /*
11240
     * Check for an Empty Element.
11241
     */
11242
37.9M
    if ((RAW == '/') && (NXT(1) == '>')) {
11243
9.33M
        SKIP(2);
11244
11245
9.33M
        if (ctxt->sax2) {
11246
9.33M
      if ((ctxt->sax != NULL) &&
11247
9.33M
          (ctxt->sax->endElementNs != NULL) &&
11248
9.33M
          (!ctxt->disableSAX))
11249
9.33M
          ctxt->sax->endElementNs(ctxt->userData, name,
11250
9.33M
                                  prefix, URI);
11251
9.33M
      if (nbNs > 0)
11252
176k
          xmlParserNsPop(ctxt, nbNs);
11253
9.33M
#ifdef LIBXML_SAX1_ENABLED
11254
18.4E
        } else {
11255
18.4E
      if ((ctxt->sax != NULL) &&
11256
0
          (ctxt->sax->endElement != NULL) &&
11257
0
          (!ctxt->disableSAX))
11258
0
          ctxt->sax->endElement(ctxt->userData, name);
11259
18.4E
#endif /* LIBXML_SAX1_ENABLED */
11260
18.4E
        }
11261
9.33M
        spacePop(ctxt);
11262
28.6M
    } else if (RAW == '>') {
11263
28.5M
        NEXT;
11264
28.5M
                    nameNsPush(ctxt, name, prefix, URI, line, nbNs);
11265
28.5M
    } else {
11266
59.9k
        xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11267
59.9k
           "Couldn't find end of Start Tag %s\n",
11268
59.9k
           name);
11269
59.9k
        nodePop(ctxt);
11270
59.9k
        spacePop(ctxt);
11271
59.9k
                    if (nbNs > 0)
11272
6.03k
                        xmlParserNsPop(ctxt, nbNs);
11273
59.9k
    }
11274
11275
37.9M
                if (ctxt->nameNr == 0)
11276
11.0k
                    ctxt->instate = XML_PARSER_EPILOG;
11277
37.9M
                else
11278
37.9M
                    ctxt->instate = XML_PARSER_CONTENT;
11279
37.9M
                break;
11280
37.9M
      }
11281
60.0M
            case XML_PARSER_CONTENT: {
11282
60.0M
    cur = ctxt->input->cur[0];
11283
11284
60.0M
    if (cur == '<') {
11285
46.5M
                    if ((!terminate) && (avail < 2))
11286
648
                        goto done;
11287
46.5M
        next = ctxt->input->cur[1];
11288
11289
46.5M
                    if (next == '/') {
11290
8.76M
                        ctxt->instate = XML_PARSER_END_TAG;
11291
8.76M
                        break;
11292
37.8M
                    } else if (next == '?') {
11293
14.4k
                        if ((!terminate) &&
11294
8.81k
                            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11295
109
                            goto done;
11296
14.3k
                        xmlParsePI(ctxt);
11297
14.3k
                        ctxt->instate = XML_PARSER_CONTENT;
11298
14.3k
                        break;
11299
37.8M
                    } else if (next == '!') {
11300
77.0k
                        if ((!terminate) && (avail < 3))
11301
10
                            goto done;
11302
77.0k
                        next = ctxt->input->cur[2];
11303
11304
77.0k
                        if (next == '-') {
11305
55.6k
                            if ((!terminate) && (avail < 4))
11306
8
                                goto done;
11307
55.6k
                            if (ctxt->input->cur[3] == '-') {
11308
55.6k
                                if ((!terminate) &&
11309
37.3k
                                    (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11310
112
                                    goto done;
11311
55.5k
                                xmlParseComment(ctxt);
11312
55.5k
                                ctxt->instate = XML_PARSER_CONTENT;
11313
55.5k
                                break;
11314
55.6k
                            }
11315
55.6k
                        } else if (next == '[') {
11316
21.2k
                            if ((!terminate) && (avail < 9))
11317
8
                                goto done;
11318
21.2k
                            if ((ctxt->input->cur[2] == '[') &&
11319
21.2k
                                (ctxt->input->cur[3] == 'C') &&
11320
21.1k
                                (ctxt->input->cur[4] == 'D') &&
11321
21.1k
                                (ctxt->input->cur[5] == 'A') &&
11322
21.1k
                                (ctxt->input->cur[6] == 'T') &&
11323
21.1k
                                (ctxt->input->cur[7] == 'A') &&
11324
21.1k
                                (ctxt->input->cur[8] == '[')) {
11325
21.1k
                                if ((!terminate) &&
11326
11.3k
                                    (!xmlParseLookupString(ctxt, 9, "]]>", 3)))
11327
972
                                    goto done;
11328
20.1k
                                ctxt->instate = XML_PARSER_CDATA_SECTION;
11329
20.1k
                                xmlParseCDSect(ctxt);
11330
20.1k
                                ctxt->instate = XML_PARSER_CONTENT;
11331
20.1k
                                break;
11332
21.1k
                            }
11333
21.2k
                        }
11334
77.0k
                    }
11335
46.5M
    } else if (cur == '&') {
11336
407k
        if ((!terminate) && (!xmlParseLookupChar(ctxt, ';')))
11337
79
      goto done;
11338
407k
        xmlParseReference(ctxt);
11339
407k
                    break;
11340
13.0M
    } else {
11341
        /* TODO Avoid the extra copy, handle directly !!! */
11342
        /*
11343
         * Goal of the following test is:
11344
         *  - minimize calls to the SAX 'character' callback
11345
         *    when they are mergeable
11346
         *  - handle an problem for isBlank when we only parse
11347
         *    a sequence of blank chars and the next one is
11348
         *    not available to check against '<' presence.
11349
         *  - tries to homogenize the differences in SAX
11350
         *    callbacks between the push and pull versions
11351
         *    of the parser.
11352
         */
11353
13.0M
        if (avail < XML_PARSER_BIG_BUFFER_SIZE) {
11354
494k
      if ((!terminate) && (!xmlParseLookupCharData(ctxt)))
11355
2.56k
          goto done;
11356
494k
                    }
11357
13.0M
                    ctxt->checkIndex = 0;
11358
13.0M
        xmlParseCharDataInternal(ctxt, !terminate);
11359
13.0M
                    break;
11360
13.0M
    }
11361
11362
37.7M
                ctxt->instate = XML_PARSER_START_TAG;
11363
37.7M
    break;
11364
60.0M
      }
11365
8.76M
            case XML_PARSER_END_TAG:
11366
8.76M
    if ((!terminate) && (!xmlParseLookupChar(ctxt, '>')))
11367
758
        goto done;
11368
8.76M
    if (ctxt->sax2) {
11369
8.76M
              xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11370
8.76M
        nameNsPop(ctxt);
11371
8.76M
    }
11372
59
#ifdef LIBXML_SAX1_ENABLED
11373
59
      else
11374
59
        xmlParseEndTag1(ctxt, 0);
11375
8.76M
#endif /* LIBXML_SAX1_ENABLED */
11376
8.76M
    if (ctxt->nameNr == 0) {
11377
143k
        ctxt->instate = XML_PARSER_EPILOG;
11378
8.61M
    } else {
11379
8.61M
        ctxt->instate = XML_PARSER_CONTENT;
11380
8.61M
    }
11381
8.76M
    break;
11382
296k
            case XML_PARSER_MISC:
11383
300k
            case XML_PARSER_PROLOG:
11384
316k
            case XML_PARSER_EPILOG:
11385
316k
    SKIP_BLANKS;
11386
316k
                avail = ctxt->input->end - ctxt->input->cur;
11387
316k
    if (avail < 1)
11388
11.4k
        goto done;
11389
305k
    if (ctxt->input->cur[0] == '<') {
11390
303k
                    if ((!terminate) && (avail < 2))
11391
11
                        goto done;
11392
303k
                    next = ctxt->input->cur[1];
11393
303k
                    if (next == '?') {
11394
50.7k
                        if ((!terminate) &&
11395
22.0k
                            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11396
94
                            goto done;
11397
50.6k
                        xmlParsePI(ctxt);
11398
50.6k
                        break;
11399
252k
                    } else if (next == '!') {
11400
13.5k
                        if ((!terminate) && (avail < 3))
11401
6
                            goto done;
11402
11403
13.5k
                        if (ctxt->input->cur[2] == '-') {
11404
4.64k
                            if ((!terminate) && (avail < 4))
11405
1
                                goto done;
11406
4.64k
                            if (ctxt->input->cur[3] == '-') {
11407
4.62k
                                if ((!terminate) &&
11408
2.49k
                                    (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11409
63
                                    goto done;
11410
4.55k
                                xmlParseComment(ctxt);
11411
4.55k
                                break;
11412
4.62k
                            }
11413
8.92k
                        } else if (ctxt->instate == XML_PARSER_MISC) {
11414
8.90k
                            if ((!terminate) && (avail < 9))
11415
1
                                goto done;
11416
8.90k
                            if ((ctxt->input->cur[2] == 'D') &&
11417
8.88k
                                (ctxt->input->cur[3] == 'O') &&
11418
8.87k
                                (ctxt->input->cur[4] == 'C') &&
11419
8.87k
                                (ctxt->input->cur[5] == 'T') &&
11420
8.85k
                                (ctxt->input->cur[6] == 'Y') &&
11421
8.85k
                                (ctxt->input->cur[7] == 'P') &&
11422
8.84k
                                (ctxt->input->cur[8] == 'E')) {
11423
8.84k
                                if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11424
100
                                    goto done;
11425
8.74k
                                ctxt->inSubset = 1;
11426
8.74k
                                xmlParseDocTypeDecl(ctxt);
11427
8.74k
                                if (RAW == '[') {
11428
8.23k
                                    ctxt->instate = XML_PARSER_DTD;
11429
8.23k
                                } else {
11430
503
                                    if (RAW == '>')
11431
273
                                        NEXT;
11432
                                    /*
11433
                                     * Create and update the external subset.
11434
                                     */
11435
503
                                    ctxt->inSubset = 2;
11436
503
                                    if ((ctxt->sax != NULL) &&
11437
503
                                        (!ctxt->disableSAX) &&
11438
265
                                        (ctxt->sax->externalSubset != NULL))
11439
0
                                        ctxt->sax->externalSubset(
11440
0
                                                ctxt->userData,
11441
0
                                                ctxt->intSubName,
11442
0
                                                ctxt->extSubSystem,
11443
0
                                                ctxt->extSubURI);
11444
503
                                    ctxt->inSubset = 0;
11445
503
                                    xmlCleanSpecialAttr(ctxt);
11446
503
                                    ctxt->instate = XML_PARSER_PROLOG;
11447
503
                                }
11448
8.74k
                                break;
11449
8.84k
                            }
11450
8.90k
                        }
11451
13.5k
                    }
11452
303k
                }
11453
11454
241k
                if (ctxt->instate == XML_PARSER_EPILOG) {
11455
1.62k
                    if (ctxt->errNo == XML_ERR_OK)
11456
183
                        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11457
1.62k
        ctxt->instate = XML_PARSER_EOF;
11458
1.62k
                    xmlFinishDocument(ctxt);
11459
239k
                } else {
11460
239k
        ctxt->instate = XML_PARSER_START_TAG;
11461
239k
    }
11462
241k
    break;
11463
8.95k
            case XML_PARSER_DTD: {
11464
8.95k
                if ((!terminate) && (!xmlParseLookupInternalSubset(ctxt)))
11465
791
                    goto done;
11466
8.15k
    xmlParseInternalSubset(ctxt);
11467
8.15k
    ctxt->inSubset = 2;
11468
8.15k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11469
2.96k
        (ctxt->sax->externalSubset != NULL))
11470
0
        ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11471
0
          ctxt->extSubSystem, ctxt->extSubURI);
11472
8.15k
    ctxt->inSubset = 0;
11473
8.15k
    xmlCleanSpecialAttr(ctxt);
11474
8.15k
    ctxt->instate = XML_PARSER_PROLOG;
11475
8.15k
                break;
11476
8.95k
      }
11477
0
            default:
11478
0
                xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
11479
0
      "PP: internal error\n");
11480
0
    ctxt->instate = XML_PARSER_EOF;
11481
0
    break;
11482
107M
  }
11483
107M
    }
11484
277k
done:
11485
277k
    return(ret);
11486
277k
}
11487
11488
/**
11489
 * xmlParseChunk:
11490
 * @ctxt:  an XML parser context
11491
 * @chunk:  chunk of memory
11492
 * @size:  size of chunk in bytes
11493
 * @terminate:  last chunk indicator
11494
 *
11495
 * Parse a chunk of memory in push parser mode.
11496
 *
11497
 * Assumes that the parser context was initialized with
11498
 * xmlCreatePushParserCtxt.
11499
 *
11500
 * The last chunk, which will often be empty, must be marked with
11501
 * the @terminate flag. With the default SAX callbacks, the resulting
11502
 * document will be available in ctxt->myDoc. This pointer will not
11503
 * be freed when calling xmlFreeParserCtxt and must be freed by the
11504
 * caller. If the document isn't well-formed, it will still be returned
11505
 * in ctxt->myDoc.
11506
 *
11507
 * As an exception, xmlCtxtResetPush will free the document in
11508
 * ctxt->myDoc. So ctxt->myDoc should be set to NULL after extracting
11509
 * the document.
11510
 *
11511
 * Returns an xmlParserErrors code (0 on success).
11512
 */
11513
int
11514
xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
11515
277k
              int terminate) {
11516
277k
    size_t curBase;
11517
277k
    size_t maxLength;
11518
277k
    size_t pos;
11519
277k
    int end_in_lf = 0;
11520
277k
    int res;
11521
11522
277k
    if ((ctxt == NULL) || (size < 0))
11523
0
        return(XML_ERR_ARGUMENT);
11524
277k
    if ((chunk == NULL) && (size > 0))
11525
0
        return(XML_ERR_ARGUMENT);
11526
277k
    if ((ctxt->input == NULL) || (ctxt->input->buf == NULL))
11527
0
        return(XML_ERR_ARGUMENT);
11528
277k
    if (ctxt->disableSAX != 0)
11529
0
        return(ctxt->errNo);
11530
11531
277k
    ctxt->input->flags |= XML_INPUT_PROGRESSIVE;
11532
277k
    if (ctxt->instate == XML_PARSER_START)
11533
249k
        xmlCtxtInitializeLate(ctxt);
11534
277k
    if ((size > 0) && (chunk != NULL) && (!terminate) &&
11535
40.8k
        (chunk[size - 1] == '\r')) {
11536
153
  end_in_lf = 1;
11537
153
  size--;
11538
153
    }
11539
11540
    /*
11541
     * Also push an empty chunk to make sure that the raw buffer
11542
     * will be flushed if there is an encoder.
11543
     */
11544
277k
    pos = ctxt->input->cur - ctxt->input->base;
11545
277k
    res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11546
277k
    xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
11547
277k
    if (res < 0) {
11548
6
        xmlCtxtErrIO(ctxt, ctxt->input->buf->error, NULL);
11549
6
        xmlHaltParser(ctxt);
11550
6
        return(ctxt->errNo);
11551
6
    }
11552
11553
277k
    xmlParseTryOrFinish(ctxt, terminate);
11554
11555
277k
    curBase = ctxt->input->cur - ctxt->input->base;
11556
277k
    maxLength = (ctxt->options & XML_PARSE_HUGE) ?
11557
277k
                XML_MAX_HUGE_LENGTH :
11558
277k
                XML_MAX_LOOKUP_LIMIT;
11559
277k
    if (curBase > maxLength) {
11560
0
        xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
11561
0
                    "Buffer size limit exceeded, try XML_PARSE_HUGE\n");
11562
0
        xmlHaltParser(ctxt);
11563
0
    }
11564
11565
277k
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX != 0))
11566
94.3k
        return(ctxt->errNo);
11567
11568
182k
    if (end_in_lf == 1) {
11569
131
  pos = ctxt->input->cur - ctxt->input->base;
11570
131
  res = xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
11571
131
  xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
11572
131
        if (res < 0) {
11573
0
            xmlCtxtErrIO(ctxt, ctxt->input->buf->error, NULL);
11574
0
            xmlHaltParser(ctxt);
11575
0
            return(ctxt->errNo);
11576
0
        }
11577
131
    }
11578
182k
    if (terminate) {
11579
  /*
11580
   * Check for termination
11581
   */
11582
154k
        if ((ctxt->instate != XML_PARSER_EOF) &&
11583
153k
            (ctxt->instate != XML_PARSER_EPILOG)) {
11584
10.2k
            if (ctxt->nameNr > 0) {
11585
10.0k
                const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
11586
10.0k
                int line = ctxt->pushTab[ctxt->nameNr - 1].line;
11587
10.0k
                xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
11588
10.0k
                        "Premature end of data in tag %s line %d\n",
11589
10.0k
                        name, line, NULL);
11590
10.0k
            } else if (ctxt->instate == XML_PARSER_START) {
11591
0
                xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11592
165
            } else {
11593
165
                xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
11594
165
                               "Start tag expected, '<' not found\n");
11595
165
            }
11596
144k
        } else {
11597
144k
            xmlParserCheckEOF(ctxt, XML_ERR_DOCUMENT_END);
11598
144k
        }
11599
154k
  if (ctxt->instate != XML_PARSER_EOF) {
11600
153k
            ctxt->instate = XML_PARSER_EOF;
11601
153k
            xmlFinishDocument(ctxt);
11602
153k
  }
11603
154k
    }
11604
182k
    if (ctxt->wellFormed == 0)
11605
10.2k
  return((xmlParserErrors) ctxt->errNo);
11606
172k
    else
11607
172k
        return(0);
11608
182k
}
11609
11610
/************************************************************************
11611
 *                  *
11612
 *    I/O front end functions to the parser     *
11613
 *                  *
11614
 ************************************************************************/
11615
11616
/**
11617
 * xmlCreatePushParserCtxt:
11618
 * @sax:  a SAX handler (optional)
11619
 * @user_data:  user data for SAX callbacks (optional)
11620
 * @chunk:  initial chunk (optional, deprecated)
11621
 * @size:  size of initial chunk in bytes
11622
 * @filename:  file name or URI (optional)
11623
 *
11624
 * Create a parser context for using the XML parser in push mode.
11625
 * See xmlParseChunk.
11626
 *
11627
 * Passing an initial chunk is useless and deprecated.
11628
 *
11629
 * The push parser doesn't support recovery mode or the
11630
 * XML_PARSE_NOBLANKS option.
11631
 *
11632
 * @filename is used as base URI to fetch external entities and for
11633
 * error reports.
11634
 *
11635
 * Returns the new parser context or NULL if a memory allocation
11636
 * failed.
11637
 */
11638
11639
xmlParserCtxtPtr
11640
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11641
249k
                        const char *chunk, int size, const char *filename) {
11642
249k
    xmlParserCtxtPtr ctxt;
11643
249k
    xmlParserInputPtr input;
11644
11645
249k
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
11646
249k
    if (ctxt == NULL)
11647
0
  return(NULL);
11648
11649
249k
    ctxt->options &= ~XML_PARSE_NODICT;
11650
249k
    ctxt->dictNames = 1;
11651
11652
249k
    input = xmlNewPushInput(filename, chunk, size);
11653
249k
    if (input == NULL) {
11654
0
  xmlFreeParserCtxt(ctxt);
11655
0
  return(NULL);
11656
0
    }
11657
249k
    if (xmlCtxtPushInput(ctxt, input) < 0) {
11658
0
        xmlFreeInputStream(input);
11659
0
        xmlFreeParserCtxt(ctxt);
11660
0
        return(NULL);
11661
0
    }
11662
11663
249k
    return(ctxt);
11664
249k
}
11665
#endif /* LIBXML_PUSH_ENABLED */
11666
11667
/**
11668
 * xmlStopParser:
11669
 * @ctxt:  an XML parser context
11670
 *
11671
 * Blocks further parser processing
11672
 */
11673
void
11674
0
xmlStopParser(xmlParserCtxtPtr ctxt) {
11675
0
    if (ctxt == NULL)
11676
0
        return;
11677
0
    xmlHaltParser(ctxt);
11678
0
    if (ctxt->errNo != XML_ERR_NO_MEMORY)
11679
0
        ctxt->errNo = XML_ERR_USER_STOP;
11680
0
}
11681
11682
/**
11683
 * xmlCreateIOParserCtxt:
11684
 * @sax:  a SAX handler (optional)
11685
 * @user_data:  user data for SAX callbacks (optional)
11686
 * @ioread:  an I/O read function
11687
 * @ioclose:  an I/O close function (optional)
11688
 * @ioctx:  an I/O handler
11689
 * @enc:  the charset encoding if known (deprecated)
11690
 *
11691
 * Create a parser context for using the XML parser with an existing
11692
 * I/O stream
11693
 *
11694
 * Returns the new parser context or NULL
11695
 */
11696
xmlParserCtxtPtr
11697
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11698
                      xmlInputReadCallback ioread,
11699
                      xmlInputCloseCallback ioclose,
11700
0
                      void *ioctx, xmlCharEncoding enc) {
11701
0
    xmlParserCtxtPtr ctxt;
11702
0
    xmlParserInputPtr input;
11703
0
    const char *encoding;
11704
11705
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
11706
0
    if (ctxt == NULL)
11707
0
  return(NULL);
11708
11709
0
    encoding = xmlGetCharEncodingName(enc);
11710
0
    input = xmlCtxtNewInputFromIO(ctxt, NULL, ioread, ioclose, ioctx,
11711
0
                                  encoding, 0);
11712
0
    if (input == NULL) {
11713
0
  xmlFreeParserCtxt(ctxt);
11714
0
        return (NULL);
11715
0
    }
11716
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
11717
0
        xmlFreeInputStream(input);
11718
0
        xmlFreeParserCtxt(ctxt);
11719
0
        return(NULL);
11720
0
    }
11721
11722
0
    return(ctxt);
11723
0
}
11724
11725
#ifdef LIBXML_VALID_ENABLED
11726
/************************************************************************
11727
 *                  *
11728
 *    Front ends when parsing a DTD       *
11729
 *                  *
11730
 ************************************************************************/
11731
11732
/**
11733
 * xmlCtxtParseDtd:
11734
 * @ctxt:  a parser context
11735
 * @input:  a parser input
11736
 * @publicId:  public ID of the DTD (optional)
11737
 * @systemId:  system ID of the DTD (optional)
11738
 *
11739
 * Parse a DTD.
11740
 *
11741
 * Option XML_PARSE_DTDLOAD should be enabled in the parser context
11742
 * to make external entities work.
11743
 *
11744
 * Availabe since 2.14.0.
11745
 *
11746
 * Returns the resulting xmlDtdPtr or NULL in case of error.
11747
 * @input will be freed by the function in any case.
11748
 */
11749
xmlDtdPtr
11750
xmlCtxtParseDtd(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
11751
0
                const xmlChar *publicId, const xmlChar *systemId) {
11752
0
    xmlDtdPtr ret = NULL;
11753
11754
0
    if ((ctxt == NULL) || (input == NULL)) {
11755
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
11756
0
        xmlFreeInputStream(input);
11757
0
        return(NULL);
11758
0
    }
11759
11760
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
11761
0
        xmlFreeInputStream(input);
11762
0
        return(NULL);
11763
0
    }
11764
11765
0
    if (publicId == NULL)
11766
0
        publicId = BAD_CAST "none";
11767
0
    if (systemId == NULL)
11768
0
        systemId = BAD_CAST "none";
11769
11770
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11771
0
    if (ctxt->myDoc == NULL) {
11772
0
        xmlErrMemory(ctxt);
11773
0
        goto error;
11774
0
    }
11775
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
11776
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11777
0
                                       publicId, systemId);
11778
0
    if (ctxt->myDoc->extSubset == NULL) {
11779
0
        xmlErrMemory(ctxt);
11780
0
        xmlFreeDoc(ctxt->myDoc);
11781
0
        goto error;
11782
0
    }
11783
11784
0
    xmlParseExternalSubset(ctxt, publicId, systemId);
11785
11786
0
    if (ctxt->wellFormed) {
11787
0
        ret = ctxt->myDoc->extSubset;
11788
0
        ctxt->myDoc->extSubset = NULL;
11789
0
        if (ret != NULL) {
11790
0
            xmlNodePtr tmp;
11791
11792
0
            ret->doc = NULL;
11793
0
            tmp = ret->children;
11794
0
            while (tmp != NULL) {
11795
0
                tmp->doc = NULL;
11796
0
                tmp = tmp->next;
11797
0
            }
11798
0
        }
11799
0
    } else {
11800
0
        ret = NULL;
11801
0
    }
11802
0
    xmlFreeDoc(ctxt->myDoc);
11803
0
    ctxt->myDoc = NULL;
11804
11805
0
error:
11806
0
    xmlFreeInputStream(xmlCtxtPopInput(ctxt));
11807
11808
0
    return(ret);
11809
0
}
11810
11811
/**
11812
 * xmlIOParseDTD:
11813
 * @sax:  the SAX handler block or NULL
11814
 * @input:  an Input Buffer
11815
 * @enc:  the charset encoding if known
11816
 *
11817
 * DEPRECATED: Use xmlCtxtParseDtd.
11818
 *
11819
 * Load and parse a DTD
11820
 *
11821
 * Returns the resulting xmlDtdPtr or NULL in case of error.
11822
 * @input will be freed by the function in any case.
11823
 */
11824
11825
xmlDtdPtr
11826
xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
11827
0
        xmlCharEncoding enc) {
11828
0
    xmlDtdPtr ret = NULL;
11829
0
    xmlParserCtxtPtr ctxt;
11830
0
    xmlParserInputPtr pinput = NULL;
11831
11832
0
    if (input == NULL)
11833
0
  return(NULL);
11834
11835
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
11836
0
    if (ctxt == NULL) {
11837
0
        xmlFreeParserInputBuffer(input);
11838
0
  return(NULL);
11839
0
    }
11840
0
    xmlCtxtSetOptions(ctxt, XML_PARSE_DTDLOAD);
11841
11842
    /*
11843
     * generate a parser input from the I/O handler
11844
     */
11845
11846
0
    pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
11847
0
    if (pinput == NULL) {
11848
0
        xmlFreeParserInputBuffer(input);
11849
0
  xmlFreeParserCtxt(ctxt);
11850
0
  return(NULL);
11851
0
    }
11852
11853
0
    if (enc != XML_CHAR_ENCODING_NONE) {
11854
0
        xmlSwitchEncoding(ctxt, enc);
11855
0
    }
11856
11857
0
    ret = xmlCtxtParseDtd(ctxt, pinput, NULL, NULL);
11858
11859
0
    xmlFreeParserCtxt(ctxt);
11860
0
    return(ret);
11861
0
}
11862
11863
/**
11864
 * xmlSAXParseDTD:
11865
 * @sax:  the SAX handler block
11866
 * @ExternalID:  a NAME* containing the External ID of the DTD
11867
 * @SystemID:  a NAME* containing the URL to the DTD
11868
 *
11869
 * DEPRECATED: Use xmlCtxtParseDtd.
11870
 *
11871
 * Load and parse an external subset.
11872
 *
11873
 * Returns the resulting xmlDtdPtr or NULL in case of error.
11874
 */
11875
11876
xmlDtdPtr
11877
xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
11878
0
                          const xmlChar *SystemID) {
11879
0
    xmlDtdPtr ret = NULL;
11880
0
    xmlParserCtxtPtr ctxt;
11881
0
    xmlParserInputPtr input = NULL;
11882
0
    xmlChar* systemIdCanonic;
11883
11884
0
    if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
11885
11886
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
11887
0
    if (ctxt == NULL) {
11888
0
  return(NULL);
11889
0
    }
11890
0
    xmlCtxtSetOptions(ctxt, XML_PARSE_DTDLOAD);
11891
11892
    /*
11893
     * Canonicalise the system ID
11894
     */
11895
0
    systemIdCanonic = xmlCanonicPath(SystemID);
11896
0
    if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
11897
0
  xmlFreeParserCtxt(ctxt);
11898
0
  return(NULL);
11899
0
    }
11900
11901
    /*
11902
     * Ask the Entity resolver to load the damn thing
11903
     */
11904
11905
0
    if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
11906
0
  input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
11907
0
                                   systemIdCanonic);
11908
0
    if (input == NULL) {
11909
0
  xmlFreeParserCtxt(ctxt);
11910
0
  if (systemIdCanonic != NULL)
11911
0
      xmlFree(systemIdCanonic);
11912
0
  return(NULL);
11913
0
    }
11914
11915
0
    if (input->filename == NULL)
11916
0
  input->filename = (char *) systemIdCanonic;
11917
0
    else
11918
0
  xmlFree(systemIdCanonic);
11919
11920
0
    ret = xmlCtxtParseDtd(ctxt, input, ExternalID, SystemID);
11921
11922
0
    xmlFreeParserCtxt(ctxt);
11923
0
    return(ret);
11924
0
}
11925
11926
11927
/**
11928
 * xmlParseDTD:
11929
 * @ExternalID:  a NAME* containing the External ID of the DTD
11930
 * @SystemID:  a NAME* containing the URL to the DTD
11931
 *
11932
 * Load and parse an external subset.
11933
 *
11934
 * Returns the resulting xmlDtdPtr or NULL in case of error.
11935
 */
11936
11937
xmlDtdPtr
11938
0
xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
11939
0
    return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
11940
0
}
11941
#endif /* LIBXML_VALID_ENABLED */
11942
11943
/************************************************************************
11944
 *                  *
11945
 *    Front ends when parsing an Entity     *
11946
 *                  *
11947
 ************************************************************************/
11948
11949
static xmlNodePtr
11950
xmlCtxtParseContentInternal(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
11951
0
                            int hasTextDecl, int buildTree) {
11952
0
    xmlNodePtr root = NULL;
11953
0
    xmlNodePtr list = NULL;
11954
0
    xmlChar *rootName = BAD_CAST "#root";
11955
0
    int result;
11956
11957
0
    if (buildTree) {
11958
0
        root = xmlNewDocNode(ctxt->myDoc, NULL, rootName, NULL);
11959
0
        if (root == NULL) {
11960
0
            xmlErrMemory(ctxt);
11961
0
            goto error;
11962
0
        }
11963
0
    }
11964
11965
0
    if (xmlCtxtPushInput(ctxt, input) < 0)
11966
0
        goto error;
11967
11968
0
    nameNsPush(ctxt, rootName, NULL, NULL, 0, 0);
11969
0
    spacePush(ctxt, -1);
11970
11971
0
    if (buildTree)
11972
0
        nodePush(ctxt, root);
11973
11974
0
    if (hasTextDecl) {
11975
0
        xmlDetectEncoding(ctxt);
11976
11977
        /*
11978
         * Parse a possible text declaration first
11979
         */
11980
0
        if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
11981
0
            (IS_BLANK_CH(NXT(5)))) {
11982
0
            xmlParseTextDecl(ctxt);
11983
            /*
11984
             * An XML-1.0 document can't reference an entity not XML-1.0
11985
             */
11986
0
            if ((xmlStrEqual(ctxt->version, BAD_CAST "1.0")) &&
11987
0
                (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
11988
0
                xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
11989
0
                               "Version mismatch between document and "
11990
0
                               "entity\n");
11991
0
            }
11992
0
        }
11993
0
    }
11994
11995
0
    xmlParseContentInternal(ctxt);
11996
11997
0
    if (ctxt->input->cur < ctxt->input->end)
11998
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11999
12000
0
    if ((ctxt->wellFormed) ||
12001
0
        ((ctxt->recovery) && (!xmlCtxtIsCatastrophicError(ctxt)))) {
12002
0
        if (root != NULL) {
12003
0
            xmlNodePtr cur;
12004
12005
            /*
12006
             * Unlink newly created node list.
12007
             */
12008
0
            list = root->children;
12009
0
            root->children = NULL;
12010
0
            root->last = NULL;
12011
0
            for (cur = list; cur != NULL; cur = cur->next)
12012
0
                cur->parent = NULL;
12013
0
        }
12014
0
    }
12015
12016
    /*
12017
     * Read the rest of the stream in case of errors. We want
12018
     * to account for the whole entity size.
12019
     */
12020
0
    do {
12021
0
        ctxt->input->cur = ctxt->input->end;
12022
0
        xmlParserShrink(ctxt);
12023
0
        result = xmlParserGrow(ctxt);
12024
0
    } while (result > 0);
12025
12026
0
    if (buildTree)
12027
0
        nodePop(ctxt);
12028
12029
0
    namePop(ctxt);
12030
0
    spacePop(ctxt);
12031
12032
0
    xmlCtxtPopInput(ctxt);
12033
12034
0
error:
12035
0
    xmlFreeNode(root);
12036
12037
0
    return(list);
12038
0
}
12039
12040
static void
12041
0
xmlCtxtParseEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr ent) {
12042
0
    xmlParserInputPtr input;
12043
0
    xmlNodePtr list;
12044
0
    unsigned long consumed;
12045
0
    int isExternal;
12046
0
    int buildTree;
12047
0
    int oldMinNsIndex;
12048
0
    int oldNodelen, oldNodemem;
12049
12050
0
    isExternal = (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY);
12051
0
    buildTree = (ctxt->node != NULL);
12052
12053
    /*
12054
     * Recursion check
12055
     */
12056
0
    if (ent->flags & XML_ENT_EXPANDING) {
12057
0
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
12058
0
        xmlHaltParser(ctxt);
12059
0
        goto error;
12060
0
    }
12061
12062
    /*
12063
     * Load entity
12064
     */
12065
0
    input = xmlNewEntityInputStream(ctxt, ent);
12066
0
    if (input == NULL)
12067
0
        goto error;
12068
12069
    /*
12070
     * When building a tree, we need to limit the scope of namespace
12071
     * declarations, so that entities don't reference xmlNs structs
12072
     * from the parent of a reference.
12073
     */
12074
0
    oldMinNsIndex = ctxt->nsdb->minNsIndex;
12075
0
    if (buildTree)
12076
0
        ctxt->nsdb->minNsIndex = ctxt->nsNr;
12077
12078
0
    oldNodelen = ctxt->nodelen;
12079
0
    oldNodemem = ctxt->nodemem;
12080
0
    ctxt->nodelen = 0;
12081
0
    ctxt->nodemem = 0;
12082
12083
    /*
12084
     * Parse content
12085
     *
12086
     * This initiates a recursive call chain:
12087
     *
12088
     * - xmlCtxtParseContentInternal
12089
     * - xmlParseContentInternal
12090
     * - xmlParseReference
12091
     * - xmlCtxtParseEntity
12092
     *
12093
     * The nesting depth is limited by the maximum number of inputs,
12094
     * see xmlCtxtPushInput.
12095
     *
12096
     * It's possible to make this non-recursive (minNsIndex must be
12097
     * stored in the input struct) at the expense of code readability.
12098
     */
12099
12100
0
    ent->flags |= XML_ENT_EXPANDING;
12101
12102
0
    list = xmlCtxtParseContentInternal(ctxt, input, isExternal, buildTree);
12103
12104
0
    ent->flags &= ~XML_ENT_EXPANDING;
12105
12106
0
    ctxt->nsdb->minNsIndex = oldMinNsIndex;
12107
0
    ctxt->nodelen = oldNodelen;
12108
0
    ctxt->nodemem = oldNodemem;
12109
12110
    /*
12111
     * Entity size accounting
12112
     */
12113
0
    consumed = input->consumed;
12114
0
    xmlSaturatedAddSizeT(&consumed, input->end - input->base);
12115
12116
0
    if ((ent->flags & XML_ENT_CHECKED) == 0)
12117
0
        xmlSaturatedAdd(&ent->expandedSize, consumed);
12118
12119
0
    if ((ent->flags & XML_ENT_PARSED) == 0) {
12120
0
        if (isExternal)
12121
0
            xmlSaturatedAdd(&ctxt->sizeentities, consumed);
12122
12123
0
        ent->children = list;
12124
12125
0
        while (list != NULL) {
12126
0
            list->parent = (xmlNodePtr) ent;
12127
12128
            /*
12129
             * Downstream code like the nginx xslt module can set
12130
             * ctxt->myDoc->extSubset to a separate DTD, so the entity
12131
             * might have a different or a NULL document.
12132
             */
12133
0
            if (list->doc != ent->doc)
12134
0
                xmlSetTreeDoc(list, ent->doc);
12135
12136
0
            if (list->next == NULL)
12137
0
                ent->last = list;
12138
0
            list = list->next;
12139
0
        }
12140
0
    } else {
12141
0
        xmlFreeNodeList(list);
12142
0
    }
12143
12144
0
    xmlFreeInputStream(input);
12145
12146
0
error:
12147
0
    ent->flags |= XML_ENT_PARSED | XML_ENT_CHECKED;
12148
0
}
12149
12150
/**
12151
 * xmlParseCtxtExternalEntity:
12152
 * @ctxt:  the existing parsing context
12153
 * @URL:  the URL for the entity to load
12154
 * @ID:  the System ID for the entity to load
12155
 * @listOut:  the return value for the set of parsed nodes
12156
 *
12157
 * Parse an external general entity within an existing parsing context
12158
 * An external general parsed entity is well-formed if it matches the
12159
 * production labeled extParsedEnt.
12160
 *
12161
 * [78] extParsedEnt ::= TextDecl? content
12162
 *
12163
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12164
 *    the parser error code otherwise
12165
 */
12166
12167
int
12168
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctxt, const xmlChar *URL,
12169
0
                           const xmlChar *ID, xmlNodePtr *listOut) {
12170
0
    xmlParserInputPtr input;
12171
0
    xmlNodePtr list;
12172
12173
0
    if (listOut != NULL)
12174
0
        *listOut = NULL;
12175
12176
0
    if (ctxt == NULL)
12177
0
        return(XML_ERR_ARGUMENT);
12178
12179
0
    input = xmlLoadResource(ctxt, (char *) URL, (char *) ID,
12180
0
                            XML_RESOURCE_GENERAL_ENTITY);
12181
0
    if (input == NULL)
12182
0
        return(ctxt->errNo);
12183
12184
0
    xmlCtxtInitializeLate(ctxt);
12185
12186
0
    list = xmlCtxtParseContentInternal(ctxt, input, /* hasTextDecl */ 1, 1);
12187
0
    if (listOut != NULL)
12188
0
        *listOut = list;
12189
0
    else
12190
0
        xmlFreeNodeList(list);
12191
12192
0
    xmlFreeInputStream(input);
12193
0
    return(ctxt->errNo);
12194
0
}
12195
12196
#ifdef LIBXML_SAX1_ENABLED
12197
/**
12198
 * xmlParseExternalEntity:
12199
 * @doc:  the document the chunk pertains to
12200
 * @sax:  the SAX handler block (possibly NULL)
12201
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12202
 * @depth:  Used for loop detection, use 0
12203
 * @URL:  the URL for the entity to load
12204
 * @ID:  the System ID for the entity to load
12205
 * @list:  the return value for the set of parsed nodes
12206
 *
12207
 * DEPRECATED: Use xmlParseCtxtExternalEntity.
12208
 *
12209
 * Parse an external general entity
12210
 * An external general parsed entity is well-formed if it matches the
12211
 * production labeled extParsedEnt.
12212
 *
12213
 * [78] extParsedEnt ::= TextDecl? content
12214
 *
12215
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12216
 *    the parser error code otherwise
12217
 */
12218
12219
int
12220
xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
12221
0
    int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *list) {
12222
0
    xmlParserCtxtPtr ctxt;
12223
0
    int ret;
12224
12225
0
    if (list != NULL)
12226
0
        *list = NULL;
12227
12228
0
    if (doc == NULL)
12229
0
        return(XML_ERR_ARGUMENT);
12230
12231
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12232
0
    if (ctxt == NULL)
12233
0
        return(XML_ERR_NO_MEMORY);
12234
12235
0
    ctxt->depth = depth;
12236
0
    ctxt->myDoc = doc;
12237
0
    ret = xmlParseCtxtExternalEntity(ctxt, URL, ID, list);
12238
12239
0
    xmlFreeParserCtxt(ctxt);
12240
0
    return(ret);
12241
0
}
12242
12243
/**
12244
 * xmlParseBalancedChunkMemory:
12245
 * @doc:  the document the chunk pertains to (must not be NULL)
12246
 * @sax:  the SAX handler block (possibly NULL)
12247
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12248
 * @depth:  Used for loop detection, use 0
12249
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
12250
 * @lst:  the return value for the set of parsed nodes
12251
 *
12252
 * Parse a well-balanced chunk of an XML document
12253
 * called by the parser
12254
 * The allowed sequence for the Well Balanced Chunk is the one defined by
12255
 * the content production in the XML grammar:
12256
 *
12257
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12258
 *
12259
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12260
 *    the parser error code otherwise
12261
 */
12262
12263
int
12264
xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12265
0
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
12266
0
    return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
12267
0
                                                depth, string, lst, 0 );
12268
0
}
12269
#endif /* LIBXML_SAX1_ENABLED */
12270
12271
/**
12272
 * xmlCtxtParseContent:
12273
 * @ctxt:  parser context
12274
 * @input:  parser input
12275
 * @node:  target node or document
12276
 * @hasTextDecl:  whether to parse text declaration
12277
 *
12278
 * Parse a well-balanced chunk of XML matching the 'content' production.
12279
 *
12280
 * Namespaces in scope of @node and entities of @node's document are
12281
 * recognized. When validating, the DTD of @node's document is used.
12282
 *
12283
 * Always consumes @input even in error case.
12284
 *
12285
 * Available since 2.14.0.
12286
 *
12287
 * Returns a node list or NULL in case of error.
12288
 */
12289
xmlNodePtr
12290
xmlCtxtParseContent(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
12291
0
                    xmlNodePtr node, int hasTextDecl) {
12292
0
    xmlDocPtr doc;
12293
0
    xmlNodePtr cur, list = NULL;
12294
0
    int nsnr = 0;
12295
0
    xmlDictPtr oldDict;
12296
0
    int oldOptions, oldDictNames, oldLoadSubset;
12297
12298
0
    if ((ctxt == NULL) || (input == NULL) || (node == NULL)) {
12299
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
12300
0
        goto exit;
12301
0
    }
12302
12303
0
    doc = node->doc;
12304
0
    if (doc == NULL) {
12305
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
12306
0
        goto exit;
12307
0
    }
12308
12309
0
    switch (node->type) {
12310
0
        case XML_ELEMENT_NODE:
12311
0
        case XML_DOCUMENT_NODE:
12312
0
        case XML_HTML_DOCUMENT_NODE:
12313
0
            break;
12314
12315
0
        case XML_ATTRIBUTE_NODE:
12316
0
        case XML_TEXT_NODE:
12317
0
        case XML_CDATA_SECTION_NODE:
12318
0
        case XML_ENTITY_REF_NODE:
12319
0
        case XML_PI_NODE:
12320
0
        case XML_COMMENT_NODE:
12321
0
            for (cur = node->parent; cur != NULL; cur = node->parent) {
12322
0
                if ((cur->type == XML_ELEMENT_NODE) ||
12323
0
                    (cur->type == XML_DOCUMENT_NODE) ||
12324
0
                    (cur->type == XML_HTML_DOCUMENT_NODE)) {
12325
0
                    node = cur;
12326
0
                    break;
12327
0
                }
12328
0
            }
12329
0
            break;
12330
12331
0
        default:
12332
0
            xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
12333
0
            goto exit;
12334
0
    }
12335
12336
0
#ifdef LIBXML_HTML_ENABLED
12337
0
    if (ctxt->html)
12338
0
        htmlCtxtReset(ctxt);
12339
0
    else
12340
0
#endif
12341
0
        xmlCtxtReset(ctxt);
12342
12343
0
    oldDict = ctxt->dict;
12344
0
    oldOptions = ctxt->options;
12345
0
    oldDictNames = ctxt->dictNames;
12346
0
    oldLoadSubset = ctxt->loadsubset;
12347
12348
    /*
12349
     * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
12350
     */
12351
0
    if (doc->dict != NULL) {
12352
0
        ctxt->dict = doc->dict;
12353
0
    } else {
12354
0
        ctxt->options |= XML_PARSE_NODICT;
12355
0
        ctxt->dictNames = 0;
12356
0
    }
12357
12358
    /*
12359
     * Disable IDs
12360
     */
12361
0
    ctxt->loadsubset |= XML_SKIP_IDS;
12362
12363
0
    ctxt->myDoc = doc;
12364
12365
0
#ifdef LIBXML_HTML_ENABLED
12366
0
    if (ctxt->html) {
12367
        /*
12368
         * When parsing in context, it makes no sense to add implied
12369
         * elements like html/body/etc...
12370
         */
12371
0
        ctxt->options |= HTML_PARSE_NOIMPLIED;
12372
12373
0
        list = htmlCtxtParseContentInternal(ctxt, input);
12374
0
    } else
12375
0
#endif
12376
0
    {
12377
0
        xmlCtxtInitializeLate(ctxt);
12378
12379
        /*
12380
         * initialize the SAX2 namespaces stack
12381
         */
12382
0
        cur = node;
12383
0
        while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
12384
0
            xmlNsPtr ns = cur->nsDef;
12385
0
            xmlHashedString hprefix, huri;
12386
12387
0
            while (ns != NULL) {
12388
0
                hprefix = xmlDictLookupHashed(ctxt->dict, ns->prefix, -1);
12389
0
                huri = xmlDictLookupHashed(ctxt->dict, ns->href, -1);
12390
0
                if (xmlParserNsPush(ctxt, &hprefix, &huri, ns, 1) > 0)
12391
0
                    nsnr++;
12392
0
                ns = ns->next;
12393
0
            }
12394
0
            cur = cur->parent;
12395
0
        }
12396
12397
0
        list = xmlCtxtParseContentInternal(ctxt, input, hasTextDecl, 1);
12398
12399
0
        if (nsnr > 0)
12400
0
            xmlParserNsPop(ctxt, nsnr);
12401
0
    }
12402
12403
0
    ctxt->dict = oldDict;
12404
0
    ctxt->options = oldOptions;
12405
0
    ctxt->dictNames = oldDictNames;
12406
0
    ctxt->loadsubset = oldLoadSubset;
12407
0
    ctxt->myDoc = NULL;
12408
0
    ctxt->node = NULL;
12409
12410
0
exit:
12411
0
    xmlFreeInputStream(input);
12412
0
    return(list);
12413
0
}
12414
12415
/**
12416
 * xmlParseInNodeContext:
12417
 * @node:  the context node
12418
 * @data:  the input string
12419
 * @datalen:  the input string length in bytes
12420
 * @options:  a combination of xmlParserOption
12421
 * @listOut:  the return value for the set of parsed nodes
12422
 *
12423
 * Parse a well-balanced chunk of an XML document
12424
 * within the context (DTD, namespaces, etc ...) of the given node.
12425
 *
12426
 * The allowed sequence for the data is a Well Balanced Chunk defined by
12427
 * the content production in the XML grammar:
12428
 *
12429
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12430
 *
12431
 * This function assumes the encoding of @node's document which is
12432
 * typically not what you want. A better alternative is
12433
 * xmlCtxtParseContent.
12434
 *
12435
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12436
 * error code otherwise
12437
 */
12438
xmlParserErrors
12439
xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
12440
0
                      int options, xmlNodePtr *listOut) {
12441
0
    xmlParserCtxtPtr ctxt;
12442
0
    xmlParserInputPtr input;
12443
0
    xmlDocPtr doc;
12444
0
    xmlNodePtr list;
12445
0
    xmlParserErrors ret;
12446
12447
0
    if (listOut == NULL)
12448
0
        return(XML_ERR_INTERNAL_ERROR);
12449
0
    *listOut = NULL;
12450
12451
0
    if ((node == NULL) || (data == NULL) || (datalen < 0))
12452
0
        return(XML_ERR_INTERNAL_ERROR);
12453
12454
0
    doc = node->doc;
12455
0
    if (doc == NULL)
12456
0
        return(XML_ERR_INTERNAL_ERROR);
12457
12458
0
#ifdef LIBXML_HTML_ENABLED
12459
0
    if (doc->type == XML_HTML_DOCUMENT_NODE) {
12460
0
        ctxt = htmlNewParserCtxt();
12461
0
    }
12462
0
    else
12463
0
#endif
12464
0
        ctxt = xmlNewParserCtxt();
12465
12466
0
    if (ctxt == NULL)
12467
0
        return(XML_ERR_NO_MEMORY);
12468
12469
0
    input = xmlCtxtNewInputFromMemory(ctxt, NULL, data, datalen,
12470
0
                                      (const char *) doc->encoding,
12471
0
                                      XML_INPUT_BUF_STATIC);
12472
0
    if (input == NULL) {
12473
0
        xmlFreeParserCtxt(ctxt);
12474
0
        return(XML_ERR_NO_MEMORY);
12475
0
    }
12476
12477
0
    xmlCtxtUseOptions(ctxt, options);
12478
12479
0
    list = xmlCtxtParseContent(ctxt, input, node, /* hasTextDecl */ 0);
12480
12481
0
    if (list == NULL) {
12482
0
        ret = ctxt->errNo;
12483
0
        if (ret == XML_ERR_ARGUMENT)
12484
0
            ret = XML_ERR_INTERNAL_ERROR;
12485
0
    } else {
12486
0
        ret = XML_ERR_OK;
12487
0
        *listOut = list;
12488
0
    }
12489
12490
0
    xmlFreeParserCtxt(ctxt);
12491
12492
0
    return(ret);
12493
0
}
12494
12495
#ifdef LIBXML_SAX1_ENABLED
12496
/**
12497
 * xmlParseBalancedChunkMemoryRecover:
12498
 * @doc:  the document the chunk pertains to (must not be NULL)
12499
 * @sax:  the SAX handler block (possibly NULL)
12500
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12501
 * @depth:  Used for loop detection, use 0
12502
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
12503
 * @listOut:  the return value for the set of parsed nodes
12504
 * @recover: return nodes even if the data is broken (use 0)
12505
 *
12506
 * Parse a well-balanced chunk of an XML document
12507
 *
12508
 * The allowed sequence for the Well Balanced Chunk is the one defined by
12509
 * the content production in the XML grammar:
12510
 *
12511
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12512
 *
12513
 * Returns 0 if the chunk is well balanced, or thehe parser error code
12514
 * otherwise.
12515
 *
12516
 * In case recover is set to 1, the nodelist will not be empty even if
12517
 * the parsed chunk is not well balanced, assuming the parsing succeeded to
12518
 * some extent.
12519
 */
12520
int
12521
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12522
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *listOut,
12523
0
     int recover) {
12524
0
    xmlParserCtxtPtr ctxt;
12525
0
    xmlParserInputPtr input;
12526
0
    xmlNodePtr list;
12527
0
    int ret;
12528
12529
0
    if (listOut != NULL)
12530
0
        *listOut = NULL;
12531
12532
0
    if (string == NULL)
12533
0
        return(XML_ERR_ARGUMENT);
12534
12535
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12536
0
    if (ctxt == NULL)
12537
0
        return(XML_ERR_NO_MEMORY);
12538
12539
0
    xmlCtxtInitializeLate(ctxt);
12540
12541
0
    ctxt->depth = depth;
12542
0
    ctxt->myDoc = doc;
12543
0
    if (recover) {
12544
0
        ctxt->options |= XML_PARSE_RECOVER;
12545
0
        ctxt->recovery = 1;
12546
0
    }
12547
12548
0
    input = xmlNewStringInputStream(ctxt, string);
12549
0
    if (input == NULL) {
12550
0
        ret = ctxt->errNo;
12551
0
        goto error;
12552
0
    }
12553
12554
0
    list = xmlCtxtParseContentInternal(ctxt, input, /* hasTextDecl */ 0, 1);
12555
0
    if (listOut != NULL)
12556
0
        *listOut = list;
12557
0
    else
12558
0
        xmlFreeNodeList(list);
12559
12560
0
    if (!ctxt->wellFormed)
12561
0
        ret = ctxt->errNo;
12562
0
    else
12563
0
        ret = XML_ERR_OK;
12564
12565
0
error:
12566
0
    xmlFreeInputStream(input);
12567
0
    xmlFreeParserCtxt(ctxt);
12568
0
    return(ret);
12569
0
}
12570
12571
/**
12572
 * xmlSAXParseEntity:
12573
 * @sax:  the SAX handler block
12574
 * @filename:  the filename
12575
 *
12576
 * DEPRECATED: Don't use.
12577
 *
12578
 * parse an XML external entity out of context and build a tree.
12579
 * It use the given SAX function block to handle the parsing callback.
12580
 * If sax is NULL, fallback to the default DOM tree building routines.
12581
 *
12582
 * [78] extParsedEnt ::= TextDecl? content
12583
 *
12584
 * This correspond to a "Well Balanced" chunk
12585
 *
12586
 * Returns the resulting document tree
12587
 */
12588
12589
xmlDocPtr
12590
0
xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
12591
0
    xmlDocPtr ret;
12592
0
    xmlParserCtxtPtr ctxt;
12593
12594
0
    ctxt = xmlCreateFileParserCtxt(filename);
12595
0
    if (ctxt == NULL) {
12596
0
  return(NULL);
12597
0
    }
12598
0
    if (sax != NULL) {
12599
0
        if (sax->initialized == XML_SAX2_MAGIC) {
12600
0
            *ctxt->sax = *sax;
12601
0
        } else {
12602
0
            memset(ctxt->sax, 0, sizeof(*ctxt->sax));
12603
0
            memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12604
0
        }
12605
0
        ctxt->userData = NULL;
12606
0
    }
12607
12608
0
    xmlParseExtParsedEnt(ctxt);
12609
12610
0
    if (ctxt->wellFormed) {
12611
0
  ret = ctxt->myDoc;
12612
0
    } else {
12613
0
        ret = NULL;
12614
0
        xmlFreeDoc(ctxt->myDoc);
12615
0
    }
12616
12617
0
    xmlFreeParserCtxt(ctxt);
12618
12619
0
    return(ret);
12620
0
}
12621
12622
/**
12623
 * xmlParseEntity:
12624
 * @filename:  the filename
12625
 *
12626
 * parse an XML external entity out of context and build a tree.
12627
 *
12628
 * [78] extParsedEnt ::= TextDecl? content
12629
 *
12630
 * This correspond to a "Well Balanced" chunk
12631
 *
12632
 * Returns the resulting document tree
12633
 */
12634
12635
xmlDocPtr
12636
0
xmlParseEntity(const char *filename) {
12637
0
    return(xmlSAXParseEntity(NULL, filename));
12638
0
}
12639
#endif /* LIBXML_SAX1_ENABLED */
12640
12641
/**
12642
 * xmlCreateEntityParserCtxt:
12643
 * @URL:  the entity URL
12644
 * @ID:  the entity PUBLIC ID
12645
 * @base:  a possible base for the target URI
12646
 *
12647
 * DEPRECATED: Don't use.
12648
 *
12649
 * Create a parser context for an external entity
12650
 * Automatic support for ZLIB/Compress compressed document is provided
12651
 * by default if found at compile-time.
12652
 *
12653
 * Returns the new parser context or NULL
12654
 */
12655
xmlParserCtxtPtr
12656
xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
12657
0
                    const xmlChar *base) {
12658
0
    xmlParserCtxtPtr ctxt;
12659
0
    xmlParserInputPtr input;
12660
0
    xmlChar *uri = NULL;
12661
12662
0
    ctxt = xmlNewParserCtxt();
12663
0
    if (ctxt == NULL)
12664
0
  return(NULL);
12665
12666
0
    if (base != NULL) {
12667
0
        if (xmlBuildURISafe(URL, base, &uri) < 0)
12668
0
            goto error;
12669
0
        if (uri != NULL)
12670
0
            URL = uri;
12671
0
    }
12672
12673
0
    input = xmlLoadResource(ctxt, (char *) URL, (char *) ID,
12674
0
                            XML_RESOURCE_UNKNOWN);
12675
0
    if (input == NULL)
12676
0
        goto error;
12677
12678
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
12679
0
        xmlFreeInputStream(input);
12680
0
        goto error;
12681
0
    }
12682
12683
0
    xmlFree(uri);
12684
0
    return(ctxt);
12685
12686
0
error:
12687
0
    xmlFree(uri);
12688
0
    xmlFreeParserCtxt(ctxt);
12689
0
    return(NULL);
12690
0
}
12691
12692
/************************************************************************
12693
 *                  *
12694
 *    Front ends when parsing from a file     *
12695
 *                  *
12696
 ************************************************************************/
12697
12698
/**
12699
 * xmlCreateURLParserCtxt:
12700
 * @filename:  the filename or URL
12701
 * @options:  a combination of xmlParserOption
12702
 *
12703
 * DEPRECATED: Use xmlNewParserCtxt and xmlCtxtReadFile.
12704
 *
12705
 * Create a parser context for a file or URL content.
12706
 * Automatic support for ZLIB/Compress compressed document is provided
12707
 * by default if found at compile-time and for file accesses
12708
 *
12709
 * Returns the new parser context or NULL
12710
 */
12711
xmlParserCtxtPtr
12712
xmlCreateURLParserCtxt(const char *filename, int options)
12713
0
{
12714
0
    xmlParserCtxtPtr ctxt;
12715
0
    xmlParserInputPtr input;
12716
12717
0
    ctxt = xmlNewParserCtxt();
12718
0
    if (ctxt == NULL)
12719
0
  return(NULL);
12720
12721
0
    options |= XML_PARSE_UNZIP;
12722
12723
0
    xmlCtxtUseOptions(ctxt, options);
12724
0
    ctxt->linenumbers = 1;
12725
12726
0
    input = xmlLoadResource(ctxt, filename, NULL, XML_RESOURCE_MAIN_DOCUMENT);
12727
0
    if (input == NULL) {
12728
0
  xmlFreeParserCtxt(ctxt);
12729
0
  return(NULL);
12730
0
    }
12731
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
12732
0
        xmlFreeInputStream(input);
12733
0
        xmlFreeParserCtxt(ctxt);
12734
0
        return(NULL);
12735
0
    }
12736
12737
0
    return(ctxt);
12738
0
}
12739
12740
/**
12741
 * xmlCreateFileParserCtxt:
12742
 * @filename:  the filename
12743
 *
12744
 * DEPRECATED: Use xmlNewParserCtxt and xmlCtxtReadFile.
12745
 *
12746
 * Create a parser context for a file content.
12747
 * Automatic support for ZLIB/Compress compressed document is provided
12748
 * by default if found at compile-time.
12749
 *
12750
 * Returns the new parser context or NULL
12751
 */
12752
xmlParserCtxtPtr
12753
xmlCreateFileParserCtxt(const char *filename)
12754
0
{
12755
0
    return(xmlCreateURLParserCtxt(filename, 0));
12756
0
}
12757
12758
#ifdef LIBXML_SAX1_ENABLED
12759
/**
12760
 * xmlSAXParseFileWithData:
12761
 * @sax:  the SAX handler block
12762
 * @filename:  the filename
12763
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
12764
 *             documents
12765
 * @data:  the userdata
12766
 *
12767
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
12768
 *
12769
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12770
 * compressed document is provided by default if found at compile-time.
12771
 * It use the given SAX function block to handle the parsing callback.
12772
 * If sax is NULL, fallback to the default DOM tree building routines.
12773
 *
12774
 * User data (void *) is stored within the parser context in the
12775
 * context's _private member, so it is available nearly everywhere in libxml
12776
 *
12777
 * Returns the resulting document tree
12778
 */
12779
12780
xmlDocPtr
12781
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
12782
0
                        int recovery, void *data) {
12783
0
    xmlDocPtr ret = NULL;
12784
0
    xmlParserCtxtPtr ctxt;
12785
0
    xmlParserInputPtr input;
12786
12787
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12788
0
    if (ctxt == NULL)
12789
0
  return(NULL);
12790
12791
0
    if (data != NULL)
12792
0
  ctxt->_private = data;
12793
12794
0
    if (recovery) {
12795
0
        ctxt->options |= XML_PARSE_RECOVER;
12796
0
        ctxt->recovery = 1;
12797
0
    }
12798
12799
0
    if ((filename != NULL) && (filename[0] == '-') && (filename[1] == 0))
12800
0
        input = xmlCtxtNewInputFromFd(ctxt, filename, STDIN_FILENO, NULL, 0);
12801
0
    else
12802
0
        input = xmlCtxtNewInputFromUrl(ctxt, filename, NULL, NULL, 0);
12803
12804
0
    if (input != NULL)
12805
0
        ret = xmlCtxtParseDocument(ctxt, input);
12806
12807
0
    xmlFreeParserCtxt(ctxt);
12808
0
    return(ret);
12809
0
}
12810
12811
/**
12812
 * xmlSAXParseFile:
12813
 * @sax:  the SAX handler block
12814
 * @filename:  the filename
12815
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
12816
 *             documents
12817
 *
12818
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
12819
 *
12820
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12821
 * compressed document is provided by default if found at compile-time.
12822
 * It use the given SAX function block to handle the parsing callback.
12823
 * If sax is NULL, fallback to the default DOM tree building routines.
12824
 *
12825
 * Returns the resulting document tree
12826
 */
12827
12828
xmlDocPtr
12829
xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
12830
0
                          int recovery) {
12831
0
    return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
12832
0
}
12833
12834
/**
12835
 * xmlRecoverDoc:
12836
 * @cur:  a pointer to an array of xmlChar
12837
 *
12838
 * DEPRECATED: Use xmlReadDoc with XML_PARSE_RECOVER.
12839
 *
12840
 * parse an XML in-memory document and build a tree.
12841
 * In the case the document is not Well Formed, a attempt to build a
12842
 * tree is tried anyway
12843
 *
12844
 * Returns the resulting document tree or NULL in case of failure
12845
 */
12846
12847
xmlDocPtr
12848
0
xmlRecoverDoc(const xmlChar *cur) {
12849
0
    return(xmlSAXParseDoc(NULL, cur, 1));
12850
0
}
12851
12852
/**
12853
 * xmlParseFile:
12854
 * @filename:  the filename
12855
 *
12856
 * DEPRECATED: Use xmlReadFile.
12857
 *
12858
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12859
 * compressed document is provided by default if found at compile-time.
12860
 *
12861
 * Returns the resulting document tree if the file was wellformed,
12862
 * NULL otherwise.
12863
 */
12864
12865
xmlDocPtr
12866
0
xmlParseFile(const char *filename) {
12867
0
    return(xmlSAXParseFile(NULL, filename, 0));
12868
0
}
12869
12870
/**
12871
 * xmlRecoverFile:
12872
 * @filename:  the filename
12873
 *
12874
 * DEPRECATED: Use xmlReadFile with XML_PARSE_RECOVER.
12875
 *
12876
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12877
 * compressed document is provided by default if found at compile-time.
12878
 * In the case the document is not Well Formed, it attempts to build
12879
 * a tree anyway
12880
 *
12881
 * Returns the resulting document tree or NULL in case of failure
12882
 */
12883
12884
xmlDocPtr
12885
0
xmlRecoverFile(const char *filename) {
12886
0
    return(xmlSAXParseFile(NULL, filename, 1));
12887
0
}
12888
12889
12890
/**
12891
 * xmlSetupParserForBuffer:
12892
 * @ctxt:  an XML parser context
12893
 * @buffer:  a xmlChar * buffer
12894
 * @filename:  a file name
12895
 *
12896
 * DEPRECATED: Don't use.
12897
 *
12898
 * Setup the parser context to parse a new buffer; Clears any prior
12899
 * contents from the parser context. The buffer parameter must not be
12900
 * NULL, but the filename parameter can be
12901
 */
12902
void
12903
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
12904
                             const char* filename)
12905
0
{
12906
0
    xmlParserInputPtr input;
12907
12908
0
    if ((ctxt == NULL) || (buffer == NULL))
12909
0
        return;
12910
12911
0
    xmlClearParserCtxt(ctxt);
12912
12913
0
    input = xmlCtxtNewInputFromString(ctxt, filename, (const char *) buffer,
12914
0
                                      NULL, 0);
12915
0
    if (input == NULL)
12916
0
        return;
12917
0
    if (xmlCtxtPushInput(ctxt, input) < 0)
12918
0
        xmlFreeInputStream(input);
12919
0
}
12920
12921
/**
12922
 * xmlSAXUserParseFile:
12923
 * @sax:  a SAX handler
12924
 * @user_data:  The user data returned on SAX callbacks
12925
 * @filename:  a file name
12926
 *
12927
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
12928
 *
12929
 * parse an XML file and call the given SAX handler routines.
12930
 * Automatic support for ZLIB/Compress compressed document is provided
12931
 *
12932
 * Returns 0 in case of success or a error number otherwise
12933
 */
12934
int
12935
xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
12936
0
                    const char *filename) {
12937
0
    int ret = 0;
12938
0
    xmlParserCtxtPtr ctxt;
12939
12940
0
    ctxt = xmlCreateFileParserCtxt(filename);
12941
0
    if (ctxt == NULL) return -1;
12942
0
    if (sax != NULL) {
12943
0
        if (sax->initialized == XML_SAX2_MAGIC) {
12944
0
            *ctxt->sax = *sax;
12945
0
        } else {
12946
0
            memset(ctxt->sax, 0, sizeof(*ctxt->sax));
12947
0
            memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12948
0
        }
12949
0
  ctxt->userData = user_data;
12950
0
    }
12951
12952
0
    xmlParseDocument(ctxt);
12953
12954
0
    if (ctxt->wellFormed)
12955
0
  ret = 0;
12956
0
    else {
12957
0
        if (ctxt->errNo != 0)
12958
0
      ret = ctxt->errNo;
12959
0
  else
12960
0
      ret = -1;
12961
0
    }
12962
0
    if (ctxt->myDoc != NULL) {
12963
0
        xmlFreeDoc(ctxt->myDoc);
12964
0
  ctxt->myDoc = NULL;
12965
0
    }
12966
0
    xmlFreeParserCtxt(ctxt);
12967
12968
0
    return ret;
12969
0
}
12970
#endif /* LIBXML_SAX1_ENABLED */
12971
12972
/************************************************************************
12973
 *                  *
12974
 *    Front ends when parsing from memory     *
12975
 *                  *
12976
 ************************************************************************/
12977
12978
/**
12979
 * xmlCreateMemoryParserCtxt:
12980
 * @buffer:  a pointer to a char array
12981
 * @size:  the size of the array
12982
 *
12983
 * Create a parser context for an XML in-memory document. The input buffer
12984
 * must not contain a terminating null byte.
12985
 *
12986
 * Returns the new parser context or NULL
12987
 */
12988
xmlParserCtxtPtr
12989
0
xmlCreateMemoryParserCtxt(const char *buffer, int size) {
12990
0
    xmlParserCtxtPtr ctxt;
12991
0
    xmlParserInputPtr input;
12992
12993
0
    if (size < 0)
12994
0
  return(NULL);
12995
12996
0
    ctxt = xmlNewParserCtxt();
12997
0
    if (ctxt == NULL)
12998
0
  return(NULL);
12999
13000
0
    input = xmlCtxtNewInputFromMemory(ctxt, NULL, buffer, size, NULL, 0);
13001
0
    if (input == NULL) {
13002
0
  xmlFreeParserCtxt(ctxt);
13003
0
  return(NULL);
13004
0
    }
13005
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
13006
0
        xmlFreeInputStream(input);
13007
0
        xmlFreeParserCtxt(ctxt);
13008
0
        return(NULL);
13009
0
    }
13010
13011
0
    return(ctxt);
13012
0
}
13013
13014
#ifdef LIBXML_SAX1_ENABLED
13015
/**
13016
 * xmlSAXParseMemoryWithData:
13017
 * @sax:  the SAX handler block
13018
 * @buffer:  an pointer to a char array
13019
 * @size:  the size of the array
13020
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13021
 *             documents
13022
 * @data:  the userdata
13023
 *
13024
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
13025
 *
13026
 * parse an XML in-memory block and use the given SAX function block
13027
 * to handle the parsing callback. If sax is NULL, fallback to the default
13028
 * DOM tree building routines.
13029
 *
13030
 * User data (void *) is stored within the parser context in the
13031
 * context's _private member, so it is available nearly everywhere in libxml
13032
 *
13033
 * Returns the resulting document tree
13034
 */
13035
13036
xmlDocPtr
13037
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
13038
0
                          int size, int recovery, void *data) {
13039
0
    xmlDocPtr ret = NULL;
13040
0
    xmlParserCtxtPtr ctxt;
13041
0
    xmlParserInputPtr input;
13042
13043
0
    if (size < 0)
13044
0
        return(NULL);
13045
13046
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
13047
0
    if (ctxt == NULL)
13048
0
        return(NULL);
13049
13050
0
    if (data != NULL)
13051
0
  ctxt->_private=data;
13052
13053
0
    if (recovery) {
13054
0
        ctxt->options |= XML_PARSE_RECOVER;
13055
0
        ctxt->recovery = 1;
13056
0
    }
13057
13058
0
    input = xmlCtxtNewInputFromMemory(ctxt, NULL, buffer, size, NULL,
13059
0
                                      XML_INPUT_BUF_STATIC);
13060
13061
0
    if (input != NULL)
13062
0
        ret = xmlCtxtParseDocument(ctxt, input);
13063
13064
0
    xmlFreeParserCtxt(ctxt);
13065
0
    return(ret);
13066
0
}
13067
13068
/**
13069
 * xmlSAXParseMemory:
13070
 * @sax:  the SAX handler block
13071
 * @buffer:  an pointer to a char array
13072
 * @size:  the size of the array
13073
 * @recovery:  work in recovery mode, i.e. tries to read not Well Formed
13074
 *             documents
13075
 *
13076
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
13077
 *
13078
 * parse an XML in-memory block and use the given SAX function block
13079
 * to handle the parsing callback. If sax is NULL, fallback to the default
13080
 * DOM tree building routines.
13081
 *
13082
 * Returns the resulting document tree
13083
 */
13084
xmlDocPtr
13085
xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
13086
0
            int size, int recovery) {
13087
0
    return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
13088
0
}
13089
13090
/**
13091
 * xmlParseMemory:
13092
 * @buffer:  an pointer to a char array
13093
 * @size:  the size of the array
13094
 *
13095
 * DEPRECATED: Use xmlReadMemory.
13096
 *
13097
 * parse an XML in-memory block and build a tree.
13098
 *
13099
 * Returns the resulting document tree
13100
 */
13101
13102
0
xmlDocPtr xmlParseMemory(const char *buffer, int size) {
13103
0
   return(xmlSAXParseMemory(NULL, buffer, size, 0));
13104
0
}
13105
13106
/**
13107
 * xmlRecoverMemory:
13108
 * @buffer:  an pointer to a char array
13109
 * @size:  the size of the array
13110
 *
13111
 * DEPRECATED: Use xmlReadMemory with XML_PARSE_RECOVER.
13112
 *
13113
 * parse an XML in-memory block and build a tree.
13114
 * In the case the document is not Well Formed, an attempt to
13115
 * build a tree is tried anyway
13116
 *
13117
 * Returns the resulting document tree or NULL in case of error
13118
 */
13119
13120
0
xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
13121
0
   return(xmlSAXParseMemory(NULL, buffer, size, 1));
13122
0
}
13123
13124
/**
13125
 * xmlSAXUserParseMemory:
13126
 * @sax:  a SAX handler
13127
 * @user_data:  The user data returned on SAX callbacks
13128
 * @buffer:  an in-memory XML document input
13129
 * @size:  the length of the XML document in bytes
13130
 *
13131
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
13132
 *
13133
 * parse an XML in-memory buffer and call the given SAX handler routines.
13134
 *
13135
 * Returns 0 in case of success or a error number otherwise
13136
 */
13137
int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
13138
0
        const char *buffer, int size) {
13139
0
    int ret = 0;
13140
0
    xmlParserCtxtPtr ctxt;
13141
13142
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13143
0
    if (ctxt == NULL) return -1;
13144
0
    if (sax != NULL) {
13145
0
        if (sax->initialized == XML_SAX2_MAGIC) {
13146
0
            *ctxt->sax = *sax;
13147
0
        } else {
13148
0
            memset(ctxt->sax, 0, sizeof(*ctxt->sax));
13149
0
            memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
13150
0
        }
13151
0
  ctxt->userData = user_data;
13152
0
    }
13153
13154
0
    xmlParseDocument(ctxt);
13155
13156
0
    if (ctxt->wellFormed)
13157
0
  ret = 0;
13158
0
    else {
13159
0
        if (ctxt->errNo != 0)
13160
0
      ret = ctxt->errNo;
13161
0
  else
13162
0
      ret = -1;
13163
0
    }
13164
0
    if (ctxt->myDoc != NULL) {
13165
0
        xmlFreeDoc(ctxt->myDoc);
13166
0
  ctxt->myDoc = NULL;
13167
0
    }
13168
0
    xmlFreeParserCtxt(ctxt);
13169
13170
0
    return ret;
13171
0
}
13172
#endif /* LIBXML_SAX1_ENABLED */
13173
13174
/**
13175
 * xmlCreateDocParserCtxt:
13176
 * @str:  a pointer to an array of xmlChar
13177
 *
13178
 * Creates a parser context for an XML in-memory document.
13179
 *
13180
 * Returns the new parser context or NULL
13181
 */
13182
xmlParserCtxtPtr
13183
0
xmlCreateDocParserCtxt(const xmlChar *str) {
13184
0
    xmlParserCtxtPtr ctxt;
13185
0
    xmlParserInputPtr input;
13186
13187
0
    ctxt = xmlNewParserCtxt();
13188
0
    if (ctxt == NULL)
13189
0
  return(NULL);
13190
13191
0
    input = xmlCtxtNewInputFromString(ctxt, NULL, (const char *) str, NULL, 0);
13192
0
    if (input == NULL) {
13193
0
  xmlFreeParserCtxt(ctxt);
13194
0
  return(NULL);
13195
0
    }
13196
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
13197
0
        xmlFreeInputStream(input);
13198
0
        xmlFreeParserCtxt(ctxt);
13199
0
        return(NULL);
13200
0
    }
13201
13202
0
    return(ctxt);
13203
0
}
13204
13205
#ifdef LIBXML_SAX1_ENABLED
13206
/**
13207
 * xmlSAXParseDoc:
13208
 * @sax:  the SAX handler block
13209
 * @cur:  a pointer to an array of xmlChar
13210
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13211
 *             documents
13212
 *
13213
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadDoc.
13214
 *
13215
 * parse an XML in-memory document and build a tree.
13216
 * It use the given SAX function block to handle the parsing callback.
13217
 * If sax is NULL, fallback to the default DOM tree building routines.
13218
 *
13219
 * Returns the resulting document tree
13220
 */
13221
13222
xmlDocPtr
13223
0
xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
13224
0
    xmlDocPtr ret;
13225
0
    xmlParserCtxtPtr ctxt;
13226
0
    xmlSAXHandlerPtr oldsax = NULL;
13227
13228
0
    if (cur == NULL) return(NULL);
13229
13230
13231
0
    ctxt = xmlCreateDocParserCtxt(cur);
13232
0
    if (ctxt == NULL) return(NULL);
13233
0
    if (sax != NULL) {
13234
0
        oldsax = ctxt->sax;
13235
0
        ctxt->sax = sax;
13236
0
        ctxt->userData = NULL;
13237
0
    }
13238
13239
0
    xmlParseDocument(ctxt);
13240
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13241
0
    else {
13242
0
       ret = NULL;
13243
0
       xmlFreeDoc(ctxt->myDoc);
13244
0
       ctxt->myDoc = NULL;
13245
0
    }
13246
0
    if (sax != NULL)
13247
0
  ctxt->sax = oldsax;
13248
0
    xmlFreeParserCtxt(ctxt);
13249
13250
0
    return(ret);
13251
0
}
13252
13253
/**
13254
 * xmlParseDoc:
13255
 * @cur:  a pointer to an array of xmlChar
13256
 *
13257
 * DEPRECATED: Use xmlReadDoc.
13258
 *
13259
 * parse an XML in-memory document and build a tree.
13260
 *
13261
 * Returns the resulting document tree
13262
 */
13263
13264
xmlDocPtr
13265
0
xmlParseDoc(const xmlChar *cur) {
13266
0
    return(xmlSAXParseDoc(NULL, cur, 0));
13267
0
}
13268
#endif /* LIBXML_SAX1_ENABLED */
13269
13270
/************************************************************************
13271
 *                  *
13272
 *  New set (2.6.0) of simpler and more flexible APIs   *
13273
 *                  *
13274
 ************************************************************************/
13275
13276
/**
13277
 * DICT_FREE:
13278
 * @str:  a string
13279
 *
13280
 * Free a string if it is not owned by the "dict" dictionary in the
13281
 * current scope
13282
 */
13283
#define DICT_FREE(str)            \
13284
25.8k
  if ((str) && ((!dict) ||       \
13285
0
      (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
13286
25.8k
      xmlFree((char *)(str));
13287
13288
/**
13289
 * xmlCtxtReset:
13290
 * @ctxt: an XML parser context
13291
 *
13292
 * Reset a parser context
13293
 */
13294
void
13295
xmlCtxtReset(xmlParserCtxtPtr ctxt)
13296
6.46k
{
13297
6.46k
    xmlParserInputPtr input;
13298
6.46k
    xmlDictPtr dict;
13299
13300
6.46k
    if (ctxt == NULL)
13301
0
        return;
13302
13303
6.46k
    dict = ctxt->dict;
13304
13305
6.46k
    while ((input = xmlCtxtPopInput(ctxt)) != NULL) { /* Non consuming */
13306
0
        xmlFreeInputStream(input);
13307
0
    }
13308
6.46k
    ctxt->inputNr = 0;
13309
6.46k
    ctxt->input = NULL;
13310
13311
6.46k
    ctxt->spaceNr = 0;
13312
6.46k
    if (ctxt->spaceTab != NULL) {
13313
6.46k
  ctxt->spaceTab[0] = -1;
13314
6.46k
  ctxt->space = &ctxt->spaceTab[0];
13315
6.46k
    } else {
13316
0
        ctxt->space = NULL;
13317
0
    }
13318
13319
13320
6.46k
    ctxt->nodeNr = 0;
13321
6.46k
    ctxt->node = NULL;
13322
13323
6.46k
    ctxt->nameNr = 0;
13324
6.46k
    ctxt->name = NULL;
13325
13326
6.46k
    ctxt->nsNr = 0;
13327
6.46k
    xmlParserNsReset(ctxt->nsdb);
13328
13329
6.46k
    DICT_FREE(ctxt->version);
13330
6.46k
    ctxt->version = NULL;
13331
6.46k
    DICT_FREE(ctxt->encoding);
13332
6.46k
    ctxt->encoding = NULL;
13333
6.46k
    DICT_FREE(ctxt->extSubURI);
13334
6.46k
    ctxt->extSubURI = NULL;
13335
6.46k
    DICT_FREE(ctxt->extSubSystem);
13336
6.46k
    ctxt->extSubSystem = NULL;
13337
13338
6.46k
    if (ctxt->directory != NULL) {
13339
0
        xmlFree(ctxt->directory);
13340
0
        ctxt->directory = NULL;
13341
0
    }
13342
13343
6.46k
    if (ctxt->myDoc != NULL)
13344
0
        xmlFreeDoc(ctxt->myDoc);
13345
6.46k
    ctxt->myDoc = NULL;
13346
13347
6.46k
    ctxt->standalone = -1;
13348
6.46k
    ctxt->hasExternalSubset = 0;
13349
6.46k
    ctxt->hasPErefs = 0;
13350
6.46k
    ctxt->html = 0;
13351
6.46k
    ctxt->instate = XML_PARSER_START;
13352
13353
6.46k
    ctxt->wellFormed = 1;
13354
6.46k
    ctxt->nsWellFormed = 1;
13355
6.46k
    ctxt->disableSAX = 0;
13356
6.46k
    ctxt->valid = 1;
13357
6.46k
    ctxt->record_info = 0;
13358
6.46k
    ctxt->checkIndex = 0;
13359
6.46k
    ctxt->endCheckState = 0;
13360
6.46k
    ctxt->inSubset = 0;
13361
6.46k
    ctxt->errNo = XML_ERR_OK;
13362
6.46k
    ctxt->depth = 0;
13363
6.46k
    ctxt->catalogs = NULL;
13364
6.46k
    ctxt->sizeentities = 0;
13365
6.46k
    ctxt->sizeentcopy = 0;
13366
6.46k
    xmlInitNodeInfoSeq(&ctxt->node_seq);
13367
13368
6.46k
    if (ctxt->attsDefault != NULL) {
13369
0
        xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
13370
0
        ctxt->attsDefault = NULL;
13371
0
    }
13372
6.46k
    if (ctxt->attsSpecial != NULL) {
13373
0
        xmlHashFree(ctxt->attsSpecial, NULL);
13374
0
        ctxt->attsSpecial = NULL;
13375
0
    }
13376
13377
6.46k
#ifdef LIBXML_CATALOG_ENABLED
13378
6.46k
    if (ctxt->catalogs != NULL)
13379
0
  xmlCatalogFreeLocal(ctxt->catalogs);
13380
6.46k
#endif
13381
6.46k
    ctxt->nbErrors = 0;
13382
6.46k
    ctxt->nbWarnings = 0;
13383
6.46k
    if (ctxt->lastError.code != XML_ERR_OK)
13384
0
        xmlResetError(&ctxt->lastError);
13385
6.46k
}
13386
13387
/**
13388
 * xmlCtxtResetPush:
13389
 * @ctxt: an XML parser context
13390
 * @chunk:  a pointer to an array of chars
13391
 * @size:  number of chars in the array
13392
 * @filename:  an optional file name or URI
13393
 * @encoding:  the document encoding, or NULL
13394
 *
13395
 * Reset a push parser context
13396
 *
13397
 * Returns 0 in case of success and 1 in case of error
13398
 */
13399
int
13400
xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
13401
                 int size, const char *filename, const char *encoding)
13402
0
{
13403
0
    xmlParserInputPtr input;
13404
13405
0
    if (ctxt == NULL)
13406
0
        return(1);
13407
13408
0
    xmlCtxtReset(ctxt);
13409
13410
0
    input = xmlNewPushInput(filename, chunk, size);
13411
0
    if (input == NULL)
13412
0
        return(1);
13413
13414
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
13415
0
        xmlFreeInputStream(input);
13416
0
        return(1);
13417
0
    }
13418
13419
0
    if (encoding != NULL)
13420
0
        xmlSwitchEncodingName(ctxt, encoding);
13421
13422
0
    return(0);
13423
0
}
13424
13425
static int
13426
xmlCtxtSetOptionsInternal(xmlParserCtxtPtr ctxt, int options, int keepMask)
13427
256k
{
13428
256k
    int allMask;
13429
13430
256k
    if (ctxt == NULL)
13431
0
        return(-1);
13432
13433
    /*
13434
     * XInclude options aren't handled by the parser.
13435
     *
13436
     * XML_PARSE_XINCLUDE
13437
     * XML_PARSE_NOXINCNODE
13438
     * XML_PARSE_NOBASEFIX
13439
     */
13440
256k
    allMask = XML_PARSE_RECOVER |
13441
256k
              XML_PARSE_NOENT |
13442
256k
              XML_PARSE_DTDLOAD |
13443
256k
              XML_PARSE_DTDATTR |
13444
256k
              XML_PARSE_DTDVALID |
13445
256k
              XML_PARSE_NOERROR |
13446
256k
              XML_PARSE_NOWARNING |
13447
256k
              XML_PARSE_PEDANTIC |
13448
256k
              XML_PARSE_NOBLANKS |
13449
256k
#ifdef LIBXML_SAX1_ENABLED
13450
256k
              XML_PARSE_SAX1 |
13451
256k
#endif
13452
256k
              XML_PARSE_NONET |
13453
256k
              XML_PARSE_NODICT |
13454
256k
              XML_PARSE_NSCLEAN |
13455
256k
              XML_PARSE_NOCDATA |
13456
256k
              XML_PARSE_COMPACT |
13457
256k
              XML_PARSE_OLD10 |
13458
256k
              XML_PARSE_HUGE |
13459
256k
              XML_PARSE_OLDSAX |
13460
256k
              XML_PARSE_IGNORE_ENC |
13461
256k
              XML_PARSE_BIG_LINES |
13462
256k
              XML_PARSE_NO_XXE |
13463
256k
              XML_PARSE_UNZIP |
13464
256k
              XML_PARSE_NO_SYS_CATALOG |
13465
256k
              XML_PARSE_CATALOG_PI;
13466
13467
256k
    ctxt->options = (ctxt->options & keepMask) | (options & allMask);
13468
13469
    /*
13470
     * For some options, struct members are historically the source
13471
     * of truth. The values are initalized from global variables and
13472
     * old code could also modify them directly. Several older API
13473
     * functions that don't take an options argument rely on these
13474
     * deprecated mechanisms.
13475
     *
13476
     * Once public access to struct members and the globals are
13477
     * disabled, we can use the options bitmask as source of
13478
     * truth, making all these struct members obsolete.
13479
     *
13480
     * The XML_DETECT_IDS flags is misnamed. It simply enables
13481
     * loading of the external subset.
13482
     */
13483
256k
    ctxt->recovery = (options & XML_PARSE_RECOVER) ? 1 : 0;
13484
256k
    ctxt->replaceEntities = (options & XML_PARSE_NOENT) ? 1 : 0;
13485
256k
    ctxt->loadsubset = (options & XML_PARSE_DTDLOAD) ? XML_DETECT_IDS : 0;
13486
256k
    ctxt->loadsubset |= (options & XML_PARSE_DTDATTR) ? XML_COMPLETE_ATTRS : 0;
13487
256k
    ctxt->validate = (options & XML_PARSE_DTDVALID) ? 1 : 0;
13488
256k
    ctxt->pedantic = (options & XML_PARSE_PEDANTIC) ? 1 : 0;
13489
256k
    ctxt->keepBlanks = (options & XML_PARSE_NOBLANKS) ? 0 : 1;
13490
256k
    ctxt->dictNames = (options & XML_PARSE_NODICT) ? 0 : 1;
13491
13492
256k
    if (options & XML_PARSE_HUGE) {
13493
249k
        if (ctxt->dict != NULL)
13494
249k
            xmlDictSetLimit(ctxt->dict, 0);
13495
249k
    }
13496
13497
256k
    ctxt->linenumbers = 1;
13498
13499
256k
    return(options & ~allMask);
13500
256k
}
13501
13502
/**
13503
 * xmlCtxtSetOptions:
13504
 * @ctxt: an XML parser context
13505
 * @options:  a bitmask of xmlParserOption values
13506
 *
13507
 * Applies the options to the parser context. Unset options are
13508
 * cleared.
13509
 *
13510
 * Available since 2.13.0. With older versions, you can use
13511
 * xmlCtxtUseOptions.
13512
 *
13513
 * XML_PARSE_RECOVER
13514
 *
13515
 * Enable "recovery" mode which allows non-wellformed documents.
13516
 * How this mode behaves exactly is unspecified and may change
13517
 * without further notice. Use of this feature is DISCOURAGED.
13518
 *
13519
 * Not supported by the push parser.
13520
 *
13521
 * XML_PARSE_NOENT
13522
 *
13523
 * Despite the confusing name, this option enables substitution
13524
 * of entities. The resulting tree won't contain any entity
13525
 * reference nodes.
13526
 *
13527
 * This option also enables loading of external entities (both
13528
 * general and parameter entities) which is dangerous. If you
13529
 * process untrusted data, it's recommended to set the
13530
 * XML_PARSE_NO_XXE option to disable loading of external
13531
 * entities.
13532
 *
13533
 * XML_PARSE_DTDLOAD
13534
 *
13535
 * Enables loading of an external DTD and the loading and
13536
 * substitution of external parameter entities. Has no effect
13537
 * if XML_PARSE_NO_XXE is set.
13538
 *
13539
 * XML_PARSE_DTDATTR
13540
 *
13541
 * Adds default attributes from the DTD to the result document.
13542
 *
13543
 * Implies XML_PARSE_DTDLOAD, but loading of external content
13544
 * can be disabled with XML_PARSE_NO_XXE.
13545
 *
13546
 * XML_PARSE_DTDVALID
13547
 *
13548
 * This option enables DTD validation which requires to load
13549
 * external DTDs and external entities (both general and
13550
 * parameter entities) unless XML_PARSE_NO_XXE was set.
13551
 *
13552
 * XML_PARSE_NO_XXE
13553
 *
13554
 * Disables loading of external DTDs or entities.
13555
 *
13556
 * Available since 2.13.0.
13557
 *
13558
 * XML_PARSE_NOERROR
13559
 *
13560
 * Disable error and warning reports to the error handlers.
13561
 * Errors are still accessible with xmlCtxtGetLastError.
13562
 *
13563
 * XML_PARSE_NOWARNING
13564
 *
13565
 * Disable warning reports.
13566
 *
13567
 * XML_PARSE_PEDANTIC
13568
 *
13569
 * Enable some pedantic warnings.
13570
 *
13571
 * XML_PARSE_NOBLANKS
13572
 *
13573
 * Remove some whitespace from the result document. Where to
13574
 * remove whitespace depends on DTD element declarations or a
13575
 * broken heuristic with unfixable bugs. Use of this option is
13576
 * DISCOURAGED.
13577
 *
13578
 * Not supported by the push parser.
13579
 *
13580
 * XML_PARSE_SAX1
13581
 *
13582
 * Always invoke the deprecated SAX1 startElement and endElement
13583
 * handlers. This option is DEPRECATED.
13584
 *
13585
 * XML_PARSE_NONET
13586
 *
13587
 * Disable network access with the builtin HTTP client.
13588
 *
13589
 * XML_PARSE_NODICT
13590
 *
13591
 * Create a document without interned strings, making all
13592
 * strings separate memory allocations.
13593
 *
13594
 * XML_PARSE_NSCLEAN
13595
 *
13596
 * Remove redundant namespace declarations from the result
13597
 * document.
13598
 *
13599
 * XML_PARSE_NOCDATA
13600
 *
13601
 * Output normal text nodes instead of CDATA nodes.
13602
 *
13603
 * XML_PARSE_COMPACT
13604
 *
13605
 * Store small strings directly in the node struct to save
13606
 * memory.
13607
 *
13608
 * XML_PARSE_OLD10
13609
 *
13610
 * Use old Name productions from before XML 1.0 Fifth Edition.
13611
 * This options is DEPRECATED.
13612
 *
13613
 * XML_PARSE_HUGE
13614
 *
13615
 * Relax some internal limits.
13616
 *
13617
 * Maximum size of text nodes, tags, comments, processing instructions,
13618
 * CDATA sections, entity values
13619
 *
13620
 * normal: 10M
13621
 * huge:    1B
13622
 *
13623
 * Maximum size of names, system literals, pubid literals
13624
 *
13625
 * normal: 50K
13626
 * huge:   10M
13627
 *
13628
 * Maximum nesting depth of elements
13629
 *
13630
 * normal:  256
13631
 * huge:   2048
13632
 *
13633
 * Maximum nesting depth of entities
13634
 *
13635
 * normal: 20
13636
 * huge:   40
13637
 *
13638
 * XML_PARSE_OLDSAX
13639
 *
13640
 * Enable an unspecified legacy mode for SAX parsers. This
13641
 * option is DEPRECATED.
13642
 *
13643
 * XML_PARSE_IGNORE_ENC
13644
 *
13645
 * Ignore the encoding in the XML declaration. This option is
13646
 * mostly unneeded these days. The only effect is to enforce
13647
 * UTF-8 decoding of ASCII-like data.
13648
 *
13649
 * XML_PARSE_BIG_LINES
13650
 *
13651
 * Enable reporting of line numbers larger than 65535.
13652
 *
13653
 * XML_PARSE_UNZIP
13654
 *
13655
 * Enable input decompression. Setting this option is discouraged
13656
 * to avoid zip bombs.
13657
 *
13658
 * Available since 2.14.0.
13659
 *
13660
 * XML_PARSE_NO_SYS_CATALOG
13661
 *
13662
 * Disables the global system XML catalog.
13663
 *
13664
 * Available since 2.14.0.
13665
 *
13666
 * XML_PARSE_CATALOG_PI
13667
 *
13668
 * Enable XML catalog processing instructions.
13669
 *
13670
 * Available since 2.14.0.
13671
 *
13672
 * Returns 0 in case of success, the set of unknown or unimplemented options
13673
 *         in case of error.
13674
 */
13675
int
13676
xmlCtxtSetOptions(xmlParserCtxtPtr ctxt, int options)
13677
0
{
13678
0
#ifdef LIBXML_HTML_ENABLED
13679
0
    if ((ctxt != NULL) && (ctxt->html))
13680
0
        return(htmlCtxtSetOptions(ctxt, options));
13681
0
#endif
13682
13683
0
    return(xmlCtxtSetOptionsInternal(ctxt, options, 0));
13684
0
}
13685
13686
/**
13687
 * xmlCtxtGetOptions:
13688
 * @ctxt: an XML parser context
13689
 *
13690
 * Get the current options of the parser context.
13691
 *
13692
 * Available since 2.14.0.
13693
 *
13694
 * Returns the current options set in the parser context, or -1 if ctxt is NULL.
13695
 */
13696
int
13697
xmlCtxtGetOptions(xmlParserCtxtPtr ctxt)
13698
0
{
13699
0
    if (ctxt == NULL)
13700
0
        return(-1);
13701
13702
0
    return(ctxt->options);
13703
0
}
13704
13705
/**
13706
 * xmlCtxtUseOptions:
13707
 * @ctxt: an XML parser context
13708
 * @options:  a combination of xmlParserOption
13709
 *
13710
 * DEPRECATED: Use xmlCtxtSetOptions.
13711
 *
13712
 * Applies the options to the parser context. The following options
13713
 * are never cleared and can only be enabled:
13714
 *
13715
 * XML_PARSE_NOERROR
13716
 * XML_PARSE_NOWARNING
13717
 * XML_PARSE_NONET
13718
 * XML_PARSE_NSCLEAN
13719
 * XML_PARSE_NOCDATA
13720
 * XML_PARSE_COMPACT
13721
 * XML_PARSE_OLD10
13722
 * XML_PARSE_HUGE
13723
 * XML_PARSE_OLDSAX
13724
 * XML_PARSE_IGNORE_ENC
13725
 * XML_PARSE_BIG_LINES
13726
 *
13727
 * Returns 0 in case of success, the set of unknown or unimplemented options
13728
 *         in case of error.
13729
 */
13730
int
13731
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
13732
256k
{
13733
256k
    int keepMask;
13734
13735
256k
#ifdef LIBXML_HTML_ENABLED
13736
256k
    if ((ctxt != NULL) && (ctxt->html))
13737
0
        return(htmlCtxtUseOptions(ctxt, options));
13738
256k
#endif
13739
13740
    /*
13741
     * For historic reasons, some options can only be enabled.
13742
     */
13743
256k
    keepMask = XML_PARSE_NOERROR |
13744
256k
               XML_PARSE_NOWARNING |
13745
256k
               XML_PARSE_NONET |
13746
256k
               XML_PARSE_NSCLEAN |
13747
256k
               XML_PARSE_NOCDATA |
13748
256k
               XML_PARSE_COMPACT |
13749
256k
               XML_PARSE_OLD10 |
13750
256k
               XML_PARSE_HUGE |
13751
256k
               XML_PARSE_OLDSAX |
13752
256k
               XML_PARSE_IGNORE_ENC |
13753
256k
               XML_PARSE_BIG_LINES;
13754
13755
256k
    return(xmlCtxtSetOptionsInternal(ctxt, options, keepMask));
13756
256k
}
13757
13758
/**
13759
 * xmlCtxtSetMaxAmplification:
13760
 * @ctxt: an XML parser context
13761
 * @maxAmpl:  maximum amplification factor
13762
 *
13763
 * To protect against exponential entity expansion ("billion laughs"), the
13764
 * size of serialized output is (roughly) limited to the input size
13765
 * multiplied by this factor. The default value is 5.
13766
 *
13767
 * When working with documents making heavy use of entity expansion, it can
13768
 * be necessary to increase the value. For security reasons, this should only
13769
 * be considered when processing trusted input.
13770
 */
13771
void
13772
xmlCtxtSetMaxAmplification(xmlParserCtxtPtr ctxt, unsigned maxAmpl)
13773
0
{
13774
0
    ctxt->maxAmpl = maxAmpl;
13775
0
}
13776
13777
/**
13778
 * xmlCtxtParseDocument:
13779
 * @ctxt:  an XML parser context
13780
 * @input:  parser input
13781
 *
13782
 * Parse an XML document and return the resulting document tree.
13783
 * Takes ownership of the input object.
13784
 *
13785
 * Available since 2.13.0.
13786
 *
13787
 * Returns the resulting document tree or NULL
13788
 */
13789
xmlDocPtr
13790
xmlCtxtParseDocument(xmlParserCtxtPtr ctxt, xmlParserInputPtr input)
13791
6.46k
{
13792
6.46k
    xmlDocPtr ret = NULL;
13793
13794
6.46k
    if ((ctxt == NULL) || (input == NULL)) {
13795
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
13796
0
        xmlFreeInputStream(input);
13797
0
        return(NULL);
13798
0
    }
13799
13800
    /* assert(ctxt->inputNr == 0); */
13801
6.46k
    while (ctxt->inputNr > 0)
13802
0
        xmlFreeInputStream(xmlCtxtPopInput(ctxt));
13803
13804
6.46k
    if (xmlCtxtPushInput(ctxt, input) < 0) {
13805
0
        xmlFreeInputStream(input);
13806
0
        return(NULL);
13807
0
    }
13808
13809
6.46k
    xmlParseDocument(ctxt);
13810
13811
6.46k
    ret = xmlCtxtGetDocument(ctxt);
13812
13813
    /* assert(ctxt->inputNr == 1); */
13814
12.9k
    while (ctxt->inputNr > 0)
13815
6.46k
        xmlFreeInputStream(xmlCtxtPopInput(ctxt));
13816
13817
6.46k
    return(ret);
13818
6.46k
}
13819
13820
/**
13821
 * xmlReadDoc:
13822
 * @cur:  a pointer to a zero terminated string
13823
 * @URL:  base URL (optional)
13824
 * @encoding:  the document encoding (optional)
13825
 * @options:  a combination of xmlParserOption
13826
 *
13827
 * Convenience function to parse an XML document from a
13828
 * zero-terminated string.
13829
 *
13830
 * See xmlCtxtReadDoc for details.
13831
 *
13832
 * Returns the resulting document tree
13833
 */
13834
xmlDocPtr
13835
xmlReadDoc(const xmlChar *cur, const char *URL, const char *encoding,
13836
           int options)
13837
0
{
13838
0
    xmlParserCtxtPtr ctxt;
13839
0
    xmlParserInputPtr input;
13840
0
    xmlDocPtr doc = NULL;
13841
13842
0
    ctxt = xmlNewParserCtxt();
13843
0
    if (ctxt == NULL)
13844
0
        return(NULL);
13845
13846
0
    xmlCtxtUseOptions(ctxt, options);
13847
13848
0
    input = xmlCtxtNewInputFromString(ctxt, URL, (const char *) cur, encoding,
13849
0
                                      XML_INPUT_BUF_STATIC);
13850
13851
0
    if (input != NULL)
13852
0
        doc = xmlCtxtParseDocument(ctxt, input);
13853
13854
0
    xmlFreeParserCtxt(ctxt);
13855
0
    return(doc);
13856
0
}
13857
13858
/**
13859
 * xmlReadFile:
13860
 * @filename:  a file or URL
13861
 * @encoding:  the document encoding (optional)
13862
 * @options:  a combination of xmlParserOption
13863
 *
13864
 * Convenience function to parse an XML file from the filesystem,
13865
 * the network or a global user-define resource loader.
13866
 *
13867
 * This function always enables the XML_PARSE_UNZIP option for
13868
 * backward compatibility. If a "-" filename is passed, it will
13869
 * read from stdin. Both of these features are potentially
13870
 * insecure and might be removed from later versions.
13871
 *
13872
 * See xmlCtxtReadFile for details.
13873
 *
13874
 * Returns the resulting document tree
13875
 */
13876
xmlDocPtr
13877
xmlReadFile(const char *filename, const char *encoding, int options)
13878
0
{
13879
0
    xmlParserCtxtPtr ctxt;
13880
0
    xmlParserInputPtr input;
13881
0
    xmlDocPtr doc = NULL;
13882
13883
0
    ctxt = xmlNewParserCtxt();
13884
0
    if (ctxt == NULL)
13885
0
        return(NULL);
13886
13887
0
    options |= XML_PARSE_UNZIP;
13888
13889
0
    xmlCtxtUseOptions(ctxt, options);
13890
13891
    /*
13892
     * Backward compatibility for users of command line utilities like
13893
     * xmlstarlet expecting "-" to mean stdin. This is dangerous and
13894
     * should be removed at some point.
13895
     */
13896
0
    if ((filename != NULL) && (filename[0] == '-') && (filename[1] == 0))
13897
0
        input = xmlCtxtNewInputFromFd(ctxt, filename, STDIN_FILENO,
13898
0
                                      encoding, 0);
13899
0
    else
13900
0
        input = xmlCtxtNewInputFromUrl(ctxt, filename, NULL, encoding, 0);
13901
13902
0
    if (input != NULL)
13903
0
        doc = xmlCtxtParseDocument(ctxt, input);
13904
13905
0
    xmlFreeParserCtxt(ctxt);
13906
0
    return(doc);
13907
0
}
13908
13909
/**
13910
 * xmlReadMemory:
13911
 * @buffer:  a pointer to a char array
13912
 * @size:  the size of the array
13913
 * @url:  base URL (optional)
13914
 * @encoding:  the document encoding (optional)
13915
 * @options:  a combination of xmlParserOption
13916
 *
13917
 * Parse an XML in-memory document and build a tree. The input buffer must
13918
 * not contain a terminating null byte.
13919
 *
13920
 * See xmlCtxtReadMemory for details.
13921
 *
13922
 * Returns the resulting document tree
13923
 */
13924
xmlDocPtr
13925
xmlReadMemory(const char *buffer, int size, const char *url,
13926
              const char *encoding, int options)
13927
0
{
13928
0
    xmlParserCtxtPtr ctxt;
13929
0
    xmlParserInputPtr input;
13930
0
    xmlDocPtr doc = NULL;
13931
13932
0
    if (size < 0)
13933
0
  return(NULL);
13934
13935
0
    ctxt = xmlNewParserCtxt();
13936
0
    if (ctxt == NULL)
13937
0
        return(NULL);
13938
13939
0
    xmlCtxtUseOptions(ctxt, options);
13940
13941
0
    input = xmlCtxtNewInputFromMemory(ctxt, url, buffer, size, encoding,
13942
0
                                      XML_INPUT_BUF_STATIC);
13943
13944
0
    if (input != NULL)
13945
0
        doc = xmlCtxtParseDocument(ctxt, input);
13946
13947
0
    xmlFreeParserCtxt(ctxt);
13948
0
    return(doc);
13949
0
}
13950
13951
/**
13952
 * xmlReadFd:
13953
 * @fd:  an open file descriptor
13954
 * @URL:  base URL (optional)
13955
 * @encoding:  the document encoding (optional)
13956
 * @options:  a combination of xmlParserOption
13957
 *
13958
 * Parse an XML from a file descriptor and build a tree.
13959
 *
13960
 * See xmlCtxtReadFd for details.
13961
 *
13962
 * NOTE that the file descriptor will not be closed when the
13963
 * context is freed or reset.
13964
 *
13965
 * Returns the resulting document tree
13966
 */
13967
xmlDocPtr
13968
xmlReadFd(int fd, const char *URL, const char *encoding, int options)
13969
0
{
13970
0
    xmlParserCtxtPtr ctxt;
13971
0
    xmlParserInputPtr input;
13972
0
    xmlDocPtr doc = NULL;
13973
13974
0
    ctxt = xmlNewParserCtxt();
13975
0
    if (ctxt == NULL)
13976
0
        return(NULL);
13977
13978
0
    xmlCtxtUseOptions(ctxt, options);
13979
13980
0
    input = xmlCtxtNewInputFromFd(ctxt, URL, fd, encoding, 0);
13981
13982
0
    if (input != NULL)
13983
0
        doc = xmlCtxtParseDocument(ctxt, input);
13984
13985
0
    xmlFreeParserCtxt(ctxt);
13986
0
    return(doc);
13987
0
}
13988
13989
/**
13990
 * xmlReadIO:
13991
 * @ioread:  an I/O read function
13992
 * @ioclose:  an I/O close function (optional)
13993
 * @ioctx:  an I/O handler
13994
 * @URL:  base URL (optional)
13995
 * @encoding:  the document encoding (optional)
13996
 * @options:  a combination of xmlParserOption
13997
 *
13998
 * Parse an XML document from I/O functions and context and build a tree.
13999
 *
14000
 * See xmlCtxtReadIO for details.
14001
 *
14002
 * Returns the resulting document tree
14003
 */
14004
xmlDocPtr
14005
xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
14006
          void *ioctx, const char *URL, const char *encoding, int options)
14007
0
{
14008
0
    xmlParserCtxtPtr ctxt;
14009
0
    xmlParserInputPtr input;
14010
0
    xmlDocPtr doc = NULL;
14011
14012
0
    ctxt = xmlNewParserCtxt();
14013
0
    if (ctxt == NULL)
14014
0
        return(NULL);
14015
14016
0
    xmlCtxtUseOptions(ctxt, options);
14017
14018
0
    input = xmlCtxtNewInputFromIO(ctxt, URL, ioread, ioclose, ioctx,
14019
0
                                  encoding, 0);
14020
14021
0
    if (input != NULL)
14022
0
        doc = xmlCtxtParseDocument(ctxt, input);
14023
14024
0
    xmlFreeParserCtxt(ctxt);
14025
0
    return(doc);
14026
0
}
14027
14028
/**
14029
 * xmlCtxtReadDoc:
14030
 * @ctxt:  an XML parser context
14031
 * @str:  a pointer to a zero terminated string
14032
 * @URL:  base URL (optional)
14033
 * @encoding:  the document encoding (optional)
14034
 * @options:  a combination of xmlParserOption
14035
 *
14036
 * Parse an XML in-memory document and build a tree.
14037
 *
14038
 * @URL is used as base to resolve external entities and for error
14039
 * reporting.
14040
 *
14041
 * See xmlCtxtUseOptions for details.
14042
 *
14043
 * Returns the resulting document tree
14044
 */
14045
xmlDocPtr
14046
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar *str,
14047
               const char *URL, const char *encoding, int options)
14048
0
{
14049
0
    xmlParserInputPtr input;
14050
14051
0
    if (ctxt == NULL)
14052
0
        return(NULL);
14053
14054
0
    xmlCtxtReset(ctxt);
14055
0
    xmlCtxtUseOptions(ctxt, options);
14056
14057
0
    input = xmlCtxtNewInputFromString(ctxt, URL, (const char *) str, encoding,
14058
0
                                      XML_INPUT_BUF_STATIC);
14059
0
    if (input == NULL)
14060
0
        return(NULL);
14061
14062
0
    return(xmlCtxtParseDocument(ctxt, input));
14063
0
}
14064
14065
/**
14066
 * xmlCtxtReadFile:
14067
 * @ctxt:  an XML parser context
14068
 * @filename:  a file or URL
14069
 * @encoding:  the document encoding (optional)
14070
 * @options:  a combination of xmlParserOption
14071
 *
14072
 * Parse an XML file from the filesystem, the network or a user-defined
14073
 * resource loader.
14074
 *
14075
 * This function always enables the XML_PARSE_UNZIP option for
14076
 * backward compatibility. This feature is potentially insecure
14077
 * and might be removed from later versions.
14078
 *
14079
 * Returns the resulting document tree
14080
 */
14081
xmlDocPtr
14082
xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
14083
                const char *encoding, int options)
14084
0
{
14085
0
    xmlParserInputPtr input;
14086
14087
0
    if (ctxt == NULL)
14088
0
        return(NULL);
14089
14090
0
    options |= XML_PARSE_UNZIP;
14091
14092
0
    xmlCtxtReset(ctxt);
14093
0
    xmlCtxtUseOptions(ctxt, options);
14094
14095
0
    input = xmlCtxtNewInputFromUrl(ctxt, filename, NULL, encoding, 0);
14096
0
    if (input == NULL)
14097
0
        return(NULL);
14098
14099
0
    return(xmlCtxtParseDocument(ctxt, input));
14100
0
}
14101
14102
/**
14103
 * xmlCtxtReadMemory:
14104
 * @ctxt:  an XML parser context
14105
 * @buffer:  a pointer to a char array
14106
 * @size:  the size of the array
14107
 * @URL:  base URL (optional)
14108
 * @encoding:  the document encoding (optional)
14109
 * @options:  a combination of xmlParserOption
14110
 *
14111
 * Parse an XML in-memory document and build a tree. The input buffer must
14112
 * not contain a terminating null byte.
14113
 *
14114
 * @URL is used as base to resolve external entities and for error
14115
 * reporting.
14116
 *
14117
 * See xmlCtxtUseOptions for details.
14118
 *
14119
 * Returns the resulting document tree
14120
 */
14121
xmlDocPtr
14122
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
14123
                  const char *URL, const char *encoding, int options)
14124
0
{
14125
0
    xmlParserInputPtr input;
14126
14127
0
    if ((ctxt == NULL) || (size < 0))
14128
0
        return(NULL);
14129
14130
0
    xmlCtxtReset(ctxt);
14131
0
    xmlCtxtUseOptions(ctxt, options);
14132
14133
0
    input = xmlCtxtNewInputFromMemory(ctxt, URL, buffer, size, encoding,
14134
0
                                      XML_INPUT_BUF_STATIC);
14135
0
    if (input == NULL)
14136
0
        return(NULL);
14137
14138
0
    return(xmlCtxtParseDocument(ctxt, input));
14139
0
}
14140
14141
/**
14142
 * xmlCtxtReadFd:
14143
 * @ctxt:  an XML parser context
14144
 * @fd:  an open file descriptor
14145
 * @URL:  base URL (optional)
14146
 * @encoding:  the document encoding (optional)
14147
 * @options:  a combination of xmlParserOption
14148
 *
14149
 * Parse an XML document from a file descriptor and build a tree.
14150
 *
14151
 * NOTE that the file descriptor will not be closed when the
14152
 * context is freed or reset.
14153
 *
14154
 * @URL is used as base to resolve external entities and for error
14155
 * reporting.
14156
 *
14157
 * See xmlCtxtUseOptions for details.
14158
 *
14159
 * Returns the resulting document tree
14160
 */
14161
xmlDocPtr
14162
xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
14163
              const char *URL, const char *encoding, int options)
14164
0
{
14165
0
    xmlParserInputPtr input;
14166
14167
0
    if (ctxt == NULL)
14168
0
        return(NULL);
14169
14170
0
    xmlCtxtReset(ctxt);
14171
0
    xmlCtxtUseOptions(ctxt, options);
14172
14173
0
    input = xmlCtxtNewInputFromFd(ctxt, URL, fd, encoding, 0);
14174
0
    if (input == NULL)
14175
0
        return(NULL);
14176
14177
0
    return(xmlCtxtParseDocument(ctxt, input));
14178
0
}
14179
14180
/**
14181
 * xmlCtxtReadIO:
14182
 * @ctxt:  an XML parser context
14183
 * @ioread:  an I/O read function
14184
 * @ioclose:  an I/O close function
14185
 * @ioctx:  an I/O handler
14186
 * @URL:  the base URL to use for the document
14187
 * @encoding:  the document encoding, or NULL
14188
 * @options:  a combination of xmlParserOption
14189
 *
14190
 * parse an XML document from I/O functions and source and build a tree.
14191
 * This reuses the existing @ctxt parser context
14192
 *
14193
 * @URL is used as base to resolve external entities and for error
14194
 * reporting.
14195
 *
14196
 * See xmlCtxtUseOptions for details.
14197
 *
14198
 * Returns the resulting document tree
14199
 */
14200
xmlDocPtr
14201
xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
14202
              xmlInputCloseCallback ioclose, void *ioctx,
14203
        const char *URL,
14204
              const char *encoding, int options)
14205
6.46k
{
14206
6.46k
    xmlParserInputPtr input;
14207
14208
6.46k
    if (ctxt == NULL)
14209
0
        return(NULL);
14210
14211
6.46k
    xmlCtxtReset(ctxt);
14212
6.46k
    xmlCtxtUseOptions(ctxt, options);
14213
14214
6.46k
    input = xmlCtxtNewInputFromIO(ctxt, URL, ioread, ioclose, ioctx,
14215
6.46k
                                  encoding, 0);
14216
6.46k
    if (input == NULL)
14217
0
        return(NULL);
14218
14219
6.46k
    return(xmlCtxtParseDocument(ctxt, input));
14220
6.46k
}
14221