Coverage Report

Created: 2026-03-31 11:00

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/workdir/UnpackedTarball/libxml2/parser.c
Line
Count
Source
1
/*
2
 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
 *            implemented on top of the SAX interfaces
4
 *
5
 * References:
6
 *   The XML specification:
7
 *     http://www.w3.org/TR/REC-xml
8
 *   Original 1.0 version:
9
 *     http://www.w3.org/TR/1998/REC-xml-19980210
10
 *   XML second edition working draft
11
 *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12
 *
13
 * Okay this is a big file, the parser core is around 7000 lines, then it
14
 * is followed by the progressive parser top routines, then the various
15
 * high level APIs to call the parser and a few miscellaneous functions.
16
 * A number of helper functions and deprecated ones have been moved to
17
 * parserInternals.c to reduce this file size.
18
 * As much as possible the functions are associated with their relative
19
 * production in the XML specification. A few productions defining the
20
 * different ranges of character are actually implanted either in
21
 * parserInternals.h or parserInternals.c
22
 * The DOM tree build is realized from the default SAX callbacks in
23
 * the module SAX2.c.
24
 * The routines doing the validation checks are in valid.c and called either
25
 * from the SAX callbacks or as standalone functions using a preparsed
26
 * document.
27
 *
28
 * See Copyright for the status of this software.
29
 *
30
 * daniel@veillard.com
31
 */
32
33
/* To avoid EBCDIC trouble when parsing on zOS */
34
#if defined(__MVS__)
35
#pragma convert("ISO8859-1")
36
#endif
37
38
#define IN_LIBXML
39
#include "libxml.h"
40
41
#if defined(_WIN32)
42
#define XML_DIR_SEP '\\'
43
#else
44
#define XML_DIR_SEP '/'
45
#endif
46
47
#include <stdlib.h>
48
#include <limits.h>
49
#include <string.h>
50
#include <stdarg.h>
51
#include <stddef.h>
52
#include <ctype.h>
53
#include <stdlib.h>
54
#include <libxml/parser.h>
55
#include <libxml/xmlmemory.h>
56
#include <libxml/tree.h>
57
#include <libxml/parserInternals.h>
58
#include <libxml/valid.h>
59
#include <libxml/entities.h>
60
#include <libxml/xmlerror.h>
61
#include <libxml/encoding.h>
62
#include <libxml/xmlIO.h>
63
#include <libxml/uri.h>
64
#include <libxml/SAX2.h>
65
#include <libxml/HTMLparser.h>
66
#ifdef LIBXML_CATALOG_ENABLED
67
#include <libxml/catalog.h>
68
#endif
69
70
#include "private/buf.h"
71
#include "private/dict.h"
72
#include "private/entities.h"
73
#include "private/error.h"
74
#include "private/html.h"
75
#include "private/io.h"
76
#include "private/memory.h"
77
#include "private/parser.h"
78
79
30.3M
#define NS_INDEX_EMPTY  INT_MAX
80
5.17M
#define NS_INDEX_XML    (INT_MAX - 1)
81
11.0M
#define URI_HASH_EMPTY  0xD943A04E
82
47.2k
#define URI_HASH_XML    0xF0451F02
83
84
#ifndef STDIN_FILENO
85
0
  #define STDIN_FILENO 0
86
#endif
87
88
#ifndef SIZE_MAX
89
  #define SIZE_MAX ((size_t) -1)
90
#endif
91
92
964k
#define XML_MAX_ATTRS 100000000 /* 100 million */
93
94
struct _xmlStartTag {
95
    const xmlChar *prefix;
96
    const xmlChar *URI;
97
    int line;
98
    int nsNr;
99
};
100
101
typedef struct {
102
    void *saxData;
103
    unsigned prefixHashValue;
104
    unsigned uriHashValue;
105
    unsigned elementId;
106
    int oldIndex;
107
} xmlParserNsExtra;
108
109
typedef struct {
110
    unsigned hashValue;
111
    int index;
112
} xmlParserNsBucket;
113
114
struct _xmlParserNsData {
115
    xmlParserNsExtra *extra;
116
117
    unsigned hashSize;
118
    unsigned hashElems;
119
    xmlParserNsBucket *hash;
120
121
    unsigned elementId;
122
    int defaultNsIndex;
123
    int minNsIndex;
124
};
125
126
static int
127
xmlParseElementStart(xmlParserCtxtPtr ctxt);
128
129
static void
130
xmlParseElementEnd(xmlParserCtxtPtr ctxt);
131
132
static xmlEntityPtr
133
xmlLookupGeneralEntity(xmlParserCtxtPtr ctxt, const xmlChar *name, int inAttr);
134
135
static const xmlChar *
136
xmlParseEntityRefInternal(xmlParserCtxtPtr ctxt);
137
138
/************************************************************************
139
 *                  *
140
 *  Arbitrary limits set in the parser. See XML_PARSE_HUGE    *
141
 *                  *
142
 ************************************************************************/
143
144
#define XML_PARSER_BIG_ENTITY 1000
145
#define XML_PARSER_LOT_ENTITY 5000
146
147
/*
148
 * Constants for protection against abusive entity expansion
149
 * ("billion laughs").
150
 */
151
152
/*
153
 * A certain amount of entity expansion which is always allowed.
154
 */
155
741k
#define XML_PARSER_ALLOWED_EXPANSION 1000000
156
157
/*
158
 * Fixed cost for each entity reference. This crudely models processing time
159
 * as well to protect, for example, against exponential expansion of empty
160
 * or very short entities.
161
 */
162
741k
#define XML_ENT_FIXED_COST 20
163
164
75.1M
#define XML_PARSER_BIG_BUFFER_SIZE 300
165
225k
#define XML_PARSER_BUFFER_SIZE 100
166
43.9k
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
167
168
/**
169
 * XML_PARSER_CHUNK_SIZE
170
 *
171
 * When calling GROW that's the minimal amount of data
172
 * the parser expected to have received. It is not a hard
173
 * limit but an optimization when reading strings like Names
174
 * It is not strictly needed as long as inputs available characters
175
 * are followed by 0, which should be provided by the I/O level
176
 */
177
#define XML_PARSER_CHUNK_SIZE 100
178
179
/**
180
 * xmlParserVersion:
181
 *
182
 * Constant string describing the internal version of the library
183
 */
184
const char *const
185
xmlParserVersion = LIBXML_VERSION_STRING LIBXML_VERSION_EXTRA;
186
187
/*
188
 * List of XML prefixed PI allowed by W3C specs
189
 */
190
191
static const char* const xmlW3CPIs[] = {
192
    "xml-stylesheet",
193
    "xml-model",
194
    NULL
195
};
196
197
198
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
199
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
200
                                              const xmlChar **str);
201
202
static void
203
xmlCtxtParseEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr ent);
204
205
static int
206
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
207
208
/************************************************************************
209
 *                  *
210
 *    Some factorized error routines        *
211
 *                  *
212
 ************************************************************************/
213
214
static void
215
0
xmlErrMemory(xmlParserCtxtPtr ctxt) {
216
0
    xmlCtxtErrMemory(ctxt);
217
0
}
218
219
/**
220
 * xmlErrAttributeDup:
221
 * @ctxt:  an XML parser context
222
 * @prefix:  the attribute prefix
223
 * @localname:  the attribute localname
224
 *
225
 * Handle a redefinition of attribute error
226
 */
227
static void
228
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
229
                   const xmlChar * localname)
230
233k
{
231
233k
    if (prefix == NULL)
232
181k
        xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, XML_ERR_ATTRIBUTE_REDEFINED,
233
181k
                   XML_ERR_FATAL, localname, NULL, NULL, 0,
234
181k
                   "Attribute %s redefined\n", localname);
235
52.5k
    else
236
52.5k
        xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, XML_ERR_ATTRIBUTE_REDEFINED,
237
52.5k
                   XML_ERR_FATAL, prefix, localname, NULL, 0,
238
52.5k
                   "Attribute %s:%s redefined\n", prefix, localname);
239
233k
}
240
241
/**
242
 * xmlFatalErrMsg:
243
 * @ctxt:  an XML parser context
244
 * @error:  the error number
245
 * @msg:  the error message
246
 *
247
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
248
 */
249
static void LIBXML_ATTR_FORMAT(3,0)
250
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
251
               const char *msg)
252
1.80M
{
253
1.80M
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
254
1.80M
               NULL, NULL, NULL, 0, "%s", msg);
255
1.80M
}
256
257
/**
258
 * xmlWarningMsg:
259
 * @ctxt:  an XML parser context
260
 * @error:  the error number
261
 * @msg:  the error message
262
 * @str1:  extra data
263
 * @str2:  extra data
264
 *
265
 * Handle a warning.
266
 */
267
void LIBXML_ATTR_FORMAT(3,0)
268
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
269
              const char *msg, const xmlChar *str1, const xmlChar *str2)
270
27.4k
{
271
27.4k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_WARNING,
272
27.4k
               str1, str2, NULL, 0, msg, str1, str2);
273
27.4k
}
274
275
/**
276
 * xmlValidityError:
277
 * @ctxt:  an XML parser context
278
 * @error:  the error number
279
 * @msg:  the error message
280
 * @str1:  extra data
281
 *
282
 * Handle a validity error.
283
 */
284
static void LIBXML_ATTR_FORMAT(3,0)
285
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
286
              const char *msg, const xmlChar *str1, const xmlChar *str2)
287
0
{
288
0
    ctxt->valid = 0;
289
290
0
    xmlCtxtErr(ctxt, NULL, XML_FROM_DTD, error, XML_ERR_ERROR,
291
0
               str1, str2, NULL, 0, msg, str1, str2);
292
0
}
293
294
/**
295
 * xmlFatalErrMsgInt:
296
 * @ctxt:  an XML parser context
297
 * @error:  the error number
298
 * @msg:  the error message
299
 * @val:  an integer value
300
 *
301
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
302
 */
303
static void LIBXML_ATTR_FORMAT(3,0)
304
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
305
                  const char *msg, int val)
306
14.4k
{
307
14.4k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
308
14.4k
               NULL, NULL, NULL, val, msg, val);
309
14.4k
}
310
311
/**
312
 * xmlFatalErrMsgStrIntStr:
313
 * @ctxt:  an XML parser context
314
 * @error:  the error number
315
 * @msg:  the error message
316
 * @str1:  an string info
317
 * @val:  an integer value
318
 * @str2:  an string info
319
 *
320
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
321
 */
322
static void LIBXML_ATTR_FORMAT(3,0)
323
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
324
                  const char *msg, const xmlChar *str1, int val,
325
      const xmlChar *str2)
326
74.6k
{
327
74.6k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
328
74.6k
               str1, str2, NULL, val, msg, str1, val, str2);
329
74.6k
}
330
331
/**
332
 * xmlFatalErrMsgStr:
333
 * @ctxt:  an XML parser context
334
 * @error:  the error number
335
 * @msg:  the error message
336
 * @val:  a string value
337
 *
338
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
339
 */
340
static void LIBXML_ATTR_FORMAT(3,0)
341
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
342
                  const char *msg, const xmlChar * val)
343
127k
{
344
127k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
345
127k
               val, NULL, NULL, 0, msg, val);
346
127k
}
347
348
/**
349
 * xmlErrMsgStr:
350
 * @ctxt:  an XML parser context
351
 * @error:  the error number
352
 * @msg:  the error message
353
 * @val:  a string value
354
 *
355
 * Handle a non fatal parser error
356
 */
357
static void LIBXML_ATTR_FORMAT(3,0)
358
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
359
                  const char *msg, const xmlChar * val)
360
9.52k
{
361
9.52k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_ERROR,
362
9.52k
               val, NULL, NULL, 0, msg, val);
363
9.52k
}
364
365
/**
366
 * xmlNsErr:
367
 * @ctxt:  an XML parser context
368
 * @error:  the error number
369
 * @msg:  the message
370
 * @info1:  extra information string
371
 * @info2:  extra information string
372
 *
373
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
374
 */
375
static void LIBXML_ATTR_FORMAT(3,0)
376
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
377
         const char *msg,
378
         const xmlChar * info1, const xmlChar * info2,
379
         const xmlChar * info3)
380
1.80M
{
381
1.80M
    ctxt->nsWellFormed = 0;
382
383
1.80M
    xmlCtxtErr(ctxt, NULL, XML_FROM_NAMESPACE, error, XML_ERR_ERROR,
384
1.80M
               info1, info2, info3, 0, msg, info1, info2, info3);
385
1.80M
}
386
387
/**
388
 * xmlNsWarn
389
 * @ctxt:  an XML parser context
390
 * @error:  the error number
391
 * @msg:  the message
392
 * @info1:  extra information string
393
 * @info2:  extra information string
394
 *
395
 * Handle a namespace warning error
396
 */
397
static void LIBXML_ATTR_FORMAT(3,0)
398
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
399
         const char *msg,
400
         const xmlChar * info1, const xmlChar * info2,
401
         const xmlChar * info3)
402
67.7k
{
403
67.7k
    xmlCtxtErr(ctxt, NULL, XML_FROM_NAMESPACE, error, XML_ERR_WARNING,
404
67.7k
               info1, info2, info3, 0, msg, info1, info2, info3);
405
67.7k
}
406
407
static void
408
2.22M
xmlSaturatedAdd(unsigned long *dst, unsigned long val) {
409
2.22M
    if (val > ULONG_MAX - *dst)
410
0
        *dst = ULONG_MAX;
411
2.22M
    else
412
2.22M
        *dst += val;
413
2.22M
}
414
415
static void
416
741k
xmlSaturatedAddSizeT(unsigned long *dst, size_t val) {
417
741k
    if (val > ULONG_MAX - *dst)
418
0
        *dst = ULONG_MAX;
419
741k
    else
420
741k
        *dst += val;
421
741k
}
422
423
/**
424
 * xmlParserEntityCheck:
425
 * @ctxt:  parser context
426
 * @extra:  sum of unexpanded entity sizes
427
 *
428
 * Check for non-linear entity expansion behaviour.
429
 *
430
 * In some cases like xmlExpandEntityInAttValue, this function is called
431
 * for each, possibly nested entity and its unexpanded content length.
432
 *
433
 * In other cases like xmlParseReference, it's only called for each
434
 * top-level entity with its unexpanded content length plus the sum of
435
 * the unexpanded content lengths (plus fixed cost) of all nested
436
 * entities.
437
 *
438
 * Summing the unexpanded lengths also adds the length of the reference.
439
 * This is by design. Taking the length of the entity name into account
440
 * discourages attacks that try to waste CPU time with abusively long
441
 * entity names. See test/recurse/lol6.xml for example. Each call also
442
 * adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with
443
 * short entities.
444
 *
445
 * Returns 1 on error, 0 on success.
446
 */
447
static int
448
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long extra)
449
741k
{
450
741k
    unsigned long consumed;
451
741k
    unsigned long *expandedSize;
452
741k
    xmlParserInputPtr input = ctxt->input;
453
741k
    xmlEntityPtr entity = input->entity;
454
455
741k
    if ((entity) && (entity->flags & XML_ENT_CHECKED))
456
0
        return(0);
457
458
    /*
459
     * Compute total consumed bytes so far, including input streams of
460
     * external entities.
461
     */
462
741k
    consumed = input->consumed;
463
741k
    xmlSaturatedAddSizeT(&consumed, input->cur - input->base);
464
741k
    xmlSaturatedAdd(&consumed, ctxt->sizeentities);
465
466
741k
    if (entity)
467
0
        expandedSize = &entity->expandedSize;
468
741k
    else
469
741k
        expandedSize = &ctxt->sizeentcopy;
470
471
    /*
472
     * Add extra cost and some fixed cost.
473
     */
474
741k
    xmlSaturatedAdd(expandedSize, extra);
475
741k
    xmlSaturatedAdd(expandedSize, XML_ENT_FIXED_COST);
476
477
    /*
478
     * It's important to always use saturation arithmetic when tracking
479
     * entity sizes to make the size checks reliable. If "sizeentcopy"
480
     * overflows, we have to abort.
481
     */
482
741k
    if ((*expandedSize > XML_PARSER_ALLOWED_EXPANSION) &&
483
148
        ((*expandedSize >= ULONG_MAX) ||
484
148
         (*expandedSize / ctxt->maxAmpl > consumed))) {
485
148
        xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
486
148
                       "Maximum entity amplification factor exceeded, see "
487
148
                       "xmlCtxtSetMaxAmplification.\n");
488
148
        xmlHaltParser(ctxt);
489
148
        return(1);
490
148
    }
491
492
741k
    return(0);
493
741k
}
494
495
/************************************************************************
496
 *                  *
497
 *    Library wide options          *
498
 *                  *
499
 ************************************************************************/
500
501
/**
502
  * xmlHasFeature:
503
  * @feature: the feature to be examined
504
  *
505
  * Examines if the library has been compiled with a given feature.
506
  *
507
  * Returns a non-zero value if the feature exist, otherwise zero.
508
  * Returns zero (0) if the feature does not exist or an unknown
509
  * unknown feature is requested, non-zero otherwise.
510
  */
511
int
512
xmlHasFeature(xmlFeature feature)
513
0
{
514
0
    switch (feature) {
515
0
  case XML_WITH_THREAD:
516
0
#ifdef LIBXML_THREAD_ENABLED
517
0
      return(1);
518
#else
519
      return(0);
520
#endif
521
0
        case XML_WITH_TREE:
522
0
            return(1);
523
0
        case XML_WITH_OUTPUT:
524
0
#ifdef LIBXML_OUTPUT_ENABLED
525
0
            return(1);
526
#else
527
            return(0);
528
#endif
529
0
        case XML_WITH_PUSH:
530
0
#ifdef LIBXML_PUSH_ENABLED
531
0
            return(1);
532
#else
533
            return(0);
534
#endif
535
0
        case XML_WITH_READER:
536
0
#ifdef LIBXML_READER_ENABLED
537
0
            return(1);
538
#else
539
            return(0);
540
#endif
541
0
        case XML_WITH_PATTERN:
542
0
#ifdef LIBXML_PATTERN_ENABLED
543
0
            return(1);
544
#else
545
            return(0);
546
#endif
547
0
        case XML_WITH_WRITER:
548
0
#ifdef LIBXML_WRITER_ENABLED
549
0
            return(1);
550
#else
551
            return(0);
552
#endif
553
0
        case XML_WITH_SAX1:
554
0
#ifdef LIBXML_SAX1_ENABLED
555
0
            return(1);
556
#else
557
            return(0);
558
#endif
559
0
        case XML_WITH_HTTP:
560
#ifdef LIBXML_HTTP_ENABLED
561
            return(1);
562
#else
563
0
            return(0);
564
0
#endif
565
0
        case XML_WITH_VALID:
566
0
#ifdef LIBXML_VALID_ENABLED
567
0
            return(1);
568
#else
569
            return(0);
570
#endif
571
0
        case XML_WITH_HTML:
572
0
#ifdef LIBXML_HTML_ENABLED
573
0
            return(1);
574
#else
575
            return(0);
576
#endif
577
0
        case XML_WITH_LEGACY:
578
0
            return(0);
579
0
        case XML_WITH_C14N:
580
0
#ifdef LIBXML_C14N_ENABLED
581
0
            return(1);
582
#else
583
            return(0);
584
#endif
585
0
        case XML_WITH_CATALOG:
586
0
#ifdef LIBXML_CATALOG_ENABLED
587
0
            return(1);
588
#else
589
            return(0);
590
#endif
591
0
        case XML_WITH_XPATH:
592
0
#ifdef LIBXML_XPATH_ENABLED
593
0
            return(1);
594
#else
595
            return(0);
596
#endif
597
0
        case XML_WITH_XPTR:
598
0
#ifdef LIBXML_XPTR_ENABLED
599
0
            return(1);
600
#else
601
            return(0);
602
#endif
603
0
        case XML_WITH_XINCLUDE:
604
0
#ifdef LIBXML_XINCLUDE_ENABLED
605
0
            return(1);
606
#else
607
            return(0);
608
#endif
609
0
        case XML_WITH_ICONV:
610
#ifdef LIBXML_ICONV_ENABLED
611
            return(1);
612
#else
613
0
            return(0);
614
0
#endif
615
0
        case XML_WITH_ISO8859X:
616
0
#ifdef LIBXML_ISO8859X_ENABLED
617
0
            return(1);
618
#else
619
            return(0);
620
#endif
621
0
        case XML_WITH_UNICODE:
622
0
            return(0);
623
0
        case XML_WITH_REGEXP:
624
0
#ifdef LIBXML_REGEXP_ENABLED
625
0
            return(1);
626
#else
627
            return(0);
628
#endif
629
0
        case XML_WITH_AUTOMATA:
630
0
#ifdef LIBXML_REGEXP_ENABLED
631
0
            return(1);
632
#else
633
            return(0);
634
#endif
635
0
        case XML_WITH_EXPR:
636
#ifdef LIBXML_EXPR_ENABLED
637
            return(1);
638
#else
639
0
            return(0);
640
0
#endif
641
0
        case XML_WITH_RELAXNG:
642
0
#ifdef LIBXML_RELAXNG_ENABLED
643
0
            return(1);
644
#else
645
            return(0);
646
#endif
647
0
        case XML_WITH_SCHEMAS:
648
0
#ifdef LIBXML_SCHEMAS_ENABLED
649
0
            return(1);
650
#else
651
            return(0);
652
#endif
653
0
        case XML_WITH_SCHEMATRON:
654
0
#ifdef LIBXML_SCHEMATRON_ENABLED
655
0
            return(1);
656
#else
657
            return(0);
658
#endif
659
0
        case XML_WITH_MODULES:
660
0
#ifdef LIBXML_MODULES_ENABLED
661
0
            return(1);
662
#else
663
            return(0);
664
#endif
665
0
        case XML_WITH_DEBUG:
666
0
#ifdef LIBXML_DEBUG_ENABLED
667
0
            return(1);
668
#else
669
            return(0);
670
#endif
671
0
        case XML_WITH_DEBUG_MEM:
672
0
            return(0);
673
0
        case XML_WITH_ZLIB:
674
#ifdef LIBXML_ZLIB_ENABLED
675
            return(1);
676
#else
677
0
            return(0);
678
0
#endif
679
0
        case XML_WITH_LZMA:
680
#ifdef LIBXML_LZMA_ENABLED
681
            return(1);
682
#else
683
0
            return(0);
684
0
#endif
685
0
        case XML_WITH_ICU:
686
#ifdef LIBXML_ICU_ENABLED
687
            return(1);
688
#else
689
0
            return(0);
690
0
#endif
691
0
        default:
692
0
      break;
693
0
     }
694
0
     return(0);
695
0
}
696
697
/************************************************************************
698
 *                  *
699
 *      Simple string buffer        *
700
 *                  *
701
 ************************************************************************/
702
703
typedef struct {
704
    xmlChar *mem;
705
    unsigned size;
706
    unsigned cap; /* size < cap */
707
    unsigned max; /* size <= max */
708
    xmlParserErrors code;
709
} xmlSBuf;
710
711
static void
712
21.7M
xmlSBufInit(xmlSBuf *buf, unsigned max) {
713
21.7M
    buf->mem = NULL;
714
21.7M
    buf->size = 0;
715
21.7M
    buf->cap = 0;
716
21.7M
    buf->max = max;
717
21.7M
    buf->code = XML_ERR_OK;
718
21.7M
}
719
720
static int
721
283k
xmlSBufGrow(xmlSBuf *buf, unsigned len) {
722
283k
    xmlChar *mem;
723
283k
    unsigned cap;
724
725
283k
    if (len >= UINT_MAX / 2 - buf->size) {
726
0
        if (buf->code == XML_ERR_OK)
727
0
            buf->code = XML_ERR_RESOURCE_LIMIT;
728
0
        return(-1);
729
0
    }
730
731
283k
    cap = (buf->size + len) * 2;
732
283k
    if (cap < 240)
733
237k
        cap = 240;
734
735
283k
    mem = xmlRealloc(buf->mem, cap);
736
283k
    if (mem == NULL) {
737
0
        buf->code = XML_ERR_NO_MEMORY;
738
0
        return(-1);
739
0
    }
740
741
283k
    buf->mem = mem;
742
283k
    buf->cap = cap;
743
744
283k
    return(0);
745
283k
}
746
747
static void
748
5.26M
xmlSBufAddString(xmlSBuf *buf, const xmlChar *str, unsigned len) {
749
5.26M
    if (buf->max - buf->size < len) {
750
0
        if (buf->code == XML_ERR_OK)
751
0
            buf->code = XML_ERR_RESOURCE_LIMIT;
752
0
        return;
753
0
    }
754
755
5.26M
    if (buf->cap - buf->size <= len) {
756
280k
        if (xmlSBufGrow(buf, len) < 0)
757
0
            return;
758
280k
    }
759
760
5.26M
    if (len > 0)
761
5.26M
        memcpy(buf->mem + buf->size, str, len);
762
5.26M
    buf->size += len;
763
5.26M
}
764
765
static void
766
4.12M
xmlSBufAddCString(xmlSBuf *buf, const char *str, unsigned len) {
767
4.12M
    xmlSBufAddString(buf, (const xmlChar *) str, len);
768
4.12M
}
769
770
static void
771
73.3k
xmlSBufAddChar(xmlSBuf *buf, int c) {
772
73.3k
    xmlChar *end;
773
774
73.3k
    if (buf->max - buf->size < 4) {
775
0
        if (buf->code == XML_ERR_OK)
776
0
            buf->code = XML_ERR_RESOURCE_LIMIT;
777
0
        return;
778
0
    }
779
780
73.3k
    if (buf->cap - buf->size <= 4) {
781
3.28k
        if (xmlSBufGrow(buf, 4) < 0)
782
0
            return;
783
3.28k
    }
784
785
73.3k
    end = buf->mem + buf->size;
786
787
73.3k
    if (c < 0x80) {
788
18.6k
        *end = (xmlChar) c;
789
18.6k
        buf->size += 1;
790
54.6k
    } else {
791
54.6k
        buf->size += xmlCopyCharMultiByte(end, c);
792
54.6k
    }
793
73.3k
}
794
795
static void
796
3.43M
xmlSBufAddReplChar(xmlSBuf *buf) {
797
3.43M
    xmlSBufAddCString(buf, "\xEF\xBF\xBD", 3);
798
3.43M
}
799
800
static void
801
0
xmlSBufReportError(xmlSBuf *buf, xmlParserCtxtPtr ctxt, const char *errMsg) {
802
0
    if (buf->code == XML_ERR_NO_MEMORY)
803
0
        xmlCtxtErrMemory(ctxt);
804
0
    else
805
0
        xmlFatalErr(ctxt, buf->code, errMsg);
806
0
}
807
808
static xmlChar *
809
xmlSBufFinish(xmlSBuf *buf, int *sizeOut, xmlParserCtxtPtr ctxt,
810
273k
              const char *errMsg) {
811
273k
    if (buf->mem == NULL) {
812
27.7k
        buf->mem = xmlMalloc(1);
813
27.7k
        if (buf->mem == NULL) {
814
0
            buf->code = XML_ERR_NO_MEMORY;
815
27.7k
        } else {
816
27.7k
            buf->mem[0] = 0;
817
27.7k
        }
818
246k
    } else {
819
246k
        buf->mem[buf->size] = 0;
820
246k
    }
821
822
273k
    if (buf->code == XML_ERR_OK) {
823
273k
        if (sizeOut != NULL)
824
194k
            *sizeOut = buf->size;
825
273k
        return(buf->mem);
826
273k
    }
827
828
0
    xmlSBufReportError(buf, ctxt, errMsg);
829
830
0
    xmlFree(buf->mem);
831
832
0
    if (sizeOut != NULL)
833
0
        *sizeOut = 0;
834
0
    return(NULL);
835
273k
}
836
837
static void
838
21.5M
xmlSBufCleanup(xmlSBuf *buf, xmlParserCtxtPtr ctxt, const char *errMsg) {
839
21.5M
    if (buf->code != XML_ERR_OK)
840
0
        xmlSBufReportError(buf, ctxt, errMsg);
841
842
21.5M
    xmlFree(buf->mem);
843
21.5M
}
844
845
static int
846
xmlUTF8MultibyteLen(xmlParserCtxtPtr ctxt, const xmlChar *str,
847
6.13M
                    const char *errMsg) {
848
6.13M
    int c = str[0];
849
6.13M
    int c1 = str[1];
850
851
6.13M
    if ((c1 & 0xC0) != 0x80)
852
1.55M
        goto encoding_error;
853
854
4.58M
    if (c < 0xE0) {
855
        /* 2-byte sequence */
856
1.29M
        if (c < 0xC2)
857
419k
            goto encoding_error;
858
859
871k
        return(2);
860
3.29M
    } else {
861
3.29M
        int c2 = str[2];
862
863
3.29M
        if ((c2 & 0xC0) != 0x80)
864
20.2k
            goto encoding_error;
865
866
3.27M
        if (c < 0xF0) {
867
            /* 3-byte sequence */
868
3.16M
            if (c == 0xE0) {
869
                /* overlong */
870
202k
                if (c1 < 0xA0)
871
1.43k
                    goto encoding_error;
872
2.96M
            } else if (c == 0xED) {
873
                /* surrogate */
874
12.5k
                if (c1 >= 0xA0)
875
6.96k
                    goto encoding_error;
876
2.95M
            } else if (c == 0xEF) {
877
                /* U+FFFE and U+FFFF are invalid Chars */
878
1.46M
                if ((c1 == 0xBF) && (c2 >= 0xBE))
879
31.8k
                    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR, errMsg);
880
1.46M
            }
881
882
3.16M
            return(3);
883
3.16M
        } else {
884
            /* 4-byte sequence */
885
105k
            if ((str[3] & 0xC0) != 0x80)
886
4.98k
                goto encoding_error;
887
100k
            if (c == 0xF0) {
888
                /* overlong */
889
3.08k
                if (c1 < 0x90)
890
1.08k
                    goto encoding_error;
891
97.3k
            } else if (c >= 0xF4) {
892
                /* greater than 0x10FFFF */
893
6.13k
                if ((c > 0xF4) || (c1 >= 0x90))
894
2.83k
                    goto encoding_error;
895
6.13k
            }
896
897
96.4k
            return(4);
898
100k
        }
899
3.27M
    }
900
901
2.00M
encoding_error:
902
    /* Only report the first error */
903
2.00M
    if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
904
5.04k
        xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
905
5.04k
        ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
906
5.04k
    }
907
908
2.00M
    return(0);
909
4.58M
}
910
911
/************************************************************************
912
 *                  *
913
 *    SAX2 defaulted attributes handling      *
914
 *                  *
915
 ************************************************************************/
916
917
/**
918
 * xmlCtxtInitializeLate:
919
 * @ctxt:  an XML parser context
920
 *
921
 * Final initialization of the parser context before starting to parse.
922
 *
923
 * This accounts for users modifying struct members of parser context
924
 * directly.
925
 */
926
static void
927
217k
xmlCtxtInitializeLate(xmlParserCtxtPtr ctxt) {
928
217k
    xmlSAXHandlerPtr sax;
929
930
    /* Avoid unused variable warning if features are disabled. */
931
217k
    (void) sax;
932
933
    /*
934
     * Changing the SAX struct directly is still widespread practice
935
     * in internal and external code.
936
     */
937
217k
    if (ctxt == NULL) return;
938
217k
    sax = ctxt->sax;
939
217k
#ifdef LIBXML_SAX1_ENABLED
940
    /*
941
     * Only enable SAX2 if there SAX2 element handlers, except when there
942
     * are no element handlers at all.
943
     */
944
217k
    if (((ctxt->options & XML_PARSE_SAX1) == 0) &&
945
217k
        (sax) &&
946
217k
        (sax->initialized == XML_SAX2_MAGIC) &&
947
217k
        ((sax->startElementNs != NULL) ||
948
0
         (sax->endElementNs != NULL) ||
949
0
         ((sax->startElement == NULL) && (sax->endElement == NULL))))
950
217k
        ctxt->sax2 = 1;
951
#else
952
    ctxt->sax2 = 1;
953
#endif /* LIBXML_SAX1_ENABLED */
954
955
    /*
956
     * Some users replace the dictionary directly in the context struct.
957
     * We really need an API function to do that cleanly.
958
     */
959
217k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
960
217k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
961
217k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
962
217k
    if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
963
217k
    (ctxt->str_xml_ns == NULL)) {
964
0
        xmlErrMemory(ctxt);
965
0
    }
966
967
217k
    xmlDictSetLimit(ctxt->dict,
968
217k
                    (ctxt->options & XML_PARSE_HUGE) ?
969
212k
                        0 :
970
217k
                        XML_MAX_DICTIONARY_LIMIT);
971
217k
}
972
973
typedef struct {
974
    xmlHashedString prefix;
975
    xmlHashedString name;
976
    xmlHashedString value;
977
    const xmlChar *valueEnd;
978
    int external;
979
    int expandedSize;
980
} xmlDefAttr;
981
982
typedef struct _xmlDefAttrs xmlDefAttrs;
983
typedef xmlDefAttrs *xmlDefAttrsPtr;
984
struct _xmlDefAttrs {
985
    int nbAttrs;  /* number of defaulted attributes on that element */
986
    int maxAttrs;       /* the size of the array */
987
#if __STDC_VERSION__ >= 199901L
988
    /* Using a C99 flexible array member avoids UBSan errors. */
989
    xmlDefAttr attrs[] ATTRIBUTE_COUNTED_BY(maxAttrs);
990
#else
991
    xmlDefAttr attrs[1];
992
#endif
993
};
994
995
/**
996
 * xmlAttrNormalizeSpace:
997
 * @src: the source string
998
 * @dst: the target string
999
 *
1000
 * Normalize the space in non CDATA attribute values:
1001
 * If the attribute type is not CDATA, then the XML processor MUST further
1002
 * process the normalized attribute value by discarding any leading and
1003
 * trailing space (#x20) characters, and by replacing sequences of space
1004
 * (#x20) characters by a single space (#x20) character.
1005
 * Note that the size of dst need to be at least src, and if one doesn't need
1006
 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1007
 * passing src as dst is just fine.
1008
 *
1009
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1010
 *         is needed.
1011
 */
1012
static xmlChar *
1013
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1014
32.2k
{
1015
32.2k
    if ((src == NULL) || (dst == NULL))
1016
0
        return(NULL);
1017
1018
46.9k
    while (*src == 0x20) src++;
1019
778k
    while (*src != 0) {
1020
746k
  if (*src == 0x20) {
1021
122k
      while (*src == 0x20) src++;
1022
32.3k
      if (*src != 0)
1023
27.4k
    *dst++ = 0x20;
1024
713k
  } else {
1025
713k
      *dst++ = *src++;
1026
713k
  }
1027
746k
    }
1028
32.2k
    *dst = 0;
1029
32.2k
    if (dst == src)
1030
20.4k
       return(NULL);
1031
11.8k
    return(dst);
1032
32.2k
}
1033
1034
/**
1035
 * xmlAddDefAttrs:
1036
 * @ctxt:  an XML parser context
1037
 * @fullname:  the element fullname
1038
 * @fullattr:  the attribute fullname
1039
 * @value:  the attribute value
1040
 *
1041
 * Add a defaulted attribute for an element
1042
 */
1043
static void
1044
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1045
               const xmlChar *fullname,
1046
               const xmlChar *fullattr,
1047
35.2k
               const xmlChar *value) {
1048
35.2k
    xmlDefAttrsPtr defaults;
1049
35.2k
    xmlDefAttr *attr;
1050
35.2k
    int len, expandedSize;
1051
35.2k
    xmlHashedString name;
1052
35.2k
    xmlHashedString prefix;
1053
35.2k
    xmlHashedString hvalue;
1054
35.2k
    const xmlChar *localname;
1055
1056
    /*
1057
     * Allows to detect attribute redefinitions
1058
     */
1059
35.2k
    if (ctxt->attsSpecial != NULL) {
1060
32.5k
        if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1061
9.28k
      return;
1062
32.5k
    }
1063
1064
25.9k
    if (ctxt->attsDefault == NULL) {
1065
2.79k
        ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1066
2.79k
  if (ctxt->attsDefault == NULL)
1067
0
      goto mem_error;
1068
2.79k
    }
1069
1070
    /*
1071
     * split the element name into prefix:localname , the string found
1072
     * are within the DTD and then not associated to namespace names.
1073
     */
1074
25.9k
    localname = xmlSplitQName3(fullname, &len);
1075
25.9k
    if (localname == NULL) {
1076
23.3k
        name = xmlDictLookupHashed(ctxt->dict, fullname, -1);
1077
23.3k
  prefix.name = NULL;
1078
23.3k
    } else {
1079
2.65k
        name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1080
2.65k
  prefix = xmlDictLookupHashed(ctxt->dict, fullname, len);
1081
2.65k
        if (prefix.name == NULL)
1082
0
            goto mem_error;
1083
2.65k
    }
1084
25.9k
    if (name.name == NULL)
1085
0
        goto mem_error;
1086
1087
    /*
1088
     * make sure there is some storage
1089
     */
1090
25.9k
    defaults = xmlHashLookup2(ctxt->attsDefault, name.name, prefix.name);
1091
25.9k
    if ((defaults == NULL) ||
1092
19.7k
        (defaults->nbAttrs >= defaults->maxAttrs)) {
1093
9.19k
        xmlDefAttrsPtr temp;
1094
9.19k
        int newSize;
1095
1096
9.19k
        if (defaults == NULL) {
1097
6.21k
            newSize = 4;
1098
6.21k
        } else {
1099
2.97k
            if ((defaults->maxAttrs >= XML_MAX_ATTRS) ||
1100
2.97k
                ((size_t) defaults->maxAttrs >
1101
2.97k
                     SIZE_MAX / 2 / sizeof(temp[0]) - sizeof(*defaults)))
1102
0
                goto mem_error;
1103
1104
2.97k
            if (defaults->maxAttrs > XML_MAX_ATTRS / 2)
1105
0
                newSize = XML_MAX_ATTRS;
1106
2.97k
            else
1107
2.97k
                newSize = defaults->maxAttrs * 2;
1108
2.97k
        }
1109
9.19k
        temp = xmlRealloc(defaults,
1110
9.19k
                          sizeof(*defaults) + newSize * sizeof(xmlDefAttr));
1111
9.19k
  if (temp == NULL)
1112
0
      goto mem_error;
1113
9.19k
        if (defaults == NULL)
1114
6.21k
            temp->nbAttrs = 0;
1115
9.19k
  temp->maxAttrs = newSize;
1116
9.19k
        defaults = temp;
1117
9.19k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name.name, prefix.name,
1118
9.19k
                          defaults, NULL) < 0) {
1119
0
      xmlFree(defaults);
1120
0
      goto mem_error;
1121
0
  }
1122
9.19k
    }
1123
1124
    /*
1125
     * Split the attribute name into prefix:localname , the string found
1126
     * are within the DTD and hen not associated to namespace names.
1127
     */
1128
25.9k
    localname = xmlSplitQName3(fullattr, &len);
1129
25.9k
    if (localname == NULL) {
1130
12.4k
        name = xmlDictLookupHashed(ctxt->dict, fullattr, -1);
1131
12.4k
  prefix.name = NULL;
1132
13.5k
    } else {
1133
13.5k
        name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1134
13.5k
  prefix = xmlDictLookupHashed(ctxt->dict, fullattr, len);
1135
13.5k
        if (prefix.name == NULL)
1136
0
            goto mem_error;
1137
13.5k
    }
1138
25.9k
    if (name.name == NULL)
1139
0
        goto mem_error;
1140
1141
    /* intern the string and precompute the end */
1142
25.9k
    len = strlen((const char *) value);
1143
25.9k
    hvalue = xmlDictLookupHashed(ctxt->dict, value, len);
1144
25.9k
    if (hvalue.name == NULL)
1145
0
        goto mem_error;
1146
1147
25.9k
    expandedSize = strlen((const char *) name.name);
1148
25.9k
    if (prefix.name != NULL)
1149
13.5k
        expandedSize += strlen((const char *) prefix.name);
1150
25.9k
    expandedSize += len;
1151
1152
25.9k
    attr = &defaults->attrs[defaults->nbAttrs++];
1153
25.9k
    attr->name = name;
1154
25.9k
    attr->prefix = prefix;
1155
25.9k
    attr->value = hvalue;
1156
25.9k
    attr->valueEnd = hvalue.name + len;
1157
25.9k
    attr->external = PARSER_EXTERNAL(ctxt);
1158
25.9k
    attr->expandedSize = expandedSize;
1159
1160
25.9k
    return;
1161
1162
0
mem_error:
1163
0
    xmlErrMemory(ctxt);
1164
0
}
1165
1166
/**
1167
 * xmlAddSpecialAttr:
1168
 * @ctxt:  an XML parser context
1169
 * @fullname:  the element fullname
1170
 * @fullattr:  the attribute fullname
1171
 * @type:  the attribute type
1172
 *
1173
 * Register this attribute type
1174
 */
1175
static void
1176
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1177
      const xmlChar *fullname,
1178
      const xmlChar *fullattr,
1179
      int type)
1180
41.1k
{
1181
41.1k
    if (ctxt->attsSpecial == NULL) {
1182
3.12k
        ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1183
3.12k
  if (ctxt->attsSpecial == NULL)
1184
0
      goto mem_error;
1185
3.12k
    }
1186
1187
41.1k
    if (xmlHashAdd2(ctxt->attsSpecial, fullname, fullattr,
1188
41.1k
                    XML_INT_TO_PTR(type)) < 0)
1189
0
        goto mem_error;
1190
41.1k
    return;
1191
1192
41.1k
mem_error:
1193
0
    xmlErrMemory(ctxt);
1194
0
}
1195
1196
/**
1197
 * xmlCleanSpecialAttrCallback:
1198
 *
1199
 * Removes CDATA attributes from the special attribute table
1200
 */
1201
static void
1202
xmlCleanSpecialAttrCallback(void *payload, void *data,
1203
                            const xmlChar *fullname, const xmlChar *fullattr,
1204
29.0k
                            const xmlChar *unused ATTRIBUTE_UNUSED) {
1205
29.0k
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1206
1207
29.0k
    if (XML_PTR_TO_INT(payload) == XML_ATTRIBUTE_CDATA) {
1208
2.35k
        xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1209
2.35k
    }
1210
29.0k
}
1211
1212
/**
1213
 * xmlCleanSpecialAttr:
1214
 * @ctxt:  an XML parser context
1215
 *
1216
 * Trim the list of attributes defined to remove all those of type
1217
 * CDATA as they are not special. This call should be done when finishing
1218
 * to parse the DTD and before starting to parse the document root.
1219
 */
1220
static void
1221
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1222
7.59k
{
1223
7.59k
    if (ctxt->attsSpecial == NULL)
1224
4.47k
        return;
1225
1226
3.12k
    xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1227
1228
3.12k
    if (xmlHashSize(ctxt->attsSpecial) == 0) {
1229
218
        xmlHashFree(ctxt->attsSpecial, NULL);
1230
218
        ctxt->attsSpecial = NULL;
1231
218
    }
1232
3.12k
}
1233
1234
/**
1235
 * xmlCheckLanguageID:
1236
 * @lang:  pointer to the string value
1237
 *
1238
 * DEPRECATED: Internal function, do not use.
1239
 *
1240
 * Checks that the value conforms to the LanguageID production:
1241
 *
1242
 * NOTE: this is somewhat deprecated, those productions were removed from
1243
 *       the XML Second edition.
1244
 *
1245
 * [33] LanguageID ::= Langcode ('-' Subcode)*
1246
 * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1247
 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1248
 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1249
 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1250
 * [38] Subcode ::= ([a-z] | [A-Z])+
1251
 *
1252
 * The current REC reference the successors of RFC 1766, currently 5646
1253
 *
1254
 * http://www.rfc-editor.org/rfc/rfc5646.txt
1255
 * langtag       = language
1256
 *                 ["-" script]
1257
 *                 ["-" region]
1258
 *                 *("-" variant)
1259
 *                 *("-" extension)
1260
 *                 ["-" privateuse]
1261
 * language      = 2*3ALPHA            ; shortest ISO 639 code
1262
 *                 ["-" extlang]       ; sometimes followed by
1263
 *                                     ; extended language subtags
1264
 *               / 4ALPHA              ; or reserved for future use
1265
 *               / 5*8ALPHA            ; or registered language subtag
1266
 *
1267
 * extlang       = 3ALPHA              ; selected ISO 639 codes
1268
 *                 *2("-" 3ALPHA)      ; permanently reserved
1269
 *
1270
 * script        = 4ALPHA              ; ISO 15924 code
1271
 *
1272
 * region        = 2ALPHA              ; ISO 3166-1 code
1273
 *               / 3DIGIT              ; UN M.49 code
1274
 *
1275
 * variant       = 5*8alphanum         ; registered variants
1276
 *               / (DIGIT 3alphanum)
1277
 *
1278
 * extension     = singleton 1*("-" (2*8alphanum))
1279
 *
1280
 *                                     ; Single alphanumerics
1281
 *                                     ; "x" reserved for private use
1282
 * singleton     = DIGIT               ; 0 - 9
1283
 *               / %x41-57             ; A - W
1284
 *               / %x59-5A             ; Y - Z
1285
 *               / %x61-77             ; a - w
1286
 *               / %x79-7A             ; y - z
1287
 *
1288
 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1289
 * The parser below doesn't try to cope with extension or privateuse
1290
 * that could be added but that's not interoperable anyway
1291
 *
1292
 * Returns 1 if correct 0 otherwise
1293
 **/
1294
int
1295
xmlCheckLanguageID(const xmlChar * lang)
1296
0
{
1297
0
    const xmlChar *cur = lang, *nxt;
1298
1299
0
    if (cur == NULL)
1300
0
        return (0);
1301
0
    if (((cur[0] == 'i') && (cur[1] == '-')) ||
1302
0
        ((cur[0] == 'I') && (cur[1] == '-')) ||
1303
0
        ((cur[0] == 'x') && (cur[1] == '-')) ||
1304
0
        ((cur[0] == 'X') && (cur[1] == '-'))) {
1305
        /*
1306
         * Still allow IANA code and user code which were coming
1307
         * from the previous version of the XML-1.0 specification
1308
         * it's deprecated but we should not fail
1309
         */
1310
0
        cur += 2;
1311
0
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1312
0
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
1313
0
            cur++;
1314
0
        return(cur[0] == 0);
1315
0
    }
1316
0
    nxt = cur;
1317
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1318
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1319
0
           nxt++;
1320
0
    if (nxt - cur >= 4) {
1321
        /*
1322
         * Reserved
1323
         */
1324
0
        if ((nxt - cur > 8) || (nxt[0] != 0))
1325
0
            return(0);
1326
0
        return(1);
1327
0
    }
1328
0
    if (nxt - cur < 2)
1329
0
        return(0);
1330
    /* we got an ISO 639 code */
1331
0
    if (nxt[0] == 0)
1332
0
        return(1);
1333
0
    if (nxt[0] != '-')
1334
0
        return(0);
1335
1336
0
    nxt++;
1337
0
    cur = nxt;
1338
    /* now we can have extlang or script or region or variant */
1339
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1340
0
        goto region_m49;
1341
1342
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1343
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1344
0
           nxt++;
1345
0
    if (nxt - cur == 4)
1346
0
        goto script;
1347
0
    if (nxt - cur == 2)
1348
0
        goto region;
1349
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1350
0
        goto variant;
1351
0
    if (nxt - cur != 3)
1352
0
        return(0);
1353
    /* we parsed an extlang */
1354
0
    if (nxt[0] == 0)
1355
0
        return(1);
1356
0
    if (nxt[0] != '-')
1357
0
        return(0);
1358
1359
0
    nxt++;
1360
0
    cur = nxt;
1361
    /* now we can have script or region or variant */
1362
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1363
0
        goto region_m49;
1364
1365
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1366
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1367
0
           nxt++;
1368
0
    if (nxt - cur == 2)
1369
0
        goto region;
1370
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1371
0
        goto variant;
1372
0
    if (nxt - cur != 4)
1373
0
        return(0);
1374
    /* we parsed a script */
1375
0
script:
1376
0
    if (nxt[0] == 0)
1377
0
        return(1);
1378
0
    if (nxt[0] != '-')
1379
0
        return(0);
1380
1381
0
    nxt++;
1382
0
    cur = nxt;
1383
    /* now we can have region or variant */
1384
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1385
0
        goto region_m49;
1386
1387
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1388
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1389
0
           nxt++;
1390
1391
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1392
0
        goto variant;
1393
0
    if (nxt - cur != 2)
1394
0
        return(0);
1395
    /* we parsed a region */
1396
0
region:
1397
0
    if (nxt[0] == 0)
1398
0
        return(1);
1399
0
    if (nxt[0] != '-')
1400
0
        return(0);
1401
1402
0
    nxt++;
1403
0
    cur = nxt;
1404
    /* now we can just have a variant */
1405
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1406
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1407
0
           nxt++;
1408
1409
0
    if ((nxt - cur < 5) || (nxt - cur > 8))
1410
0
        return(0);
1411
1412
    /* we parsed a variant */
1413
0
variant:
1414
0
    if (nxt[0] == 0)
1415
0
        return(1);
1416
0
    if (nxt[0] != '-')
1417
0
        return(0);
1418
    /* extensions and private use subtags not checked */
1419
0
    return (1);
1420
1421
0
region_m49:
1422
0
    if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1423
0
        ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1424
0
        nxt += 3;
1425
0
        goto region;
1426
0
    }
1427
0
    return(0);
1428
0
}
1429
1430
/************************************************************************
1431
 *                  *
1432
 *    Parser stacks related functions and macros    *
1433
 *                  *
1434
 ************************************************************************/
1435
1436
static xmlChar *
1437
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar **str);
1438
1439
/**
1440
 * xmlParserNsCreate:
1441
 *
1442
 * Create a new namespace database.
1443
 *
1444
 * Returns the new obejct.
1445
 */
1446
xmlParserNsData *
1447
217k
xmlParserNsCreate(void) {
1448
217k
    xmlParserNsData *nsdb = xmlMalloc(sizeof(*nsdb));
1449
1450
217k
    if (nsdb == NULL)
1451
0
        return(NULL);
1452
217k
    memset(nsdb, 0, sizeof(*nsdb));
1453
217k
    nsdb->defaultNsIndex = INT_MAX;
1454
1455
217k
    return(nsdb);
1456
217k
}
1457
1458
/**
1459
 * xmlParserNsFree:
1460
 * @nsdb: namespace database
1461
 *
1462
 * Free a namespace database.
1463
 */
1464
void
1465
217k
xmlParserNsFree(xmlParserNsData *nsdb) {
1466
217k
    if (nsdb == NULL)
1467
0
        return;
1468
1469
217k
    xmlFree(nsdb->extra);
1470
217k
    xmlFree(nsdb->hash);
1471
217k
    xmlFree(nsdb);
1472
217k
}
1473
1474
/**
1475
 * xmlParserNsReset:
1476
 * @nsdb: namespace database
1477
 *
1478
 * Reset a namespace database.
1479
 */
1480
static void
1481
4.89k
xmlParserNsReset(xmlParserNsData *nsdb) {
1482
4.89k
    if (nsdb == NULL)
1483
0
        return;
1484
1485
4.89k
    nsdb->hashElems = 0;
1486
4.89k
    nsdb->elementId = 0;
1487
4.89k
    nsdb->defaultNsIndex = INT_MAX;
1488
1489
4.89k
    if (nsdb->hash)
1490
0
        memset(nsdb->hash, 0, nsdb->hashSize * sizeof(nsdb->hash[0]));
1491
4.89k
}
1492
1493
/**
1494
 * xmlParserStartElement:
1495
 * @nsdb: namespace database
1496
 *
1497
 * Signal that a new element has started.
1498
 *
1499
 * Returns 0 on success, -1 if the element counter overflowed.
1500
 */
1501
static int
1502
33.2M
xmlParserNsStartElement(xmlParserNsData *nsdb) {
1503
33.2M
    if (nsdb->elementId == UINT_MAX)
1504
0
        return(-1);
1505
33.2M
    nsdb->elementId++;
1506
1507
33.2M
    return(0);
1508
33.2M
}
1509
1510
/**
1511
 * xmlParserNsLookup:
1512
 * @ctxt: parser context
1513
 * @prefix: namespace prefix
1514
 * @bucketPtr: optional bucket (return value)
1515
 *
1516
 * Lookup namespace with given prefix. If @bucketPtr is non-NULL, it will
1517
 * be set to the matching bucket, or the first empty bucket if no match
1518
 * was found.
1519
 *
1520
 * Returns the namespace index on success, INT_MAX if no namespace was
1521
 * found.
1522
 */
1523
static int
1524
xmlParserNsLookup(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1525
42.9M
                  xmlParserNsBucket **bucketPtr) {
1526
42.9M
    xmlParserNsBucket *bucket, *tombstone;
1527
42.9M
    unsigned index, hashValue;
1528
1529
42.9M
    if (prefix->name == NULL)
1530
21.5M
        return(ctxt->nsdb->defaultNsIndex);
1531
1532
21.4M
    if (ctxt->nsdb->hashSize == 0)
1533
467k
        return(INT_MAX);
1534
1535
20.9M
    hashValue = prefix->hashValue;
1536
20.9M
    index = hashValue & (ctxt->nsdb->hashSize - 1);
1537
20.9M
    bucket = &ctxt->nsdb->hash[index];
1538
20.9M
    tombstone = NULL;
1539
1540
22.2M
    while (bucket->hashValue) {
1541
20.6M
        if (bucket->index == INT_MAX) {
1542
254k
            if (tombstone == NULL)
1543
247k
                tombstone = bucket;
1544
20.4M
        } else if (bucket->hashValue == hashValue) {
1545
19.3M
            if (ctxt->nsTab[bucket->index * 2] == prefix->name) {
1546
19.3M
                if (bucketPtr != NULL)
1547
912k
                    *bucketPtr = bucket;
1548
19.3M
                return(bucket->index);
1549
19.3M
            }
1550
19.3M
        }
1551
1552
1.32M
        index++;
1553
1.32M
        bucket++;
1554
1.32M
        if (index == ctxt->nsdb->hashSize) {
1555
15.8k
            index = 0;
1556
15.8k
            bucket = ctxt->nsdb->hash;
1557
15.8k
        }
1558
1.32M
    }
1559
1560
1.56M
    if (bucketPtr != NULL)
1561
836k
        *bucketPtr = tombstone ? tombstone : bucket;
1562
1.56M
    return(INT_MAX);
1563
20.9M
}
1564
1565
/**
1566
 * xmlParserNsLookupUri:
1567
 * @ctxt: parser context
1568
 * @prefix: namespace prefix
1569
 *
1570
 * Lookup namespace URI with given prefix.
1571
 *
1572
 * Returns the namespace URI on success, NULL if no namespace was found.
1573
 */
1574
static const xmlChar *
1575
33.2M
xmlParserNsLookupUri(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix) {
1576
33.2M
    const xmlChar *ret;
1577
33.2M
    int nsIndex;
1578
1579
33.2M
    if (prefix->name == ctxt->str_xml)
1580
38.2k
        return(ctxt->str_xml_ns);
1581
1582
    /*
1583
     * minNsIndex is used when building an entity tree. We must
1584
     * ignore namespaces declared outside the entity.
1585
     */
1586
33.2M
    nsIndex = xmlParserNsLookup(ctxt, prefix, NULL);
1587
33.2M
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1588
17.2M
        return(NULL);
1589
1590
15.9M
    ret = ctxt->nsTab[nsIndex * 2 + 1];
1591
15.9M
    if (ret[0] == 0)
1592
28.7k
        ret = NULL;
1593
15.9M
    return(ret);
1594
33.2M
}
1595
1596
/**
1597
 * xmlParserNsLookupSax:
1598
 * @ctxt: parser context
1599
 * @prefix: namespace prefix
1600
 *
1601
 * Lookup extra data for the given prefix. This returns data stored
1602
 * with xmlParserNsUdpateSax.
1603
 *
1604
 * Returns the data on success, NULL if no namespace was found.
1605
 */
1606
void *
1607
755k
xmlParserNsLookupSax(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
1608
755k
    xmlHashedString hprefix;
1609
755k
    int nsIndex;
1610
1611
755k
    if (prefix == ctxt->str_xml)
1612
0
        return(NULL);
1613
1614
755k
    hprefix.name = prefix;
1615
755k
    if (prefix != NULL)
1616
754k
        hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1617
1.10k
    else
1618
1.10k
        hprefix.hashValue = 0;
1619
755k
    nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1620
755k
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1621
0
        return(NULL);
1622
1623
755k
    return(ctxt->nsdb->extra[nsIndex].saxData);
1624
755k
}
1625
1626
/**
1627
 * xmlParserNsUpdateSax:
1628
 * @ctxt: parser context
1629
 * @prefix: namespace prefix
1630
 * @saxData: extra data for SAX handler
1631
 *
1632
 * Sets or updates extra data for the given prefix. This value will be
1633
 * returned by xmlParserNsLookupSax as long as the namespace with the
1634
 * given prefix is in scope.
1635
 *
1636
 * Returns the data on success, NULL if no namespace was found.
1637
 */
1638
int
1639
xmlParserNsUpdateSax(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
1640
10.1k
                     void *saxData) {
1641
10.1k
    xmlHashedString hprefix;
1642
10.1k
    int nsIndex;
1643
1644
10.1k
    if (prefix == ctxt->str_xml)
1645
0
        return(-1);
1646
1647
10.1k
    hprefix.name = prefix;
1648
10.1k
    if (prefix != NULL)
1649
9.51k
        hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1650
687
    else
1651
687
        hprefix.hashValue = 0;
1652
10.1k
    nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1653
10.1k
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1654
0
        return(-1);
1655
1656
10.1k
    ctxt->nsdb->extra[nsIndex].saxData = saxData;
1657
10.1k
    return(0);
1658
10.1k
}
1659
1660
/**
1661
 * xmlParserNsGrow:
1662
 * @ctxt: parser context
1663
 *
1664
 * Grows the namespace tables.
1665
 *
1666
 * Returns 0 on success, -1 if a memory allocation failed.
1667
 */
1668
static int
1669
519k
xmlParserNsGrow(xmlParserCtxtPtr ctxt) {
1670
519k
    const xmlChar **table;
1671
519k
    xmlParserNsExtra *extra;
1672
519k
    int newSize;
1673
1674
519k
    newSize = xmlGrowCapacity(ctxt->nsMax,
1675
519k
                              sizeof(table[0]) + sizeof(extra[0]),
1676
519k
                              16, XML_MAX_ITEMS);
1677
519k
    if (newSize < 0)
1678
0
        goto error;
1679
1680
519k
    table = xmlRealloc(ctxt->nsTab, 2 * newSize * sizeof(table[0]));
1681
519k
    if (table == NULL)
1682
0
        goto error;
1683
519k
    ctxt->nsTab = table;
1684
1685
519k
    extra = xmlRealloc(ctxt->nsdb->extra, newSize * sizeof(extra[0]));
1686
519k
    if (extra == NULL)
1687
0
        goto error;
1688
519k
    ctxt->nsdb->extra = extra;
1689
1690
519k
    ctxt->nsMax = newSize;
1691
519k
    return(0);
1692
1693
0
error:
1694
0
    xmlErrMemory(ctxt);
1695
0
    return(-1);
1696
519k
}
1697
1698
/**
1699
 * xmlParserNsPush:
1700
 * @ctxt: parser context
1701
 * @prefix: prefix with hash value
1702
 * @uri: uri with hash value
1703
 * @saxData: extra data for SAX handler
1704
 * @defAttr: whether the namespace comes from a default attribute
1705
 *
1706
 * Push a new namespace on the table.
1707
 *
1708
 * Returns 1 if the namespace was pushed, 0 if the namespace was ignored,
1709
 * -1 if a memory allocation failed.
1710
 */
1711
static int
1712
xmlParserNsPush(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1713
1.52M
                const xmlHashedString *uri, void *saxData, int defAttr) {
1714
1.52M
    xmlParserNsBucket *bucket = NULL;
1715
1.52M
    xmlParserNsExtra *extra;
1716
1.52M
    const xmlChar **ns;
1717
1.52M
    unsigned hashValue, nsIndex, oldIndex;
1718
1719
1.52M
    if ((prefix != NULL) && (prefix->name == ctxt->str_xml))
1720
45
        return(0);
1721
1722
1.52M
    if ((ctxt->nsNr >= ctxt->nsMax) && (xmlParserNsGrow(ctxt) < 0)) {
1723
0
        xmlErrMemory(ctxt);
1724
0
        return(-1);
1725
0
    }
1726
1727
    /*
1728
     * Default namespace and 'xml' namespace
1729
     */
1730
1.52M
    if ((prefix == NULL) || (prefix->name == NULL)) {
1731
349k
        oldIndex = ctxt->nsdb->defaultNsIndex;
1732
1733
349k
        if (oldIndex != INT_MAX) {
1734
208k
            extra = &ctxt->nsdb->extra[oldIndex];
1735
1736
208k
            if (extra->elementId == ctxt->nsdb->elementId) {
1737
123k
                if (defAttr == 0)
1738
119k
                    xmlErrAttributeDup(ctxt, NULL, BAD_CAST "xmlns");
1739
123k
                return(0);
1740
123k
            }
1741
1742
85.7k
            if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1743
0
                (uri->name == ctxt->nsTab[oldIndex * 2 + 1]))
1744
0
                return(0);
1745
85.7k
        }
1746
1747
226k
        ctxt->nsdb->defaultNsIndex = ctxt->nsNr;
1748
226k
        goto populate_entry;
1749
349k
    }
1750
1751
    /*
1752
     * Hash table lookup
1753
     */
1754
1.17M
    oldIndex = xmlParserNsLookup(ctxt, prefix, &bucket);
1755
1.17M
    if (oldIndex != INT_MAX) {
1756
226k
        extra = &ctxt->nsdb->extra[oldIndex];
1757
1758
        /*
1759
         * Check for duplicate definitions on the same element.
1760
         */
1761
226k
        if (extra->elementId == ctxt->nsdb->elementId) {
1762
49.7k
            if (defAttr == 0)
1763
49.7k
                xmlErrAttributeDup(ctxt, BAD_CAST "xmlns", prefix->name);
1764
49.7k
            return(0);
1765
49.7k
        }
1766
1767
176k
        if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1768
0
            (uri->name == ctxt->nsTab[bucket->index * 2 + 1]))
1769
0
            return(0);
1770
1771
176k
        bucket->index = ctxt->nsNr;
1772
176k
        goto populate_entry;
1773
176k
    }
1774
1775
    /*
1776
     * Insert new bucket
1777
     */
1778
1779
952k
    hashValue = prefix->hashValue;
1780
1781
    /*
1782
     * Grow hash table, 50% fill factor
1783
     */
1784
952k
    if (ctxt->nsdb->hashElems + 1 > ctxt->nsdb->hashSize / 2) {
1785
153k
        xmlParserNsBucket *newHash;
1786
153k
        unsigned newSize, i, index;
1787
1788
153k
        if (ctxt->nsdb->hashSize > UINT_MAX / 2) {
1789
0
            xmlErrMemory(ctxt);
1790
0
            return(-1);
1791
0
        }
1792
153k
        newSize = ctxt->nsdb->hashSize ? ctxt->nsdb->hashSize * 2 : 16;
1793
153k
        newHash = xmlMalloc(newSize * sizeof(newHash[0]));
1794
153k
        if (newHash == NULL) {
1795
0
            xmlErrMemory(ctxt);
1796
0
            return(-1);
1797
0
        }
1798
153k
        memset(newHash, 0, newSize * sizeof(newHash[0]));
1799
1800
1.67M
        for (i = 0; i < ctxt->nsdb->hashSize; i++) {
1801
1.52M
            unsigned hv = ctxt->nsdb->hash[i].hashValue;
1802
1.52M
            unsigned newIndex;
1803
1804
1.52M
            if ((hv == 0) || (ctxt->nsdb->hash[i].index == INT_MAX))
1805
1.10M
                continue;
1806
417k
            newIndex = hv & (newSize - 1);
1807
1808
431k
            while (newHash[newIndex].hashValue != 0) {
1809
13.7k
                newIndex++;
1810
13.7k
                if (newIndex == newSize)
1811
95
                    newIndex = 0;
1812
13.7k
            }
1813
1814
417k
            newHash[newIndex] = ctxt->nsdb->hash[i];
1815
417k
        }
1816
1817
153k
        xmlFree(ctxt->nsdb->hash);
1818
153k
        ctxt->nsdb->hash = newHash;
1819
153k
        ctxt->nsdb->hashSize = newSize;
1820
1821
        /*
1822
         * Relookup
1823
         */
1824
153k
        index = hashValue & (newSize - 1);
1825
1826
156k
        while (newHash[index].hashValue != 0) {
1827
2.59k
            index++;
1828
2.59k
            if (index == newSize)
1829
47
                index = 0;
1830
2.59k
        }
1831
1832
153k
        bucket = &newHash[index];
1833
153k
    }
1834
1835
952k
    bucket->hashValue = hashValue;
1836
952k
    bucket->index = ctxt->nsNr;
1837
952k
    ctxt->nsdb->hashElems++;
1838
952k
    oldIndex = INT_MAX;
1839
1840
1.35M
populate_entry:
1841
1.35M
    nsIndex = ctxt->nsNr;
1842
1843
1.35M
    ns = &ctxt->nsTab[nsIndex * 2];
1844
1.35M
    ns[0] = prefix ? prefix->name : NULL;
1845
1.35M
    ns[1] = uri->name;
1846
1847
1.35M
    extra = &ctxt->nsdb->extra[nsIndex];
1848
1.35M
    extra->saxData = saxData;
1849
1.35M
    extra->prefixHashValue = prefix ? prefix->hashValue : 0;
1850
1.35M
    extra->uriHashValue = uri->hashValue;
1851
1.35M
    extra->elementId = ctxt->nsdb->elementId;
1852
1.35M
    extra->oldIndex = oldIndex;
1853
1854
1.35M
    ctxt->nsNr++;
1855
1856
1.35M
    return(1);
1857
952k
}
1858
1859
/**
1860
 * xmlParserNsPop:
1861
 * @ctxt: an XML parser context
1862
 * @nr:  the number to pop
1863
 *
1864
 * Pops the top @nr namespaces and restores the hash table.
1865
 *
1866
 * Returns the number of namespaces popped.
1867
 */
1868
static int
1869
xmlParserNsPop(xmlParserCtxtPtr ctxt, int nr)
1870
307k
{
1871
307k
    int i;
1872
1873
    /* assert(nr <= ctxt->nsNr); */
1874
1875
1.14M
    for (i = ctxt->nsNr - 1; i >= ctxt->nsNr - nr; i--) {
1876
839k
        const xmlChar *prefix = ctxt->nsTab[i * 2];
1877
839k
        xmlParserNsExtra *extra = &ctxt->nsdb->extra[i];
1878
1879
839k
        if (prefix == NULL) {
1880
153k
            ctxt->nsdb->defaultNsIndex = extra->oldIndex;
1881
686k
        } else {
1882
686k
            xmlHashedString hprefix;
1883
686k
            xmlParserNsBucket *bucket = NULL;
1884
1885
686k
            hprefix.name = prefix;
1886
686k
            hprefix.hashValue = extra->prefixHashValue;
1887
686k
            xmlParserNsLookup(ctxt, &hprefix, &bucket);
1888
            /* assert(bucket && bucket->hashValue); */
1889
686k
            bucket->index = extra->oldIndex;
1890
686k
        }
1891
839k
    }
1892
1893
307k
    ctxt->nsNr -= nr;
1894
307k
    return(nr);
1895
307k
}
1896
1897
static int
1898
566k
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt) {
1899
566k
    const xmlChar **atts;
1900
566k
    unsigned *attallocs;
1901
566k
    int newSize;
1902
1903
566k
    newSize = xmlGrowCapacity(ctxt->maxatts / 5,
1904
566k
                              sizeof(atts[0]) * 5 + sizeof(attallocs[0]),
1905
566k
                              10, XML_MAX_ATTRS);
1906
566k
    if (newSize < 0) {
1907
0
        xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
1908
0
                    "Maximum number of attributes exceeded");
1909
0
        return(-1);
1910
0
    }
1911
1912
566k
    atts = xmlRealloc(ctxt->atts, newSize * sizeof(atts[0]) * 5);
1913
566k
    if (atts == NULL)
1914
0
        goto mem_error;
1915
566k
    ctxt->atts = atts;
1916
1917
566k
    attallocs = xmlRealloc(ctxt->attallocs,
1918
566k
                           newSize * sizeof(attallocs[0]));
1919
566k
    if (attallocs == NULL)
1920
0
        goto mem_error;
1921
566k
    ctxt->attallocs = attallocs;
1922
1923
566k
    ctxt->maxatts = newSize * 5;
1924
1925
566k
    return(0);
1926
1927
0
mem_error:
1928
0
    xmlErrMemory(ctxt);
1929
0
    return(-1);
1930
566k
}
1931
1932
/**
1933
 * xmlCtxtPushInput:
1934
 * @ctxt:  an XML parser context
1935
 * @value:  the parser input
1936
 *
1937
 * Pushes a new parser input on top of the input stack
1938
 *
1939
 * Returns -1 in case of error, the index in the stack otherwise
1940
 */
1941
int
1942
xmlCtxtPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1943
217k
{
1944
217k
    char *directory = NULL;
1945
217k
    int maxDepth;
1946
1947
217k
    if ((ctxt == NULL) || (value == NULL))
1948
0
        return(-1);
1949
1950
217k
    maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
1951
1952
217k
    if (ctxt->inputNr >= ctxt->inputMax) {
1953
0
        xmlParserInputPtr *tmp;
1954
0
        int newSize;
1955
1956
0
        newSize = xmlGrowCapacity(ctxt->inputMax, sizeof(tmp[0]),
1957
0
                                  5, maxDepth);
1958
0
        if (newSize < 0) {
1959
0
            xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
1960
0
                           "Maximum entity nesting depth exceeded");
1961
0
            xmlHaltParser(ctxt);
1962
0
            return(-1);
1963
0
        }
1964
0
        tmp = xmlRealloc(ctxt->inputTab, newSize * sizeof(tmp[0]));
1965
0
        if (tmp == NULL) {
1966
0
            xmlErrMemory(ctxt);
1967
0
            return(-1);
1968
0
        }
1969
0
        ctxt->inputTab = tmp;
1970
0
        ctxt->inputMax = newSize;
1971
0
    }
1972
1973
217k
    if ((ctxt->inputNr == 0) && (value->filename != NULL)) {
1974
0
        directory = xmlParserGetDirectory(value->filename);
1975
0
        if (directory == NULL) {
1976
0
            xmlErrMemory(ctxt);
1977
0
            return(-1);
1978
0
        }
1979
0
    }
1980
1981
217k
    if (ctxt->input_id >= INT_MAX) {
1982
0
        xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT, "Input ID overflow\n");
1983
0
        return(-1);
1984
0
    }
1985
1986
217k
    ctxt->inputTab[ctxt->inputNr] = value;
1987
217k
    ctxt->input = value;
1988
1989
217k
    if (ctxt->inputNr == 0) {
1990
217k
        xmlFree(ctxt->directory);
1991
217k
        ctxt->directory = directory;
1992
217k
    }
1993
1994
    /*
1995
     * Internally, the input ID is only used to detect parameter entity
1996
     * boundaries. But there are entity loaders in downstream code that
1997
     * detect the main document by checking for "input_id == 1".
1998
     */
1999
217k
    value->id = ctxt->input_id++;
2000
2001
217k
    return(ctxt->inputNr++);
2002
217k
}
2003
2004
/**
2005
 * xmlCtxtPopInput:
2006
 * @ctxt: an XML parser context
2007
 *
2008
 * Pops the top parser input from the input stack
2009
 *
2010
 * Returns the input just removed
2011
 */
2012
xmlParserInputPtr
2013
xmlCtxtPopInput(xmlParserCtxtPtr ctxt)
2014
656k
{
2015
656k
    xmlParserInputPtr ret;
2016
2017
656k
    if (ctxt == NULL)
2018
0
        return(NULL);
2019
656k
    if (ctxt->inputNr <= 0)
2020
439k
        return (NULL);
2021
217k
    ctxt->inputNr--;
2022
217k
    if (ctxt->inputNr > 0)
2023
0
        ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
2024
217k
    else
2025
217k
        ctxt->input = NULL;
2026
217k
    ret = ctxt->inputTab[ctxt->inputNr];
2027
217k
    ctxt->inputTab[ctxt->inputNr] = NULL;
2028
217k
    return (ret);
2029
656k
}
2030
2031
/**
2032
 * nodePush:
2033
 * @ctxt:  an XML parser context
2034
 * @value:  the element node
2035
 *
2036
 * DEPRECATED: Internal function, do not use.
2037
 *
2038
 * Pushes a new element node on top of the node stack
2039
 *
2040
 * Returns -1 in case of error, the index in the stack otherwise
2041
 */
2042
int
2043
nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
2044
754k
{
2045
754k
    if (ctxt == NULL)
2046
0
        return(0);
2047
2048
754k
    if (ctxt->nodeNr >= ctxt->nodeMax) {
2049
19.0k
        int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
2050
19.0k
        xmlNodePtr *tmp;
2051
19.0k
        int newSize;
2052
2053
19.0k
        newSize = xmlGrowCapacity(ctxt->nodeMax, sizeof(tmp[0]),
2054
19.0k
                                  10, maxDepth);
2055
19.0k
        if (newSize < 0) {
2056
0
            xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
2057
0
                    "Excessive depth in document: %d,"
2058
0
                    " use XML_PARSE_HUGE option\n",
2059
0
                    ctxt->nodeNr);
2060
0
            xmlHaltParser(ctxt);
2061
0
            return(-1);
2062
0
        }
2063
2064
19.0k
  tmp = xmlRealloc(ctxt->nodeTab, newSize * sizeof(tmp[0]));
2065
19.0k
        if (tmp == NULL) {
2066
0
            xmlErrMemory(ctxt);
2067
0
            return (-1);
2068
0
        }
2069
19.0k
        ctxt->nodeTab = tmp;
2070
19.0k
  ctxt->nodeMax = newSize;
2071
19.0k
    }
2072
2073
754k
    ctxt->nodeTab[ctxt->nodeNr] = value;
2074
754k
    ctxt->node = value;
2075
754k
    return (ctxt->nodeNr++);
2076
754k
}
2077
2078
/**
2079
 * nodePop:
2080
 * @ctxt: an XML parser context
2081
 *
2082
 * DEPRECATED: Internal function, do not use.
2083
 *
2084
 * Pops the top element node from the node stack
2085
 *
2086
 * Returns the node just removed
2087
 */
2088
xmlNodePtr
2089
nodePop(xmlParserCtxtPtr ctxt)
2090
853k
{
2091
853k
    xmlNodePtr ret;
2092
2093
853k
    if (ctxt == NULL) return(NULL);
2094
853k
    if (ctxt->nodeNr <= 0)
2095
99.2k
        return (NULL);
2096
754k
    ctxt->nodeNr--;
2097
754k
    if (ctxt->nodeNr > 0)
2098
749k
        ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
2099
4.79k
    else
2100
4.79k
        ctxt->node = NULL;
2101
754k
    ret = ctxt->nodeTab[ctxt->nodeNr];
2102
754k
    ctxt->nodeTab[ctxt->nodeNr] = NULL;
2103
754k
    return (ret);
2104
853k
}
2105
2106
/**
2107
 * nameNsPush:
2108
 * @ctxt:  an XML parser context
2109
 * @value:  the element name
2110
 * @prefix:  the element prefix
2111
 * @URI:  the element namespace name
2112
 * @line:  the current line number for error messages
2113
 * @nsNr:  the number of namespaces pushed on the namespace table
2114
 *
2115
 * Pushes a new element name/prefix/URL on top of the name stack
2116
 *
2117
 * Returns -1 in case of error, the index in the stack otherwise
2118
 */
2119
static int
2120
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
2121
           const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
2122
25.1M
{
2123
25.1M
    xmlStartTag *tag;
2124
2125
25.1M
    if (ctxt->nameNr >= ctxt->nameMax) {
2126
765k
        const xmlChar **tmp;
2127
765k
        xmlStartTag *tmp2;
2128
765k
        int newSize;
2129
2130
765k
        newSize = xmlGrowCapacity(ctxt->nameMax,
2131
765k
                                  sizeof(tmp[0]) + sizeof(tmp2[0]),
2132
765k
                                  10, XML_MAX_ITEMS);
2133
765k
        if (newSize < 0)
2134
0
            goto mem_error;
2135
2136
765k
        tmp = xmlRealloc(ctxt->nameTab, newSize * sizeof(tmp[0]));
2137
765k
        if (tmp == NULL)
2138
0
      goto mem_error;
2139
765k
  ctxt->nameTab = tmp;
2140
2141
765k
        tmp2 = xmlRealloc(ctxt->pushTab, newSize * sizeof(tmp2[0]));
2142
765k
        if (tmp2 == NULL)
2143
0
      goto mem_error;
2144
765k
  ctxt->pushTab = tmp2;
2145
2146
765k
        ctxt->nameMax = newSize;
2147
24.3M
    } else if (ctxt->pushTab == NULL) {
2148
197k
        ctxt->pushTab = xmlMalloc(ctxt->nameMax * sizeof(ctxt->pushTab[0]));
2149
197k
        if (ctxt->pushTab == NULL)
2150
0
            goto mem_error;
2151
197k
    }
2152
25.1M
    ctxt->nameTab[ctxt->nameNr] = value;
2153
25.1M
    ctxt->name = value;
2154
25.1M
    tag = &ctxt->pushTab[ctxt->nameNr];
2155
25.1M
    tag->prefix = prefix;
2156
25.1M
    tag->URI = URI;
2157
25.1M
    tag->line = line;
2158
25.1M
    tag->nsNr = nsNr;
2159
25.1M
    return (ctxt->nameNr++);
2160
0
mem_error:
2161
0
    xmlErrMemory(ctxt);
2162
0
    return (-1);
2163
25.1M
}
2164
#ifdef LIBXML_PUSH_ENABLED
2165
/**
2166
 * nameNsPop:
2167
 * @ctxt: an XML parser context
2168
 *
2169
 * Pops the top element/prefix/URI name from the name stack
2170
 *
2171
 * Returns the name just removed
2172
 */
2173
static const xmlChar *
2174
nameNsPop(xmlParserCtxtPtr ctxt)
2175
7.61M
{
2176
7.61M
    const xmlChar *ret;
2177
2178
7.61M
    if (ctxt->nameNr <= 0)
2179
0
        return (NULL);
2180
7.61M
    ctxt->nameNr--;
2181
7.61M
    if (ctxt->nameNr > 0)
2182
7.49M
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2183
121k
    else
2184
121k
        ctxt->name = NULL;
2185
7.61M
    ret = ctxt->nameTab[ctxt->nameNr];
2186
7.61M
    ctxt->nameTab[ctxt->nameNr] = NULL;
2187
7.61M
    return (ret);
2188
7.61M
}
2189
#endif /* LIBXML_PUSH_ENABLED */
2190
2191
/**
2192
 * namePop:
2193
 * @ctxt: an XML parser context
2194
 *
2195
 * DEPRECATED: Internal function, do not use.
2196
 *
2197
 * Pops the top element name from the name stack
2198
 *
2199
 * Returns the name just removed
2200
 */
2201
static const xmlChar *
2202
namePop(xmlParserCtxtPtr ctxt)
2203
843k
{
2204
843k
    const xmlChar *ret;
2205
2206
843k
    if ((ctxt == NULL) || (ctxt->nameNr <= 0))
2207
0
        return (NULL);
2208
843k
    ctxt->nameNr--;
2209
843k
    if (ctxt->nameNr > 0)
2210
839k
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2211
4.64k
    else
2212
4.64k
        ctxt->name = NULL;
2213
843k
    ret = ctxt->nameTab[ctxt->nameNr];
2214
843k
    ctxt->nameTab[ctxt->nameNr] = NULL;
2215
843k
    return (ret);
2216
843k
}
2217
2218
33.2M
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
2219
33.2M
    if (ctxt->spaceNr >= ctxt->spaceMax) {
2220
955k
        int *tmp;
2221
955k
        int newSize;
2222
2223
955k
        newSize = xmlGrowCapacity(ctxt->spaceMax, sizeof(tmp[0]),
2224
955k
                                  10, XML_MAX_ITEMS);
2225
955k
        if (newSize < 0) {
2226
0
      xmlErrMemory(ctxt);
2227
0
      return(-1);
2228
0
        }
2229
2230
955k
        tmp = xmlRealloc(ctxt->spaceTab, newSize * sizeof(tmp[0]));
2231
955k
        if (tmp == NULL) {
2232
0
      xmlErrMemory(ctxt);
2233
0
      return(-1);
2234
0
  }
2235
955k
  ctxt->spaceTab = tmp;
2236
2237
955k
        ctxt->spaceMax = newSize;
2238
955k
    }
2239
33.2M
    ctxt->spaceTab[ctxt->spaceNr] = val;
2240
33.2M
    ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
2241
33.2M
    return(ctxt->spaceNr++);
2242
33.2M
}
2243
2244
16.5M
static int spacePop(xmlParserCtxtPtr ctxt) {
2245
16.5M
    int ret;
2246
16.5M
    if (ctxt->spaceNr <= 0) return(0);
2247
16.5M
    ctxt->spaceNr--;
2248
16.5M
    if (ctxt->spaceNr > 0)
2249
16.5M
  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
2250
4.82k
    else
2251
4.82k
        ctxt->space = &ctxt->spaceTab[0];
2252
16.5M
    ret = ctxt->spaceTab[ctxt->spaceNr];
2253
16.5M
    ctxt->spaceTab[ctxt->spaceNr] = -1;
2254
16.5M
    return(ret);
2255
16.5M
}
2256
2257
/*
2258
 * Macros for accessing the content. Those should be used only by the parser,
2259
 * and not exported.
2260
 *
2261
 * Dirty macros, i.e. one often need to make assumption on the context to
2262
 * use them
2263
 *
2264
 *   CUR_PTR return the current pointer to the xmlChar to be parsed.
2265
 *           To be used with extreme caution since operations consuming
2266
 *           characters may move the input buffer to a different location !
2267
 *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
2268
 *           This should be used internally by the parser
2269
 *           only to compare to ASCII values otherwise it would break when
2270
 *           running with UTF-8 encoding.
2271
 *   RAW     same as CUR but in the input buffer, bypass any token
2272
 *           extraction that may have been done
2273
 *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
2274
 *           to compare on ASCII based substring.
2275
 *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2276
 *           strings without newlines within the parser.
2277
 *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2278
 *           defined char within the parser.
2279
 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2280
 *
2281
 *   NEXT    Skip to the next character, this does the proper decoding
2282
 *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2283
 *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2284
 *   CUR_SCHAR  same but operate on a string instead of the context
2285
 *   COPY_BUF  copy the current unicode char to the target buffer, increment
2286
 *            the index
2287
 *   GROW, SHRINK  handling of input buffers
2288
 */
2289
2290
245M
#define RAW (*ctxt->input->cur)
2291
381M
#define CUR (*ctxt->input->cur)
2292
30.3M
#define NXT(val) ctxt->input->cur[(val)]
2293
580M
#define CUR_PTR ctxt->input->cur
2294
140M
#define BASE_PTR ctxt->input->base
2295
2296
#define CMP4( s, c1, c2, c3, c4 ) \
2297
4.98M
  ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2298
2.60M
    ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2299
#define CMP5( s, c1, c2, c3, c4, c5 ) \
2300
4.65M
  ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2301
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2302
4.36M
  ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2303
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2304
3.96M
  ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2305
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2306
3.54M
  ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2307
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2308
1.76M
  ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2309
1.76M
    ((unsigned char *) s)[ 8 ] == c9 )
2310
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2311
95.7k
  ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2312
95.7k
    ((unsigned char *) s)[ 9 ] == c10 )
2313
2314
18.0M
#define SKIP(val) do {             \
2315
18.0M
    ctxt->input->cur += (val),ctxt->input->col+=(val);      \
2316
18.0M
    if (*ctxt->input->cur == 0)           \
2317
18.0M
        xmlParserGrow(ctxt);           \
2318
18.0M
  } while (0)
2319
2320
#define SKIPL(val) do {             \
2321
    int skipl;                \
2322
    for(skipl=0; skipl<val; skipl++) {          \
2323
  if (*(ctxt->input->cur) == '\n') {        \
2324
  ctxt->input->line++; ctxt->input->col = 1;      \
2325
  } else ctxt->input->col++;          \
2326
  ctxt->input->cur++;           \
2327
    }                 \
2328
    if (*ctxt->input->cur == 0)           \
2329
        xmlParserGrow(ctxt);            \
2330
  } while (0)
2331
2332
#define SHRINK \
2333
2.84M
    if (!PARSER_PROGRESSIVE(ctxt)) \
2334
2.84M
  xmlParserShrink(ctxt);
2335
2336
#define GROW \
2337
206M
    if ((!PARSER_PROGRESSIVE(ctxt)) && \
2338
206M
        (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2339
457k
  xmlParserGrow(ctxt);
2340
2341
98.3M
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2342
2343
1.02M
#define SKIP_BLANKS_PE xmlSkipBlankCharsPE(ctxt)
2344
2345
71.1M
#define NEXT xmlNextChar(ctxt)
2346
2347
41.5M
#define NEXT1 {               \
2348
41.5M
  ctxt->input->col++;           \
2349
41.5M
  ctxt->input->cur++;           \
2350
41.5M
  if (*ctxt->input->cur == 0)         \
2351
41.5M
      xmlParserGrow(ctxt);           \
2352
41.5M
    }
2353
2354
420M
#define NEXTL(l) do {             \
2355
420M
    if (*(ctxt->input->cur) == '\n') {         \
2356
1.38M
  ctxt->input->line++; ctxt->input->col = 1;      \
2357
419M
    } else ctxt->input->col++;           \
2358
420M
    ctxt->input->cur += l;        \
2359
420M
  } while (0)
2360
2361
565k
#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2362
2363
#define COPY_BUF(b, i, v)           \
2364
122M
    if (v < 0x80) b[i++] = v;           \
2365
122M
    else i += xmlCopyCharMultiByte(&b[i],v)
2366
2367
static int
2368
118M
xmlCurrentCharRecover(xmlParserCtxtPtr ctxt, int *len) {
2369
118M
    int c = xmlCurrentChar(ctxt, len);
2370
2371
118M
    if (c == XML_INVALID_CHAR)
2372
535k
        c = 0xFFFD; /* replacement character */
2373
2374
118M
    return(c);
2375
118M
}
2376
2377
/**
2378
 * xmlSkipBlankChars:
2379
 * @ctxt:  the XML parser context
2380
 *
2381
 * DEPRECATED: Internal function, do not use.
2382
 *
2383
 * Skip whitespace in the input stream.
2384
 *
2385
 * Returns the number of space chars skipped
2386
 */
2387
int
2388
99.2M
xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2389
99.2M
    const xmlChar *cur;
2390
99.2M
    int res = 0;
2391
2392
99.2M
    cur = ctxt->input->cur;
2393
99.2M
    while (IS_BLANK_CH(*cur)) {
2394
30.0M
        if (*cur == '\n') {
2395
825k
            ctxt->input->line++; ctxt->input->col = 1;
2396
29.2M
        } else {
2397
29.2M
            ctxt->input->col++;
2398
29.2M
        }
2399
30.0M
        cur++;
2400
30.0M
        if (res < INT_MAX)
2401
30.0M
            res++;
2402
30.0M
        if (*cur == 0) {
2403
10.2k
            ctxt->input->cur = cur;
2404
10.2k
            xmlParserGrow(ctxt);
2405
10.2k
            cur = ctxt->input->cur;
2406
10.2k
        }
2407
30.0M
    }
2408
99.2M
    ctxt->input->cur = cur;
2409
2410
99.2M
    if (res > 4)
2411
398k
        GROW;
2412
2413
99.2M
    return(res);
2414
99.2M
}
2415
2416
static void
2417
0
xmlPopPE(xmlParserCtxtPtr ctxt) {
2418
0
    unsigned long consumed;
2419
0
    xmlEntityPtr ent;
2420
2421
0
    ent = ctxt->input->entity;
2422
2423
0
    ent->flags &= ~XML_ENT_EXPANDING;
2424
2425
0
    if ((ent->flags & XML_ENT_CHECKED) == 0) {
2426
0
        int result;
2427
2428
        /*
2429
         * Read the rest of the stream in case of errors. We want
2430
         * to account for the whole entity size.
2431
         */
2432
0
        do {
2433
0
            ctxt->input->cur = ctxt->input->end;
2434
0
            xmlParserShrink(ctxt);
2435
0
            result = xmlParserGrow(ctxt);
2436
0
        } while (result > 0);
2437
2438
0
        consumed = ctxt->input->consumed;
2439
0
        xmlSaturatedAddSizeT(&consumed,
2440
0
                             ctxt->input->end - ctxt->input->base);
2441
2442
0
        xmlSaturatedAdd(&ent->expandedSize, consumed);
2443
2444
        /*
2445
         * Add to sizeentities when parsing an external entity
2446
         * for the first time.
2447
         */
2448
0
        if (ent->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
2449
0
            xmlSaturatedAdd(&ctxt->sizeentities, consumed);
2450
0
        }
2451
2452
0
        ent->flags |= XML_ENT_CHECKED;
2453
0
    }
2454
2455
0
    xmlFreeInputStream(xmlCtxtPopInput(ctxt));
2456
2457
0
    xmlParserEntityCheck(ctxt, ent->expandedSize);
2458
2459
0
    GROW;
2460
0
}
2461
2462
/**
2463
 * xmlSkipBlankCharsPE:
2464
 * @ctxt:  the XML parser context
2465
 *
2466
 * Skip whitespace in the input stream, also handling parameter
2467
 * entities.
2468
 *
2469
 * Returns the number of space chars skipped
2470
 */
2471
static int
2472
1.02M
xmlSkipBlankCharsPE(xmlParserCtxtPtr ctxt) {
2473
1.02M
    int res = 0;
2474
1.02M
    int inParam;
2475
1.02M
    int expandParam;
2476
2477
1.02M
    inParam = PARSER_IN_PE(ctxt);
2478
1.02M
    expandParam = PARSER_EXTERNAL(ctxt);
2479
2480
1.02M
    if (!inParam && !expandParam)
2481
1.02M
        return(xmlSkipBlankChars(ctxt));
2482
2483
    /*
2484
     * It's Okay to use CUR/NEXT here since all the blanks are on
2485
     * the ASCII range.
2486
     */
2487
0
    while (PARSER_STOPPED(ctxt) == 0) {
2488
0
        if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2489
0
            NEXT;
2490
0
        } else if (CUR == '%') {
2491
0
            if ((expandParam == 0) ||
2492
0
                (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2493
0
                break;
2494
2495
            /*
2496
             * Expand parameter entity. We continue to consume
2497
             * whitespace at the start of the entity and possible
2498
             * even consume the whole entity and pop it. We might
2499
             * even pop multiple PEs in this loop.
2500
             */
2501
0
            xmlParsePEReference(ctxt);
2502
2503
0
            inParam = PARSER_IN_PE(ctxt);
2504
0
            expandParam = PARSER_EXTERNAL(ctxt);
2505
0
        } else if (CUR == 0) {
2506
0
            if (inParam == 0)
2507
0
                break;
2508
2509
0
            xmlPopPE(ctxt);
2510
2511
0
            inParam = PARSER_IN_PE(ctxt);
2512
0
            expandParam = PARSER_EXTERNAL(ctxt);
2513
0
        } else {
2514
0
            break;
2515
0
        }
2516
2517
        /*
2518
         * Also increase the counter when entering or exiting a PERef.
2519
         * The spec says: "When a parameter-entity reference is recognized
2520
         * in the DTD and included, its replacement text MUST be enlarged
2521
         * by the attachment of one leading and one following space (#x20)
2522
         * character."
2523
         */
2524
0
        if (res < INT_MAX)
2525
0
            res++;
2526
0
    }
2527
2528
0
    return(res);
2529
1.02M
}
2530
2531
/************************************************************************
2532
 *                  *
2533
 *    Commodity functions to handle entities      *
2534
 *                  *
2535
 ************************************************************************/
2536
2537
/**
2538
 * xmlPopInput:
2539
 * @ctxt:  an XML parser context
2540
 *
2541
 * DEPRECATED: Internal function, don't use.
2542
 *
2543
 * Returns the current xmlChar in the parser context
2544
 */
2545
xmlChar
2546
0
xmlPopInput(xmlParserCtxtPtr ctxt) {
2547
0
    xmlParserInputPtr input;
2548
2549
0
    if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2550
0
    input = xmlCtxtPopInput(ctxt);
2551
0
    xmlFreeInputStream(input);
2552
0
    if (*ctxt->input->cur == 0)
2553
0
        xmlParserGrow(ctxt);
2554
0
    return(CUR);
2555
0
}
2556
2557
/**
2558
 * xmlPushInput:
2559
 * @ctxt:  an XML parser context
2560
 * @input:  an XML parser input fragment (entity, XML fragment ...).
2561
 *
2562
 * DEPRECATED: Internal function, don't use.
2563
 *
2564
 * Push an input stream onto the stack.
2565
 *
2566
 * Returns -1 in case of error or the index in the input stack
2567
 */
2568
int
2569
0
xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2570
0
    int ret;
2571
2572
0
    if ((ctxt == NULL) || (input == NULL))
2573
0
        return(-1);
2574
2575
0
    ret = xmlCtxtPushInput(ctxt, input);
2576
0
    if (ret >= 0)
2577
0
        GROW;
2578
0
    return(ret);
2579
0
}
2580
2581
/**
2582
 * xmlParseCharRef:
2583
 * @ctxt:  an XML parser context
2584
 *
2585
 * DEPRECATED: Internal function, don't use.
2586
 *
2587
 * Parse a numeric character reference. Always consumes '&'.
2588
 *
2589
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2590
 *                  '&#x' [0-9a-fA-F]+ ';'
2591
 *
2592
 * [ WFC: Legal Character ]
2593
 * Characters referred to using character references must match the
2594
 * production for Char.
2595
 *
2596
 * Returns the value parsed (as an int), 0 in case of error
2597
 */
2598
int
2599
146k
xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2600
146k
    int val = 0;
2601
146k
    int count = 0;
2602
2603
    /*
2604
     * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2605
     */
2606
146k
    if ((RAW == '&') && (NXT(1) == '#') &&
2607
146k
        (NXT(2) == 'x')) {
2608
87.0k
  SKIP(3);
2609
87.0k
  GROW;
2610
416k
  while ((RAW != ';') && (PARSER_STOPPED(ctxt) == 0)) {
2611
329k
      if (count++ > 20) {
2612
300
    count = 0;
2613
300
    GROW;
2614
300
      }
2615
329k
      if ((RAW >= '0') && (RAW <= '9'))
2616
145k
          val = val * 16 + (CUR - '0');
2617
184k
      else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2618
147k
          val = val * 16 + (CUR - 'a') + 10;
2619
36.6k
      else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2620
36.2k
          val = val * 16 + (CUR - 'A') + 10;
2621
321
      else {
2622
321
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2623
321
    val = 0;
2624
321
    break;
2625
321
      }
2626
329k
      if (val > 0x110000)
2627
3.80k
          val = 0x110000;
2628
2629
329k
      NEXT;
2630
329k
      count++;
2631
329k
  }
2632
87.0k
  if (RAW == ';') {
2633
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2634
86.7k
      ctxt->input->col++;
2635
86.7k
      ctxt->input->cur++;
2636
86.7k
  }
2637
87.0k
    } else if  ((RAW == '&') && (NXT(1) == '#')) {
2638
59.8k
  SKIP(2);
2639
59.8k
  GROW;
2640
199k
  while (RAW != ';') { /* loop blocked by count */
2641
140k
      if (count++ > 20) {
2642
344
    count = 0;
2643
344
    GROW;
2644
344
      }
2645
140k
      if ((RAW >= '0') && (RAW <= '9'))
2646
139k
          val = val * 10 + (CUR - '0');
2647
491
      else {
2648
491
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2649
491
    val = 0;
2650
491
    break;
2651
491
      }
2652
139k
      if (val > 0x110000)
2653
3.97k
          val = 0x110000;
2654
2655
139k
      NEXT;
2656
139k
      count++;
2657
139k
  }
2658
59.8k
  if (RAW == ';') {
2659
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2660
59.3k
      ctxt->input->col++;
2661
59.3k
      ctxt->input->cur++;
2662
59.3k
  }
2663
59.8k
    } else {
2664
0
        if (RAW == '&')
2665
0
            SKIP(1);
2666
0
        xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2667
0
    }
2668
2669
    /*
2670
     * [ WFC: Legal Character ]
2671
     * Characters referred to using character references must match the
2672
     * production for Char.
2673
     */
2674
146k
    if (val >= 0x110000) {
2675
344
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2676
344
                "xmlParseCharRef: character reference out of bounds\n",
2677
344
          val);
2678
344
        val = 0xFFFD;
2679
146k
    } else if (!IS_CHAR(val)) {
2680
2.83k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2681
2.83k
                          "xmlParseCharRef: invalid xmlChar value %d\n",
2682
2.83k
                    val);
2683
2.83k
    }
2684
146k
    return(val);
2685
146k
}
2686
2687
/**
2688
 * xmlParseStringCharRef:
2689
 * @ctxt:  an XML parser context
2690
 * @str:  a pointer to an index in the string
2691
 *
2692
 * parse Reference declarations, variant parsing from a string rather
2693
 * than an an input flow.
2694
 *
2695
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2696
 *                  '&#x' [0-9a-fA-F]+ ';'
2697
 *
2698
 * [ WFC: Legal Character ]
2699
 * Characters referred to using character references must match the
2700
 * production for Char.
2701
 *
2702
 * Returns the value parsed (as an int), 0 in case of error, str will be
2703
 *         updated to the current value of the index
2704
 */
2705
static int
2706
51.2k
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2707
51.2k
    const xmlChar *ptr;
2708
51.2k
    xmlChar cur;
2709
51.2k
    int val = 0;
2710
2711
51.2k
    if ((str == NULL) || (*str == NULL)) return(0);
2712
51.2k
    ptr = *str;
2713
51.2k
    cur = *ptr;
2714
51.2k
    if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2715
20.8k
  ptr += 3;
2716
20.8k
  cur = *ptr;
2717
82.8k
  while (cur != ';') { /* Non input consuming loop */
2718
66.8k
      if ((cur >= '0') && (cur <= '9'))
2719
33.1k
          val = val * 16 + (cur - '0');
2720
33.6k
      else if ((cur >= 'a') && (cur <= 'f'))
2721
18.6k
          val = val * 16 + (cur - 'a') + 10;
2722
15.0k
      else if ((cur >= 'A') && (cur <= 'F'))
2723
10.1k
          val = val * 16 + (cur - 'A') + 10;
2724
4.84k
      else {
2725
4.84k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2726
4.84k
    val = 0;
2727
4.84k
    break;
2728
4.84k
      }
2729
61.9k
      if (val > 0x110000)
2730
3.82k
          val = 0x110000;
2731
2732
61.9k
      ptr++;
2733
61.9k
      cur = *ptr;
2734
61.9k
  }
2735
20.8k
  if (cur == ';')
2736
16.0k
      ptr++;
2737
30.3k
    } else if  ((cur == '&') && (ptr[1] == '#')){
2738
30.3k
  ptr += 2;
2739
30.3k
  cur = *ptr;
2740
176k
  while (cur != ';') { /* Non input consuming loops */
2741
148k
      if ((cur >= '0') && (cur <= '9'))
2742
145k
          val = val * 10 + (cur - '0');
2743
2.34k
      else {
2744
2.34k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2745
2.34k
    val = 0;
2746
2.34k
    break;
2747
2.34k
      }
2748
145k
      if (val > 0x110000)
2749
1.99k
          val = 0x110000;
2750
2751
145k
      ptr++;
2752
145k
      cur = *ptr;
2753
145k
  }
2754
30.3k
  if (cur == ';')
2755
28.0k
      ptr++;
2756
30.3k
    } else {
2757
0
  xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2758
0
  return(0);
2759
0
    }
2760
51.2k
    *str = ptr;
2761
2762
    /*
2763
     * [ WFC: Legal Character ]
2764
     * Characters referred to using character references must match the
2765
     * production for Char.
2766
     */
2767
51.2k
    if (val >= 0x110000) {
2768
230
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2769
230
                "xmlParseStringCharRef: character reference out of bounds\n",
2770
230
                val);
2771
50.9k
    } else if (IS_CHAR(val)) {
2772
42.7k
        return(val);
2773
42.7k
    } else {
2774
8.28k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2775
8.28k
        "xmlParseStringCharRef: invalid xmlChar value %d\n",
2776
8.28k
        val);
2777
8.28k
    }
2778
8.51k
    return(0);
2779
51.2k
}
2780
2781
/**
2782
 * xmlParserHandlePEReference:
2783
 * @ctxt:  the parser context
2784
 *
2785
 * DEPRECATED: Internal function, do not use.
2786
 *
2787
 * [69] PEReference ::= '%' Name ';'
2788
 *
2789
 * [ WFC: No Recursion ]
2790
 * A parsed entity must not contain a recursive
2791
 * reference to itself, either directly or indirectly.
2792
 *
2793
 * [ WFC: Entity Declared ]
2794
 * In a document without any DTD, a document with only an internal DTD
2795
 * subset which contains no parameter entity references, or a document
2796
 * with "standalone='yes'", ...  ... The declaration of a parameter
2797
 * entity must precede any reference to it...
2798
 *
2799
 * [ VC: Entity Declared ]
2800
 * In a document with an external subset or external parameter entities
2801
 * with "standalone='no'", ...  ... The declaration of a parameter entity
2802
 * must precede any reference to it...
2803
 *
2804
 * [ WFC: In DTD ]
2805
 * Parameter-entity references may only appear in the DTD.
2806
 * NOTE: misleading but this is handled.
2807
 *
2808
 * A PEReference may have been detected in the current input stream
2809
 * the handling is done accordingly to
2810
 *      http://www.w3.org/TR/REC-xml#entproc
2811
 * i.e.
2812
 *   - Included in literal in entity values
2813
 *   - Included as Parameter Entity reference within DTDs
2814
 */
2815
void
2816
0
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2817
0
    xmlParsePEReference(ctxt);
2818
0
}
2819
2820
/**
2821
 * xmlStringLenDecodeEntities:
2822
 * @ctxt:  the parser context
2823
 * @str:  the input string
2824
 * @len: the string length
2825
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2826
 * @end:  an end marker xmlChar, 0 if none
2827
 * @end2:  an end marker xmlChar, 0 if none
2828
 * @end3:  an end marker xmlChar, 0 if none
2829
 *
2830
 * DEPRECATED: Internal function, don't use.
2831
 *
2832
 * Returns A newly allocated string with the substitution done. The caller
2833
 *      must deallocate it !
2834
 */
2835
xmlChar *
2836
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2837
                           int what ATTRIBUTE_UNUSED,
2838
0
                           xmlChar end, xmlChar end2, xmlChar end3) {
2839
0
    if ((ctxt == NULL) || (str == NULL) || (len < 0))
2840
0
        return(NULL);
2841
2842
0
    if ((str[len] != 0) ||
2843
0
        (end != 0) || (end2 != 0) || (end3 != 0))
2844
0
        return(NULL);
2845
2846
0
    return(xmlExpandEntitiesInAttValue(ctxt, str, 0));
2847
0
}
2848
2849
/**
2850
 * xmlStringDecodeEntities:
2851
 * @ctxt:  the parser context
2852
 * @str:  the input string
2853
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2854
 * @end:  an end marker xmlChar, 0 if none
2855
 * @end2:  an end marker xmlChar, 0 if none
2856
 * @end3:  an end marker xmlChar, 0 if none
2857
 *
2858
 * DEPRECATED: Internal function, don't use.
2859
 *
2860
 * Returns A newly allocated string with the substitution done. The caller
2861
 *      must deallocate it !
2862
 */
2863
xmlChar *
2864
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str,
2865
                        int what ATTRIBUTE_UNUSED,
2866
0
            xmlChar end, xmlChar  end2, xmlChar end3) {
2867
0
    if ((ctxt == NULL) || (str == NULL))
2868
0
        return(NULL);
2869
2870
0
    if ((end != 0) || (end2 != 0) || (end3 != 0))
2871
0
        return(NULL);
2872
2873
0
    return(xmlExpandEntitiesInAttValue(ctxt, str, 0));
2874
0
}
2875
2876
/************************************************************************
2877
 *                  *
2878
 *    Commodity functions, cleanup needed ?     *
2879
 *                  *
2880
 ************************************************************************/
2881
2882
/**
2883
 * areBlanks:
2884
 * @ctxt:  an XML parser context
2885
 * @str:  a xmlChar *
2886
 * @len:  the size of @str
2887
 * @blank_chars: we know the chars are blanks
2888
 *
2889
 * Is this a sequence of blank chars that one can ignore ?
2890
 *
2891
 * Returns 1 if ignorable 0 otherwise.
2892
 */
2893
2894
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2895
11.6M
                     int blank_chars) {
2896
11.6M
    int i;
2897
11.6M
    xmlNodePtr lastChild;
2898
2899
    /*
2900
     * Check for xml:space value.
2901
     */
2902
11.6M
    if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2903
11.5M
        (*(ctxt->space) == -2))
2904
4.68M
  return(0);
2905
2906
    /*
2907
     * Check that the string is made of blanks
2908
     */
2909
7.00M
    if (blank_chars == 0) {
2910
7.32M
  for (i = 0;i < len;i++)
2911
7.20M
      if (!(IS_BLANK_CH(str[i]))) return(0);
2912
6.15M
    }
2913
2914
    /*
2915
     * Look if the element is mixed content in the DTD if available
2916
     */
2917
972k
    if (ctxt->node == NULL) return(0);
2918
18.4E
    if (ctxt->myDoc != NULL) {
2919
0
        xmlElementPtr elemDecl = NULL;
2920
0
        xmlDocPtr doc = ctxt->myDoc;
2921
0
        const xmlChar *prefix = NULL;
2922
2923
0
        if (ctxt->node->ns)
2924
0
            prefix = ctxt->node->ns->prefix;
2925
0
        if (doc->intSubset != NULL)
2926
0
            elemDecl = xmlHashLookup2(doc->intSubset->elements, ctxt->node->name,
2927
0
                                      prefix);
2928
0
        if ((elemDecl == NULL) && (doc->extSubset != NULL))
2929
0
            elemDecl = xmlHashLookup2(doc->extSubset->elements, ctxt->node->name,
2930
0
                                      prefix);
2931
0
        if (elemDecl != NULL) {
2932
0
            if (elemDecl->etype == XML_ELEMENT_TYPE_ELEMENT)
2933
0
                return(1);
2934
0
            if ((elemDecl->etype == XML_ELEMENT_TYPE_ANY) ||
2935
0
                (elemDecl->etype == XML_ELEMENT_TYPE_MIXED))
2936
0
                return(0);
2937
0
        }
2938
0
    }
2939
2940
    /*
2941
     * Otherwise, heuristic :-\
2942
     *
2943
     * When push parsing, we could be at the end of a chunk.
2944
     * This makes the look-ahead and consequently the NOBLANKS
2945
     * option unreliable.
2946
     */
2947
18.4E
    if ((RAW != '<') && (RAW != 0xD)) return(0);
2948
18.4E
    if ((ctxt->node->children == NULL) &&
2949
0
  (RAW == '<') && (NXT(1) == '/')) return(0);
2950
2951
18.4E
    lastChild = xmlGetLastChild(ctxt->node);
2952
18.4E
    if (lastChild == NULL) {
2953
0
        if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2954
0
            (ctxt->node->content != NULL)) return(0);
2955
18.4E
    } else if (xmlNodeIsText(lastChild))
2956
0
        return(0);
2957
18.4E
    else if ((ctxt->node->children != NULL) &&
2958
0
             (xmlNodeIsText(ctxt->node->children)))
2959
0
        return(0);
2960
18.4E
    return(1);
2961
18.4E
}
2962
2963
/************************************************************************
2964
 *                  *
2965
 *    Extra stuff for namespace support     *
2966
 *  Relates to http://www.w3.org/TR/WD-xml-names      *
2967
 *                  *
2968
 ************************************************************************/
2969
2970
/**
2971
 * xmlSplitQName:
2972
 * @ctxt:  an XML parser context
2973
 * @name:  an XML parser context
2974
 * @prefixOut:  a xmlChar **
2975
 *
2976
 * DEPRECATED: Don't use.
2977
 *
2978
 * parse an UTF8 encoded XML qualified name string
2979
 *
2980
 * [NS 5] QName ::= (Prefix ':')? LocalPart
2981
 *
2982
 * [NS 6] Prefix ::= NCName
2983
 *
2984
 * [NS 7] LocalPart ::= NCName
2985
 *
2986
 * Returns the local part, and prefix is updated
2987
 *   to get the Prefix if any.
2988
 */
2989
2990
xmlChar *
2991
0
xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefixOut) {
2992
0
    xmlChar buf[XML_MAX_NAMELEN + 5];
2993
0
    xmlChar *buffer = NULL;
2994
0
    int len = 0;
2995
0
    int max = XML_MAX_NAMELEN;
2996
0
    xmlChar *ret = NULL;
2997
0
    xmlChar *prefix;
2998
0
    const xmlChar *cur = name;
2999
0
    int c;
3000
3001
0
    if (prefixOut == NULL) return(NULL);
3002
0
    *prefixOut = NULL;
3003
3004
0
    if (cur == NULL) return(NULL);
3005
3006
    /* nasty but well=formed */
3007
0
    if (cur[0] == ':')
3008
0
  return(xmlStrdup(name));
3009
3010
0
    c = *cur++;
3011
0
    while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
3012
0
  buf[len++] = c;
3013
0
  c = *cur++;
3014
0
    }
3015
0
    if (len >= max) {
3016
  /*
3017
   * Okay someone managed to make a huge name, so he's ready to pay
3018
   * for the processing speed.
3019
   */
3020
0
  max = len * 2;
3021
3022
0
  buffer = xmlMalloc(max);
3023
0
  if (buffer == NULL) {
3024
0
      xmlErrMemory(ctxt);
3025
0
      return(NULL);
3026
0
  }
3027
0
  memcpy(buffer, buf, len);
3028
0
  while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3029
0
      if (len + 10 > max) {
3030
0
          xmlChar *tmp;
3031
0
                int newSize;
3032
3033
0
                newSize = xmlGrowCapacity(max, 1, 1, XML_MAX_ITEMS);
3034
0
                if (newSize < 0) {
3035
0
        xmlErrMemory(ctxt);
3036
0
        xmlFree(buffer);
3037
0
        return(NULL);
3038
0
                }
3039
0
    tmp = xmlRealloc(buffer, newSize);
3040
0
    if (tmp == NULL) {
3041
0
        xmlErrMemory(ctxt);
3042
0
        xmlFree(buffer);
3043
0
        return(NULL);
3044
0
    }
3045
0
    buffer = tmp;
3046
0
    max = newSize;
3047
0
      }
3048
0
      buffer[len++] = c;
3049
0
      c = *cur++;
3050
0
  }
3051
0
  buffer[len] = 0;
3052
0
    }
3053
3054
0
    if ((c == ':') && (*cur == 0)) {
3055
0
        if (buffer != NULL)
3056
0
      xmlFree(buffer);
3057
0
  return(xmlStrdup(name));
3058
0
    }
3059
3060
0
    if (buffer == NULL) {
3061
0
  ret = xmlStrndup(buf, len);
3062
0
        if (ret == NULL) {
3063
0
      xmlErrMemory(ctxt);
3064
0
      return(NULL);
3065
0
        }
3066
0
    } else {
3067
0
  ret = buffer;
3068
0
  buffer = NULL;
3069
0
  max = XML_MAX_NAMELEN;
3070
0
    }
3071
3072
3073
0
    if (c == ':') {
3074
0
  c = *cur;
3075
0
        prefix = ret;
3076
0
  if (c == 0) {
3077
0
      ret = xmlStrndup(BAD_CAST "", 0);
3078
0
            if (ret == NULL) {
3079
0
                xmlFree(prefix);
3080
0
                return(NULL);
3081
0
            }
3082
0
            *prefixOut = prefix;
3083
0
            return(ret);
3084
0
  }
3085
0
  len = 0;
3086
3087
  /*
3088
   * Check that the first character is proper to start
3089
   * a new name
3090
   */
3091
0
  if (!(((c >= 0x61) && (c <= 0x7A)) ||
3092
0
        ((c >= 0x41) && (c <= 0x5A)) ||
3093
0
        (c == '_') || (c == ':'))) {
3094
0
      int l;
3095
0
      int first = CUR_SCHAR(cur, l);
3096
3097
0
      if (!IS_LETTER(first) && (first != '_')) {
3098
0
    xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3099
0
          "Name %s is not XML Namespace compliant\n",
3100
0
          name);
3101
0
      }
3102
0
  }
3103
0
  cur++;
3104
3105
0
  while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3106
0
      buf[len++] = c;
3107
0
      c = *cur++;
3108
0
  }
3109
0
  if (len >= max) {
3110
      /*
3111
       * Okay someone managed to make a huge name, so he's ready to pay
3112
       * for the processing speed.
3113
       */
3114
0
      max = len * 2;
3115
3116
0
      buffer = xmlMalloc(max);
3117
0
      if (buffer == NULL) {
3118
0
          xmlErrMemory(ctxt);
3119
0
                xmlFree(prefix);
3120
0
    return(NULL);
3121
0
      }
3122
0
      memcpy(buffer, buf, len);
3123
0
      while (c != 0) { /* tested bigname2.xml */
3124
0
    if (len + 10 > max) {
3125
0
        xmlChar *tmp;
3126
0
                    int newSize;
3127
3128
0
                    newSize = xmlGrowCapacity(max, 1, 1, XML_MAX_ITEMS);
3129
0
                    if (newSize < 0) {
3130
0
                        xmlErrMemory(ctxt);
3131
0
                        xmlFree(buffer);
3132
0
                        return(NULL);
3133
0
                    }
3134
0
        tmp = xmlRealloc(buffer, newSize);
3135
0
        if (tmp == NULL) {
3136
0
      xmlErrMemory(ctxt);
3137
0
                        xmlFree(prefix);
3138
0
      xmlFree(buffer);
3139
0
      return(NULL);
3140
0
        }
3141
0
        buffer = tmp;
3142
0
                    max = newSize;
3143
0
    }
3144
0
    buffer[len++] = c;
3145
0
    c = *cur++;
3146
0
      }
3147
0
      buffer[len] = 0;
3148
0
  }
3149
3150
0
  if (buffer == NULL) {
3151
0
      ret = xmlStrndup(buf, len);
3152
0
            if (ret == NULL) {
3153
0
                xmlFree(prefix);
3154
0
                return(NULL);
3155
0
            }
3156
0
  } else {
3157
0
      ret = buffer;
3158
0
  }
3159
3160
0
        *prefixOut = prefix;
3161
0
    }
3162
3163
0
    return(ret);
3164
0
}
3165
3166
/************************************************************************
3167
 *                  *
3168
 *      The parser itself       *
3169
 *  Relates to http://www.w3.org/TR/REC-xml       *
3170
 *                  *
3171
 ************************************************************************/
3172
3173
/************************************************************************
3174
 *                  *
3175
 *  Routines to parse Name, NCName and NmToken      *
3176
 *                  *
3177
 ************************************************************************/
3178
3179
/*
3180
 * The two following functions are related to the change of accepted
3181
 * characters for Name and NmToken in the Revision 5 of XML-1.0
3182
 * They correspond to the modified production [4] and the new production [4a]
3183
 * changes in that revision. Also note that the macros used for the
3184
 * productions Letter, Digit, CombiningChar and Extender are not needed
3185
 * anymore.
3186
 * We still keep compatibility to pre-revision5 parsing semantic if the
3187
 * new XML_PARSE_OLD10 option is given to the parser.
3188
 */
3189
static int
3190
1.57M
xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3191
1.57M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3192
        /*
3193
   * Use the new checks of production [4] [4a] amd [5] of the
3194
   * Update 5 of XML-1.0
3195
   */
3196
1.57M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3197
1.56M
      (((c >= 'a') && (c <= 'z')) ||
3198
730k
       ((c >= 'A') && (c <= 'Z')) ||
3199
448k
       (c == '_') || (c == ':') ||
3200
251k
       ((c >= 0xC0) && (c <= 0xD6)) ||
3201
248k
       ((c >= 0xD8) && (c <= 0xF6)) ||
3202
247k
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3203
245k
       ((c >= 0x370) && (c <= 0x37D)) ||
3204
244k
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3205
243k
       ((c >= 0x200C) && (c <= 0x200D)) ||
3206
242k
       ((c >= 0x2070) && (c <= 0x218F)) ||
3207
242k
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3208
242k
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3209
233k
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3210
233k
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3211
121k
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3212
1.45M
      return(1);
3213
1.57M
    } else {
3214
0
        if (IS_LETTER(c) || (c == '_') || (c == ':'))
3215
0
      return(1);
3216
0
    }
3217
121k
    return(0);
3218
1.57M
}
3219
3220
static int
3221
34.9M
xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3222
34.9M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3223
        /*
3224
   * Use the new checks of production [4] [4a] amd [5] of the
3225
   * Update 5 of XML-1.0
3226
   */
3227
34.9M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3228
34.5M
      (((c >= 'a') && (c <= 'z')) ||
3229
19.6M
       ((c >= 'A') && (c <= 'Z')) ||
3230
15.0M
       ((c >= '0') && (c <= '9')) || /* !start */
3231
13.3M
       (c == '_') || (c == ':') ||
3232
12.5M
       (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3233
12.0M
       ((c >= 0xC0) && (c <= 0xD6)) ||
3234
11.9M
       ((c >= 0xD8) && (c <= 0xF6)) ||
3235
11.9M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3236
11.6M
       ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3237
11.6M
       ((c >= 0x370) && (c <= 0x37D)) ||
3238
11.6M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3239
11.5M
       ((c >= 0x200C) && (c <= 0x200D)) ||
3240
11.5M
       ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3241
11.5M
       ((c >= 0x2070) && (c <= 0x218F)) ||
3242
11.5M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3243
11.5M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3244
11.4M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3245
11.4M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3246
206k
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3247
34.3M
       return(1);
3248
34.9M
    } else {
3249
0
        if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3250
0
            (c == '.') || (c == '-') ||
3251
0
      (c == '_') || (c == ':') ||
3252
0
      (IS_COMBINING(c)) ||
3253
0
      (IS_EXTENDER(c)))
3254
0
      return(1);
3255
0
    }
3256
591k
    return(0);
3257
34.9M
}
3258
3259
static const xmlChar *
3260
311k
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3261
311k
    const xmlChar *ret;
3262
311k
    int len = 0, l;
3263
311k
    int c;
3264
311k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3265
310k
                    XML_MAX_TEXT_LENGTH :
3266
311k
                    XML_MAX_NAME_LENGTH;
3267
3268
    /*
3269
     * Handler for more complex cases
3270
     */
3271
311k
    c = xmlCurrentChar(ctxt, &l);
3272
311k
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3273
        /*
3274
   * Use the new checks of production [4] [4a] amd [5] of the
3275
   * Update 5 of XML-1.0
3276
   */
3277
311k
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3278
303k
      (!(((c >= 'a') && (c <= 'z')) ||
3279
262k
         ((c >= 'A') && (c <= 'Z')) ||
3280
255k
         (c == '_') || (c == ':') ||
3281
251k
         ((c >= 0xC0) && (c <= 0xD6)) ||
3282
249k
         ((c >= 0xD8) && (c <= 0xF6)) ||
3283
235k
         ((c >= 0xF8) && (c <= 0x2FF)) ||
3284
225k
         ((c >= 0x370) && (c <= 0x37D)) ||
3285
223k
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
3286
219k
         ((c >= 0x200C) && (c <= 0x200D)) ||
3287
217k
         ((c >= 0x2070) && (c <= 0x218F)) ||
3288
202k
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3289
199k
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
3290
190k
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
3291
188k
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3292
181k
         ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3293
181k
      return(NULL);
3294
181k
  }
3295
129k
  len += l;
3296
129k
  NEXTL(l);
3297
129k
  c = xmlCurrentChar(ctxt, &l);
3298
842k
  while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3299
832k
         (((c >= 'a') && (c <= 'z')) ||
3300
611k
          ((c >= 'A') && (c <= 'Z')) ||
3301
580k
          ((c >= '0') && (c <= '9')) || /* !start */
3302
540k
          (c == '_') || (c == ':') ||
3303
499k
          (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3304
481k
          ((c >= 0xC0) && (c <= 0xD6)) ||
3305
478k
          ((c >= 0xD8) && (c <= 0xF6)) ||
3306
464k
          ((c >= 0xF8) && (c <= 0x2FF)) ||
3307
440k
          ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3308
439k
          ((c >= 0x370) && (c <= 0x37D)) ||
3309
439k
          ((c >= 0x37F) && (c <= 0x1FFF)) ||
3310
423k
          ((c >= 0x200C) && (c <= 0x200D)) ||
3311
421k
          ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3312
421k
          ((c >= 0x2070) && (c <= 0x218F)) ||
3313
402k
          ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3314
384k
          ((c >= 0x3001) && (c <= 0xD7FF)) ||
3315
252k
          ((c >= 0xF900) && (c <= 0xFDCF)) ||
3316
252k
          ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3317
122k
          ((c >= 0x10000) && (c <= 0xEFFFF))
3318
832k
    )) {
3319
712k
            if (len <= INT_MAX - l)
3320
712k
          len += l;
3321
712k
      NEXTL(l);
3322
712k
      c = xmlCurrentChar(ctxt, &l);
3323
712k
  }
3324
129k
    } else {
3325
0
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3326
0
      (!IS_LETTER(c) && (c != '_') &&
3327
0
       (c != ':'))) {
3328
0
      return(NULL);
3329
0
  }
3330
0
  len += l;
3331
0
  NEXTL(l);
3332
0
  c = xmlCurrentChar(ctxt, &l);
3333
3334
0
  while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3335
0
         ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3336
0
    (c == '.') || (c == '-') ||
3337
0
    (c == '_') || (c == ':') ||
3338
0
    (IS_COMBINING(c)) ||
3339
0
    (IS_EXTENDER(c)))) {
3340
0
            if (len <= INT_MAX - l)
3341
0
          len += l;
3342
0
      NEXTL(l);
3343
0
      c = xmlCurrentChar(ctxt, &l);
3344
0
  }
3345
0
    }
3346
129k
    if (len > maxLength) {
3347
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3348
0
        return(NULL);
3349
0
    }
3350
129k
    if (ctxt->input->cur - ctxt->input->base < len) {
3351
        /*
3352
         * There were a couple of bugs where PERefs lead to to a change
3353
         * of the buffer. Check the buffer size to avoid passing an invalid
3354
         * pointer to xmlDictLookup.
3355
         */
3356
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3357
0
                    "unexpected change of input buffer");
3358
0
        return (NULL);
3359
0
    }
3360
129k
    if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3361
767
        ret = xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len);
3362
128k
    else
3363
128k
        ret = xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len);
3364
129k
    if (ret == NULL)
3365
0
        xmlErrMemory(ctxt);
3366
129k
    return(ret);
3367
129k
}
3368
3369
/**
3370
 * xmlParseName:
3371
 * @ctxt:  an XML parser context
3372
 *
3373
 * DEPRECATED: Internal function, don't use.
3374
 *
3375
 * parse an XML name.
3376
 *
3377
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3378
 *                  CombiningChar | Extender
3379
 *
3380
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3381
 *
3382
 * [6] Names ::= Name (#x20 Name)*
3383
 *
3384
 * Returns the Name parsed or NULL
3385
 */
3386
3387
const xmlChar *
3388
1.07M
xmlParseName(xmlParserCtxtPtr ctxt) {
3389
1.07M
    const xmlChar *in;
3390
1.07M
    const xmlChar *ret;
3391
1.07M
    size_t count = 0;
3392
1.07M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3393
1.07M
                       XML_MAX_TEXT_LENGTH :
3394
1.07M
                       XML_MAX_NAME_LENGTH;
3395
3396
1.07M
    GROW;
3397
3398
    /*
3399
     * Accelerator for simple ASCII names
3400
     */
3401
1.07M
    in = ctxt->input->cur;
3402
1.07M
    if (((*in >= 0x61) && (*in <= 0x7A)) ||
3403
342k
  ((*in >= 0x41) && (*in <= 0x5A)) ||
3404
816k
  (*in == '_') || (*in == ':')) {
3405
816k
  in++;
3406
3.79M
  while (((*in >= 0x61) && (*in <= 0x7A)) ||
3407
1.51M
         ((*in >= 0x41) && (*in <= 0x5A)) ||
3408
1.23M
         ((*in >= 0x30) && (*in <= 0x39)) ||
3409
1.01M
         (*in == '_') || (*in == '-') ||
3410
886k
         (*in == ':') || (*in == '.'))
3411
2.97M
      in++;
3412
816k
  if ((*in > 0) && (*in < 0x80)) {
3413
763k
      count = in - ctxt->input->cur;
3414
763k
            if (count > maxLength) {
3415
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3416
0
                return(NULL);
3417
0
            }
3418
763k
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3419
763k
      ctxt->input->cur = in;
3420
763k
      ctxt->input->col += count;
3421
763k
      if (ret == NULL)
3422
0
          xmlErrMemory(ctxt);
3423
763k
      return(ret);
3424
763k
  }
3425
816k
    }
3426
    /* accelerator for special cases */
3427
311k
    return(xmlParseNameComplex(ctxt));
3428
1.07M
}
3429
3430
static xmlHashedString
3431
1.59M
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3432
1.59M
    xmlHashedString ret;
3433
1.59M
    int len = 0, l;
3434
1.59M
    int c;
3435
1.59M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3436
1.55M
                    XML_MAX_TEXT_LENGTH :
3437
1.59M
                    XML_MAX_NAME_LENGTH;
3438
1.59M
    size_t startPosition = 0;
3439
3440
1.59M
    ret.name = NULL;
3441
1.59M
    ret.hashValue = 0;
3442
3443
    /*
3444
     * Handler for more complex cases
3445
     */
3446
1.59M
    startPosition = CUR_PTR - BASE_PTR;
3447
1.59M
    c = xmlCurrentChar(ctxt, &l);
3448
1.59M
    if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3449
1.51M
  (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3450
386k
  return(ret);
3451
386k
    }
3452
3453
30.6M
    while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3454
29.5M
     (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3455
29.4M
        if (len <= INT_MAX - l)
3456
29.4M
      len += l;
3457
29.4M
  NEXTL(l);
3458
29.4M
  c = xmlCurrentChar(ctxt, &l);
3459
29.4M
    }
3460
1.20M
    if (len > maxLength) {
3461
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3462
0
        return(ret);
3463
0
    }
3464
1.20M
    ret = xmlDictLookupHashed(ctxt->dict, (BASE_PTR + startPosition), len);
3465
1.20M
    if (ret.name == NULL)
3466
0
        xmlErrMemory(ctxt);
3467
1.20M
    return(ret);
3468
1.20M
}
3469
3470
/**
3471
 * xmlParseNCName:
3472
 * @ctxt:  an XML parser context
3473
 * @len:  length of the string parsed
3474
 *
3475
 * parse an XML name.
3476
 *
3477
 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3478
 *                      CombiningChar | Extender
3479
 *
3480
 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3481
 *
3482
 * Returns the Name parsed or NULL
3483
 */
3484
3485
static xmlHashedString
3486
75.0M
xmlParseNCName(xmlParserCtxtPtr ctxt) {
3487
75.0M
    const xmlChar *in, *e;
3488
75.0M
    xmlHashedString ret;
3489
75.0M
    size_t count = 0;
3490
75.0M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3491
72.2M
                       XML_MAX_TEXT_LENGTH :
3492
75.0M
                       XML_MAX_NAME_LENGTH;
3493
3494
75.0M
    ret.name = NULL;
3495
3496
    /*
3497
     * Accelerator for simple ASCII names
3498
     */
3499
75.0M
    in = ctxt->input->cur;
3500
75.0M
    e = ctxt->input->end;
3501
75.0M
    if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3502
5.68M
   ((*in >= 0x41) && (*in <= 0x5A)) ||
3503
74.5M
   (*in == '_')) && (in < e)) {
3504
74.5M
  in++;
3505
424M
  while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3506
138M
          ((*in >= 0x41) && (*in <= 0x5A)) ||
3507
94.3M
          ((*in >= 0x30) && (*in <= 0x39)) ||
3508
82.2M
          (*in == '_') || (*in == '-') ||
3509
350M
          (*in == '.')) && (in < e))
3510
350M
      in++;
3511
74.5M
  if (in >= e)
3512
17.8k
      goto complex;
3513
74.4M
  if ((*in > 0) && (*in < 0x80)) {
3514
73.4M
      count = in - ctxt->input->cur;
3515
73.4M
            if (count > maxLength) {
3516
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3517
0
                return(ret);
3518
0
            }
3519
73.4M
      ret = xmlDictLookupHashed(ctxt->dict, ctxt->input->cur, count);
3520
73.4M
      ctxt->input->cur = in;
3521
73.4M
      ctxt->input->col += count;
3522
73.4M
      if (ret.name == NULL) {
3523
0
          xmlErrMemory(ctxt);
3524
0
      }
3525
73.4M
      return(ret);
3526
73.4M
  }
3527
74.4M
    }
3528
1.59M
complex:
3529
1.59M
    return(xmlParseNCNameComplex(ctxt));
3530
75.0M
}
3531
3532
/**
3533
 * xmlParseNameAndCompare:
3534
 * @ctxt:  an XML parser context
3535
 *
3536
 * parse an XML name and compares for match
3537
 * (specialized for endtag parsing)
3538
 *
3539
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3540
 * and the name for mismatch
3541
 */
3542
3543
static const xmlChar *
3544
2.72M
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3545
2.72M
    register const xmlChar *cmp = other;
3546
2.72M
    register const xmlChar *in;
3547
2.72M
    const xmlChar *ret;
3548
3549
2.72M
    GROW;
3550
3551
2.72M
    in = ctxt->input->cur;
3552
8.80M
    while (*in != 0 && *in == *cmp) {
3553
6.08M
  ++in;
3554
6.08M
  ++cmp;
3555
6.08M
    }
3556
2.72M
    if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3557
  /* success */
3558
2.71M
  ctxt->input->col += in - ctxt->input->cur;
3559
2.71M
  ctxt->input->cur = in;
3560
2.71M
  return (const xmlChar*) 1;
3561
2.71M
    }
3562
    /* failure (or end of input buffer), check with full function */
3563
5.55k
    ret = xmlParseName (ctxt);
3564
    /* strings coming from the dictionary direct compare possible */
3565
5.55k
    if (ret == other) {
3566
56
  return (const xmlChar*) 1;
3567
56
    }
3568
5.49k
    return ret;
3569
5.55k
}
3570
3571
/**
3572
 * xmlParseStringName:
3573
 * @ctxt:  an XML parser context
3574
 * @str:  a pointer to the string pointer (IN/OUT)
3575
 *
3576
 * parse an XML name.
3577
 *
3578
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3579
 *                  CombiningChar | Extender
3580
 *
3581
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3582
 *
3583
 * [6] Names ::= Name (#x20 Name)*
3584
 *
3585
 * Returns the Name parsed or NULL. The @str pointer
3586
 * is updated to the current location in the string.
3587
 */
3588
3589
static xmlChar *
3590
58.9k
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3591
58.9k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3592
58.9k
    xmlChar *ret;
3593
58.9k
    const xmlChar *cur = *str;
3594
58.9k
    int len = 0, l;
3595
58.9k
    int c;
3596
58.9k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3597
58.9k
                    XML_MAX_TEXT_LENGTH :
3598
58.9k
                    XML_MAX_NAME_LENGTH;
3599
3600
58.9k
    c = CUR_SCHAR(cur, l);
3601
58.9k
    if (!xmlIsNameStartChar(ctxt, c)) {
3602
11.9k
  return(NULL);
3603
11.9k
    }
3604
3605
46.9k
    COPY_BUF(buf, len, c);
3606
46.9k
    cur += l;
3607
46.9k
    c = CUR_SCHAR(cur, l);
3608
357k
    while (xmlIsNameChar(ctxt, c)) {
3609
314k
  COPY_BUF(buf, len, c);
3610
314k
  cur += l;
3611
314k
  c = CUR_SCHAR(cur, l);
3612
314k
  if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3613
      /*
3614
       * Okay someone managed to make a huge name, so he's ready to pay
3615
       * for the processing speed.
3616
       */
3617
3.71k
      xmlChar *buffer;
3618
3.71k
      int max = len * 2;
3619
3620
3.71k
      buffer = xmlMalloc(max);
3621
3.71k
      if (buffer == NULL) {
3622
0
          xmlErrMemory(ctxt);
3623
0
    return(NULL);
3624
0
      }
3625
3.71k
      memcpy(buffer, buf, len);
3626
149k
      while (xmlIsNameChar(ctxt, c)) {
3627
145k
    if (len + 10 > max) {
3628
1.48k
        xmlChar *tmp;
3629
1.48k
                    int newSize;
3630
3631
1.48k
                    newSize = xmlGrowCapacity(max, 1, 1, maxLength);
3632
1.48k
                    if (newSize < 0) {
3633
0
                        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3634
0
                        xmlFree(buffer);
3635
0
                        return(NULL);
3636
0
                    }
3637
1.48k
        tmp = xmlRealloc(buffer, newSize);
3638
1.48k
        if (tmp == NULL) {
3639
0
      xmlErrMemory(ctxt);
3640
0
      xmlFree(buffer);
3641
0
      return(NULL);
3642
0
        }
3643
1.48k
        buffer = tmp;
3644
1.48k
                    max = newSize;
3645
1.48k
    }
3646
145k
    COPY_BUF(buffer, len, c);
3647
145k
    cur += l;
3648
145k
    c = CUR_SCHAR(cur, l);
3649
145k
      }
3650
3.71k
      buffer[len] = 0;
3651
3.71k
      *str = cur;
3652
3.71k
      return(buffer);
3653
3.71k
  }
3654
314k
    }
3655
43.2k
    if (len > maxLength) {
3656
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3657
0
        return(NULL);
3658
0
    }
3659
43.2k
    *str = cur;
3660
43.2k
    ret = xmlStrndup(buf, len);
3661
43.2k
    if (ret == NULL)
3662
0
        xmlErrMemory(ctxt);
3663
43.2k
    return(ret);
3664
43.2k
}
3665
3666
/**
3667
 * xmlParseNmtoken:
3668
 * @ctxt:  an XML parser context
3669
 *
3670
 * DEPRECATED: Internal function, don't use.
3671
 *
3672
 * parse an XML Nmtoken.
3673
 *
3674
 * [7] Nmtoken ::= (NameChar)+
3675
 *
3676
 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3677
 *
3678
 * Returns the Nmtoken parsed or NULL
3679
 */
3680
3681
xmlChar *
3682
498k
xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3683
498k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3684
498k
    xmlChar *ret;
3685
498k
    int len = 0, l;
3686
498k
    int c;
3687
498k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3688
492k
                    XML_MAX_TEXT_LENGTH :
3689
498k
                    XML_MAX_NAME_LENGTH;
3690
3691
498k
    c = xmlCurrentChar(ctxt, &l);
3692
3693
4.46M
    while (xmlIsNameChar(ctxt, c)) {
3694
3.97M
  COPY_BUF(buf, len, c);
3695
3.97M
  NEXTL(l);
3696
3.97M
  c = xmlCurrentChar(ctxt, &l);
3697
3.97M
  if (len >= XML_MAX_NAMELEN) {
3698
      /*
3699
       * Okay someone managed to make a huge token, so he's ready to pay
3700
       * for the processing speed.
3701
       */
3702
3.90k
      xmlChar *buffer;
3703
3.90k
      int max = len * 2;
3704
3705
3.90k
      buffer = xmlMalloc(max);
3706
3.90k
      if (buffer == NULL) {
3707
0
          xmlErrMemory(ctxt);
3708
0
    return(NULL);
3709
0
      }
3710
3.90k
      memcpy(buffer, buf, len);
3711
446k
      while (xmlIsNameChar(ctxt, c)) {
3712
442k
    if (len + 10 > max) {
3713
1.48k
        xmlChar *tmp;
3714
1.48k
                    int newSize;
3715
3716
1.48k
                    newSize = xmlGrowCapacity(max, 1, 1, maxLength);
3717
1.48k
                    if (newSize < 0) {
3718
0
                        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3719
0
                        xmlFree(buffer);
3720
0
                        return(NULL);
3721
0
                    }
3722
1.48k
        tmp = xmlRealloc(buffer, newSize);
3723
1.48k
        if (tmp == NULL) {
3724
0
      xmlErrMemory(ctxt);
3725
0
      xmlFree(buffer);
3726
0
      return(NULL);
3727
0
        }
3728
1.48k
        buffer = tmp;
3729
1.48k
                    max = newSize;
3730
1.48k
    }
3731
442k
    COPY_BUF(buffer, len, c);
3732
442k
    NEXTL(l);
3733
442k
    c = xmlCurrentChar(ctxt, &l);
3734
442k
      }
3735
3.90k
      buffer[len] = 0;
3736
3.90k
      return(buffer);
3737
3.90k
  }
3738
3.97M
    }
3739
494k
    if (len == 0)
3740
77.2k
        return(NULL);
3741
417k
    if (len > maxLength) {
3742
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3743
0
        return(NULL);
3744
0
    }
3745
417k
    ret = xmlStrndup(buf, len);
3746
417k
    if (ret == NULL)
3747
0
        xmlErrMemory(ctxt);
3748
417k
    return(ret);
3749
417k
}
3750
3751
/**
3752
 * xmlExpandPEsInEntityValue:
3753
 * @ctxt:  parser context
3754
 * @buf:  string buffer
3755
 * @str:  entity value
3756
 * @length:  size of entity value
3757
 * @depth:  nesting depth
3758
 *
3759
 * Validate an entity value and expand parameter entities.
3760
 */
3761
static void
3762
xmlExpandPEsInEntityValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf,
3763
44.2k
                          const xmlChar *str, int length, int depth) {
3764
44.2k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3765
44.2k
    const xmlChar *end, *chunk;
3766
44.2k
    int c, l;
3767
3768
44.2k
    if (str == NULL)
3769
0
        return;
3770
3771
44.2k
    depth += 1;
3772
44.2k
    if (depth > maxDepth) {
3773
0
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3774
0
                       "Maximum entity nesting depth exceeded");
3775
0
  return;
3776
0
    }
3777
3778
44.2k
    end = str + length;
3779
44.2k
    chunk = str;
3780
3781
2.52M
    while ((str < end) && (!PARSER_STOPPED(ctxt))) {
3782
2.51M
        c = *str;
3783
3784
2.51M
        if (c >= 0x80) {
3785
428k
            l = xmlUTF8MultibyteLen(ctxt, str,
3786
428k
                    "invalid character in entity value\n");
3787
428k
            if (l == 0) {
3788
202k
                if (chunk < str)
3789
25.0k
                    xmlSBufAddString(buf, chunk, str - chunk);
3790
202k
                xmlSBufAddReplChar(buf);
3791
202k
                str += 1;
3792
202k
                chunk = str;
3793
226k
            } else {
3794
226k
                str += l;
3795
226k
            }
3796
2.08M
        } else if (c == '&') {
3797
106k
            if (str[1] == '#') {
3798
51.2k
                if (chunk < str)
3799
32.6k
                    xmlSBufAddString(buf, chunk, str - chunk);
3800
3801
51.2k
                c = xmlParseStringCharRef(ctxt, &str);
3802
51.2k
                if (c == 0)
3803
8.51k
                    return;
3804
3805
42.7k
                xmlSBufAddChar(buf, c);
3806
3807
42.7k
                chunk = str;
3808
55.4k
            } else {
3809
55.4k
                xmlChar *name;
3810
3811
                /*
3812
                 * General entity references are checked for
3813
                 * syntactic validity.
3814
                 */
3815
55.4k
                str++;
3816
55.4k
                name = xmlParseStringName(ctxt, &str);
3817
3818
55.4k
                if ((name == NULL) || (*str++ != ';')) {
3819
19.9k
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3820
19.9k
                            "EntityValue: '&' forbidden except for entities "
3821
19.9k
                            "references\n");
3822
19.9k
                    xmlFree(name);
3823
19.9k
                    return;
3824
19.9k
                }
3825
3826
35.5k
                xmlFree(name);
3827
35.5k
            }
3828
1.98M
        } else if (c == '%') {
3829
3.40k
            xmlEntityPtr ent;
3830
3831
3.40k
            if (chunk < str)
3832
3.03k
                xmlSBufAddString(buf, chunk, str - chunk);
3833
3834
3.40k
            ent = xmlParseStringPEReference(ctxt, &str);
3835
3.40k
            if (ent == NULL)
3836
3.40k
                return;
3837
3838
0
            if (!PARSER_EXTERNAL(ctxt)) {
3839
0
                xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3840
0
                return;
3841
0
            }
3842
3843
0
            if (ent->content == NULL) {
3844
                /*
3845
                 * Note: external parsed entities will not be loaded,
3846
                 * it is not required for a non-validating parser to
3847
                 * complete external PEReferences coming from the
3848
                 * internal subset
3849
                 */
3850
0
                if (((ctxt->options & XML_PARSE_NO_XXE) == 0) &&
3851
0
                    ((ctxt->replaceEntities) ||
3852
0
                     (ctxt->validate))) {
3853
0
                    xmlLoadEntityContent(ctxt, ent);
3854
0
                } else {
3855
0
                    xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
3856
0
                                  "not validating will not read content for "
3857
0
                                  "PE entity %s\n", ent->name, NULL);
3858
0
                }
3859
0
            }
3860
3861
            /*
3862
             * TODO: Skip if ent->content is still NULL.
3863
             */
3864
3865
0
            if (xmlParserEntityCheck(ctxt, ent->length))
3866
0
                return;
3867
3868
0
            if (ent->flags & XML_ENT_EXPANDING) {
3869
0
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
3870
0
                xmlHaltParser(ctxt);
3871
0
                return;
3872
0
            }
3873
3874
0
            ent->flags |= XML_ENT_EXPANDING;
3875
0
            xmlExpandPEsInEntityValue(ctxt, buf, ent->content, ent->length,
3876
0
                                      depth);
3877
0
            ent->flags &= ~XML_ENT_EXPANDING;
3878
3879
0
            chunk = str;
3880
1.97M
        } else {
3881
            /* Normal ASCII char */
3882
1.97M
            if (!IS_BYTE_CHAR(c)) {
3883
214k
                xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3884
214k
                        "invalid character in entity value\n");
3885
214k
                if (chunk < str)
3886
10.4k
                    xmlSBufAddString(buf, chunk, str - chunk);
3887
214k
                xmlSBufAddReplChar(buf);
3888
214k
                str += 1;
3889
214k
                chunk = str;
3890
1.76M
            } else {
3891
1.76M
                str += 1;
3892
1.76M
            }
3893
1.97M
        }
3894
2.51M
    }
3895
3896
12.4k
    if (chunk < str)
3897
8.37k
        xmlSBufAddString(buf, chunk, str - chunk);
3898
12.4k
}
3899
3900
/**
3901
 * xmlParseEntityValue:
3902
 * @ctxt:  an XML parser context
3903
 * @orig:  if non-NULL store a copy of the original entity value
3904
 *
3905
 * DEPRECATED: Internal function, don't use.
3906
 *
3907
 * parse a value for ENTITY declarations
3908
 *
3909
 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3910
 *                 "'" ([^%&'] | PEReference | Reference)* "'"
3911
 *
3912
 * Returns the EntityValue parsed with reference substituted or NULL
3913
 */
3914
xmlChar *
3915
44.7k
xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3916
44.7k
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3917
44.7k
                         XML_MAX_HUGE_LENGTH :
3918
44.7k
                         XML_MAX_TEXT_LENGTH;
3919
44.7k
    xmlSBuf buf;
3920
44.7k
    const xmlChar *start;
3921
44.7k
    int quote, length;
3922
3923
44.7k
    xmlSBufInit(&buf, maxLength);
3924
3925
44.7k
    GROW;
3926
3927
44.7k
    quote = CUR;
3928
44.7k
    if ((quote != '"') && (quote != '\'')) {
3929
0
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3930
0
  return(NULL);
3931
0
    }
3932
44.7k
    CUR_PTR++;
3933
3934
44.7k
    length = 0;
3935
3936
    /*
3937
     * Copy raw content of the entity into a buffer
3938
     */
3939
7.34M
    while (1) {
3940
7.34M
        int c;
3941
3942
7.34M
        if (PARSER_STOPPED(ctxt))
3943
0
            goto error;
3944
3945
7.34M
        if (CUR_PTR >= ctxt->input->end) {
3946
385
            xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3947
385
            goto error;
3948
385
        }
3949
3950
7.34M
        c = CUR;
3951
3952
7.34M
        if (c == 0) {
3953
83
            xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3954
83
                    "invalid character in entity value\n");
3955
83
            goto error;
3956
83
        }
3957
7.34M
        if (c == quote)
3958
44.2k
            break;
3959
7.29M
        NEXTL(1);
3960
7.29M
        length += 1;
3961
3962
        /*
3963
         * TODO: Check growth threshold
3964
         */
3965
7.29M
        if (ctxt->input->end - CUR_PTR < 10)
3966
4.45k
            GROW;
3967
7.29M
    }
3968
3969
44.2k
    start = CUR_PTR - length;
3970
3971
44.2k
    if (orig != NULL) {
3972
44.2k
        *orig = xmlStrndup(start, length);
3973
44.2k
        if (*orig == NULL)
3974
0
            xmlErrMemory(ctxt);
3975
44.2k
    }
3976
3977
44.2k
    xmlExpandPEsInEntityValue(ctxt, &buf, start, length, ctxt->inputNr);
3978
3979
44.2k
    NEXTL(1);
3980
3981
44.2k
    return(xmlSBufFinish(&buf, NULL, ctxt, "entity length too long"));
3982
3983
468
error:
3984
468
    xmlSBufCleanup(&buf, ctxt, "entity length too long");
3985
468
    return(NULL);
3986
44.7k
}
3987
3988
/**
3989
 * xmlCheckEntityInAttValue:
3990
 * @ctxt:  parser context
3991
 * @pent:  entity
3992
 * @depth:  nesting depth
3993
 *
3994
 * Check an entity reference in an attribute value for validity
3995
 * without expanding it.
3996
 */
3997
static void
3998
0
xmlCheckEntityInAttValue(xmlParserCtxtPtr ctxt, xmlEntityPtr pent, int depth) {
3999
0
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
4000
0
    const xmlChar *str;
4001
0
    unsigned long expandedSize = pent->length;
4002
0
    int c, flags;
4003
4004
0
    depth += 1;
4005
0
    if (depth > maxDepth) {
4006
0
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
4007
0
                       "Maximum entity nesting depth exceeded");
4008
0
  return;
4009
0
    }
4010
4011
0
    if (pent->flags & XML_ENT_EXPANDING) {
4012
0
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
4013
0
        xmlHaltParser(ctxt);
4014
0
        return;
4015
0
    }
4016
4017
    /*
4018
     * If we're parsing a default attribute value in DTD content,
4019
     * the entity might reference other entities which weren't
4020
     * defined yet, so the check isn't reliable.
4021
     */
4022
0
    if (ctxt->inSubset == 0)
4023
0
        flags = XML_ENT_CHECKED | XML_ENT_VALIDATED;
4024
0
    else
4025
0
        flags = XML_ENT_VALIDATED;
4026
4027
0
    str = pent->content;
4028
0
    if (str == NULL)
4029
0
        goto done;
4030
4031
    /*
4032
     * Note that entity values are already validated. We only check
4033
     * for illegal less-than signs and compute the expanded size
4034
     * of the entity. No special handling for multi-byte characters
4035
     * is needed.
4036
     */
4037
0
    while (!PARSER_STOPPED(ctxt)) {
4038
0
        c = *str;
4039
4040
0
  if (c != '&') {
4041
0
            if (c == 0)
4042
0
                break;
4043
4044
0
            if (c == '<')
4045
0
                xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
4046
0
                        "'<' in entity '%s' is not allowed in attributes "
4047
0
                        "values\n", pent->name);
4048
4049
0
            str += 1;
4050
0
        } else if (str[1] == '#') {
4051
0
            int val;
4052
4053
0
      val = xmlParseStringCharRef(ctxt, &str);
4054
0
      if (val == 0) {
4055
0
                pent->content[0] = 0;
4056
0
                break;
4057
0
            }
4058
0
  } else {
4059
0
            xmlChar *name;
4060
0
            xmlEntityPtr ent;
4061
4062
0
      name = xmlParseStringEntityRef(ctxt, &str);
4063
0
      if (name == NULL) {
4064
0
                pent->content[0] = 0;
4065
0
                break;
4066
0
            }
4067
4068
0
            ent = xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 1);
4069
0
            xmlFree(name);
4070
4071
0
            if ((ent != NULL) &&
4072
0
                (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
4073
0
                if ((ent->flags & flags) != flags) {
4074
0
                    pent->flags |= XML_ENT_EXPANDING;
4075
0
                    xmlCheckEntityInAttValue(ctxt, ent, depth);
4076
0
                    pent->flags &= ~XML_ENT_EXPANDING;
4077
0
                }
4078
4079
0
                xmlSaturatedAdd(&expandedSize, ent->expandedSize);
4080
0
                xmlSaturatedAdd(&expandedSize, XML_ENT_FIXED_COST);
4081
0
            }
4082
0
        }
4083
0
    }
4084
4085
0
done:
4086
0
    if (ctxt->inSubset == 0)
4087
0
        pent->expandedSize = expandedSize;
4088
4089
0
    pent->flags |= flags;
4090
0
}
4091
4092
/**
4093
 * xmlExpandEntityInAttValue:
4094
 * @ctxt:  parser context
4095
 * @buf:  string buffer
4096
 * @str:  entity or attribute value
4097
 * @pent:  entity for entity value, NULL for attribute values
4098
 * @normalize:  whether to collapse whitespace
4099
 * @inSpace:  whitespace state
4100
 * @depth:  nesting depth
4101
 * @check:  whether to check for amplification
4102
 *
4103
 * Expand general entity references in an entity or attribute value.
4104
 * Perform attribute value normalization.
4105
 */
4106
static void
4107
xmlExpandEntityInAttValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf,
4108
                          const xmlChar *str, xmlEntityPtr pent, int normalize,
4109
0
                          int *inSpace, int depth, int check) {
4110
0
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
4111
0
    int c, chunkSize;
4112
4113
0
    if (str == NULL)
4114
0
        return;
4115
4116
0
    depth += 1;
4117
0
    if (depth > maxDepth) {
4118
0
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
4119
0
                       "Maximum entity nesting depth exceeded");
4120
0
  return;
4121
0
    }
4122
4123
0
    if (pent != NULL) {
4124
0
        if (pent->flags & XML_ENT_EXPANDING) {
4125
0
            xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
4126
0
            xmlHaltParser(ctxt);
4127
0
            return;
4128
0
        }
4129
4130
0
        if (check) {
4131
0
            if (xmlParserEntityCheck(ctxt, pent->length))
4132
0
                return;
4133
0
        }
4134
0
    }
4135
4136
0
    chunkSize = 0;
4137
4138
    /*
4139
     * Note that entity values are already validated. No special
4140
     * handling for multi-byte characters is needed.
4141
     */
4142
0
    while (!PARSER_STOPPED(ctxt)) {
4143
0
        c = *str;
4144
4145
0
  if (c != '&') {
4146
0
            if (c == 0)
4147
0
                break;
4148
4149
            /*
4150
             * If this function is called without an entity, it is used to
4151
             * expand entities in an attribute content where less-than was
4152
             * already unscaped and is allowed.
4153
             */
4154
0
            if ((pent != NULL) && (c == '<')) {
4155
0
                xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
4156
0
                        "'<' in entity '%s' is not allowed in attributes "
4157
0
                        "values\n", pent->name);
4158
0
                break;
4159
0
            }
4160
4161
0
            if (c <= 0x20) {
4162
0
                if ((normalize) && (*inSpace)) {
4163
                    /* Skip char */
4164
0
                    if (chunkSize > 0) {
4165
0
                        xmlSBufAddString(buf, str - chunkSize, chunkSize);
4166
0
                        chunkSize = 0;
4167
0
                    }
4168
0
                } else if (c < 0x20) {
4169
0
                    if (chunkSize > 0) {
4170
0
                        xmlSBufAddString(buf, str - chunkSize, chunkSize);
4171
0
                        chunkSize = 0;
4172
0
                    }
4173
4174
0
                    xmlSBufAddCString(buf, " ", 1);
4175
0
                } else {
4176
0
                    chunkSize += 1;
4177
0
                }
4178
4179
0
                *inSpace = 1;
4180
0
            } else {
4181
0
                chunkSize += 1;
4182
0
                *inSpace = 0;
4183
0
            }
4184
4185
0
            str += 1;
4186
0
        } else if (str[1] == '#') {
4187
0
            int val;
4188
4189
0
            if (chunkSize > 0) {
4190
0
                xmlSBufAddString(buf, str - chunkSize, chunkSize);
4191
0
                chunkSize = 0;
4192
0
            }
4193
4194
0
      val = xmlParseStringCharRef(ctxt, &str);
4195
0
      if (val == 0) {
4196
0
                if (pent != NULL)
4197
0
                    pent->content[0] = 0;
4198
0
                break;
4199
0
            }
4200
4201
0
            if (val == ' ') {
4202
0
                if ((!normalize) || (!*inSpace))
4203
0
                    xmlSBufAddCString(buf, " ", 1);
4204
0
                *inSpace = 1;
4205
0
            } else {
4206
0
                xmlSBufAddChar(buf, val);
4207
0
                *inSpace = 0;
4208
0
            }
4209
0
  } else {
4210
0
            xmlChar *name;
4211
0
            xmlEntityPtr ent;
4212
4213
0
            if (chunkSize > 0) {
4214
0
                xmlSBufAddString(buf, str - chunkSize, chunkSize);
4215
0
                chunkSize = 0;
4216
0
            }
4217
4218
0
      name = xmlParseStringEntityRef(ctxt, &str);
4219
0
            if (name == NULL) {
4220
0
                if (pent != NULL)
4221
0
                    pent->content[0] = 0;
4222
0
                break;
4223
0
            }
4224
4225
0
            ent = xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 1);
4226
0
            xmlFree(name);
4227
4228
0
      if ((ent != NULL) &&
4229
0
    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
4230
0
    if (ent->content == NULL) {
4231
0
        xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
4232
0
          "predefined entity has no content\n");
4233
0
                    break;
4234
0
                }
4235
4236
0
                xmlSBufAddString(buf, ent->content, ent->length);
4237
4238
0
                *inSpace = 0;
4239
0
      } else if ((ent != NULL) && (ent->content != NULL)) {
4240
0
                if (pent != NULL)
4241
0
                    pent->flags |= XML_ENT_EXPANDING;
4242
0
    xmlExpandEntityInAttValue(ctxt, buf, ent->content, ent,
4243
0
                                          normalize, inSpace, depth, check);
4244
0
                if (pent != NULL)
4245
0
                    pent->flags &= ~XML_ENT_EXPANDING;
4246
0
      }
4247
0
        }
4248
0
    }
4249
4250
0
    if (chunkSize > 0)
4251
0
        xmlSBufAddString(buf, str - chunkSize, chunkSize);
4252
0
}
4253
4254
/**
4255
 * xmlExpandEntitiesInAttValue:
4256
 * @ctxt:  parser context
4257
 * @str:  entity or attribute value
4258
 * @normalize:  whether to collapse whitespace
4259
 *
4260
 * Expand general entity references in an entity or attribute value.
4261
 * Perform attribute value normalization.
4262
 *
4263
 * Returns the expanded attribtue value.
4264
 */
4265
xmlChar *
4266
xmlExpandEntitiesInAttValue(xmlParserCtxtPtr ctxt, const xmlChar *str,
4267
0
                            int normalize) {
4268
0
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4269
0
                         XML_MAX_HUGE_LENGTH :
4270
0
                         XML_MAX_TEXT_LENGTH;
4271
0
    xmlSBuf buf;
4272
0
    int inSpace = 1;
4273
4274
0
    xmlSBufInit(&buf, maxLength);
4275
4276
0
    xmlExpandEntityInAttValue(ctxt, &buf, str, NULL, normalize, &inSpace,
4277
0
                              ctxt->inputNr, /* check */ 0);
4278
4279
0
    if ((normalize) && (inSpace) && (buf.size > 0))
4280
0
        buf.size--;
4281
4282
0
    return(xmlSBufFinish(&buf, NULL, ctxt, "AttValue length too long"));
4283
0
}
4284
4285
/**
4286
 * xmlParseAttValueInternal:
4287
 * @ctxt:  an XML parser context
4288
 * @len:  attribute len result
4289
 * @alloc:  whether the attribute was reallocated as a new string
4290
 * @normalize:  if 1 then further non-CDATA normalization must be done
4291
 *
4292
 * parse a value for an attribute.
4293
 * NOTE: if no normalization is needed, the routine will return pointers
4294
 *       directly from the data buffer.
4295
 *
4296
 * 3.3.3 Attribute-Value Normalization:
4297
 * Before the value of an attribute is passed to the application or
4298
 * checked for validity, the XML processor must normalize it as follows:
4299
 * - a character reference is processed by appending the referenced
4300
 *   character to the attribute value
4301
 * - an entity reference is processed by recursively processing the
4302
 *   replacement text of the entity
4303
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4304
 *   appending #x20 to the normalized value, except that only a single
4305
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4306
 *   parsed entity or the literal entity value of an internal parsed entity
4307
 * - other characters are processed by appending them to the normalized value
4308
 * If the declared value is not CDATA, then the XML processor must further
4309
 * process the normalized attribute value by discarding any leading and
4310
 * trailing space (#x20) characters, and by replacing sequences of space
4311
 * (#x20) characters by a single space (#x20) character.
4312
 * All attributes for which no declaration has been read should be treated
4313
 * by a non-validating parser as if declared CDATA.
4314
 *
4315
 * Returns the AttValue parsed or NULL. The value has to be freed by the
4316
 *     caller if it was copied, this can be detected by val[*len] == 0.
4317
 */
4318
static xmlChar *
4319
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *attlen, int *alloc,
4320
21.7M
                         int normalize, int isNamespace) {
4321
21.7M
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4322
20.7M
                         XML_MAX_HUGE_LENGTH :
4323
21.7M
                         XML_MAX_TEXT_LENGTH;
4324
21.7M
    xmlSBuf buf;
4325
21.7M
    xmlChar *ret;
4326
21.7M
    int c, l, quote, flags, chunkSize;
4327
21.7M
    int inSpace = 1;
4328
21.7M
    int replaceEntities;
4329
4330
    /* Always expand namespace URIs */
4331
21.7M
    replaceEntities = (ctxt->replaceEntities) || (isNamespace);
4332
4333
21.7M
    xmlSBufInit(&buf, maxLength);
4334
4335
21.7M
    GROW;
4336
4337
21.7M
    quote = CUR;
4338
21.7M
    if ((quote != '"') && (quote != '\'')) {
4339
3.47k
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
4340
3.47k
  return(NULL);
4341
3.47k
    }
4342
21.7M
    NEXTL(1);
4343
4344
21.7M
    if (ctxt->inSubset == 0)
4345
21.6M
        flags = XML_ENT_CHECKED | XML_ENT_VALIDATED;
4346
36.3k
    else
4347
36.3k
        flags = XML_ENT_VALIDATED;
4348
4349
21.7M
    inSpace = 1;
4350
21.7M
    chunkSize = 0;
4351
4352
240M
    while (1) {
4353
240M
        if (PARSER_STOPPED(ctxt))
4354
0
            goto error;
4355
4356
240M
        if (CUR_PTR >= ctxt->input->end) {
4357
5.30k
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4358
5.30k
                           "AttValue: ' expected\n");
4359
5.30k
            goto error;
4360
5.30k
        }
4361
4362
        /*
4363
         * TODO: Check growth threshold
4364
         */
4365
240M
        if (ctxt->input->end - CUR_PTR < 10)
4366
54.5k
            GROW;
4367
4368
240M
        c = CUR;
4369
4370
240M
        if (c >= 0x80) {
4371
5.70M
            l = xmlUTF8MultibyteLen(ctxt, CUR_PTR,
4372
5.70M
                    "invalid character in attribute value\n");
4373
5.70M
            if (l == 0) {
4374
1.80M
                if (chunkSize > 0) {
4375
211k
                    xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4376
211k
                    chunkSize = 0;
4377
211k
                }
4378
1.80M
                xmlSBufAddReplChar(&buf);
4379
1.80M
                NEXTL(1);
4380
3.90M
            } else {
4381
3.90M
                chunkSize += l;
4382
3.90M
                NEXTL(l);
4383
3.90M
            }
4384
4385
5.70M
            inSpace = 0;
4386
234M
        } else if (c != '&') {
4387
233M
            if (c > 0x20) {
4388
230M
                if (c == quote)
4389
21.7M
                    break;
4390
4391
208M
                if (c == '<')
4392
170k
                    xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4393
4394
208M
                chunkSize += 1;
4395
208M
                inSpace = 0;
4396
208M
            } else if (!IS_BYTE_CHAR(c)) {
4397
1.21M
                xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4398
1.21M
                        "invalid character in attribute value\n");
4399
1.21M
                if (chunkSize > 0) {
4400
70.4k
                    xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4401
70.4k
                    chunkSize = 0;
4402
70.4k
                }
4403
1.21M
                xmlSBufAddReplChar(&buf);
4404
1.21M
                inSpace = 0;
4405
2.44M
            } else {
4406
                /* Whitespace */
4407
2.44M
                if ((normalize) && (inSpace)) {
4408
                    /* Skip char */
4409
21.1k
                    if (chunkSize > 0) {
4410
1.74k
                        xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4411
1.74k
                        chunkSize = 0;
4412
1.74k
                    }
4413
2.42M
                } else if (c < 0x20) {
4414
                    /* Convert to space */
4415
656k
                    if (chunkSize > 0) {
4416
163k
                        xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4417
163k
                        chunkSize = 0;
4418
163k
                    }
4419
4420
656k
                    xmlSBufAddCString(&buf, " ", 1);
4421
1.76M
                } else {
4422
1.76M
                    chunkSize += 1;
4423
1.76M
                }
4424
4425
2.44M
                inSpace = 1;
4426
4427
2.44M
                if ((c == 0xD) && (NXT(1) == 0xA))
4428
23.1k
                    CUR_PTR++;
4429
2.44M
            }
4430
4431
212M
            NEXTL(1);
4432
212M
        } else if (NXT(1) == '#') {
4433
75.2k
            int val;
4434
4435
75.2k
            if (chunkSize > 0) {
4436
35.0k
                xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4437
35.0k
                chunkSize = 0;
4438
35.0k
            }
4439
4440
75.2k
            val = xmlParseCharRef(ctxt);
4441
75.2k
            if (val == 0)
4442
970
                goto error;
4443
4444
74.2k
            if ((val == '&') && (!replaceEntities)) {
4445
                /*
4446
                 * The reparsing will be done in xmlNodeParseContent()
4447
                 * called from SAX2.c
4448
                 */
4449
0
                xmlSBufAddCString(&buf, "&#38;", 5);
4450
0
                inSpace = 0;
4451
74.2k
            } else if (val == ' ') {
4452
43.7k
                if ((!normalize) || (!inSpace))
4453
36.8k
                    xmlSBufAddCString(&buf, " ", 1);
4454
43.7k
                inSpace = 1;
4455
43.7k
            } else {
4456
30.5k
                xmlSBufAddChar(&buf, val);
4457
30.5k
                inSpace = 0;
4458
30.5k
            }
4459
486k
        } else {
4460
486k
            const xmlChar *name;
4461
486k
            xmlEntityPtr ent;
4462
4463
486k
            if (chunkSize > 0) {
4464
255k
                xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4465
255k
                chunkSize = 0;
4466
255k
            }
4467
4468
486k
            name = xmlParseEntityRefInternal(ctxt);
4469
486k
            if (name == NULL) {
4470
                /*
4471
                 * Probably a literal '&' which wasn't escaped.
4472
                 * TODO: Handle gracefully in recovery mode.
4473
                 */
4474
307k
                continue;
4475
307k
            }
4476
4477
179k
            ent = xmlLookupGeneralEntity(ctxt, name, /* isAttr */ 1);
4478
179k
            if (ent == NULL)
4479
21.0k
                continue;
4480
4481
158k
            if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
4482
158k
                if ((ent->content[0] == '&') && (!replaceEntities))
4483
0
                    xmlSBufAddCString(&buf, "&#38;", 5);
4484
158k
                else
4485
158k
                    xmlSBufAddString(&buf, ent->content, ent->length);
4486
158k
                inSpace = 0;
4487
158k
            } else if (replaceEntities) {
4488
0
                xmlExpandEntityInAttValue(ctxt, &buf, ent->content, ent,
4489
0
                                          normalize, &inSpace, ctxt->inputNr,
4490
0
                                          /* check */ 1);
4491
14
            } else {
4492
14
                if ((ent->flags & flags) != flags)
4493
0
                    xmlCheckEntityInAttValue(ctxt, ent, ctxt->inputNr);
4494
4495
14
                if (xmlParserEntityCheck(ctxt, ent->expandedSize)) {
4496
0
                    ent->content[0] = 0;
4497
0
                    goto error;
4498
0
                }
4499
4500
                /*
4501
                 * Just output the reference
4502
                 */
4503
14
                xmlSBufAddCString(&buf, "&", 1);
4504
14
                xmlSBufAddString(&buf, ent->name, xmlStrlen(ent->name));
4505
14
                xmlSBufAddCString(&buf, ";", 1);
4506
4507
14
                inSpace = 0;
4508
14
            }
4509
158k
  }
4510
240M
    }
4511
4512
21.7M
    if ((buf.mem == NULL) && (alloc != NULL)) {
4513
21.4M
        ret = (xmlChar *) CUR_PTR - chunkSize;
4514
4515
21.4M
        if (attlen != NULL)
4516
21.4M
            *attlen = chunkSize;
4517
21.4M
        if ((normalize) && (inSpace) && (chunkSize > 0))
4518
243
            *attlen -= 1;
4519
21.4M
        *alloc = 0;
4520
4521
        /* Report potential error */
4522
21.4M
        xmlSBufCleanup(&buf, ctxt, "AttValue length too long");
4523
21.4M
    } else {
4524
231k
        if (chunkSize > 0)
4525
159k
            xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4526
4527
231k
        if ((normalize) && (inSpace) && (buf.size > 0))
4528
3.25k
            buf.size--;
4529
4530
231k
        ret = xmlSBufFinish(&buf, attlen, ctxt, "AttValue length too long");
4531
4532
231k
        if (ret != NULL) {
4533
229k
            if (attlen != NULL)
4534
194k
                *attlen = buf.size;
4535
229k
            if (alloc != NULL)
4536
194k
                *alloc = 1;
4537
229k
        }
4538
231k
    }
4539
4540
21.7M
    NEXTL(1);
4541
4542
21.7M
    return(ret);
4543
4544
6.27k
error:
4545
6.27k
    xmlSBufCleanup(&buf, ctxt, "AttValue length too long");
4546
6.27k
    return(NULL);
4547
21.7M
}
4548
4549
/**
4550
 * xmlParseAttValue:
4551
 * @ctxt:  an XML parser context
4552
 *
4553
 * DEPRECATED: Internal function, don't use.
4554
 *
4555
 * parse a value for an attribute
4556
 * Note: the parser won't do substitution of entities here, this
4557
 * will be handled later in xmlStringGetNodeList
4558
 *
4559
 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4560
 *                   "'" ([^<&'] | Reference)* "'"
4561
 *
4562
 * 3.3.3 Attribute-Value Normalization:
4563
 * Before the value of an attribute is passed to the application or
4564
 * checked for validity, the XML processor must normalize it as follows:
4565
 * - a character reference is processed by appending the referenced
4566
 *   character to the attribute value
4567
 * - an entity reference is processed by recursively processing the
4568
 *   replacement text of the entity
4569
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4570
 *   appending #x20 to the normalized value, except that only a single
4571
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4572
 *   parsed entity or the literal entity value of an internal parsed entity
4573
 * - other characters are processed by appending them to the normalized value
4574
 * If the declared value is not CDATA, then the XML processor must further
4575
 * process the normalized attribute value by discarding any leading and
4576
 * trailing space (#x20) characters, and by replacing sequences of space
4577
 * (#x20) characters by a single space (#x20) character.
4578
 * All attributes for which no declaration has been read should be treated
4579
 * by a non-validating parser as if declared CDATA.
4580
 *
4581
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4582
 */
4583
4584
4585
xmlChar *
4586
36.1k
xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4587
36.1k
    if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4588
36.1k
    return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0, 0));
4589
36.1k
}
4590
4591
/**
4592
 * xmlParseSystemLiteral:
4593
 * @ctxt:  an XML parser context
4594
 *
4595
 * DEPRECATED: Internal function, don't use.
4596
 *
4597
 * parse an XML Literal
4598
 *
4599
 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4600
 *
4601
 * Returns the SystemLiteral parsed or NULL
4602
 */
4603
4604
xmlChar *
4605
10.2k
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4606
10.2k
    xmlChar *buf = NULL;
4607
10.2k
    int len = 0;
4608
10.2k
    int size = XML_PARSER_BUFFER_SIZE;
4609
10.2k
    int cur, l;
4610
10.2k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4611
10.2k
                    XML_MAX_TEXT_LENGTH :
4612
10.2k
                    XML_MAX_NAME_LENGTH;
4613
10.2k
    xmlChar stop;
4614
4615
10.2k
    if (RAW == '"') {
4616
7.97k
        NEXT;
4617
7.97k
  stop = '"';
4618
7.97k
    } else if (RAW == '\'') {
4619
1.68k
        NEXT;
4620
1.68k
  stop = '\'';
4621
1.68k
    } else {
4622
636
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4623
636
  return(NULL);
4624
636
    }
4625
4626
9.66k
    buf = xmlMalloc(size);
4627
9.66k
    if (buf == NULL) {
4628
0
        xmlErrMemory(ctxt);
4629
0
  return(NULL);
4630
0
    }
4631
9.66k
    cur = xmlCurrentCharRecover(ctxt, &l);
4632
447k
    while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4633
437k
  if (len + 5 >= size) {
4634
3.30k
      xmlChar *tmp;
4635
3.30k
            int newSize;
4636
4637
3.30k
            newSize = xmlGrowCapacity(size, 1, 1, maxLength);
4638
3.30k
            if (newSize < 0) {
4639
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4640
0
                xmlFree(buf);
4641
0
                return(NULL);
4642
0
            }
4643
3.30k
      tmp = xmlRealloc(buf, newSize);
4644
3.30k
      if (tmp == NULL) {
4645
0
          xmlFree(buf);
4646
0
    xmlErrMemory(ctxt);
4647
0
    return(NULL);
4648
0
      }
4649
3.30k
      buf = tmp;
4650
3.30k
            size = newSize;
4651
3.30k
  }
4652
437k
  COPY_BUF(buf, len, cur);
4653
437k
  NEXTL(l);
4654
437k
  cur = xmlCurrentCharRecover(ctxt, &l);
4655
437k
    }
4656
9.66k
    buf[len] = 0;
4657
9.66k
    if (!IS_CHAR(cur)) {
4658
179
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4659
9.48k
    } else {
4660
9.48k
  NEXT;
4661
9.48k
    }
4662
9.66k
    return(buf);
4663
9.66k
}
4664
4665
/**
4666
 * xmlParsePubidLiteral:
4667
 * @ctxt:  an XML parser context
4668
 *
4669
 * DEPRECATED: Internal function, don't use.
4670
 *
4671
 * parse an XML public literal
4672
 *
4673
 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4674
 *
4675
 * Returns the PubidLiteral parsed or NULL.
4676
 */
4677
4678
xmlChar *
4679
4.12k
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4680
4.12k
    xmlChar *buf = NULL;
4681
4.12k
    int len = 0;
4682
4.12k
    int size = XML_PARSER_BUFFER_SIZE;
4683
4.12k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4684
4.12k
                    XML_MAX_TEXT_LENGTH :
4685
4.12k
                    XML_MAX_NAME_LENGTH;
4686
4.12k
    xmlChar cur;
4687
4.12k
    xmlChar stop;
4688
4689
4.12k
    if (RAW == '"') {
4690
1.60k
        NEXT;
4691
1.60k
  stop = '"';
4692
2.51k
    } else if (RAW == '\'') {
4693
2.12k
        NEXT;
4694
2.12k
  stop = '\'';
4695
2.12k
    } else {
4696
394
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4697
394
  return(NULL);
4698
394
    }
4699
3.72k
    buf = xmlMalloc(size);
4700
3.72k
    if (buf == NULL) {
4701
0
  xmlErrMemory(ctxt);
4702
0
  return(NULL);
4703
0
    }
4704
3.72k
    cur = CUR;
4705
73.2k
    while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop) &&
4706
69.5k
           (PARSER_STOPPED(ctxt) == 0)) { /* checked */
4707
69.5k
  if (len + 1 >= size) {
4708
107
      xmlChar *tmp;
4709
107
            int newSize;
4710
4711
107
      newSize = xmlGrowCapacity(size, 1, 1, maxLength);
4712
107
            if (newSize < 0) {
4713
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4714
0
                xmlFree(buf);
4715
0
                return(NULL);
4716
0
            }
4717
107
      tmp = xmlRealloc(buf, newSize);
4718
107
      if (tmp == NULL) {
4719
0
    xmlErrMemory(ctxt);
4720
0
    xmlFree(buf);
4721
0
    return(NULL);
4722
0
      }
4723
107
      buf = tmp;
4724
107
            size = newSize;
4725
107
  }
4726
69.5k
  buf[len++] = cur;
4727
69.5k
  NEXT;
4728
69.5k
  cur = CUR;
4729
69.5k
    }
4730
3.72k
    buf[len] = 0;
4731
3.72k
    if (cur != stop) {
4732
133
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4733
3.59k
    } else {
4734
3.59k
  NEXTL(1);
4735
3.59k
    }
4736
3.72k
    return(buf);
4737
3.72k
}
4738
4739
static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial);
4740
4741
/*
4742
 * used for the test in the inner loop of the char data testing
4743
 */
4744
static const unsigned char test_char_data[256] = {
4745
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4746
    0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4747
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4748
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4749
    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4750
    0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4751
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4752
    0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4753
    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4754
    0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4755
    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4756
    0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4757
    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4758
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4759
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4760
    0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4761
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4762
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4763
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4764
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4765
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4766
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4767
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4768
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4769
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4770
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4771
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4772
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4773
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4774
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4775
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4776
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4777
};
4778
4779
static void
4780
xmlCharacters(xmlParserCtxtPtr ctxt, const xmlChar *buf, int size,
4781
12.0M
              int isBlank) {
4782
12.0M
    int checkBlanks;
4783
4784
12.0M
    if ((ctxt->sax == NULL) || (ctxt->disableSAX))
4785
67.5k
        return;
4786
4787
11.9M
    checkBlanks = (!ctxt->keepBlanks) ||
4788
11.9M
                  (ctxt->sax->ignorableWhitespace != ctxt->sax->characters);
4789
4790
    /*
4791
     * Calling areBlanks with only parts of a text node
4792
     * is fundamentally broken, making the NOBLANKS option
4793
     * essentially unusable.
4794
     */
4795
11.9M
    if ((checkBlanks) &&
4796
11.6M
        (areBlanks(ctxt, buf, size, isBlank))) {
4797
0
        if ((ctxt->sax->ignorableWhitespace != NULL) &&
4798
0
            (ctxt->keepBlanks))
4799
0
            ctxt->sax->ignorableWhitespace(ctxt->userData, buf, size);
4800
11.9M
    } else {
4801
11.9M
        if (ctxt->sax->characters != NULL)
4802
11.9M
            ctxt->sax->characters(ctxt->userData, buf, size);
4803
4804
        /*
4805
         * The old code used to update this value for "complex" data
4806
         * even if checkBlanks was false. This was probably a bug.
4807
         */
4808
11.9M
        if ((checkBlanks) && (*ctxt->space == -1))
4809
7.00M
            *ctxt->space = -2;
4810
11.9M
    }
4811
11.9M
}
4812
4813
/**
4814
 * xmlParseCharDataInternal:
4815
 * @ctxt:  an XML parser context
4816
 * @partial:  buffer may contain partial UTF-8 sequences
4817
 *
4818
 * Parse character data. Always makes progress if the first char isn't
4819
 * '<' or '&'.
4820
 *
4821
 * The right angle bracket (>) may be represented using the string "&gt;",
4822
 * and must, for compatibility, be escaped using "&gt;" or a character
4823
 * reference when it appears in the string "]]>" in content, when that
4824
 * string is not marking the end of a CDATA section.
4825
 *
4826
 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4827
 */
4828
static void
4829
11.2M
xmlParseCharDataInternal(xmlParserCtxtPtr ctxt, int partial) {
4830
11.2M
    const xmlChar *in;
4831
11.2M
    int nbchar = 0;
4832
11.2M
    int line = ctxt->input->line;
4833
11.2M
    int col = ctxt->input->col;
4834
11.2M
    int ccol;
4835
4836
11.2M
    GROW;
4837
    /*
4838
     * Accelerated common case where input don't need to be
4839
     * modified before passing it to the handler.
4840
     */
4841
11.2M
    in = ctxt->input->cur;
4842
11.6M
    do {
4843
15.1M
get_more_space:
4844
35.3M
        while (*in == 0x20) { in++; ctxt->input->col++; }
4845
15.1M
        if (*in == 0xA) {
4846
3.68M
            do {
4847
3.68M
                ctxt->input->line++; ctxt->input->col = 1;
4848
3.68M
                in++;
4849
3.68M
            } while (*in == 0xA);
4850
3.48M
            goto get_more_space;
4851
3.48M
        }
4852
11.6M
        if (*in == '<') {
4853
3.53M
            nbchar = in - ctxt->input->cur;
4854
3.53M
            if (nbchar > 0) {
4855
3.53M
                const xmlChar *tmp = ctxt->input->cur;
4856
3.53M
                ctxt->input->cur = in;
4857
4858
3.53M
                xmlCharacters(ctxt, tmp, nbchar, 1);
4859
3.53M
            }
4860
3.53M
            return;
4861
3.53M
        }
4862
4863
9.02M
get_more:
4864
9.02M
        ccol = ctxt->input->col;
4865
117M
        while (test_char_data[*in]) {
4866
108M
            in++;
4867
108M
            ccol++;
4868
108M
        }
4869
9.02M
        ctxt->input->col = ccol;
4870
9.02M
        if (*in == 0xA) {
4871
856k
            do {
4872
856k
                ctxt->input->line++; ctxt->input->col = 1;
4873
856k
                in++;
4874
856k
            } while (*in == 0xA);
4875
718k
            goto get_more;
4876
718k
        }
4877
8.30M
        if (*in == ']') {
4878
211k
            if ((in[1] == ']') && (in[2] == '>')) {
4879
8
                xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4880
8
                ctxt->input->cur = in + 1;
4881
8
                return;
4882
8
            }
4883
211k
            if ((!partial) || (ctxt->input->end - in >= 2)) {
4884
211k
                in++;
4885
211k
                ctxt->input->col++;
4886
211k
                goto get_more;
4887
211k
            }
4888
211k
        }
4889
8.09M
        nbchar = in - ctxt->input->cur;
4890
8.09M
        if (nbchar > 0) {
4891
7.43M
            const xmlChar *tmp = ctxt->input->cur;
4892
7.43M
            ctxt->input->cur = in;
4893
4894
7.43M
            xmlCharacters(ctxt, tmp, nbchar, 0);
4895
4896
7.43M
            line = ctxt->input->line;
4897
7.43M
            col = ctxt->input->col;
4898
7.43M
        }
4899
8.09M
        ctxt->input->cur = in;
4900
8.09M
        if (*in == 0xD) {
4901
419k
            in++;
4902
419k
            if (*in == 0xA) {
4903
390k
                ctxt->input->cur = in;
4904
390k
                in++;
4905
390k
                ctxt->input->line++; ctxt->input->col = 1;
4906
390k
                continue; /* while */
4907
390k
            }
4908
28.5k
            in--;
4909
28.5k
        }
4910
7.70M
        if (*in == '<') {
4911
6.62M
            return;
4912
6.62M
        }
4913
1.07M
        if (*in == '&') {
4914
207k
            return;
4915
207k
        }
4916
869k
        if ((partial) && (*in == ']') && (ctxt->input->end - in < 2)) {
4917
3
            return;
4918
3
        }
4919
869k
        SHRINK;
4920
869k
        GROW;
4921
869k
        in = ctxt->input->cur;
4922
1.25M
    } while (((*in >= 0x20) && (*in <= 0x7F)) ||
4923
1.11M
             (*in == 0x09) || (*in == 0x0a));
4924
889k
    ctxt->input->line = line;
4925
889k
    ctxt->input->col = col;
4926
889k
    xmlParseCharDataComplex(ctxt, partial);
4927
889k
}
4928
4929
/**
4930
 * xmlParseCharDataComplex:
4931
 * @ctxt:  an XML parser context
4932
 * @cdata:  int indicating whether we are within a CDATA section
4933
 *
4934
 * Always makes progress if the first char isn't '<' or '&'.
4935
 *
4936
 * parse a CharData section.this is the fallback function
4937
 * of xmlParseCharData() when the parsing requires handling
4938
 * of non-ASCII characters.
4939
 */
4940
static void
4941
889k
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial) {
4942
889k
    xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4943
889k
    int nbchar = 0;
4944
889k
    int cur, l;
4945
4946
889k
    cur = xmlCurrentCharRecover(ctxt, &l);
4947
65.0M
    while ((cur != '<') && /* checked */
4948
64.2M
           (cur != '&') &&
4949
64.2M
           ((!partial) || (cur != ']') ||
4950
45.1k
            (ctxt->input->end - ctxt->input->cur >= 2)) &&
4951
64.2M
     (IS_CHAR(cur))) {
4952
64.2M
  if ((cur == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4953
386
      xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4954
386
  }
4955
64.2M
  COPY_BUF(buf, nbchar, cur);
4956
  /* move current position before possible calling of ctxt->sax->characters */
4957
64.2M
  NEXTL(l);
4958
64.2M
  if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4959
183k
      buf[nbchar] = 0;
4960
4961
183k
            xmlCharacters(ctxt, buf, nbchar, 0);
4962
183k
      nbchar = 0;
4963
183k
            SHRINK;
4964
183k
  }
4965
64.2M
  cur = xmlCurrentCharRecover(ctxt, &l);
4966
64.2M
    }
4967
889k
    if (nbchar != 0) {
4968
881k
        buf[nbchar] = 0;
4969
4970
881k
        xmlCharacters(ctxt, buf, nbchar, 0);
4971
881k
    }
4972
    /*
4973
     * cur == 0 can mean
4974
     *
4975
     * - End of buffer.
4976
     * - An actual 0 character.
4977
     * - An incomplete UTF-8 sequence. This is allowed if partial is set.
4978
     */
4979
889k
    if (ctxt->input->cur < ctxt->input->end) {
4980
881k
        if ((cur == 0) && (CUR != 0)) {
4981
238
            if (partial == 0) {
4982
231
                xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4983
231
                        "Incomplete UTF-8 sequence starting with %02X\n", CUR);
4984
231
                NEXTL(1);
4985
231
            }
4986
881k
        } else if ((cur != '<') && (cur != '&') && (cur != ']')) {
4987
            /* Generate the error and skip the offending character */
4988
2.30k
            xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4989
2.30k
                              "PCDATA invalid Char value %d\n", cur);
4990
2.30k
            NEXTL(l);
4991
2.30k
        }
4992
881k
    }
4993
889k
}
4994
4995
/**
4996
 * xmlParseCharData:
4997
 * @ctxt:  an XML parser context
4998
 * @cdata:  unused
4999
 *
5000
 * DEPRECATED: Internal function, don't use.
5001
 */
5002
void
5003
0
xmlParseCharData(xmlParserCtxtPtr ctxt, ATTRIBUTE_UNUSED int cdata) {
5004
0
    xmlParseCharDataInternal(ctxt, 0);
5005
0
}
5006
5007
/**
5008
 * xmlParseExternalID:
5009
 * @ctxt:  an XML parser context
5010
 * @publicID:  a xmlChar** receiving PubidLiteral
5011
 * @strict: indicate whether we should restrict parsing to only
5012
 *          production [75], see NOTE below
5013
 *
5014
 * DEPRECATED: Internal function, don't use.
5015
 *
5016
 * Parse an External ID or a Public ID
5017
 *
5018
 * NOTE: Productions [75] and [83] interact badly since [75] can generate
5019
 *       'PUBLIC' S PubidLiteral S SystemLiteral
5020
 *
5021
 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
5022
 *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
5023
 *
5024
 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
5025
 *
5026
 * Returns the function returns SystemLiteral and in the second
5027
 *                case publicID receives PubidLiteral, is strict is off
5028
 *                it is possible to return NULL and have publicID set.
5029
 */
5030
5031
xmlChar *
5032
19.9k
xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
5033
19.9k
    xmlChar *URI = NULL;
5034
5035
19.9k
    *publicID = NULL;
5036
19.9k
    if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
5037
7.59k
        SKIP(6);
5038
7.59k
  if (SKIP_BLANKS == 0) {
5039
726
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5040
726
                     "Space required after 'SYSTEM'\n");
5041
726
  }
5042
7.59k
  URI = xmlParseSystemLiteral(ctxt);
5043
7.59k
  if (URI == NULL) {
5044
200
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
5045
200
        }
5046
12.3k
    } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
5047
4.12k
        SKIP(6);
5048
4.12k
  if (SKIP_BLANKS == 0) {
5049
26
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5050
26
        "Space required after 'PUBLIC'\n");
5051
26
  }
5052
4.12k
  *publicID = xmlParsePubidLiteral(ctxt);
5053
4.12k
  if (*publicID == NULL) {
5054
394
      xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
5055
394
  }
5056
4.12k
  if (strict) {
5057
      /*
5058
       * We don't handle [83] so "S SystemLiteral" is required.
5059
       */
5060
2.27k
      if (SKIP_BLANKS == 0) {
5061
453
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5062
453
      "Space required after the Public Identifier\n");
5063
453
      }
5064
2.27k
  } else {
5065
      /*
5066
       * We handle [83] so we return immediately, if
5067
       * "S SystemLiteral" is not detected. We skip blanks if no
5068
             * system literal was found, but this is harmless since we must
5069
             * be at the end of a NotationDecl.
5070
       */
5071
1.84k
      if (SKIP_BLANKS == 0) return(NULL);
5072
986
      if ((CUR != '\'') && (CUR != '"')) return(NULL);
5073
986
  }
5074
2.70k
  URI = xmlParseSystemLiteral(ctxt);
5075
2.70k
  if (URI == NULL) {
5076
436
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
5077
436
        }
5078
2.70k
    }
5079
18.5k
    return(URI);
5080
19.9k
}
5081
5082
/**
5083
 * xmlParseCommentComplex:
5084
 * @ctxt:  an XML parser context
5085
 * @buf:  the already parsed part of the buffer
5086
 * @len:  number of bytes in the buffer
5087
 * @size:  allocated size of the buffer
5088
 *
5089
 * Skip an XML (SGML) comment <!-- .... -->
5090
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
5091
 *  must not occur within comments. "
5092
 * This is the slow routine in case the accelerator for ascii didn't work
5093
 *
5094
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
5095
 */
5096
static void
5097
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
5098
35.4k
                       size_t len, size_t size) {
5099
35.4k
    int q, ql;
5100
35.4k
    int r, rl;
5101
35.4k
    int cur, l;
5102
35.4k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5103
35.4k
                    XML_MAX_HUGE_LENGTH :
5104
35.4k
                    XML_MAX_TEXT_LENGTH;
5105
5106
35.4k
    if (buf == NULL) {
5107
10.1k
        len = 0;
5108
10.1k
  size = XML_PARSER_BUFFER_SIZE;
5109
10.1k
  buf = xmlMalloc(size);
5110
10.1k
  if (buf == NULL) {
5111
0
      xmlErrMemory(ctxt);
5112
0
      return;
5113
0
  }
5114
10.1k
    }
5115
35.4k
    q = xmlCurrentCharRecover(ctxt, &ql);
5116
35.4k
    if (q == 0)
5117
279
        goto not_terminated;
5118
35.1k
    if (!IS_CHAR(q)) {
5119
52
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
5120
52
                          "xmlParseComment: invalid xmlChar value %d\n",
5121
52
                    q);
5122
52
  xmlFree (buf);
5123
52
  return;
5124
52
    }
5125
35.1k
    NEXTL(ql);
5126
35.1k
    r = xmlCurrentCharRecover(ctxt, &rl);
5127
35.1k
    if (r == 0)
5128
34
        goto not_terminated;
5129
35.0k
    if (!IS_CHAR(r)) {
5130
28
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
5131
28
                          "xmlParseComment: invalid xmlChar value %d\n",
5132
28
                    r);
5133
28
  xmlFree (buf);
5134
28
  return;
5135
28
    }
5136
35.0k
    NEXTL(rl);
5137
35.0k
    cur = xmlCurrentCharRecover(ctxt, &l);
5138
35.0k
    if (cur == 0)
5139
27
        goto not_terminated;
5140
2.64M
    while (IS_CHAR(cur) && /* checked */
5141
2.64M
           ((cur != '>') ||
5142
2.60M
      (r != '-') || (q != '-'))) {
5143
2.60M
  if ((r == '-') && (q == '-')) {
5144
19.2k
      xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
5145
19.2k
  }
5146
2.60M
  if (len + 5 >= size) {
5147
16.1k
      xmlChar *tmp;
5148
16.1k
            int newSize;
5149
5150
16.1k
      newSize = xmlGrowCapacity(size, 1, 1, maxLength);
5151
16.1k
            if (newSize < 0) {
5152
0
                xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5153
0
                             "Comment too big found", NULL);
5154
0
                xmlFree (buf);
5155
0
                return;
5156
0
            }
5157
16.1k
      tmp = xmlRealloc(buf, newSize);
5158
16.1k
      if (tmp == NULL) {
5159
0
    xmlErrMemory(ctxt);
5160
0
    xmlFree(buf);
5161
0
    return;
5162
0
      }
5163
16.1k
      buf = tmp;
5164
16.1k
            size = newSize;
5165
16.1k
  }
5166
2.60M
  COPY_BUF(buf, len, q);
5167
5168
2.60M
  q = r;
5169
2.60M
  ql = rl;
5170
2.60M
  r = cur;
5171
2.60M
  rl = l;
5172
5173
2.60M
  NEXTL(l);
5174
2.60M
  cur = xmlCurrentCharRecover(ctxt, &l);
5175
5176
2.60M
    }
5177
35.0k
    buf[len] = 0;
5178
35.0k
    if (cur == 0) {
5179
510
  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5180
510
                       "Comment not terminated \n<!--%.50s\n", buf);
5181
34.5k
    } else if (!IS_CHAR(cur)) {
5182
106
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
5183
106
                          "xmlParseComment: invalid xmlChar value %d\n",
5184
106
                    cur);
5185
34.4k
    } else {
5186
34.4k
        NEXT;
5187
34.4k
  if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5188
0
      (!ctxt->disableSAX))
5189
0
      ctxt->sax->comment(ctxt->userData, buf);
5190
34.4k
    }
5191
35.0k
    xmlFree(buf);
5192
35.0k
    return;
5193
340
not_terminated:
5194
340
    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5195
340
       "Comment not terminated\n", NULL);
5196
340
    xmlFree(buf);
5197
340
}
5198
5199
/**
5200
 * xmlParseComment:
5201
 * @ctxt:  an XML parser context
5202
 *
5203
 * DEPRECATED: Internal function, don't use.
5204
 *
5205
 * Parse an XML (SGML) comment. Always consumes '<!'.
5206
 *
5207
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
5208
 *  must not occur within comments. "
5209
 *
5210
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
5211
 */
5212
void
5213
79.3k
xmlParseComment(xmlParserCtxtPtr ctxt) {
5214
79.3k
    xmlChar *buf = NULL;
5215
79.3k
    size_t size = XML_PARSER_BUFFER_SIZE;
5216
79.3k
    size_t len = 0;
5217
79.3k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5218
79.3k
                       XML_MAX_HUGE_LENGTH :
5219
79.3k
                       XML_MAX_TEXT_LENGTH;
5220
79.3k
    const xmlChar *in;
5221
79.3k
    size_t nbchar = 0;
5222
79.3k
    int ccol;
5223
5224
    /*
5225
     * Check that there is a comment right here.
5226
     */
5227
79.3k
    if ((RAW != '<') || (NXT(1) != '!'))
5228
0
        return;
5229
79.3k
    SKIP(2);
5230
79.3k
    if ((RAW != '-') || (NXT(1) != '-'))
5231
19
        return;
5232
79.3k
    SKIP(2);
5233
79.3k
    GROW;
5234
5235
    /*
5236
     * Accelerated common case where input don't need to be
5237
     * modified before passing it to the handler.
5238
     */
5239
79.3k
    in = ctxt->input->cur;
5240
79.3k
    do {
5241
79.3k
  if (*in == 0xA) {
5242
9.51k
      do {
5243
9.51k
    ctxt->input->line++; ctxt->input->col = 1;
5244
9.51k
    in++;
5245
9.51k
      } while (*in == 0xA);
5246
4.29k
  }
5247
156k
get_more:
5248
156k
        ccol = ctxt->input->col;
5249
1.78M
  while (((*in > '-') && (*in <= 0x7F)) ||
5250
407k
         ((*in >= 0x20) && (*in < '-')) ||
5251
1.63M
         (*in == 0x09)) {
5252
1.63M
        in++;
5253
1.63M
        ccol++;
5254
1.63M
  }
5255
156k
  ctxt->input->col = ccol;
5256
156k
  if (*in == 0xA) {
5257
26.7k
      do {
5258
26.7k
    ctxt->input->line++; ctxt->input->col = 1;
5259
26.7k
    in++;
5260
26.7k
      } while (*in == 0xA);
5261
16.5k
      goto get_more;
5262
16.5k
  }
5263
140k
  nbchar = in - ctxt->input->cur;
5264
  /*
5265
   * save current set of data
5266
   */
5267
140k
  if (nbchar > 0) {
5268
111k
            if (nbchar > maxLength - len) {
5269
0
                xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5270
0
                                  "Comment too big found", NULL);
5271
0
                xmlFree(buf);
5272
0
                return;
5273
0
            }
5274
111k
            if (buf == NULL) {
5275
63.2k
                if ((*in == '-') && (in[1] == '-'))
5276
34.5k
                    size = nbchar + 1;
5277
28.6k
                else
5278
28.6k
                    size = XML_PARSER_BUFFER_SIZE + nbchar;
5279
63.2k
                buf = xmlMalloc(size);
5280
63.2k
                if (buf == NULL) {
5281
0
                    xmlErrMemory(ctxt);
5282
0
                    return;
5283
0
                }
5284
63.2k
                len = 0;
5285
63.2k
            } else if (len + nbchar + 1 >= size) {
5286
3.27k
                xmlChar *new_buf;
5287
3.27k
                size += len + nbchar + XML_PARSER_BUFFER_SIZE;
5288
3.27k
                new_buf = xmlRealloc(buf, size);
5289
3.27k
                if (new_buf == NULL) {
5290
0
                    xmlErrMemory(ctxt);
5291
0
                    xmlFree(buf);
5292
0
                    return;
5293
0
                }
5294
3.27k
                buf = new_buf;
5295
3.27k
            }
5296
111k
            memcpy(&buf[len], ctxt->input->cur, nbchar);
5297
111k
            len += nbchar;
5298
111k
            buf[len] = 0;
5299
111k
  }
5300
140k
  ctxt->input->cur = in;
5301
140k
  if (*in == 0xA) {
5302
0
      in++;
5303
0
      ctxt->input->line++; ctxt->input->col = 1;
5304
0
  }
5305
140k
  if (*in == 0xD) {
5306
13.2k
      in++;
5307
13.2k
      if (*in == 0xA) {
5308
6.77k
    ctxt->input->cur = in;
5309
6.77k
    in++;
5310
6.77k
    ctxt->input->line++; ctxt->input->col = 1;
5311
6.77k
    goto get_more;
5312
6.77k
      }
5313
6.46k
      in--;
5314
6.46k
  }
5315
133k
  SHRINK;
5316
133k
  GROW;
5317
133k
  in = ctxt->input->cur;
5318
133k
  if (*in == '-') {
5319
97.8k
      if (in[1] == '-') {
5320
69.4k
          if (in[2] == '>') {
5321
43.9k
        SKIP(3);
5322
43.9k
        if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5323
0
            (!ctxt->disableSAX)) {
5324
0
      if (buf != NULL)
5325
0
          ctxt->sax->comment(ctxt->userData, buf);
5326
0
      else
5327
0
          ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5328
0
        }
5329
43.9k
        if (buf != NULL)
5330
37.9k
            xmlFree(buf);
5331
43.9k
        return;
5332
43.9k
    }
5333
25.5k
    if (buf != NULL) {
5334
22.4k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5335
22.4k
                          "Double hyphen within comment: "
5336
22.4k
                                      "<!--%.50s\n",
5337
22.4k
              buf);
5338
22.4k
    } else
5339
3.16k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5340
3.16k
                          "Double hyphen within comment\n", NULL);
5341
25.5k
    in++;
5342
25.5k
    ctxt->input->col++;
5343
25.5k
      }
5344
53.9k
      in++;
5345
53.9k
      ctxt->input->col++;
5346
53.9k
      goto get_more;
5347
97.8k
  }
5348
133k
    } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5349
35.4k
    xmlParseCommentComplex(ctxt, buf, len, size);
5350
35.4k
}
5351
5352
5353
/**
5354
 * xmlParsePITarget:
5355
 * @ctxt:  an XML parser context
5356
 *
5357
 * DEPRECATED: Internal function, don't use.
5358
 *
5359
 * parse the name of a PI
5360
 *
5361
 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5362
 *
5363
 * Returns the PITarget name or NULL
5364
 */
5365
5366
const xmlChar *
5367
77.0k
xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5368
77.0k
    const xmlChar *name;
5369
5370
77.0k
    name = xmlParseName(ctxt);
5371
77.0k
    if ((name != NULL) &&
5372
75.1k
        ((name[0] == 'x') || (name[0] == 'X')) &&
5373
37.6k
        ((name[1] == 'm') || (name[1] == 'M')) &&
5374
32.7k
        ((name[2] == 'l') || (name[2] == 'L'))) {
5375
24.2k
  int i;
5376
24.2k
  if ((name[0] == 'x') && (name[1] == 'm') &&
5377
18.1k
      (name[2] == 'l') && (name[3] == 0)) {
5378
1.43k
      xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5379
1.43k
     "XML declaration allowed only at the start of the document\n");
5380
1.43k
      return(name);
5381
22.8k
  } else if (name[3] == 0) {
5382
1.75k
      xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5383
1.75k
      return(name);
5384
1.75k
  }
5385
61.5k
  for (i = 0;;i++) {
5386
61.5k
      if (xmlW3CPIs[i] == NULL) break;
5387
41.3k
      if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5388
908
          return(name);
5389
41.3k
  }
5390
20.1k
  xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5391
20.1k
          "xmlParsePITarget: invalid name prefix 'xml'\n",
5392
20.1k
          NULL, NULL);
5393
20.1k
    }
5394
72.9k
    if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5395
6.58k
  xmlNsErr(ctxt, XML_NS_ERR_COLON,
5396
6.58k
     "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5397
6.58k
    }
5398
72.9k
    return(name);
5399
77.0k
}
5400
5401
#ifdef LIBXML_CATALOG_ENABLED
5402
/**
5403
 * xmlParseCatalogPI:
5404
 * @ctxt:  an XML parser context
5405
 * @catalog:  the PI value string
5406
 *
5407
 * parse an XML Catalog Processing Instruction.
5408
 *
5409
 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5410
 *
5411
 * Occurs only if allowed by the user and if happening in the Misc
5412
 * part of the document before any doctype information
5413
 * This will add the given catalog to the parsing context in order
5414
 * to be used if there is a resolution need further down in the document
5415
 */
5416
5417
static void
5418
0
xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5419
0
    xmlChar *URL = NULL;
5420
0
    const xmlChar *tmp, *base;
5421
0
    xmlChar marker;
5422
5423
0
    tmp = catalog;
5424
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5425
0
    if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5426
0
  goto error;
5427
0
    tmp += 7;
5428
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5429
0
    if (*tmp != '=') {
5430
0
  return;
5431
0
    }
5432
0
    tmp++;
5433
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5434
0
    marker = *tmp;
5435
0
    if ((marker != '\'') && (marker != '"'))
5436
0
  goto error;
5437
0
    tmp++;
5438
0
    base = tmp;
5439
0
    while ((*tmp != 0) && (*tmp != marker)) tmp++;
5440
0
    if (*tmp == 0)
5441
0
  goto error;
5442
0
    URL = xmlStrndup(base, tmp - base);
5443
0
    tmp++;
5444
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5445
0
    if (*tmp != 0)
5446
0
  goto error;
5447
5448
0
    if (URL != NULL) {
5449
        /*
5450
         * Unfortunately, the catalog API doesn't report OOM errors.
5451
         * xmlGetLastError isn't very helpful since we don't know
5452
         * where the last error came from. We'd have to reset it
5453
         * before this call and restore it afterwards.
5454
         */
5455
0
  ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5456
0
  xmlFree(URL);
5457
0
    }
5458
0
    return;
5459
5460
0
error:
5461
0
    xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5462
0
            "Catalog PI syntax error: %s\n",
5463
0
      catalog, NULL);
5464
0
    if (URL != NULL)
5465
0
  xmlFree(URL);
5466
0
}
5467
#endif
5468
5469
/**
5470
 * xmlParsePI:
5471
 * @ctxt:  an XML parser context
5472
 *
5473
 * DEPRECATED: Internal function, don't use.
5474
 *
5475
 * parse an XML Processing Instruction.
5476
 *
5477
 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5478
 *
5479
 * The processing is transferred to SAX once parsed.
5480
 */
5481
5482
void
5483
77.0k
xmlParsePI(xmlParserCtxtPtr ctxt) {
5484
77.0k
    xmlChar *buf = NULL;
5485
77.0k
    size_t len = 0;
5486
77.0k
    size_t size = XML_PARSER_BUFFER_SIZE;
5487
77.0k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5488
76.3k
                       XML_MAX_HUGE_LENGTH :
5489
77.0k
                       XML_MAX_TEXT_LENGTH;
5490
77.0k
    int cur, l;
5491
77.0k
    const xmlChar *target;
5492
5493
77.0k
    if ((RAW == '<') && (NXT(1) == '?')) {
5494
  /*
5495
   * this is a Processing Instruction.
5496
   */
5497
77.0k
  SKIP(2);
5498
5499
  /*
5500
   * Parse the target name and check for special support like
5501
   * namespace.
5502
   */
5503
77.0k
        target = xmlParsePITarget(ctxt);
5504
77.0k
  if (target != NULL) {
5505
75.1k
      if ((RAW == '?') && (NXT(1) == '>')) {
5506
13.4k
    SKIP(2);
5507
5508
    /*
5509
     * SAX: PI detected.
5510
     */
5511
13.4k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5512
6.46k
        (ctxt->sax->processingInstruction != NULL))
5513
6.46k
        ctxt->sax->processingInstruction(ctxt->userData,
5514
6.46k
                                         target, NULL);
5515
13.4k
    return;
5516
13.4k
      }
5517
61.7k
      buf = xmlMalloc(size);
5518
61.7k
      if (buf == NULL) {
5519
0
    xmlErrMemory(ctxt);
5520
0
    return;
5521
0
      }
5522
61.7k
      if (SKIP_BLANKS == 0) {
5523
9.72k
    xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5524
9.72k
        "ParsePI: PI %s space expected\n", target);
5525
9.72k
      }
5526
61.7k
      cur = xmlCurrentCharRecover(ctxt, &l);
5527
5.84M
      while (IS_CHAR(cur) && /* checked */
5528
5.84M
       ((cur != '?') || (NXT(1) != '>'))) {
5529
5.78M
    if (len + 5 >= size) {
5530
35.8k
        xmlChar *tmp;
5531
35.8k
                    int newSize;
5532
5533
35.8k
                    newSize = xmlGrowCapacity(size, 1, 1, maxLength);
5534
35.8k
                    if (newSize < 0) {
5535
0
                        xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5536
0
                                          "PI %s too big found", target);
5537
0
                        xmlFree(buf);
5538
0
                        return;
5539
0
                    }
5540
35.8k
        tmp = xmlRealloc(buf, newSize);
5541
35.8k
        if (tmp == NULL) {
5542
0
      xmlErrMemory(ctxt);
5543
0
      xmlFree(buf);
5544
0
      return;
5545
0
        }
5546
35.8k
        buf = tmp;
5547
35.8k
                    size = newSize;
5548
35.8k
    }
5549
5.78M
    COPY_BUF(buf, len, cur);
5550
5.78M
    NEXTL(l);
5551
5.78M
    cur = xmlCurrentCharRecover(ctxt, &l);
5552
5.78M
      }
5553
61.7k
      buf[len] = 0;
5554
61.7k
      if (cur != '?') {
5555
1.87k
    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5556
1.87k
          "ParsePI: PI %s never end ...\n", target);
5557
59.8k
      } else {
5558
59.8k
    SKIP(2);
5559
5560
59.8k
#ifdef LIBXML_CATALOG_ENABLED
5561
59.8k
    if ((ctxt->inSubset == 0) &&
5562
47.8k
        (xmlStrEqual(target, XML_CATALOG_PI))) {
5563
14.1k
        xmlCatalogAllow allow = xmlCatalogGetDefaults();
5564
5565
14.1k
        if ((ctxt->options & XML_PARSE_CATALOG_PI) &&
5566
0
                        ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5567
0
       (allow == XML_CATA_ALLOW_ALL)))
5568
0
      xmlParseCatalogPI(ctxt, buf);
5569
14.1k
    }
5570
59.8k
#endif
5571
5572
    /*
5573
     * SAX: PI detected.
5574
     */
5575
59.8k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5576
49.0k
        (ctxt->sax->processingInstruction != NULL))
5577
49.0k
        ctxt->sax->processingInstruction(ctxt->userData,
5578
49.0k
                                         target, buf);
5579
59.8k
      }
5580
61.7k
      xmlFree(buf);
5581
61.7k
  } else {
5582
1.88k
      xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5583
1.88k
  }
5584
77.0k
    }
5585
77.0k
}
5586
5587
/**
5588
 * xmlParseNotationDecl:
5589
 * @ctxt:  an XML parser context
5590
 *
5591
 * DEPRECATED: Internal function, don't use.
5592
 *
5593
 * Parse a notation declaration. Always consumes '<!'.
5594
 *
5595
 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
5596
 *
5597
 * Hence there is actually 3 choices:
5598
 *     'PUBLIC' S PubidLiteral
5599
 *     'PUBLIC' S PubidLiteral S SystemLiteral
5600
 * and 'SYSTEM' S SystemLiteral
5601
 *
5602
 * See the NOTE on xmlParseExternalID().
5603
 */
5604
5605
void
5606
2.86k
xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5607
2.86k
    const xmlChar *name;
5608
2.86k
    xmlChar *Pubid;
5609
2.86k
    xmlChar *Systemid;
5610
5611
2.86k
    if ((CUR != '<') || (NXT(1) != '!'))
5612
0
        return;
5613
2.86k
    SKIP(2);
5614
5615
2.86k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5616
2.78k
  int inputid = ctxt->input->id;
5617
2.78k
  SKIP(8);
5618
2.78k
  if (SKIP_BLANKS_PE == 0) {
5619
30
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5620
30
         "Space required after '<!NOTATION'\n");
5621
30
      return;
5622
30
  }
5623
5624
2.75k
        name = xmlParseName(ctxt);
5625
2.75k
  if (name == NULL) {
5626
58
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5627
58
      return;
5628
58
  }
5629
2.70k
  if (xmlStrchr(name, ':') != NULL) {
5630
6
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5631
6
         "colons are forbidden from notation names '%s'\n",
5632
6
         name, NULL, NULL);
5633
6
  }
5634
2.70k
  if (SKIP_BLANKS_PE == 0) {
5635
24
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5636
24
         "Space required after the NOTATION name'\n");
5637
24
      return;
5638
24
  }
5639
5640
  /*
5641
   * Parse the IDs.
5642
   */
5643
2.67k
  Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5644
2.67k
  SKIP_BLANKS_PE;
5645
5646
2.67k
  if (RAW == '>') {
5647
2.39k
      if (inputid != ctxt->input->id) {
5648
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5649
0
                         "Notation declaration doesn't start and stop"
5650
0
                               " in the same entity\n");
5651
0
      }
5652
2.39k
      NEXT;
5653
2.39k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5654
1.90k
    (ctxt->sax->notationDecl != NULL))
5655
0
    ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5656
2.39k
  } else {
5657
279
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5658
279
  }
5659
2.67k
  if (Systemid != NULL) xmlFree(Systemid);
5660
2.67k
  if (Pubid != NULL) xmlFree(Pubid);
5661
2.67k
    }
5662
2.86k
}
5663
5664
/**
5665
 * xmlParseEntityDecl:
5666
 * @ctxt:  an XML parser context
5667
 *
5668
 * DEPRECATED: Internal function, don't use.
5669
 *
5670
 * Parse an entity declaration. Always consumes '<!'.
5671
 *
5672
 * [70] EntityDecl ::= GEDecl | PEDecl
5673
 *
5674
 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5675
 *
5676
 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5677
 *
5678
 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5679
 *
5680
 * [74] PEDef ::= EntityValue | ExternalID
5681
 *
5682
 * [76] NDataDecl ::= S 'NDATA' S Name
5683
 *
5684
 * [ VC: Notation Declared ]
5685
 * The Name must match the declared name of a notation.
5686
 */
5687
5688
void
5689
54.7k
xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5690
54.7k
    const xmlChar *name = NULL;
5691
54.7k
    xmlChar *value = NULL;
5692
54.7k
    xmlChar *URI = NULL, *literal = NULL;
5693
54.7k
    const xmlChar *ndata = NULL;
5694
54.7k
    int isParameter = 0;
5695
54.7k
    xmlChar *orig = NULL;
5696
5697
54.7k
    if ((CUR != '<') || (NXT(1) != '!'))
5698
0
        return;
5699
54.7k
    SKIP(2);
5700
5701
    /* GROW; done in the caller */
5702
54.7k
    if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5703
54.5k
  int inputid = ctxt->input->id;
5704
54.5k
  SKIP(6);
5705
54.5k
  if (SKIP_BLANKS_PE == 0) {
5706
12.7k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5707
12.7k
         "Space required after '<!ENTITY'\n");
5708
12.7k
  }
5709
5710
54.5k
  if (RAW == '%') {
5711
11.4k
      NEXT;
5712
11.4k
      if (SKIP_BLANKS_PE == 0) {
5713
6.32k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5714
6.32k
             "Space required after '%%'\n");
5715
6.32k
      }
5716
11.4k
      isParameter = 1;
5717
11.4k
  }
5718
5719
54.5k
        name = xmlParseName(ctxt);
5720
54.5k
  if (name == NULL) {
5721
211
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5722
211
                     "xmlParseEntityDecl: no name\n");
5723
211
            return;
5724
211
  }
5725
54.3k
  if (xmlStrchr(name, ':') != NULL) {
5726
2.30k
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5727
2.30k
         "colons are forbidden from entities names '%s'\n",
5728
2.30k
         name, NULL, NULL);
5729
2.30k
  }
5730
54.3k
  if (SKIP_BLANKS_PE == 0) {
5731
21.9k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5732
21.9k
         "Space required after the entity name\n");
5733
21.9k
  }
5734
5735
  /*
5736
   * handle the various case of definitions...
5737
   */
5738
54.3k
  if (isParameter) {
5739
11.3k
      if ((RAW == '"') || (RAW == '\'')) {
5740
10.2k
          value = xmlParseEntityValue(ctxt, &orig);
5741
10.2k
    if (value) {
5742
10.2k
        if ((ctxt->sax != NULL) &&
5743
10.2k
      (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5744
0
      ctxt->sax->entityDecl(ctxt->userData, name,
5745
0
                        XML_INTERNAL_PARAMETER_ENTITY,
5746
0
            NULL, NULL, value);
5747
10.2k
    }
5748
10.2k
      } else {
5749
1.09k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5750
1.09k
    if ((URI == NULL) && (literal == NULL)) {
5751
149
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5752
149
    }
5753
1.09k
    if (URI) {
5754
866
                    if (xmlStrchr(URI, '#')) {
5755
5
                        xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5756
861
                    } else {
5757
861
                        if ((ctxt->sax != NULL) &&
5758
861
                            (!ctxt->disableSAX) &&
5759
523
                            (ctxt->sax->entityDecl != NULL))
5760
0
                            ctxt->sax->entityDecl(ctxt->userData, name,
5761
0
                                        XML_EXTERNAL_PARAMETER_ENTITY,
5762
0
                                        literal, URI, NULL);
5763
861
                    }
5764
866
    }
5765
1.09k
      }
5766
43.0k
  } else {
5767
43.0k
      if ((RAW == '"') || (RAW == '\'')) {
5768
34.4k
          value = xmlParseEntityValue(ctxt, &orig);
5769
34.4k
    if ((ctxt->sax != NULL) &&
5770
34.4k
        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5771
0
        ctxt->sax->entityDecl(ctxt->userData, name,
5772
0
        XML_INTERNAL_GENERAL_ENTITY,
5773
0
        NULL, NULL, value);
5774
    /*
5775
     * For expat compatibility in SAX mode.
5776
     */
5777
34.4k
    if ((ctxt->myDoc == NULL) ||
5778
34.4k
        (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5779
34.4k
        if (ctxt->myDoc == NULL) {
5780
1.35k
      ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5781
1.35k
      if (ctxt->myDoc == NULL) {
5782
0
          xmlErrMemory(ctxt);
5783
0
          goto done;
5784
0
      }
5785
1.35k
      ctxt->myDoc->properties = XML_DOC_INTERNAL;
5786
1.35k
        }
5787
34.4k
        if (ctxt->myDoc->intSubset == NULL) {
5788
1.35k
      ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5789
1.35k
              BAD_CAST "fake", NULL, NULL);
5790
1.35k
                        if (ctxt->myDoc->intSubset == NULL) {
5791
0
                            xmlErrMemory(ctxt);
5792
0
                            goto done;
5793
0
                        }
5794
1.35k
                    }
5795
5796
34.4k
        xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5797
34.4k
                    NULL, NULL, value);
5798
34.4k
    }
5799
34.4k
      } else {
5800
8.53k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5801
8.53k
    if ((URI == NULL) && (literal == NULL)) {
5802
1.11k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5803
1.11k
    }
5804
8.53k
    if (URI) {
5805
7.37k
                    if (xmlStrchr(URI, '#')) {
5806
792
                        xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5807
792
                    }
5808
7.37k
    }
5809
8.53k
    if ((RAW != '>') && (SKIP_BLANKS_PE == 0)) {
5810
1.85k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5811
1.85k
           "Space required before 'NDATA'\n");
5812
1.85k
    }
5813
8.53k
    if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5814
3.88k
        SKIP(5);
5815
3.88k
        if (SKIP_BLANKS_PE == 0) {
5816
122
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5817
122
               "Space required after 'NDATA'\n");
5818
122
        }
5819
3.88k
        ndata = xmlParseName(ctxt);
5820
3.88k
        if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5821
955
            (ctxt->sax->unparsedEntityDecl != NULL))
5822
0
      ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5823
0
            literal, URI, ndata);
5824
4.64k
    } else {
5825
4.64k
        if ((ctxt->sax != NULL) &&
5826
4.64k
            (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5827
0
      ctxt->sax->entityDecl(ctxt->userData, name,
5828
0
            XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5829
0
            literal, URI, NULL);
5830
        /*
5831
         * For expat compatibility in SAX mode.
5832
         * assuming the entity replacement was asked for
5833
         */
5834
4.64k
        if ((ctxt->replaceEntities != 0) &&
5835
4.64k
      ((ctxt->myDoc == NULL) ||
5836
4.64k
      (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5837
4.64k
      if (ctxt->myDoc == NULL) {
5838
309
          ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5839
309
          if (ctxt->myDoc == NULL) {
5840
0
              xmlErrMemory(ctxt);
5841
0
        goto done;
5842
0
          }
5843
309
          ctxt->myDoc->properties = XML_DOC_INTERNAL;
5844
309
      }
5845
5846
4.64k
      if (ctxt->myDoc->intSubset == NULL) {
5847
309
          ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5848
309
            BAD_CAST "fake", NULL, NULL);
5849
309
                            if (ctxt->myDoc->intSubset == NULL) {
5850
0
                                xmlErrMemory(ctxt);
5851
0
                                goto done;
5852
0
                            }
5853
309
                        }
5854
4.64k
      xmlSAX2EntityDecl(ctxt, name,
5855
4.64k
                  XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5856
4.64k
                  literal, URI, NULL);
5857
4.64k
        }
5858
4.64k
    }
5859
8.53k
      }
5860
43.0k
  }
5861
54.3k
  SKIP_BLANKS_PE;
5862
54.3k
  if (RAW != '>') {
5863
1.28k
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5864
1.28k
              "xmlParseEntityDecl: entity %s not terminated\n", name);
5865
1.28k
      xmlHaltParser(ctxt);
5866
53.1k
  } else {
5867
53.1k
      if (inputid != ctxt->input->id) {
5868
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5869
0
                         "Entity declaration doesn't start and stop in"
5870
0
                               " the same entity\n");
5871
0
      }
5872
53.1k
      NEXT;
5873
53.1k
  }
5874
54.3k
  if (orig != NULL) {
5875
      /*
5876
       * Ugly mechanism to save the raw entity value.
5877
       */
5878
44.2k
      xmlEntityPtr cur = NULL;
5879
5880
44.2k
      if (isParameter) {
5881
10.2k
          if ((ctxt->sax != NULL) &&
5882
10.2k
        (ctxt->sax->getParameterEntity != NULL))
5883
0
        cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5884
34.0k
      } else {
5885
34.0k
          if ((ctxt->sax != NULL) &&
5886
34.0k
        (ctxt->sax->getEntity != NULL))
5887
34.0k
        cur = ctxt->sax->getEntity(ctxt->userData, name);
5888
34.0k
    if ((cur == NULL) && (ctxt->userData==ctxt)) {
5889
0
        cur = xmlSAX2GetEntity(ctxt, name);
5890
0
    }
5891
34.0k
      }
5892
44.2k
            if ((cur != NULL) && (cur->orig == NULL)) {
5893
0
    cur->orig = orig;
5894
0
                orig = NULL;
5895
0
      }
5896
44.2k
  }
5897
5898
54.3k
done:
5899
54.3k
  if (value != NULL) xmlFree(value);
5900
54.3k
  if (URI != NULL) xmlFree(URI);
5901
54.3k
  if (literal != NULL) xmlFree(literal);
5902
54.3k
        if (orig != NULL) xmlFree(orig);
5903
54.3k
    }
5904
54.7k
}
5905
5906
/**
5907
 * xmlParseDefaultDecl:
5908
 * @ctxt:  an XML parser context
5909
 * @value:  Receive a possible fixed default value for the attribute
5910
 *
5911
 * DEPRECATED: Internal function, don't use.
5912
 *
5913
 * Parse an attribute default declaration
5914
 *
5915
 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5916
 *
5917
 * [ VC: Required Attribute ]
5918
 * if the default declaration is the keyword #REQUIRED, then the
5919
 * attribute must be specified for all elements of the type in the
5920
 * attribute-list declaration.
5921
 *
5922
 * [ VC: Attribute Default Legal ]
5923
 * The declared default value must meet the lexical constraints of
5924
 * the declared attribute type c.f. xmlValidateAttributeDecl()
5925
 *
5926
 * [ VC: Fixed Attribute Default ]
5927
 * if an attribute has a default value declared with the #FIXED
5928
 * keyword, instances of that attribute must match the default value.
5929
 *
5930
 * [ WFC: No < in Attribute Values ]
5931
 * handled in xmlParseAttValue()
5932
 *
5933
 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5934
 *          or XML_ATTRIBUTE_FIXED.
5935
 */
5936
5937
int
5938
42.0k
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5939
42.0k
    int val;
5940
42.0k
    xmlChar *ret;
5941
5942
42.0k
    *value = NULL;
5943
42.0k
    if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5944
2.08k
  SKIP(9);
5945
2.08k
  return(XML_ATTRIBUTE_REQUIRED);
5946
2.08k
    }
5947
39.9k
    if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5948
3.83k
  SKIP(8);
5949
3.83k
  return(XML_ATTRIBUTE_IMPLIED);
5950
3.83k
    }
5951
36.1k
    val = XML_ATTRIBUTE_NONE;
5952
36.1k
    if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5953
1.01k
  SKIP(6);
5954
1.01k
  val = XML_ATTRIBUTE_FIXED;
5955
1.01k
  if (SKIP_BLANKS_PE == 0) {
5956
85
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5957
85
         "Space required after '#FIXED'\n");
5958
85
  }
5959
1.01k
    }
5960
36.1k
    ret = xmlParseAttValue(ctxt);
5961
36.1k
    if (ret == NULL) {
5962
680
  xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5963
680
           "Attribute default value declaration error\n");
5964
680
    } else
5965
35.4k
        *value = ret;
5966
36.1k
    return(val);
5967
39.9k
}
5968
5969
/**
5970
 * xmlParseNotationType:
5971
 * @ctxt:  an XML parser context
5972
 *
5973
 * DEPRECATED: Internal function, don't use.
5974
 *
5975
 * parse an Notation attribute type.
5976
 *
5977
 * Note: the leading 'NOTATION' S part has already being parsed...
5978
 *
5979
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5980
 *
5981
 * [ VC: Notation Attributes ]
5982
 * Values of this type must match one of the notation names included
5983
 * in the declaration; all notation names in the declaration must be declared.
5984
 *
5985
 * Returns: the notation attribute tree built while parsing
5986
 */
5987
5988
xmlEnumerationPtr
5989
634
xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5990
634
    const xmlChar *name;
5991
634
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5992
5993
634
    if (RAW != '(') {
5994
9
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5995
9
  return(NULL);
5996
9
    }
5997
1.42k
    do {
5998
1.42k
        NEXT;
5999
1.42k
  SKIP_BLANKS_PE;
6000
1.42k
        name = xmlParseName(ctxt);
6001
1.42k
  if (name == NULL) {
6002
25
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6003
25
         "Name expected in NOTATION declaration\n");
6004
25
            xmlFreeEnumeration(ret);
6005
25
      return(NULL);
6006
25
  }
6007
1.39k
        tmp = NULL;
6008
1.39k
#ifdef LIBXML_VALID_ENABLED
6009
1.39k
        if (ctxt->validate) {
6010
0
            tmp = ret;
6011
0
            while (tmp != NULL) {
6012
0
                if (xmlStrEqual(name, tmp->name)) {
6013
0
                    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
6014
0
              "standalone: attribute notation value token %s duplicated\n",
6015
0
                                     name, NULL);
6016
0
                    if (!xmlDictOwns(ctxt->dict, name))
6017
0
                        xmlFree((xmlChar *) name);
6018
0
                    break;
6019
0
                }
6020
0
                tmp = tmp->next;
6021
0
            }
6022
0
        }
6023
1.39k
#endif /* LIBXML_VALID_ENABLED */
6024
1.39k
  if (tmp == NULL) {
6025
1.39k
      cur = xmlCreateEnumeration(name);
6026
1.39k
      if (cur == NULL) {
6027
0
                xmlErrMemory(ctxt);
6028
0
                xmlFreeEnumeration(ret);
6029
0
                return(NULL);
6030
0
            }
6031
1.39k
      if (last == NULL) ret = last = cur;
6032
786
      else {
6033
786
    last->next = cur;
6034
786
    last = cur;
6035
786
      }
6036
1.39k
  }
6037
1.39k
  SKIP_BLANKS_PE;
6038
1.39k
    } while (RAW == '|');
6039
600
    if (RAW != ')') {
6040
72
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
6041
72
        xmlFreeEnumeration(ret);
6042
72
  return(NULL);
6043
72
    }
6044
528
    NEXT;
6045
528
    return(ret);
6046
600
}
6047
6048
/**
6049
 * xmlParseEnumerationType:
6050
 * @ctxt:  an XML parser context
6051
 *
6052
 * DEPRECATED: Internal function, don't use.
6053
 *
6054
 * parse an Enumeration attribute type.
6055
 *
6056
 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
6057
 *
6058
 * [ VC: Enumeration ]
6059
 * Values of this type must match one of the Nmtoken tokens in
6060
 * the declaration
6061
 *
6062
 * Returns: the enumeration attribute tree built while parsing
6063
 */
6064
6065
xmlEnumerationPtr
6066
6.66k
xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
6067
6.66k
    xmlChar *name;
6068
6.66k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
6069
6070
6.66k
    if (RAW != '(') {
6071
238
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
6072
238
  return(NULL);
6073
238
    }
6074
8.25k
    do {
6075
8.25k
        NEXT;
6076
8.25k
  SKIP_BLANKS_PE;
6077
8.25k
        name = xmlParseNmtoken(ctxt);
6078
8.25k
  if (name == NULL) {
6079
54
      xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
6080
54
      return(ret);
6081
54
  }
6082
8.20k
        tmp = NULL;
6083
8.20k
#ifdef LIBXML_VALID_ENABLED
6084
8.20k
        if (ctxt->validate) {
6085
0
            tmp = ret;
6086
0
            while (tmp != NULL) {
6087
0
                if (xmlStrEqual(name, tmp->name)) {
6088
0
                    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
6089
0
              "standalone: attribute enumeration value token %s duplicated\n",
6090
0
                                     name, NULL);
6091
0
                    if (!xmlDictOwns(ctxt->dict, name))
6092
0
                        xmlFree(name);
6093
0
                    break;
6094
0
                }
6095
0
                tmp = tmp->next;
6096
0
            }
6097
0
        }
6098
8.20k
#endif /* LIBXML_VALID_ENABLED */
6099
8.20k
  if (tmp == NULL) {
6100
8.20k
      cur = xmlCreateEnumeration(name);
6101
8.20k
      if (!xmlDictOwns(ctxt->dict, name))
6102
8.20k
    xmlFree(name);
6103
8.20k
      if (cur == NULL) {
6104
0
                xmlErrMemory(ctxt);
6105
0
                xmlFreeEnumeration(ret);
6106
0
                return(NULL);
6107
0
            }
6108
8.20k
      if (last == NULL) ret = last = cur;
6109
1.78k
      else {
6110
1.78k
    last->next = cur;
6111
1.78k
    last = cur;
6112
1.78k
      }
6113
8.20k
  }
6114
8.20k
  SKIP_BLANKS_PE;
6115
8.20k
    } while (RAW == '|');
6116
6.37k
    if (RAW != ')') {
6117
67
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
6118
67
  return(ret);
6119
67
    }
6120
6.30k
    NEXT;
6121
6.30k
    return(ret);
6122
6.37k
}
6123
6124
/**
6125
 * xmlParseEnumeratedType:
6126
 * @ctxt:  an XML parser context
6127
 * @tree:  the enumeration tree built while parsing
6128
 *
6129
 * DEPRECATED: Internal function, don't use.
6130
 *
6131
 * parse an Enumerated attribute type.
6132
 *
6133
 * [57] EnumeratedType ::= NotationType | Enumeration
6134
 *
6135
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
6136
 *
6137
 *
6138
 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
6139
 */
6140
6141
int
6142
7.30k
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6143
7.30k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
6144
642
  SKIP(8);
6145
642
  if (SKIP_BLANKS_PE == 0) {
6146
8
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6147
8
         "Space required after 'NOTATION'\n");
6148
8
      return(0);
6149
8
  }
6150
634
  *tree = xmlParseNotationType(ctxt);
6151
634
  if (*tree == NULL) return(0);
6152
528
  return(XML_ATTRIBUTE_NOTATION);
6153
634
    }
6154
6.66k
    *tree = xmlParseEnumerationType(ctxt);
6155
6.66k
    if (*tree == NULL) return(0);
6156
6.41k
    return(XML_ATTRIBUTE_ENUMERATION);
6157
6.66k
}
6158
6159
/**
6160
 * xmlParseAttributeType:
6161
 * @ctxt:  an XML parser context
6162
 * @tree:  the enumeration tree built while parsing
6163
 *
6164
 * DEPRECATED: Internal function, don't use.
6165
 *
6166
 * parse the Attribute list def for an element
6167
 *
6168
 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
6169
 *
6170
 * [55] StringType ::= 'CDATA'
6171
 *
6172
 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
6173
 *                        'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
6174
 *
6175
 * Validity constraints for attribute values syntax are checked in
6176
 * xmlValidateAttributeValue()
6177
 *
6178
 * [ VC: ID ]
6179
 * Values of type ID must match the Name production. A name must not
6180
 * appear more than once in an XML document as a value of this type;
6181
 * i.e., ID values must uniquely identify the elements which bear them.
6182
 *
6183
 * [ VC: One ID per Element Type ]
6184
 * No element type may have more than one ID attribute specified.
6185
 *
6186
 * [ VC: ID Attribute Default ]
6187
 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
6188
 *
6189
 * [ VC: IDREF ]
6190
 * Values of type IDREF must match the Name production, and values
6191
 * of type IDREFS must match Names; each IDREF Name must match the value
6192
 * of an ID attribute on some element in the XML document; i.e. IDREF
6193
 * values must match the value of some ID attribute.
6194
 *
6195
 * [ VC: Entity Name ]
6196
 * Values of type ENTITY must match the Name production, values
6197
 * of type ENTITIES must match Names; each Entity Name must match the
6198
 * name of an unparsed entity declared in the DTD.
6199
 *
6200
 * [ VC: Name Token ]
6201
 * Values of type NMTOKEN must match the Nmtoken production; values
6202
 * of type NMTOKENS must match Nmtokens.
6203
 *
6204
 * Returns the attribute type
6205
 */
6206
int
6207
42.5k
xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6208
42.5k
    if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
6209
3.65k
  SKIP(5);
6210
3.65k
  return(XML_ATTRIBUTE_CDATA);
6211
38.9k
     } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
6212
570
  SKIP(6);
6213
570
  return(XML_ATTRIBUTE_IDREFS);
6214
38.3k
     } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
6215
570
  SKIP(5);
6216
570
  return(XML_ATTRIBUTE_IDREF);
6217
37.7k
     } else if ((RAW == 'I') && (NXT(1) == 'D')) {
6218
23.8k
        SKIP(2);
6219
23.8k
  return(XML_ATTRIBUTE_ID);
6220
23.8k
     } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
6221
477
  SKIP(6);
6222
477
  return(XML_ATTRIBUTE_ENTITY);
6223
13.4k
     } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
6224
5.13k
  SKIP(8);
6225
5.13k
  return(XML_ATTRIBUTE_ENTITIES);
6226
8.32k
     } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
6227
328
  SKIP(8);
6228
328
  return(XML_ATTRIBUTE_NMTOKENS);
6229
7.99k
     } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
6230
688
  SKIP(7);
6231
688
  return(XML_ATTRIBUTE_NMTOKEN);
6232
688
     }
6233
7.30k
     return(xmlParseEnumeratedType(ctxt, tree));
6234
42.5k
}
6235
6236
/**
6237
 * xmlParseAttributeListDecl:
6238
 * @ctxt:  an XML parser context
6239
 *
6240
 * DEPRECATED: Internal function, don't use.
6241
 *
6242
 * Parse an attribute list declaration for an element. Always consumes '<!'.
6243
 *
6244
 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
6245
 *
6246
 * [53] AttDef ::= S Name S AttType S DefaultDecl
6247
 *
6248
 */
6249
void
6250
12.6k
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
6251
12.6k
    const xmlChar *elemName;
6252
12.6k
    const xmlChar *attrName;
6253
12.6k
    xmlEnumerationPtr tree;
6254
6255
12.6k
    if ((CUR != '<') || (NXT(1) != '!'))
6256
0
        return;
6257
12.6k
    SKIP(2);
6258
6259
12.6k
    if (CMP7(CUR_PTR, 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
6260
12.5k
  int inputid = ctxt->input->id;
6261
6262
12.5k
  SKIP(7);
6263
12.5k
  if (SKIP_BLANKS_PE == 0) {
6264
268
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6265
268
                     "Space required after '<!ATTLIST'\n");
6266
268
  }
6267
12.5k
        elemName = xmlParseName(ctxt);
6268
12.5k
  if (elemName == NULL) {
6269
56
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6270
56
         "ATTLIST: no name for Element\n");
6271
56
      return;
6272
56
  }
6273
12.5k
  SKIP_BLANKS_PE;
6274
12.5k
  GROW;
6275
53.6k
  while ((RAW != '>') && (PARSER_STOPPED(ctxt) == 0)) {
6276
43.1k
      int type;
6277
43.1k
      int def;
6278
43.1k
      xmlChar *defaultValue = NULL;
6279
6280
43.1k
      GROW;
6281
43.1k
            tree = NULL;
6282
43.1k
      attrName = xmlParseName(ctxt);
6283
43.1k
      if (attrName == NULL) {
6284
195
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6285
195
             "ATTLIST: no name for Attribute\n");
6286
195
    break;
6287
195
      }
6288
42.9k
      GROW;
6289
42.9k
      if (SKIP_BLANKS_PE == 0) {
6290
372
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6291
372
            "Space required after the attribute name\n");
6292
372
    break;
6293
372
      }
6294
6295
42.5k
      type = xmlParseAttributeType(ctxt, &tree);
6296
42.5k
      if (type <= 0) {
6297
359
          break;
6298
359
      }
6299
6300
42.2k
      GROW;
6301
42.2k
      if (SKIP_BLANKS_PE == 0) {
6302
195
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6303
195
             "Space required after the attribute type\n");
6304
195
          if (tree != NULL)
6305
130
        xmlFreeEnumeration(tree);
6306
195
    break;
6307
195
      }
6308
6309
42.0k
      def = xmlParseDefaultDecl(ctxt, &defaultValue);
6310
42.0k
      if (def <= 0) {
6311
0
                if (defaultValue != NULL)
6312
0
        xmlFree(defaultValue);
6313
0
          if (tree != NULL)
6314
0
        xmlFreeEnumeration(tree);
6315
0
          break;
6316
0
      }
6317
42.0k
      if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6318
32.2k
          xmlAttrNormalizeSpace(defaultValue, defaultValue);
6319
6320
42.0k
      GROW;
6321
42.0k
            if (RAW != '>') {
6322
34.8k
    if (SKIP_BLANKS_PE == 0) {
6323
849
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6324
849
      "Space required after the attribute default value\n");
6325
849
        if (defaultValue != NULL)
6326
160
      xmlFree(defaultValue);
6327
849
        if (tree != NULL)
6328
91
      xmlFreeEnumeration(tree);
6329
849
        break;
6330
849
    }
6331
34.8k
      }
6332
41.1k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6333
34.5k
    (ctxt->sax->attributeDecl != NULL))
6334
0
    ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6335
0
                          type, def, defaultValue, tree);
6336
41.1k
      else if (tree != NULL)
6337
6.72k
    xmlFreeEnumeration(tree);
6338
6339
41.1k
      if ((ctxt->sax2) && (defaultValue != NULL) &&
6340
35.2k
          (def != XML_ATTRIBUTE_IMPLIED) &&
6341
35.2k
    (def != XML_ATTRIBUTE_REQUIRED)) {
6342
35.2k
    xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6343
35.2k
      }
6344
41.1k
      if (ctxt->sax2) {
6345
41.1k
    xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6346
41.1k
      }
6347
41.1k
      if (defaultValue != NULL)
6348
35.2k
          xmlFree(defaultValue);
6349
41.1k
      GROW;
6350
41.1k
  }
6351
12.5k
  if (RAW == '>') {
6352
10.5k
      if (inputid != ctxt->input->id) {
6353
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6354
0
                               "Attribute list declaration doesn't start and"
6355
0
                               " stop in the same entity\n");
6356
0
      }
6357
10.5k
      NEXT;
6358
10.5k
  }
6359
12.5k
    }
6360
12.6k
}
6361
6362
/**
6363
 * xmlParseElementMixedContentDecl:
6364
 * @ctxt:  an XML parser context
6365
 * @inputchk:  the input used for the current entity, needed for boundary checks
6366
 *
6367
 * DEPRECATED: Internal function, don't use.
6368
 *
6369
 * parse the declaration for a Mixed Element content
6370
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6371
 *
6372
 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6373
 *                '(' S? '#PCDATA' S? ')'
6374
 *
6375
 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6376
 *
6377
 * [ VC: No Duplicate Types ]
6378
 * The same name must not appear more than once in a single
6379
 * mixed-content declaration.
6380
 *
6381
 * returns: the list of the xmlElementContentPtr describing the element choices
6382
 */
6383
xmlElementContentPtr
6384
4.72k
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6385
4.72k
    xmlElementContentPtr ret = NULL, cur = NULL, n;
6386
4.72k
    const xmlChar *elem = NULL;
6387
6388
4.72k
    GROW;
6389
4.72k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6390
4.72k
  SKIP(7);
6391
4.72k
  SKIP_BLANKS_PE;
6392
4.72k
  if (RAW == ')') {
6393
1.96k
      if (ctxt->input->id != inputchk) {
6394
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6395
0
                               "Element content declaration doesn't start and"
6396
0
                               " stop in the same entity\n");
6397
0
      }
6398
1.96k
      NEXT;
6399
1.96k
      ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6400
1.96k
      if (ret == NULL)
6401
0
                goto mem_error;
6402
1.96k
      if (RAW == '*') {
6403
464
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6404
464
    NEXT;
6405
464
      }
6406
1.96k
      return(ret);
6407
1.96k
  }
6408
2.76k
  if ((RAW == '(') || (RAW == '|')) {
6409
2.66k
      ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6410
2.66k
      if (ret == NULL)
6411
0
                goto mem_error;
6412
2.66k
  }
6413
8.56k
  while ((RAW == '|') && (PARSER_STOPPED(ctxt) == 0)) {
6414
5.89k
      NEXT;
6415
5.89k
            n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6416
5.89k
            if (n == NULL)
6417
0
                goto mem_error;
6418
5.89k
      if (elem == NULL) {
6419
2.66k
    n->c1 = cur;
6420
2.66k
    if (cur != NULL)
6421
2.66k
        cur->parent = n;
6422
2.66k
    ret = cur = n;
6423
3.23k
      } else {
6424
3.23k
          cur->c2 = n;
6425
3.23k
    n->parent = cur;
6426
3.23k
    n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6427
3.23k
                if (n->c1 == NULL)
6428
0
                    goto mem_error;
6429
3.23k
    n->c1->parent = n;
6430
3.23k
    cur = n;
6431
3.23k
      }
6432
5.89k
      SKIP_BLANKS_PE;
6433
5.89k
      elem = xmlParseName(ctxt);
6434
5.89k
      if (elem == NULL) {
6435
92
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6436
92
      "xmlParseElementMixedContentDecl : Name expected\n");
6437
92
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6438
92
    return(NULL);
6439
92
      }
6440
5.80k
      SKIP_BLANKS_PE;
6441
5.80k
      GROW;
6442
5.80k
  }
6443
2.67k
  if ((RAW == ')') && (NXT(1) == '*')) {
6444
1.77k
      if (elem != NULL) {
6445
1.77k
    cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6446
1.77k
                                   XML_ELEMENT_CONTENT_ELEMENT);
6447
1.77k
    if (cur->c2 == NULL)
6448
0
                    goto mem_error;
6449
1.77k
    cur->c2->parent = cur;
6450
1.77k
            }
6451
1.77k
            if (ret != NULL)
6452
1.77k
                ret->ocur = XML_ELEMENT_CONTENT_MULT;
6453
1.77k
      if (ctxt->input->id != inputchk) {
6454
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6455
0
                               "Element content declaration doesn't start and"
6456
0
                               " stop in the same entity\n");
6457
0
      }
6458
1.77k
      SKIP(2);
6459
1.77k
  } else {
6460
901
      xmlFreeDocElementContent(ctxt->myDoc, ret);
6461
901
      xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6462
901
      return(NULL);
6463
901
  }
6464
6465
2.67k
    } else {
6466
0
  xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6467
0
    }
6468
1.77k
    return(ret);
6469
6470
0
mem_error:
6471
0
    xmlErrMemory(ctxt);
6472
0
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6473
0
    return(NULL);
6474
4.72k
}
6475
6476
/**
6477
 * xmlParseElementChildrenContentDeclPriv:
6478
 * @ctxt:  an XML parser context
6479
 * @inputchk:  the input used for the current entity, needed for boundary checks
6480
 * @depth: the level of recursion
6481
 *
6482
 * parse the declaration for a Mixed Element content
6483
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6484
 *
6485
 *
6486
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6487
 *
6488
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6489
 *
6490
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6491
 *
6492
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6493
 *
6494
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6495
 * TODO Parameter-entity replacement text must be properly nested
6496
 *  with parenthesized groups. That is to say, if either of the
6497
 *  opening or closing parentheses in a choice, seq, or Mixed
6498
 *  construct is contained in the replacement text for a parameter
6499
 *  entity, both must be contained in the same replacement text. For
6500
 *  interoperability, if a parameter-entity reference appears in a
6501
 *  choice, seq, or Mixed construct, its replacement text should not
6502
 *  be empty, and neither the first nor last non-blank character of
6503
 *  the replacement text should be a connector (| or ,).
6504
 *
6505
 * Returns the tree of xmlElementContentPtr describing the element
6506
 *          hierarchy.
6507
 */
6508
static xmlElementContentPtr
6509
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6510
122k
                                       int depth) {
6511
122k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
6512
122k
    xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6513
122k
    const xmlChar *elem;
6514
122k
    xmlChar type = 0;
6515
6516
122k
    if (depth > maxDepth) {
6517
5
        xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
6518
5
                "xmlParseElementChildrenContentDecl : depth %d too deep, "
6519
5
                "use XML_PARSE_HUGE\n", depth);
6520
5
  return(NULL);
6521
5
    }
6522
122k
    SKIP_BLANKS_PE;
6523
122k
    GROW;
6524
122k
    if (RAW == '(') {
6525
105k
  int inputid = ctxt->input->id;
6526
6527
        /* Recurse on first child */
6528
105k
  NEXT;
6529
105k
  SKIP_BLANKS_PE;
6530
105k
        cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6531
105k
                                                           depth + 1);
6532
105k
        if (cur == NULL)
6533
82.3k
            return(NULL);
6534
23.3k
  SKIP_BLANKS_PE;
6535
23.3k
  GROW;
6536
23.3k
    } else {
6537
17.0k
  elem = xmlParseName(ctxt);
6538
17.0k
  if (elem == NULL) {
6539
188
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6540
188
      return(NULL);
6541
188
  }
6542
16.9k
        cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6543
16.9k
  if (cur == NULL) {
6544
0
      xmlErrMemory(ctxt);
6545
0
      return(NULL);
6546
0
  }
6547
16.9k
  GROW;
6548
16.9k
  if (RAW == '?') {
6549
1.98k
      cur->ocur = XML_ELEMENT_CONTENT_OPT;
6550
1.98k
      NEXT;
6551
14.9k
  } else if (RAW == '*') {
6552
1.12k
      cur->ocur = XML_ELEMENT_CONTENT_MULT;
6553
1.12k
      NEXT;
6554
13.8k
  } else if (RAW == '+') {
6555
2.18k
      cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6556
2.18k
      NEXT;
6557
11.6k
  } else {
6558
11.6k
      cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6559
11.6k
  }
6560
16.9k
  GROW;
6561
16.9k
    }
6562
40.3k
    SKIP_BLANKS_PE;
6563
111k
    while ((RAW != ')') && (PARSER_STOPPED(ctxt) == 0)) {
6564
        /*
6565
   * Each loop we parse one separator and one element.
6566
   */
6567
79.1k
        if (RAW == ',') {
6568
61.3k
      if (type == 0) type = CUR;
6569
6570
      /*
6571
       * Detect "Name | Name , Name" error
6572
       */
6573
56.6k
      else if (type != CUR) {
6574
5
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6575
5
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6576
5
                      type);
6577
5
    if ((last != NULL) && (last != ret))
6578
5
        xmlFreeDocElementContent(ctxt->myDoc, last);
6579
5
    if (ret != NULL)
6580
5
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6581
5
    return(NULL);
6582
5
      }
6583
61.3k
      NEXT;
6584
6585
61.3k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6586
61.3k
      if (op == NULL) {
6587
0
                xmlErrMemory(ctxt);
6588
0
    if ((last != NULL) && (last != ret))
6589
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6590
0
          xmlFreeDocElementContent(ctxt->myDoc, ret);
6591
0
    return(NULL);
6592
0
      }
6593
61.3k
      if (last == NULL) {
6594
4.68k
    op->c1 = ret;
6595
4.68k
    if (ret != NULL)
6596
4.68k
        ret->parent = op;
6597
4.68k
    ret = cur = op;
6598
56.6k
      } else {
6599
56.6k
          cur->c2 = op;
6600
56.6k
    if (op != NULL)
6601
56.6k
        op->parent = cur;
6602
56.6k
    op->c1 = last;
6603
56.6k
    if (last != NULL)
6604
56.6k
        last->parent = op;
6605
56.6k
    cur =op;
6606
56.6k
    last = NULL;
6607
56.6k
      }
6608
61.3k
  } else if (RAW == '|') {
6609
16.7k
      if (type == 0) type = CUR;
6610
6611
      /*
6612
       * Detect "Name , Name | Name" error
6613
       */
6614
7.32k
      else if (type != CUR) {
6615
3
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6616
3
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6617
3
          type);
6618
3
    if ((last != NULL) && (last != ret))
6619
3
        xmlFreeDocElementContent(ctxt->myDoc, last);
6620
3
    if (ret != NULL)
6621
3
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6622
3
    return(NULL);
6623
3
      }
6624
16.7k
      NEXT;
6625
6626
16.7k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6627
16.7k
      if (op == NULL) {
6628
0
                xmlErrMemory(ctxt);
6629
0
    if ((last != NULL) && (last != ret))
6630
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6631
0
    if (ret != NULL)
6632
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6633
0
    return(NULL);
6634
0
      }
6635
16.7k
      if (last == NULL) {
6636
9.44k
    op->c1 = ret;
6637
9.44k
    if (ret != NULL)
6638
9.44k
        ret->parent = op;
6639
9.44k
    ret = cur = op;
6640
9.44k
      } else {
6641
7.32k
          cur->c2 = op;
6642
7.32k
    if (op != NULL)
6643
7.32k
        op->parent = cur;
6644
7.32k
    op->c1 = last;
6645
7.32k
    if (last != NULL)
6646
7.32k
        last->parent = op;
6647
7.32k
    cur =op;
6648
7.32k
    last = NULL;
6649
7.32k
      }
6650
16.7k
  } else {
6651
1.03k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6652
1.03k
      if ((last != NULL) && (last != ret))
6653
688
          xmlFreeDocElementContent(ctxt->myDoc, last);
6654
1.03k
      if (ret != NULL)
6655
1.03k
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6656
1.03k
      return(NULL);
6657
1.03k
  }
6658
78.1k
  GROW;
6659
78.1k
  SKIP_BLANKS_PE;
6660
78.1k
  GROW;
6661
78.1k
  if (RAW == '(') {
6662
11.5k
      int inputid = ctxt->input->id;
6663
      /* Recurse on second child */
6664
11.5k
      NEXT;
6665
11.5k
      SKIP_BLANKS_PE;
6666
11.5k
      last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6667
11.5k
                                                          depth + 1);
6668
11.5k
            if (last == NULL) {
6669
6.48k
    if (ret != NULL)
6670
6.48k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6671
6.48k
    return(NULL);
6672
6.48k
            }
6673
5.02k
      SKIP_BLANKS_PE;
6674
66.6k
  } else {
6675
66.6k
      elem = xmlParseName(ctxt);
6676
66.6k
      if (elem == NULL) {
6677
78
    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6678
78
    if (ret != NULL)
6679
78
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6680
78
    return(NULL);
6681
78
      }
6682
66.5k
      last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6683
66.5k
      if (last == NULL) {
6684
0
                xmlErrMemory(ctxt);
6685
0
    if (ret != NULL)
6686
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6687
0
    return(NULL);
6688
0
      }
6689
66.5k
      if (RAW == '?') {
6690
685
    last->ocur = XML_ELEMENT_CONTENT_OPT;
6691
685
    NEXT;
6692
65.8k
      } else if (RAW == '*') {
6693
460
    last->ocur = XML_ELEMENT_CONTENT_MULT;
6694
460
    NEXT;
6695
65.4k
      } else if (RAW == '+') {
6696
1.11k
    last->ocur = XML_ELEMENT_CONTENT_PLUS;
6697
1.11k
    NEXT;
6698
64.3k
      } else {
6699
64.3k
    last->ocur = XML_ELEMENT_CONTENT_ONCE;
6700
64.3k
      }
6701
66.5k
  }
6702
71.5k
  SKIP_BLANKS_PE;
6703
71.5k
  GROW;
6704
71.5k
    }
6705
32.7k
    if ((cur != NULL) && (last != NULL)) {
6706
6.87k
        cur->c2 = last;
6707
6.87k
  if (last != NULL)
6708
6.87k
      last->parent = cur;
6709
6.87k
    }
6710
32.7k
    if (ctxt->input->id != inputchk) {
6711
0
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6712
0
                       "Element content declaration doesn't start and stop in"
6713
0
                       " the same entity\n");
6714
0
    }
6715
32.7k
    NEXT;
6716
32.7k
    if (RAW == '?') {
6717
3.60k
  if (ret != NULL) {
6718
3.60k
      if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6719
3.45k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6720
975
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6721
2.62k
      else
6722
2.62k
          ret->ocur = XML_ELEMENT_CONTENT_OPT;
6723
3.60k
  }
6724
3.60k
  NEXT;
6725
29.1k
    } else if (RAW == '*') {
6726
3.49k
  if (ret != NULL) {
6727
3.49k
      ret->ocur = XML_ELEMENT_CONTENT_MULT;
6728
3.49k
      cur = ret;
6729
      /*
6730
       * Some normalization:
6731
       * (a | b* | c?)* == (a | b | c)*
6732
       */
6733
6.80k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6734
3.30k
    if ((cur->c1 != NULL) &&
6735
3.30k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6736
3.28k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6737
1.16k
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6738
3.30k
    if ((cur->c2 != NULL) &&
6739
3.30k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6740
3.29k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6741
109
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6742
3.30k
    cur = cur->c2;
6743
3.30k
      }
6744
3.49k
  }
6745
3.49k
  NEXT;
6746
25.6k
    } else if (RAW == '+') {
6747
8.02k
  if (ret != NULL) {
6748
8.02k
      int found = 0;
6749
6750
8.02k
      if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6751
5.80k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6752
4.06k
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6753
3.96k
      else
6754
3.96k
          ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6755
      /*
6756
       * Some normalization:
6757
       * (a | b*)+ == (a | b)*
6758
       * (a | b?)+ == (a | b)*
6759
       */
6760
12.1k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6761
4.09k
    if ((cur->c1 != NULL) &&
6762
4.09k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6763
3.94k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6764
824
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6765
824
        found = 1;
6766
824
    }
6767
4.09k
    if ((cur->c2 != NULL) &&
6768
4.09k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6769
3.65k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6770
688
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6771
688
        found = 1;
6772
688
    }
6773
4.09k
    cur = cur->c2;
6774
4.09k
      }
6775
8.02k
      if (found)
6776
1.35k
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6777
8.02k
  }
6778
8.02k
  NEXT;
6779
8.02k
    }
6780
32.7k
    return(ret);
6781
40.3k
}
6782
6783
/**
6784
 * xmlParseElementChildrenContentDecl:
6785
 * @ctxt:  an XML parser context
6786
 * @inputchk:  the input used for the current entity, needed for boundary checks
6787
 *
6788
 * DEPRECATED: Internal function, don't use.
6789
 *
6790
 * parse the declaration for a Mixed Element content
6791
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6792
 *
6793
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6794
 *
6795
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6796
 *
6797
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6798
 *
6799
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6800
 *
6801
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6802
 * TODO Parameter-entity replacement text must be properly nested
6803
 *  with parenthesized groups. That is to say, if either of the
6804
 *  opening or closing parentheses in a choice, seq, or Mixed
6805
 *  construct is contained in the replacement text for a parameter
6806
 *  entity, both must be contained in the same replacement text. For
6807
 *  interoperability, if a parameter-entity reference appears in a
6808
 *  choice, seq, or Mixed construct, its replacement text should not
6809
 *  be empty, and neither the first nor last non-blank character of
6810
 *  the replacement text should be a connector (| or ,).
6811
 *
6812
 * Returns the tree of xmlElementContentPtr describing the element
6813
 *          hierarchy.
6814
 */
6815
xmlElementContentPtr
6816
0
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6817
    /* stub left for API/ABI compat */
6818
0
    return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6819
0
}
6820
6821
/**
6822
 * xmlParseElementContentDecl:
6823
 * @ctxt:  an XML parser context
6824
 * @name:  the name of the element being defined.
6825
 * @result:  the Element Content pointer will be stored here if any
6826
 *
6827
 * DEPRECATED: Internal function, don't use.
6828
 *
6829
 * parse the declaration for an Element content either Mixed or Children,
6830
 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6831
 *
6832
 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6833
 *
6834
 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6835
 */
6836
6837
int
6838
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6839
10.3k
                           xmlElementContentPtr *result) {
6840
6841
10.3k
    xmlElementContentPtr tree = NULL;
6842
10.3k
    int inputid = ctxt->input->id;
6843
10.3k
    int res;
6844
6845
10.3k
    *result = NULL;
6846
6847
10.3k
    if (RAW != '(') {
6848
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6849
0
    "xmlParseElementContentDecl : %s '(' expected\n", name);
6850
0
  return(-1);
6851
0
    }
6852
10.3k
    NEXT;
6853
10.3k
    GROW;
6854
10.3k
    SKIP_BLANKS_PE;
6855
10.3k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6856
4.72k
        tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6857
4.72k
  res = XML_ELEMENT_TYPE_MIXED;
6858
5.59k
    } else {
6859
5.59k
        tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6860
5.59k
  res = XML_ELEMENT_TYPE_ELEMENT;
6861
5.59k
    }
6862
10.3k
    SKIP_BLANKS_PE;
6863
10.3k
    *result = tree;
6864
10.3k
    return(res);
6865
10.3k
}
6866
6867
/**
6868
 * xmlParseElementDecl:
6869
 * @ctxt:  an XML parser context
6870
 *
6871
 * DEPRECATED: Internal function, don't use.
6872
 *
6873
 * Parse an element declaration. Always consumes '<!'.
6874
 *
6875
 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6876
 *
6877
 * [ VC: Unique Element Type Declaration ]
6878
 * No element type may be declared more than once
6879
 *
6880
 * Returns the type of the element, or -1 in case of error
6881
 */
6882
int
6883
11.7k
xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6884
11.7k
    const xmlChar *name;
6885
11.7k
    int ret = -1;
6886
11.7k
    xmlElementContentPtr content  = NULL;
6887
6888
11.7k
    if ((CUR != '<') || (NXT(1) != '!'))
6889
0
        return(ret);
6890
11.7k
    SKIP(2);
6891
6892
    /* GROW; done in the caller */
6893
11.7k
    if (CMP7(CUR_PTR, 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6894
11.6k
  int inputid = ctxt->input->id;
6895
6896
11.6k
  SKIP(7);
6897
11.6k
  if (SKIP_BLANKS_PE == 0) {
6898
22
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6899
22
               "Space required after 'ELEMENT'\n");
6900
22
      return(-1);
6901
22
  }
6902
11.6k
        name = xmlParseName(ctxt);
6903
11.6k
  if (name == NULL) {
6904
49
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6905
49
         "xmlParseElementDecl: no name for Element\n");
6906
49
      return(-1);
6907
49
  }
6908
11.5k
  if (SKIP_BLANKS_PE == 0) {
6909
136
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6910
136
         "Space required after the element name\n");
6911
136
  }
6912
11.5k
  if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6913
384
      SKIP(5);
6914
      /*
6915
       * Element must always be empty.
6916
       */
6917
384
      ret = XML_ELEMENT_TYPE_EMPTY;
6918
11.2k
  } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6919
536
             (NXT(2) == 'Y')) {
6920
532
      SKIP(3);
6921
      /*
6922
       * Element is a generic container.
6923
       */
6924
532
      ret = XML_ELEMENT_TYPE_ANY;
6925
10.6k
  } else if (RAW == '(') {
6926
10.3k
      ret = xmlParseElementContentDecl(ctxt, name, &content);
6927
10.3k
  } else {
6928
      /*
6929
       * [ WFC: PEs in Internal Subset ] error handling.
6930
       */
6931
356
            xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6932
356
                  "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6933
356
      return(-1);
6934
356
  }
6935
6936
11.2k
  SKIP_BLANKS_PE;
6937
6938
11.2k
  if (RAW != '>') {
6939
1.10k
      xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6940
1.10k
      if (content != NULL) {
6941
74
    xmlFreeDocElementContent(ctxt->myDoc, content);
6942
74
      }
6943
10.1k
  } else {
6944
10.1k
      if (inputid != ctxt->input->id) {
6945
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6946
0
                               "Element declaration doesn't start and stop in"
6947
0
                               " the same entity\n");
6948
0
      }
6949
6950
10.1k
      NEXT;
6951
10.1k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6952
4.11k
    (ctxt->sax->elementDecl != NULL)) {
6953
0
    if (content != NULL)
6954
0
        content->parent = NULL;
6955
0
          ctxt->sax->elementDecl(ctxt->userData, name, ret,
6956
0
                           content);
6957
0
    if ((content != NULL) && (content->parent == NULL)) {
6958
        /*
6959
         * this is a trick: if xmlAddElementDecl is called,
6960
         * instead of copying the full tree it is plugged directly
6961
         * if called from the parser. Avoid duplicating the
6962
         * interfaces or change the API/ABI
6963
         */
6964
0
        xmlFreeDocElementContent(ctxt->myDoc, content);
6965
0
    }
6966
10.1k
      } else if (content != NULL) {
6967
7.94k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6968
7.94k
      }
6969
10.1k
  }
6970
11.2k
    }
6971
11.3k
    return(ret);
6972
11.7k
}
6973
6974
/**
6975
 * xmlParseConditionalSections
6976
 * @ctxt:  an XML parser context
6977
 *
6978
 * Parse a conditional section. Always consumes '<!['.
6979
 *
6980
 * [61] conditionalSect ::= includeSect | ignoreSect
6981
 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6982
 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6983
 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6984
 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6985
 */
6986
6987
static void
6988
0
xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6989
0
    int *inputIds = NULL;
6990
0
    size_t inputIdsSize = 0;
6991
0
    size_t depth = 0;
6992
6993
0
    while (PARSER_STOPPED(ctxt) == 0) {
6994
0
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6995
0
            int id = ctxt->input->id;
6996
6997
0
            SKIP(3);
6998
0
            SKIP_BLANKS_PE;
6999
7000
0
            if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
7001
0
                SKIP(7);
7002
0
                SKIP_BLANKS_PE;
7003
0
                if (RAW != '[') {
7004
0
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
7005
0
                    xmlHaltParser(ctxt);
7006
0
                    goto error;
7007
0
                }
7008
0
                if (ctxt->input->id != id) {
7009
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
7010
0
                                   "All markup of the conditional section is"
7011
0
                                   " not in the same entity\n");
7012
0
                }
7013
0
                NEXT;
7014
7015
0
                if (inputIdsSize <= depth) {
7016
0
                    int *tmp;
7017
0
                    int newSize;
7018
7019
0
                    newSize = xmlGrowCapacity(inputIdsSize, sizeof(tmp[0]),
7020
0
                                              4, 1000);
7021
0
                    if (newSize < 0) {
7022
0
                        xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
7023
0
                                       "Maximum conditional section nesting"
7024
0
                                       " depth exceeded\n");
7025
0
                        goto error;
7026
0
                    }
7027
0
                    tmp = xmlRealloc(inputIds, newSize * sizeof(tmp[0]));
7028
0
                    if (tmp == NULL) {
7029
0
                        xmlErrMemory(ctxt);
7030
0
                        goto error;
7031
0
                    }
7032
0
                    inputIds = tmp;
7033
0
                    inputIdsSize = newSize;
7034
0
                }
7035
0
                inputIds[depth] = id;
7036
0
                depth++;
7037
0
            } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
7038
0
                size_t ignoreDepth = 0;
7039
7040
0
                SKIP(6);
7041
0
                SKIP_BLANKS_PE;
7042
0
                if (RAW != '[') {
7043
0
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
7044
0
                    xmlHaltParser(ctxt);
7045
0
                    goto error;
7046
0
                }
7047
0
                if (ctxt->input->id != id) {
7048
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
7049
0
                                   "All markup of the conditional section is"
7050
0
                                   " not in the same entity\n");
7051
0
                }
7052
0
                NEXT;
7053
7054
0
                while (PARSER_STOPPED(ctxt) == 0) {
7055
0
                    if (RAW == 0) {
7056
0
                        xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
7057
0
                        goto error;
7058
0
                    }
7059
0
                    if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7060
0
                        SKIP(3);
7061
0
                        ignoreDepth++;
7062
                        /* Check for integer overflow */
7063
0
                        if (ignoreDepth == 0) {
7064
0
                            xmlErrMemory(ctxt);
7065
0
                            goto error;
7066
0
                        }
7067
0
                    } else if ((RAW == ']') && (NXT(1) == ']') &&
7068
0
                               (NXT(2) == '>')) {
7069
0
                        SKIP(3);
7070
0
                        if (ignoreDepth == 0)
7071
0
                            break;
7072
0
                        ignoreDepth--;
7073
0
                    } else {
7074
0
                        NEXT;
7075
0
                    }
7076
0
                }
7077
7078
0
                if (ctxt->input->id != id) {
7079
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
7080
0
                                   "All markup of the conditional section is"
7081
0
                                   " not in the same entity\n");
7082
0
                }
7083
0
            } else {
7084
0
                xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
7085
0
                xmlHaltParser(ctxt);
7086
0
                goto error;
7087
0
            }
7088
0
        } else if ((depth > 0) &&
7089
0
                   (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
7090
0
            depth--;
7091
0
            if (ctxt->input->id != inputIds[depth]) {
7092
0
                xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
7093
0
                               "All markup of the conditional section is not"
7094
0
                               " in the same entity\n");
7095
0
            }
7096
0
            SKIP(3);
7097
0
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7098
0
            xmlParseMarkupDecl(ctxt);
7099
0
        } else {
7100
0
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7101
0
            xmlHaltParser(ctxt);
7102
0
            goto error;
7103
0
        }
7104
7105
0
        if (depth == 0)
7106
0
            break;
7107
7108
0
        SKIP_BLANKS_PE;
7109
0
        SHRINK;
7110
0
        GROW;
7111
0
    }
7112
7113
0
error:
7114
0
    xmlFree(inputIds);
7115
0
}
7116
7117
/**
7118
 * xmlParseMarkupDecl:
7119
 * @ctxt:  an XML parser context
7120
 *
7121
 * DEPRECATED: Internal function, don't use.
7122
 *
7123
 * Parse markup declarations. Always consumes '<!' or '<?'.
7124
 *
7125
 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
7126
 *                     NotationDecl | PI | Comment
7127
 *
7128
 * [ VC: Proper Declaration/PE Nesting ]
7129
 * Parameter-entity replacement text must be properly nested with
7130
 * markup declarations. That is to say, if either the first character
7131
 * or the last character of a markup declaration (markupdecl above) is
7132
 * contained in the replacement text for a parameter-entity reference,
7133
 * both must be contained in the same replacement text.
7134
 *
7135
 * [ WFC: PEs in Internal Subset ]
7136
 * In the internal DTD subset, parameter-entity references can occur
7137
 * only where markup declarations can occur, not within markup declarations.
7138
 * (This does not apply to references that occur in external parameter
7139
 * entities or to the external subset.)
7140
 */
7141
void
7142
127k
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
7143
127k
    GROW;
7144
127k
    if (CUR == '<') {
7145
127k
        if (NXT(1) == '!') {
7146
105k
      switch (NXT(2)) {
7147
66.5k
          case 'E':
7148
66.5k
        if (NXT(3) == 'L')
7149
11.7k
      xmlParseElementDecl(ctxt);
7150
54.7k
        else if (NXT(3) == 'N')
7151
54.7k
      xmlParseEntityDecl(ctxt);
7152
47
                    else
7153
47
                        SKIP(2);
7154
66.5k
        break;
7155
12.6k
          case 'A':
7156
12.6k
        xmlParseAttributeListDecl(ctxt);
7157
12.6k
        break;
7158
2.86k
          case 'N':
7159
2.86k
        xmlParseNotationDecl(ctxt);
7160
2.86k
        break;
7161
23.4k
          case '-':
7162
23.4k
        xmlParseComment(ctxt);
7163
23.4k
        break;
7164
189
    default:
7165
189
                    xmlFatalErr(ctxt,
7166
189
                                ctxt->inSubset == 2 ?
7167
0
                                    XML_ERR_EXT_SUBSET_NOT_FINISHED :
7168
189
                                    XML_ERR_INT_SUBSET_NOT_FINISHED,
7169
189
                                NULL);
7170
189
                    SKIP(2);
7171
189
        break;
7172
105k
      }
7173
105k
  } else if (NXT(1) == '?') {
7174
21.3k
      xmlParsePI(ctxt);
7175
21.3k
  }
7176
127k
    }
7177
127k
}
7178
7179
/**
7180
 * xmlParseTextDecl:
7181
 * @ctxt:  an XML parser context
7182
 *
7183
 * DEPRECATED: Internal function, don't use.
7184
 *
7185
 * parse an XML declaration header for external entities
7186
 *
7187
 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
7188
 */
7189
7190
void
7191
0
xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
7192
0
    xmlChar *version;
7193
7194
    /*
7195
     * We know that '<?xml' is here.
7196
     */
7197
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7198
0
  SKIP(5);
7199
0
    } else {
7200
0
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
7201
0
  return;
7202
0
    }
7203
7204
0
    if (SKIP_BLANKS == 0) {
7205
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7206
0
           "Space needed after '<?xml'\n");
7207
0
    }
7208
7209
    /*
7210
     * We may have the VersionInfo here.
7211
     */
7212
0
    version = xmlParseVersionInfo(ctxt);
7213
0
    if (version == NULL) {
7214
0
  version = xmlCharStrdup(XML_DEFAULT_VERSION);
7215
0
        if (version == NULL) {
7216
0
            xmlErrMemory(ctxt);
7217
0
            return;
7218
0
        }
7219
0
    } else {
7220
0
  if (SKIP_BLANKS == 0) {
7221
0
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7222
0
               "Space needed here\n");
7223
0
  }
7224
0
    }
7225
0
    ctxt->input->version = version;
7226
7227
    /*
7228
     * We must have the encoding declaration
7229
     */
7230
0
    xmlParseEncodingDecl(ctxt);
7231
7232
0
    SKIP_BLANKS;
7233
0
    if ((RAW == '?') && (NXT(1) == '>')) {
7234
0
        SKIP(2);
7235
0
    } else if (RAW == '>') {
7236
        /* Deprecated old WD ... */
7237
0
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7238
0
  NEXT;
7239
0
    } else {
7240
0
        int c;
7241
7242
0
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7243
0
        while ((PARSER_STOPPED(ctxt) == 0) && ((c = CUR) != 0)) {
7244
0
            NEXT;
7245
0
            if (c == '>')
7246
0
                break;
7247
0
        }
7248
0
    }
7249
0
}
7250
7251
/**
7252
 * xmlParseExternalSubset:
7253
 * @ctxt:  an XML parser context
7254
 * @ExternalID: the external identifier
7255
 * @SystemID: the system identifier (or URL)
7256
 *
7257
 * DEPRECATED: Internal function, don't use.
7258
 *
7259
 * parse Markup declarations from an external subset
7260
 *
7261
 * [30] extSubset ::= textDecl? extSubsetDecl
7262
 *
7263
 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7264
 */
7265
void
7266
xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7267
0
                       const xmlChar *SystemID) {
7268
0
    int oldInputNr;
7269
7270
0
    xmlCtxtInitializeLate(ctxt);
7271
7272
0
    xmlDetectEncoding(ctxt);
7273
7274
0
    if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7275
0
  xmlParseTextDecl(ctxt);
7276
0
    }
7277
0
    if (ctxt->myDoc == NULL) {
7278
0
        ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7279
0
  if (ctxt->myDoc == NULL) {
7280
0
      xmlErrMemory(ctxt);
7281
0
      return;
7282
0
  }
7283
0
  ctxt->myDoc->properties = XML_DOC_INTERNAL;
7284
0
    }
7285
0
    if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL) &&
7286
0
        (xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID) == NULL)) {
7287
0
        xmlErrMemory(ctxt);
7288
0
    }
7289
7290
0
    ctxt->inSubset = 2;
7291
0
    oldInputNr = ctxt->inputNr;
7292
7293
0
    SKIP_BLANKS_PE;
7294
0
    while (((RAW != 0) || (ctxt->inputNr > oldInputNr)) &&
7295
0
           (!PARSER_STOPPED(ctxt))) {
7296
0
  GROW;
7297
0
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7298
0
            xmlParseConditionalSections(ctxt);
7299
0
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7300
0
            xmlParseMarkupDecl(ctxt);
7301
0
        } else {
7302
0
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7303
0
            xmlHaltParser(ctxt);
7304
0
            return;
7305
0
        }
7306
0
        SKIP_BLANKS_PE;
7307
0
        SHRINK;
7308
0
    }
7309
7310
0
    while (ctxt->inputNr > oldInputNr)
7311
0
        xmlPopPE(ctxt);
7312
7313
0
    xmlParserCheckEOF(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED);
7314
0
}
7315
7316
/**
7317
 * xmlParseReference:
7318
 * @ctxt:  an XML parser context
7319
 *
7320
 * DEPRECATED: Internal function, don't use.
7321
 *
7322
 * parse and handle entity references in content, depending on the SAX
7323
 * interface, this may end-up in a call to character() if this is a
7324
 * CharRef, a predefined entity, if there is no reference() callback.
7325
 * or if the parser was asked to switch to that mode.
7326
 *
7327
 * Always consumes '&'.
7328
 *
7329
 * [67] Reference ::= EntityRef | CharRef
7330
 */
7331
void
7332
340k
xmlParseReference(xmlParserCtxtPtr ctxt) {
7333
340k
    xmlEntityPtr ent = NULL;
7334
340k
    const xmlChar *name;
7335
340k
    xmlChar *val;
7336
7337
340k
    if (RAW != '&')
7338
0
        return;
7339
7340
    /*
7341
     * Simple case of a CharRef
7342
     */
7343
340k
    if (NXT(1) == '#') {
7344
71.6k
  int i = 0;
7345
71.6k
  xmlChar out[16];
7346
71.6k
  int value = xmlParseCharRef(ctxt);
7347
7348
71.6k
  if (value == 0)
7349
159
      return;
7350
7351
        /*
7352
         * Just encode the value in UTF-8
7353
         */
7354
71.4k
        COPY_BUF(out, i, value);
7355
71.4k
        out[i] = 0;
7356
71.4k
        if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7357
71.4k
            (!ctxt->disableSAX))
7358
71.4k
            ctxt->sax->characters(ctxt->userData, out, i);
7359
71.4k
  return;
7360
71.6k
    }
7361
7362
    /*
7363
     * We are seeing an entity reference
7364
     */
7365
268k
    name = xmlParseEntityRefInternal(ctxt);
7366
268k
    if (name == NULL)
7367
230
        return;
7368
268k
    ent = xmlLookupGeneralEntity(ctxt, name, /* isAttr */ 0);
7369
268k
    if (ent == NULL) {
7370
        /*
7371
         * Create a reference for undeclared entities.
7372
         */
7373
1.12k
        if ((ctxt->replaceEntities == 0) &&
7374
0
            (ctxt->sax != NULL) &&
7375
0
            (ctxt->disableSAX == 0) &&
7376
0
            (ctxt->sax->reference != NULL)) {
7377
0
            ctxt->sax->reference(ctxt->userData, name);
7378
0
        }
7379
1.12k
        return;
7380
1.12k
    }
7381
267k
    if (!ctxt->wellFormed)
7382
0
  return;
7383
7384
    /* special case of predefined entities */
7385
267k
    if ((ent->name == NULL) ||
7386
267k
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7387
267k
  val = ent->content;
7388
267k
  if (val == NULL) return;
7389
  /*
7390
   * inline the entity.
7391
   */
7392
267k
  if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7393
267k
      (!ctxt->disableSAX))
7394
267k
      ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7395
267k
  return;
7396
267k
    }
7397
7398
    /*
7399
     * Some users try to parse entities on their own and used to set
7400
     * the renamed "checked" member. Fix the flags to cover this
7401
     * case.
7402
     */
7403
1
    if (((ent->flags & XML_ENT_PARSED) == 0) && (ent->children != NULL))
7404
0
        ent->flags |= XML_ENT_PARSED;
7405
7406
    /*
7407
     * The first reference to the entity trigger a parsing phase
7408
     * where the ent->children is filled with the result from
7409
     * the parsing.
7410
     * Note: external parsed entities will not be loaded, it is not
7411
     * required for a non-validating parser, unless the parsing option
7412
     * of validating, or substituting entities were given. Doing so is
7413
     * far more secure as the parser will only process data coming from
7414
     * the document entity by default.
7415
     *
7416
     * FIXME: This doesn't work correctly since entities can be
7417
     * expanded with different namespace declarations in scope.
7418
     * For example:
7419
     *
7420
     * <!DOCTYPE doc [
7421
     *   <!ENTITY ent "<ns:elem/>">
7422
     * ]>
7423
     * <doc>
7424
     *   <decl1 xmlns:ns="urn:ns1">
7425
     *     &ent;
7426
     *   </decl1>
7427
     *   <decl2 xmlns:ns="urn:ns2">
7428
     *     &ent;
7429
     *   </decl2>
7430
     * </doc>
7431
     *
7432
     * Proposed fix:
7433
     *
7434
     * - Ignore current namespace declarations when parsing the
7435
     *   entity. If a prefix can't be resolved, don't report an error
7436
     *   but mark it as unresolved.
7437
     * - Try to resolve these prefixes when expanding the entity.
7438
     *   This will require a specialized version of xmlStaticCopyNode
7439
     *   which can also make use of the namespace hash table to avoid
7440
     *   quadratic behavior.
7441
     *
7442
     * Alternatively, we could simply reparse the entity on each
7443
     * expansion like we already do with custom SAX callbacks.
7444
     * External entity content should be cached in this case.
7445
     */
7446
1
    if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7447
0
        (((ctxt->options & XML_PARSE_NO_XXE) == 0) &&
7448
0
         ((ctxt->replaceEntities) ||
7449
0
          (ctxt->validate)))) {
7450
0
        if ((ent->flags & XML_ENT_PARSED) == 0) {
7451
0
            xmlCtxtParseEntity(ctxt, ent);
7452
0
        } else if (ent->children == NULL) {
7453
            /*
7454
             * Probably running in SAX mode and the callbacks don't
7455
             * build the entity content. Parse the entity again.
7456
             *
7457
             * This will also be triggered in normal tree builder mode
7458
             * if an entity happens to be empty, causing unnecessary
7459
             * reloads. It's hard to come up with a reliable check in
7460
             * which mode we're running.
7461
             */
7462
0
            xmlCtxtParseEntity(ctxt, ent);
7463
0
        }
7464
0
    }
7465
7466
    /*
7467
     * We also check for amplification if entities aren't substituted.
7468
     * They might be expanded later.
7469
     */
7470
1
    if (xmlParserEntityCheck(ctxt, ent->expandedSize))
7471
0
        return;
7472
7473
1
    if ((ctxt->sax == NULL) || (ctxt->disableSAX))
7474
0
        return;
7475
7476
1
    if (ctxt->replaceEntities == 0) {
7477
  /*
7478
   * Create a reference
7479
   */
7480
0
        if (ctxt->sax->reference != NULL)
7481
0
      ctxt->sax->reference(ctxt->userData, ent->name);
7482
1
    } else if ((ent->children != NULL) && (ctxt->node != NULL)) {
7483
0
        xmlNodePtr copy, cur;
7484
7485
        /*
7486
         * Seems we are generating the DOM content, copy the tree
7487
   */
7488
0
        cur = ent->children;
7489
7490
        /*
7491
         * Handle first text node with SAX to coalesce text efficiently
7492
         */
7493
0
        if ((cur->type == XML_TEXT_NODE) ||
7494
0
            (cur->type == XML_CDATA_SECTION_NODE)) {
7495
0
            int len = xmlStrlen(cur->content);
7496
7497
0
            if ((cur->type == XML_TEXT_NODE) ||
7498
0
                (ctxt->options & XML_PARSE_NOCDATA)) {
7499
0
                if (ctxt->sax->characters != NULL)
7500
0
                    ctxt->sax->characters(ctxt, cur->content, len);
7501
0
            } else {
7502
0
                if (ctxt->sax->cdataBlock != NULL)
7503
0
                    ctxt->sax->cdataBlock(ctxt, cur->content, len);
7504
0
            }
7505
7506
0
            cur = cur->next;
7507
0
        }
7508
7509
0
        while (cur != NULL) {
7510
0
            xmlNodePtr last;
7511
7512
            /*
7513
             * Handle last text node with SAX to coalesce text efficiently
7514
             */
7515
0
            if ((cur->next == NULL) &&
7516
0
                ((cur->type == XML_TEXT_NODE) ||
7517
0
                 (cur->type == XML_CDATA_SECTION_NODE))) {
7518
0
                int len = xmlStrlen(cur->content);
7519
7520
0
                if ((cur->type == XML_TEXT_NODE) ||
7521
0
                    (ctxt->options & XML_PARSE_NOCDATA)) {
7522
0
                    if (ctxt->sax->characters != NULL)
7523
0
                        ctxt->sax->characters(ctxt, cur->content, len);
7524
0
                } else {
7525
0
                    if (ctxt->sax->cdataBlock != NULL)
7526
0
                        ctxt->sax->cdataBlock(ctxt, cur->content, len);
7527
0
                }
7528
7529
0
                break;
7530
0
            }
7531
7532
            /*
7533
             * Reset coalesce buffer stats only for non-text nodes.
7534
             */
7535
0
            ctxt->nodemem = 0;
7536
0
            ctxt->nodelen = 0;
7537
7538
0
            copy = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7539
7540
0
            if (copy == NULL) {
7541
0
                xmlErrMemory(ctxt);
7542
0
                break;
7543
0
            }
7544
7545
0
            if (ctxt->parseMode == XML_PARSE_READER) {
7546
                /* Needed for reader */
7547
0
                copy->extra = cur->extra;
7548
                /* Maybe needed for reader */
7549
0
                copy->_private = cur->_private;
7550
0
            }
7551
7552
0
            copy->parent = ctxt->node;
7553
0
            last = ctxt->node->last;
7554
0
            if (last == NULL) {
7555
0
                ctxt->node->children = copy;
7556
0
            } else {
7557
0
                last->next = copy;
7558
0
                copy->prev = last;
7559
0
            }
7560
0
            ctxt->node->last = copy;
7561
7562
0
            cur = cur->next;
7563
0
        }
7564
0
    }
7565
1
}
7566
7567
static void
7568
30.4k
xmlHandleUndeclaredEntity(xmlParserCtxtPtr ctxt, const xmlChar *name) {
7569
    /*
7570
     * [ WFC: Entity Declared ]
7571
     * In a document without any DTD, a document with only an
7572
     * internal DTD subset which contains no parameter entity
7573
     * references, or a document with "standalone='yes'", the
7574
     * Name given in the entity reference must match that in an
7575
     * entity declaration, except that well-formed documents
7576
     * need not declare any of the following entities: amp, lt,
7577
     * gt, apos, quot.
7578
     * The declaration of a parameter entity must precede any
7579
     * reference to it.
7580
     * Similarly, the declaration of a general entity must
7581
     * precede any reference to it which appears in a default
7582
     * value in an attribute-list declaration. Note that if
7583
     * entities are declared in the external subset or in
7584
     * external parameter entities, a non-validating processor
7585
     * is not obligated to read and process their declarations;
7586
     * for such documents, the rule that an entity must be
7587
     * declared is a well-formedness constraint only if
7588
     * standalone='yes'.
7589
     */
7590
30.4k
    if ((ctxt->standalone == 1) ||
7591
28.6k
        ((ctxt->hasExternalSubset == 0) &&
7592
27.7k
         (ctxt->hasPErefs == 0))) {
7593
20.8k
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7594
20.8k
                          "Entity '%s' not defined\n", name);
7595
20.8k
    } else if (ctxt->validate) {
7596
        /*
7597
         * [ VC: Entity Declared ]
7598
         * In a document with an external subset or external
7599
         * parameter entities with "standalone='no'", ...
7600
         * ... The declaration of a parameter entity must
7601
         * precede any reference to it...
7602
         */
7603
0
        xmlValidityError(ctxt, XML_ERR_UNDECLARED_ENTITY,
7604
0
                         "Entity '%s' not defined\n", name, NULL);
7605
9.52k
    } else if ((ctxt->loadsubset & ~XML_SKIP_IDS) ||
7606
9.52k
               ((ctxt->replaceEntities) &&
7607
9.52k
                ((ctxt->options & XML_PARSE_NO_XXE) == 0))) {
7608
        /*
7609
         * Also raise a non-fatal error
7610
         *
7611
         * - if the external subset is loaded and all entity declarations
7612
         *   should be available, or
7613
         * - entity substition was requested without restricting
7614
         *   external entity access.
7615
         */
7616
9.52k
        xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7617
9.52k
                     "Entity '%s' not defined\n", name);
7618
9.52k
    } else {
7619
0
        xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7620
0
                      "Entity '%s' not defined\n", name, NULL);
7621
0
    }
7622
7623
30.4k
    ctxt->valid = 0;
7624
30.4k
}
7625
7626
static xmlEntityPtr
7627
447k
xmlLookupGeneralEntity(xmlParserCtxtPtr ctxt, const xmlChar *name, int inAttr) {
7628
447k
    xmlEntityPtr ent = NULL;
7629
7630
    /*
7631
     * Predefined entities override any extra definition
7632
     */
7633
447k
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7634
447k
        ent = xmlGetPredefinedEntity(name);
7635
447k
        if (ent != NULL)
7636
425k
            return(ent);
7637
447k
    }
7638
7639
    /*
7640
     * Ask first SAX for entity resolution, otherwise try the
7641
     * entities which may have stored in the parser context.
7642
     */
7643
22.1k
    if (ctxt->sax != NULL) {
7644
22.1k
  if (ctxt->sax->getEntity != NULL)
7645
22.1k
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7646
22.1k
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7647
1.45k
      (ctxt->options & XML_PARSE_OLDSAX))
7648
0
      ent = xmlGetPredefinedEntity(name);
7649
22.1k
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7650
1.45k
      (ctxt->userData==ctxt)) {
7651
0
      ent = xmlSAX2GetEntity(ctxt, name);
7652
0
  }
7653
22.1k
    }
7654
7655
22.1k
    if (ent == NULL) {
7656
22.1k
        xmlHandleUndeclaredEntity(ctxt, name);
7657
22.1k
    }
7658
7659
    /*
7660
     * [ WFC: Parsed Entity ]
7661
     * An entity reference must not contain the name of an
7662
     * unparsed entity
7663
     */
7664
1
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7665
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7666
0
     "Entity reference to unparsed entity %s\n", name);
7667
0
        ent = NULL;
7668
0
    }
7669
7670
    /*
7671
     * [ WFC: No External Entity References ]
7672
     * Attribute values cannot contain direct or indirect
7673
     * entity references to external entities.
7674
     */
7675
1
    else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7676
0
        if (inAttr) {
7677
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7678
0
                 "Attribute references external entity '%s'\n", name);
7679
0
            ent = NULL;
7680
0
        }
7681
0
    }
7682
7683
22.1k
    return(ent);
7684
447k
}
7685
7686
/**
7687
 * xmlParseEntityRefInternal:
7688
 * @ctxt:  an XML parser context
7689
 * @inAttr:  whether we are in an attribute value
7690
 *
7691
 * Parse an entity reference. Always consumes '&'.
7692
 *
7693
 * [68] EntityRef ::= '&' Name ';'
7694
 *
7695
 * Returns the name, or NULL in case of error.
7696
 */
7697
static const xmlChar *
7698
754k
xmlParseEntityRefInternal(xmlParserCtxtPtr ctxt) {
7699
754k
    const xmlChar *name;
7700
7701
754k
    GROW;
7702
7703
754k
    if (RAW != '&')
7704
0
        return(NULL);
7705
754k
    NEXT;
7706
754k
    name = xmlParseName(ctxt);
7707
754k
    if (name == NULL) {
7708
174k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7709
174k
           "xmlParseEntityRef: no name\n");
7710
174k
        return(NULL);
7711
174k
    }
7712
580k
    if (RAW != ';') {
7713
132k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7714
132k
  return(NULL);
7715
132k
    }
7716
447k
    NEXT;
7717
7718
447k
    return(name);
7719
580k
}
7720
7721
/**
7722
 * xmlParseEntityRef:
7723
 * @ctxt:  an XML parser context
7724
 *
7725
 * DEPRECATED: Internal function, don't use.
7726
 *
7727
 * Returns the xmlEntityPtr if found, or NULL otherwise.
7728
 */
7729
xmlEntityPtr
7730
0
xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7731
0
    const xmlChar *name;
7732
7733
0
    if (ctxt == NULL)
7734
0
        return(NULL);
7735
7736
0
    name = xmlParseEntityRefInternal(ctxt);
7737
0
    if (name == NULL)
7738
0
        return(NULL);
7739
7740
0
    return(xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 0));
7741
0
}
7742
7743
/**
7744
 * xmlParseStringEntityRef:
7745
 * @ctxt:  an XML parser context
7746
 * @str:  a pointer to an index in the string
7747
 *
7748
 * parse ENTITY references declarations, but this version parses it from
7749
 * a string value.
7750
 *
7751
 * [68] EntityRef ::= '&' Name ';'
7752
 *
7753
 * [ WFC: Entity Declared ]
7754
 * In a document without any DTD, a document with only an internal DTD
7755
 * subset which contains no parameter entity references, or a document
7756
 * with "standalone='yes'", the Name given in the entity reference
7757
 * must match that in an entity declaration, except that well-formed
7758
 * documents need not declare any of the following entities: amp, lt,
7759
 * gt, apos, quot.  The declaration of a parameter entity must precede
7760
 * any reference to it.  Similarly, the declaration of a general entity
7761
 * must precede any reference to it which appears in a default value in an
7762
 * attribute-list declaration. Note that if entities are declared in the
7763
 * external subset or in external parameter entities, a non-validating
7764
 * processor is not obligated to read and process their declarations;
7765
 * for such documents, the rule that an entity must be declared is a
7766
 * well-formedness constraint only if standalone='yes'.
7767
 *
7768
 * [ WFC: Parsed Entity ]
7769
 * An entity reference must not contain the name of an unparsed entity
7770
 *
7771
 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7772
 * is updated to the current location in the string.
7773
 */
7774
static xmlChar *
7775
0
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7776
0
    xmlChar *name;
7777
0
    const xmlChar *ptr;
7778
0
    xmlChar cur;
7779
7780
0
    if ((str == NULL) || (*str == NULL))
7781
0
        return(NULL);
7782
0
    ptr = *str;
7783
0
    cur = *ptr;
7784
0
    if (cur != '&')
7785
0
  return(NULL);
7786
7787
0
    ptr++;
7788
0
    name = xmlParseStringName(ctxt, &ptr);
7789
0
    if (name == NULL) {
7790
0
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7791
0
           "xmlParseStringEntityRef: no name\n");
7792
0
  *str = ptr;
7793
0
  return(NULL);
7794
0
    }
7795
0
    if (*ptr != ';') {
7796
0
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7797
0
        xmlFree(name);
7798
0
  *str = ptr;
7799
0
  return(NULL);
7800
0
    }
7801
0
    ptr++;
7802
7803
0
    *str = ptr;
7804
0
    return(name);
7805
0
}
7806
7807
/**
7808
 * xmlParsePEReference:
7809
 * @ctxt:  an XML parser context
7810
 *
7811
 * DEPRECATED: Internal function, don't use.
7812
 *
7813
 * Parse a parameter entity reference. Always consumes '%'.
7814
 *
7815
 * The entity content is handled directly by pushing it's content as
7816
 * a new input stream.
7817
 *
7818
 * [69] PEReference ::= '%' Name ';'
7819
 *
7820
 * [ WFC: No Recursion ]
7821
 * A parsed entity must not contain a recursive
7822
 * reference to itself, either directly or indirectly.
7823
 *
7824
 * [ WFC: Entity Declared ]
7825
 * In a document without any DTD, a document with only an internal DTD
7826
 * subset which contains no parameter entity references, or a document
7827
 * with "standalone='yes'", ...  ... The declaration of a parameter
7828
 * entity must precede any reference to it...
7829
 *
7830
 * [ VC: Entity Declared ]
7831
 * In a document with an external subset or external parameter entities
7832
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7833
 * must precede any reference to it...
7834
 *
7835
 * [ WFC: In DTD ]
7836
 * Parameter-entity references may only appear in the DTD.
7837
 * NOTE: misleading but this is handled.
7838
 */
7839
void
7840
xmlParsePEReference(xmlParserCtxtPtr ctxt)
7841
10.4k
{
7842
10.4k
    const xmlChar *name;
7843
10.4k
    xmlEntityPtr entity = NULL;
7844
10.4k
    xmlParserInputPtr input;
7845
7846
10.4k
    if (RAW != '%')
7847
0
        return;
7848
10.4k
    NEXT;
7849
10.4k
    name = xmlParseName(ctxt);
7850
10.4k
    if (name == NULL) {
7851
3.37k
  xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
7852
3.37k
  return;
7853
3.37k
    }
7854
7.06k
    if (RAW != ';') {
7855
211
  xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
7856
211
        return;
7857
211
    }
7858
7859
6.85k
    NEXT;
7860
7861
    /* Must be set before xmlHandleUndeclaredEntity */
7862
6.85k
    ctxt->hasPErefs = 1;
7863
7864
    /*
7865
     * Request the entity from SAX
7866
     */
7867
6.85k
    if ((ctxt->sax != NULL) &&
7868
6.85k
  (ctxt->sax->getParameterEntity != NULL))
7869
0
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7870
7871
6.85k
    if (entity == NULL) {
7872
6.85k
        xmlHandleUndeclaredEntity(ctxt, name);
7873
6.85k
    } else {
7874
  /*
7875
   * Internal checking in case the entity quest barfed
7876
   */
7877
0
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7878
0
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7879
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7880
0
      "Internal: %%%s; is not a parameter entity\n",
7881
0
        name, NULL);
7882
0
  } else {
7883
0
      if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7884
0
                ((ctxt->options & XML_PARSE_NO_XXE) ||
7885
0
     ((ctxt->loadsubset == 0) &&
7886
0
      (ctxt->replaceEntities == 0) &&
7887
0
      (ctxt->validate == 0))))
7888
0
    return;
7889
7890
0
            if (entity->flags & XML_ENT_EXPANDING) {
7891
0
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7892
0
                xmlHaltParser(ctxt);
7893
0
                return;
7894
0
            }
7895
7896
0
      input = xmlNewEntityInputStream(ctxt, entity);
7897
0
      if (xmlCtxtPushInput(ctxt, input) < 0) {
7898
0
                xmlFreeInputStream(input);
7899
0
    return;
7900
0
            }
7901
7902
0
            entity->flags |= XML_ENT_EXPANDING;
7903
7904
0
            GROW;
7905
7906
0
      if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
7907
0
                xmlDetectEncoding(ctxt);
7908
7909
0
                if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7910
0
                    (IS_BLANK_CH(NXT(5)))) {
7911
0
                    xmlParseTextDecl(ctxt);
7912
0
                }
7913
0
            }
7914
0
  }
7915
0
    }
7916
6.85k
}
7917
7918
/**
7919
 * xmlLoadEntityContent:
7920
 * @ctxt:  an XML parser context
7921
 * @entity: an unloaded system entity
7922
 *
7923
 * Load the content of an entity.
7924
 *
7925
 * Returns 0 in case of success and -1 in case of failure
7926
 */
7927
static int
7928
0
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7929
0
    xmlParserInputPtr oldinput, input = NULL;
7930
0
    xmlParserInputPtr *oldinputTab;
7931
0
    const xmlChar *oldencoding;
7932
0
    xmlChar *content = NULL;
7933
0
    xmlResourceType rtype;
7934
0
    size_t length, i;
7935
0
    int oldinputNr, oldinputMax;
7936
0
    int ret = -1;
7937
0
    int res;
7938
7939
0
    if ((ctxt == NULL) || (entity == NULL) ||
7940
0
        ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7941
0
   (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7942
0
  (entity->content != NULL)) {
7943
0
  xmlFatalErr(ctxt, XML_ERR_ARGUMENT,
7944
0
              "xmlLoadEntityContent parameter error");
7945
0
        return(-1);
7946
0
    }
7947
7948
0
    if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)
7949
0
        rtype = XML_RESOURCE_PARAMETER_ENTITY;
7950
0
    else
7951
0
        rtype = XML_RESOURCE_GENERAL_ENTITY;
7952
7953
0
    input = xmlLoadResource(ctxt, (char *) entity->URI,
7954
0
                            (char *) entity->ExternalID, rtype);
7955
0
    if (input == NULL)
7956
0
        return(-1);
7957
7958
0
    oldinput = ctxt->input;
7959
0
    oldinputNr = ctxt->inputNr;
7960
0
    oldinputMax = ctxt->inputMax;
7961
0
    oldinputTab = ctxt->inputTab;
7962
0
    oldencoding = ctxt->encoding;
7963
7964
0
    ctxt->input = NULL;
7965
0
    ctxt->inputNr = 0;
7966
0
    ctxt->inputMax = 1;
7967
0
    ctxt->encoding = NULL;
7968
0
    ctxt->inputTab = xmlMalloc(sizeof(xmlParserInputPtr));
7969
0
    if (ctxt->inputTab == NULL) {
7970
0
        xmlErrMemory(ctxt);
7971
0
        xmlFreeInputStream(input);
7972
0
        goto error;
7973
0
    }
7974
7975
0
    xmlBufResetInput(input->buf->buffer, input);
7976
7977
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
7978
0
        xmlFreeInputStream(input);
7979
0
        goto error;
7980
0
    }
7981
7982
0
    xmlDetectEncoding(ctxt);
7983
7984
    /*
7985
     * Parse a possible text declaration first
7986
     */
7987
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7988
0
  xmlParseTextDecl(ctxt);
7989
        /*
7990
         * An XML-1.0 document can't reference an entity not XML-1.0
7991
         */
7992
0
        if ((xmlStrEqual(ctxt->version, BAD_CAST "1.0")) &&
7993
0
            (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
7994
0
            xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
7995
0
                           "Version mismatch between document and entity\n");
7996
0
        }
7997
0
    }
7998
7999
0
    length = input->cur - input->base;
8000
0
    xmlBufShrink(input->buf->buffer, length);
8001
0
    xmlSaturatedAdd(&ctxt->sizeentities, length);
8002
8003
0
    while ((res = xmlParserInputBufferGrow(input->buf, 4096)) > 0)
8004
0
        ;
8005
8006
0
    xmlBufResetInput(input->buf->buffer, input);
8007
8008
0
    if (res < 0) {
8009
0
        xmlCtxtErrIO(ctxt, input->buf->error, NULL);
8010
0
        goto error;
8011
0
    }
8012
8013
0
    length = xmlBufUse(input->buf->buffer);
8014
0
    if (length > INT_MAX) {
8015
0
        xmlErrMemory(ctxt);
8016
0
        goto error;
8017
0
    }
8018
8019
0
    content = xmlStrndup(xmlBufContent(input->buf->buffer), length);
8020
0
    if (content == NULL) {
8021
0
        xmlErrMemory(ctxt);
8022
0
        goto error;
8023
0
    }
8024
8025
0
    for (i = 0; i < length; ) {
8026
0
        int clen = length - i;
8027
0
        int c = xmlGetUTF8Char(content + i, &clen);
8028
8029
0
        if ((c < 0) || (!IS_CHAR(c))) {
8030
0
            xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8031
0
                              "xmlLoadEntityContent: invalid char value %d\n",
8032
0
                              content[i]);
8033
0
            goto error;
8034
0
        }
8035
0
        i += clen;
8036
0
    }
8037
8038
0
    xmlSaturatedAdd(&ctxt->sizeentities, length);
8039
0
    entity->content = content;
8040
0
    entity->length = length;
8041
0
    content = NULL;
8042
0
    ret = 0;
8043
8044
0
error:
8045
0
    while (ctxt->inputNr > 0)
8046
0
        xmlFreeInputStream(xmlCtxtPopInput(ctxt));
8047
0
    xmlFree(ctxt->inputTab);
8048
0
    xmlFree((xmlChar *) ctxt->encoding);
8049
8050
0
    ctxt->input = oldinput;
8051
0
    ctxt->inputNr = oldinputNr;
8052
0
    ctxt->inputMax = oldinputMax;
8053
0
    ctxt->inputTab = oldinputTab;
8054
0
    ctxt->encoding = oldencoding;
8055
8056
0
    xmlFree(content);
8057
8058
0
    return(ret);
8059
0
}
8060
8061
/**
8062
 * xmlParseStringPEReference:
8063
 * @ctxt:  an XML parser context
8064
 * @str:  a pointer to an index in the string
8065
 *
8066
 * parse PEReference declarations
8067
 *
8068
 * [69] PEReference ::= '%' Name ';'
8069
 *
8070
 * [ WFC: No Recursion ]
8071
 * A parsed entity must not contain a recursive
8072
 * reference to itself, either directly or indirectly.
8073
 *
8074
 * [ WFC: Entity Declared ]
8075
 * In a document without any DTD, a document with only an internal DTD
8076
 * subset which contains no parameter entity references, or a document
8077
 * with "standalone='yes'", ...  ... The declaration of a parameter
8078
 * entity must precede any reference to it...
8079
 *
8080
 * [ VC: Entity Declared ]
8081
 * In a document with an external subset or external parameter entities
8082
 * with "standalone='no'", ...  ... The declaration of a parameter entity
8083
 * must precede any reference to it...
8084
 *
8085
 * [ WFC: In DTD ]
8086
 * Parameter-entity references may only appear in the DTD.
8087
 * NOTE: misleading but this is handled.
8088
 *
8089
 * Returns the string of the entity content.
8090
 *         str is updated to the current value of the index
8091
 */
8092
static xmlEntityPtr
8093
3.40k
xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8094
3.40k
    const xmlChar *ptr;
8095
3.40k
    xmlChar cur;
8096
3.40k
    xmlChar *name;
8097
3.40k
    xmlEntityPtr entity = NULL;
8098
8099
3.40k
    if ((str == NULL) || (*str == NULL)) return(NULL);
8100
3.40k
    ptr = *str;
8101
3.40k
    cur = *ptr;
8102
3.40k
    if (cur != '%')
8103
0
        return(NULL);
8104
3.40k
    ptr++;
8105
3.40k
    name = xmlParseStringName(ctxt, &ptr);
8106
3.40k
    if (name == NULL) {
8107
1.08k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8108
1.08k
           "xmlParseStringPEReference: no name\n");
8109
1.08k
  *str = ptr;
8110
1.08k
  return(NULL);
8111
1.08k
    }
8112
2.32k
    cur = *ptr;
8113
2.32k
    if (cur != ';') {
8114
932
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8115
932
  xmlFree(name);
8116
932
  *str = ptr;
8117
932
  return(NULL);
8118
932
    }
8119
1.38k
    ptr++;
8120
8121
    /* Must be set before xmlHandleUndeclaredEntity */
8122
1.38k
    ctxt->hasPErefs = 1;
8123
8124
    /*
8125
     * Request the entity from SAX
8126
     */
8127
1.38k
    if ((ctxt->sax != NULL) &&
8128
1.38k
  (ctxt->sax->getParameterEntity != NULL))
8129
0
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8130
8131
1.38k
    if (entity == NULL) {
8132
1.38k
        xmlHandleUndeclaredEntity(ctxt, name);
8133
1.38k
    } else {
8134
  /*
8135
   * Internal checking in case the entity quest barfed
8136
   */
8137
0
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8138
0
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8139
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8140
0
        "%%%s; is not a parameter entity\n",
8141
0
        name, NULL);
8142
0
  }
8143
0
    }
8144
8145
1.38k
    xmlFree(name);
8146
1.38k
    *str = ptr;
8147
1.38k
    return(entity);
8148
2.32k
}
8149
8150
/**
8151
 * xmlParseDocTypeDecl:
8152
 * @ctxt:  an XML parser context
8153
 *
8154
 * DEPRECATED: Internal function, don't use.
8155
 *
8156
 * parse a DOCTYPE declaration
8157
 *
8158
 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8159
 *                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8160
 *
8161
 * [ VC: Root Element Type ]
8162
 * The Name in the document type declaration must match the element
8163
 * type of the root element.
8164
 */
8165
8166
void
8167
7.66k
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8168
7.66k
    const xmlChar *name = NULL;
8169
7.66k
    xmlChar *ExternalID = NULL;
8170
7.66k
    xmlChar *URI = NULL;
8171
8172
    /*
8173
     * We know that '<!DOCTYPE' has been detected.
8174
     */
8175
7.66k
    SKIP(9);
8176
8177
7.66k
    if (SKIP_BLANKS == 0) {
8178
133
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8179
133
                       "Space required after 'DOCTYPE'\n");
8180
133
    }
8181
8182
    /*
8183
     * Parse the DOCTYPE name.
8184
     */
8185
7.66k
    name = xmlParseName(ctxt);
8186
7.66k
    if (name == NULL) {
8187
14
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8188
14
           "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8189
14
    }
8190
7.66k
    ctxt->intSubName = name;
8191
8192
7.66k
    SKIP_BLANKS;
8193
8194
    /*
8195
     * Check for SystemID and ExternalID
8196
     */
8197
7.66k
    URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8198
8199
7.66k
    if ((URI != NULL) || (ExternalID != NULL)) {
8200
324
        ctxt->hasExternalSubset = 1;
8201
324
    }
8202
7.66k
    ctxt->extSubURI = URI;
8203
7.66k
    ctxt->extSubSystem = ExternalID;
8204
8205
7.66k
    SKIP_BLANKS;
8206
8207
    /*
8208
     * Create and update the internal subset.
8209
     */
8210
7.66k
    if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8211
0
  (!ctxt->disableSAX))
8212
0
  ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8213
8214
7.66k
    if ((RAW != '[') && (RAW != '>')) {
8215
215
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8216
215
    }
8217
7.66k
}
8218
8219
/**
8220
 * xmlParseInternalSubset:
8221
 * @ctxt:  an XML parser context
8222
 *
8223
 * parse the internal subset declaration
8224
 *
8225
 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8226
 */
8227
8228
static void
8229
7.15k
xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8230
    /*
8231
     * Is there any DTD definition ?
8232
     */
8233
7.15k
    if (RAW == '[') {
8234
7.15k
        int oldInputNr = ctxt->inputNr;
8235
8236
7.15k
        NEXT;
8237
  /*
8238
   * Parse the succession of Markup declarations and
8239
   * PEReferences.
8240
   * Subsequence (markupdecl | PEReference | S)*
8241
   */
8242
7.15k
  SKIP_BLANKS;
8243
144k
  while (((RAW != ']') || (ctxt->inputNr > oldInputNr)) &&
8244
142k
               (PARSER_STOPPED(ctxt) == 0)) {
8245
8246
            /*
8247
             * Conditional sections are allowed from external entities included
8248
             * by PE References in the internal subset.
8249
             */
8250
140k
            if ((PARSER_EXTERNAL(ctxt)) &&
8251
0
                (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8252
0
                xmlParseConditionalSections(ctxt);
8253
140k
            } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
8254
127k
          xmlParseMarkupDecl(ctxt);
8255
127k
            } else if (RAW == '%') {
8256
10.4k
          xmlParsePEReference(ctxt);
8257
10.4k
            } else {
8258
3.47k
    xmlFatalErr(ctxt, XML_ERR_INT_SUBSET_NOT_FINISHED, NULL);
8259
3.47k
                break;
8260
3.47k
            }
8261
137k
      SKIP_BLANKS_PE;
8262
137k
            SHRINK;
8263
137k
            GROW;
8264
137k
  }
8265
8266
7.15k
        while (ctxt->inputNr > oldInputNr)
8267
0
            xmlPopPE(ctxt);
8268
8269
7.15k
  if (RAW == ']') {
8270
2.41k
      NEXT;
8271
2.41k
      SKIP_BLANKS;
8272
2.41k
  }
8273
7.15k
    }
8274
8275
    /*
8276
     * We should be at the end of the DOCTYPE declaration.
8277
     */
8278
7.15k
    if ((ctxt->wellFormed) && (RAW != '>')) {
8279
23
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8280
23
  return;
8281
23
    }
8282
7.13k
    NEXT;
8283
7.13k
}
8284
8285
#ifdef LIBXML_SAX1_ENABLED
8286
/**
8287
 * xmlParseAttribute:
8288
 * @ctxt:  an XML parser context
8289
 * @value:  a xmlChar ** used to store the value of the attribute
8290
 *
8291
 * DEPRECATED: Internal function, don't use.
8292
 *
8293
 * parse an attribute
8294
 *
8295
 * [41] Attribute ::= Name Eq AttValue
8296
 *
8297
 * [ WFC: No External Entity References ]
8298
 * Attribute values cannot contain direct or indirect entity references
8299
 * to external entities.
8300
 *
8301
 * [ WFC: No < in Attribute Values ]
8302
 * The replacement text of any entity referred to directly or indirectly in
8303
 * an attribute value (other than "&lt;") must not contain a <.
8304
 *
8305
 * [ VC: Attribute Value Type ]
8306
 * The attribute must have been declared; the value must be of the type
8307
 * declared for it.
8308
 *
8309
 * [25] Eq ::= S? '=' S?
8310
 *
8311
 * With namespace:
8312
 *
8313
 * [NS 11] Attribute ::= QName Eq AttValue
8314
 *
8315
 * Also the case QName == xmlns:??? is handled independently as a namespace
8316
 * definition.
8317
 *
8318
 * Returns the attribute name, and the value in *value.
8319
 */
8320
8321
const xmlChar *
8322
0
xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8323
0
    const xmlChar *name;
8324
0
    xmlChar *val;
8325
8326
0
    *value = NULL;
8327
0
    GROW;
8328
0
    name = xmlParseName(ctxt);
8329
0
    if (name == NULL) {
8330
0
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8331
0
                 "error parsing attribute name\n");
8332
0
        return(NULL);
8333
0
    }
8334
8335
    /*
8336
     * read the value
8337
     */
8338
0
    SKIP_BLANKS;
8339
0
    if (RAW == '=') {
8340
0
        NEXT;
8341
0
  SKIP_BLANKS;
8342
0
  val = xmlParseAttValue(ctxt);
8343
0
    } else {
8344
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8345
0
         "Specification mandates value for attribute %s\n", name);
8346
0
  return(name);
8347
0
    }
8348
8349
    /*
8350
     * Check that xml:lang conforms to the specification
8351
     * No more registered as an error, just generate a warning now
8352
     * since this was deprecated in XML second edition
8353
     */
8354
0
    if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8355
0
  if (!xmlCheckLanguageID(val)) {
8356
0
      xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8357
0
              "Malformed value for xml:lang : %s\n",
8358
0
        val, NULL);
8359
0
  }
8360
0
    }
8361
8362
    /*
8363
     * Check that xml:space conforms to the specification
8364
     */
8365
0
    if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8366
0
  if (xmlStrEqual(val, BAD_CAST "default"))
8367
0
      *(ctxt->space) = 0;
8368
0
  else if (xmlStrEqual(val, BAD_CAST "preserve"))
8369
0
      *(ctxt->space) = 1;
8370
0
  else {
8371
0
    xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8372
0
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8373
0
                                 val, NULL);
8374
0
  }
8375
0
    }
8376
8377
0
    *value = val;
8378
0
    return(name);
8379
0
}
8380
8381
/**
8382
 * xmlParseStartTag:
8383
 * @ctxt:  an XML parser context
8384
 *
8385
 * DEPRECATED: Internal function, don't use.
8386
 *
8387
 * Parse a start tag. Always consumes '<'.
8388
 *
8389
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8390
 *
8391
 * [ WFC: Unique Att Spec ]
8392
 * No attribute name may appear more than once in the same start-tag or
8393
 * empty-element tag.
8394
 *
8395
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8396
 *
8397
 * [ WFC: Unique Att Spec ]
8398
 * No attribute name may appear more than once in the same start-tag or
8399
 * empty-element tag.
8400
 *
8401
 * With namespace:
8402
 *
8403
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8404
 *
8405
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8406
 *
8407
 * Returns the element name parsed
8408
 */
8409
8410
const xmlChar *
8411
0
xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8412
0
    const xmlChar *name;
8413
0
    const xmlChar *attname;
8414
0
    xmlChar *attvalue;
8415
0
    const xmlChar **atts = ctxt->atts;
8416
0
    int nbatts = 0;
8417
0
    int maxatts = ctxt->maxatts;
8418
0
    int i;
8419
8420
0
    if (RAW != '<') return(NULL);
8421
0
    NEXT1;
8422
8423
0
    name = xmlParseName(ctxt);
8424
0
    if (name == NULL) {
8425
0
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8426
0
       "xmlParseStartTag: invalid element name\n");
8427
0
        return(NULL);
8428
0
    }
8429
8430
    /*
8431
     * Now parse the attributes, it ends up with the ending
8432
     *
8433
     * (S Attribute)* S?
8434
     */
8435
0
    SKIP_BLANKS;
8436
0
    GROW;
8437
8438
0
    while (((RAW != '>') &&
8439
0
     ((RAW != '/') || (NXT(1) != '>')) &&
8440
0
     (IS_BYTE_CHAR(RAW))) && (PARSER_STOPPED(ctxt) == 0)) {
8441
0
  attname = xmlParseAttribute(ctxt, &attvalue);
8442
0
        if (attname == NULL)
8443
0
      break;
8444
0
        if (attvalue != NULL) {
8445
      /*
8446
       * [ WFC: Unique Att Spec ]
8447
       * No attribute name may appear more than once in the same
8448
       * start-tag or empty-element tag.
8449
       */
8450
0
      for (i = 0; i < nbatts;i += 2) {
8451
0
          if (xmlStrEqual(atts[i], attname)) {
8452
0
        xmlErrAttributeDup(ctxt, NULL, attname);
8453
0
        goto failed;
8454
0
    }
8455
0
      }
8456
      /*
8457
       * Add the pair to atts
8458
       */
8459
0
      if (nbatts + 4 > maxatts) {
8460
0
          const xmlChar **n;
8461
0
                int newSize;
8462
8463
0
                newSize = xmlGrowCapacity(maxatts, sizeof(n[0]) * 2,
8464
0
                                          11, XML_MAX_ATTRS);
8465
0
                if (newSize < 0) {
8466
0
        xmlErrMemory(ctxt);
8467
0
        goto failed;
8468
0
    }
8469
0
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
8470
0
                if (newSize < 2)
8471
0
                    newSize = 2;
8472
0
#endif
8473
0
          n = xmlRealloc(atts, newSize * sizeof(n[0]) * 2);
8474
0
    if (n == NULL) {
8475
0
        xmlErrMemory(ctxt);
8476
0
        goto failed;
8477
0
    }
8478
0
    atts = n;
8479
0
                maxatts = newSize * 2;
8480
0
    ctxt->atts = atts;
8481
0
    ctxt->maxatts = maxatts;
8482
0
      }
8483
8484
0
      atts[nbatts++] = attname;
8485
0
      atts[nbatts++] = attvalue;
8486
0
      atts[nbatts] = NULL;
8487
0
      atts[nbatts + 1] = NULL;
8488
8489
0
            attvalue = NULL;
8490
0
  }
8491
8492
0
failed:
8493
8494
0
        if (attvalue != NULL)
8495
0
            xmlFree(attvalue);
8496
8497
0
  GROW
8498
0
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8499
0
      break;
8500
0
  if (SKIP_BLANKS == 0) {
8501
0
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8502
0
         "attributes construct error\n");
8503
0
  }
8504
0
  SHRINK;
8505
0
        GROW;
8506
0
    }
8507
8508
    /*
8509
     * SAX: Start of Element !
8510
     */
8511
0
    if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8512
0
  (!ctxt->disableSAX)) {
8513
0
  if (nbatts > 0)
8514
0
      ctxt->sax->startElement(ctxt->userData, name, atts);
8515
0
  else
8516
0
      ctxt->sax->startElement(ctxt->userData, name, NULL);
8517
0
    }
8518
8519
0
    if (atts != NULL) {
8520
        /* Free only the content strings */
8521
0
        for (i = 1;i < nbatts;i+=2)
8522
0
      if (atts[i] != NULL)
8523
0
         xmlFree((xmlChar *) atts[i]);
8524
0
    }
8525
0
    return(name);
8526
0
}
8527
8528
/**
8529
 * xmlParseEndTag1:
8530
 * @ctxt:  an XML parser context
8531
 * @line:  line of the start tag
8532
 * @nsNr:  number of namespaces on the start tag
8533
 *
8534
 * Parse an end tag. Always consumes '</'.
8535
 *
8536
 * [42] ETag ::= '</' Name S? '>'
8537
 *
8538
 * With namespace
8539
 *
8540
 * [NS 9] ETag ::= '</' QName S? '>'
8541
 */
8542
8543
static void
8544
0
xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8545
0
    const xmlChar *name;
8546
8547
0
    GROW;
8548
0
    if ((RAW != '<') || (NXT(1) != '/')) {
8549
0
  xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8550
0
           "xmlParseEndTag: '</' not found\n");
8551
0
  return;
8552
0
    }
8553
0
    SKIP(2);
8554
8555
0
    name = xmlParseNameAndCompare(ctxt,ctxt->name);
8556
8557
    /*
8558
     * We should definitely be at the ending "S? '>'" part
8559
     */
8560
0
    GROW;
8561
0
    SKIP_BLANKS;
8562
0
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8563
0
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8564
0
    } else
8565
0
  NEXT1;
8566
8567
    /*
8568
     * [ WFC: Element Type Match ]
8569
     * The Name in an element's end-tag must match the element type in the
8570
     * start-tag.
8571
     *
8572
     */
8573
0
    if (name != (xmlChar*)1) {
8574
0
        if (name == NULL) name = BAD_CAST "unparsable";
8575
0
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8576
0
         "Opening and ending tag mismatch: %s line %d and %s\n",
8577
0
                    ctxt->name, line, name);
8578
0
    }
8579
8580
    /*
8581
     * SAX: End of Tag
8582
     */
8583
0
    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8584
0
  (!ctxt->disableSAX))
8585
0
        ctxt->sax->endElement(ctxt->userData, ctxt->name);
8586
8587
0
    namePop(ctxt);
8588
0
    spacePop(ctxt);
8589
0
}
8590
8591
/**
8592
 * xmlParseEndTag:
8593
 * @ctxt:  an XML parser context
8594
 *
8595
 * DEPRECATED: Internal function, don't use.
8596
 *
8597
 * parse an end of tag
8598
 *
8599
 * [42] ETag ::= '</' Name S? '>'
8600
 *
8601
 * With namespace
8602
 *
8603
 * [NS 9] ETag ::= '</' QName S? '>'
8604
 */
8605
8606
void
8607
0
xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8608
0
    xmlParseEndTag1(ctxt, 0);
8609
0
}
8610
#endif /* LIBXML_SAX1_ENABLED */
8611
8612
/************************************************************************
8613
 *                  *
8614
 *          SAX 2 specific operations       *
8615
 *                  *
8616
 ************************************************************************/
8617
8618
/**
8619
 * xmlParseQNameHashed:
8620
 * @ctxt:  an XML parser context
8621
 * @prefix:  pointer to store the prefix part
8622
 *
8623
 * parse an XML Namespace QName
8624
 *
8625
 * [6]  QName  ::= (Prefix ':')? LocalPart
8626
 * [7]  Prefix  ::= NCName
8627
 * [8]  LocalPart  ::= NCName
8628
 *
8629
 * Returns the Name parsed or NULL
8630
 */
8631
8632
static xmlHashedString
8633
55.0M
xmlParseQNameHashed(xmlParserCtxtPtr ctxt, xmlHashedString *prefix) {
8634
55.0M
    xmlHashedString l, p;
8635
55.0M
    int start, isNCName = 0;
8636
8637
55.0M
    l.name = NULL;
8638
55.0M
    p.name = NULL;
8639
8640
55.0M
    GROW;
8641
55.0M
    start = CUR_PTR - BASE_PTR;
8642
8643
55.0M
    l = xmlParseNCName(ctxt);
8644
55.0M
    if (l.name != NULL) {
8645
54.8M
        isNCName = 1;
8646
54.8M
        if (CUR == ':') {
8647
19.9M
            NEXT;
8648
19.9M
            p = l;
8649
19.9M
            l = xmlParseNCName(ctxt);
8650
19.9M
        }
8651
54.8M
    }
8652
55.0M
    if ((l.name == NULL) || (CUR == ':')) {
8653
549k
        xmlChar *tmp;
8654
8655
549k
        l.name = NULL;
8656
549k
        p.name = NULL;
8657
549k
        if ((isNCName == 0) && (CUR != ':'))
8658
58.7k
            return(l);
8659
490k
        tmp = xmlParseNmtoken(ctxt);
8660
490k
        if (tmp != NULL)
8661
413k
            xmlFree(tmp);
8662
490k
        l = xmlDictLookupHashed(ctxt->dict, BASE_PTR + start,
8663
490k
                                CUR_PTR - (BASE_PTR + start));
8664
490k
        if (l.name == NULL) {
8665
0
            xmlErrMemory(ctxt);
8666
0
            return(l);
8667
0
        }
8668
490k
        xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8669
490k
                 "Failed to parse QName '%s'\n", l.name, NULL, NULL);
8670
490k
    }
8671
8672
54.9M
    *prefix = p;
8673
54.9M
    return(l);
8674
55.0M
}
8675
8676
/**
8677
 * xmlParseQName:
8678
 * @ctxt:  an XML parser context
8679
 * @prefix:  pointer to store the prefix part
8680
 *
8681
 * parse an XML Namespace QName
8682
 *
8683
 * [6]  QName  ::= (Prefix ':')? LocalPart
8684
 * [7]  Prefix  ::= NCName
8685
 * [8]  LocalPart  ::= NCName
8686
 *
8687
 * Returns the Name parsed or NULL
8688
 */
8689
8690
static const xmlChar *
8691
10.9k
xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8692
10.9k
    xmlHashedString n, p;
8693
8694
10.9k
    n = xmlParseQNameHashed(ctxt, &p);
8695
10.9k
    if (n.name == NULL)
8696
518
        return(NULL);
8697
10.3k
    *prefix = p.name;
8698
10.3k
    return(n.name);
8699
10.9k
}
8700
8701
/**
8702
 * xmlParseQNameAndCompare:
8703
 * @ctxt:  an XML parser context
8704
 * @name:  the localname
8705
 * @prefix:  the prefix, if any.
8706
 *
8707
 * parse an XML name and compares for match
8708
 * (specialized for endtag parsing)
8709
 *
8710
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8711
 * and the name for mismatch
8712
 */
8713
8714
static const xmlChar *
8715
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8716
5.23M
                        xmlChar const *prefix) {
8717
5.23M
    const xmlChar *cmp;
8718
5.23M
    const xmlChar *in;
8719
5.23M
    const xmlChar *ret;
8720
5.23M
    const xmlChar *prefix2;
8721
8722
5.23M
    if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8723
8724
5.23M
    GROW;
8725
5.23M
    in = ctxt->input->cur;
8726
8727
5.23M
    cmp = prefix;
8728
16.3M
    while (*in != 0 && *in == *cmp) {
8729
11.1M
  ++in;
8730
11.1M
  ++cmp;
8731
11.1M
    }
8732
5.23M
    if ((*cmp == 0) && (*in == ':')) {
8733
5.23M
        in++;
8734
5.23M
  cmp = name;
8735
35.9M
  while (*in != 0 && *in == *cmp) {
8736
30.7M
      ++in;
8737
30.7M
      ++cmp;
8738
30.7M
  }
8739
5.23M
  if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8740
      /* success */
8741
5.22M
            ctxt->input->col += in - ctxt->input->cur;
8742
5.22M
      ctxt->input->cur = in;
8743
5.22M
      return((const xmlChar*) 1);
8744
5.22M
  }
8745
5.23M
    }
8746
    /*
8747
     * all strings coms from the dictionary, equality can be done directly
8748
     */
8749
10.9k
    ret = xmlParseQName (ctxt, &prefix2);
8750
10.9k
    if (ret == NULL)
8751
518
        return(NULL);
8752
10.3k
    if ((ret == name) && (prefix == prefix2))
8753
208
  return((const xmlChar*) 1);
8754
10.1k
    return ret;
8755
10.3k
}
8756
8757
/**
8758
 * xmlParseAttribute2:
8759
 * @ctxt:  an XML parser context
8760
 * @pref:  the element prefix
8761
 * @elem:  the element name
8762
 * @prefix:  a xmlChar ** used to store the value of the attribute prefix
8763
 * @value:  a xmlChar ** used to store the value of the attribute
8764
 * @len:  an int * to save the length of the attribute
8765
 * @alloc:  an int * to indicate if the attribute was allocated
8766
 *
8767
 * parse an attribute in the new SAX2 framework.
8768
 *
8769
 * Returns the attribute name, and the value in *value, .
8770
 */
8771
8772
static xmlHashedString
8773
xmlParseAttribute2(xmlParserCtxtPtr ctxt,
8774
                   const xmlChar * pref, const xmlChar * elem,
8775
                   xmlHashedString * hprefix, xmlChar ** value,
8776
                   int *len, int *alloc)
8777
21.7M
{
8778
21.7M
    xmlHashedString hname;
8779
21.7M
    const xmlChar *prefix, *name;
8780
21.7M
    xmlChar *val = NULL, *internal_val = NULL;
8781
21.7M
    int normalize = 0;
8782
21.7M
    int isNamespace;
8783
8784
21.7M
    *value = NULL;
8785
21.7M
    GROW;
8786
21.7M
    hname = xmlParseQNameHashed(ctxt, hprefix);
8787
21.7M
    if (hname.name == NULL) {
8788
46.3k
        xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8789
46.3k
                       "error parsing attribute name\n");
8790
46.3k
        return(hname);
8791
46.3k
    }
8792
21.7M
    name = hname.name;
8793
21.7M
    prefix = hprefix->name;
8794
8795
    /*
8796
     * get the type if needed
8797
     */
8798
21.7M
    if (ctxt->attsSpecial != NULL) {
8799
26.4k
        int type;
8800
8801
26.4k
        type = XML_PTR_TO_INT(xmlHashQLookup2(ctxt->attsSpecial, pref, elem,
8802
26.4k
                                              prefix, name));
8803
26.4k
        if (type != 0)
8804
11.0k
            normalize = 1;
8805
26.4k
    }
8806
8807
    /*
8808
     * read the value
8809
     */
8810
21.7M
    SKIP_BLANKS;
8811
21.7M
    if (RAW == '=') {
8812
21.7M
        NEXT;
8813
21.7M
        SKIP_BLANKS;
8814
21.7M
        isNamespace = (((prefix == NULL) && (name == ctxt->str_xmlns)) ||
8815
21.3M
                       (prefix == ctxt->str_xmlns));
8816
21.7M
        val = xmlParseAttValueInternal(ctxt, len, alloc, normalize,
8817
21.7M
                                       isNamespace);
8818
21.7M
        if (val == NULL)
8819
9.06k
            goto error;
8820
21.7M
    } else {
8821
14.0k
        xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8822
14.0k
                          "Specification mandates value for attribute %s\n",
8823
14.0k
                          name);
8824
14.0k
        goto error;
8825
14.0k
    }
8826
8827
21.6M
    if (prefix == ctxt->str_xml) {
8828
        /*
8829
         * Check that xml:lang conforms to the specification
8830
         * No more registered as an error, just generate a warning now
8831
         * since this was deprecated in XML second edition
8832
         */
8833
93.4k
        if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8834
0
            internal_val = xmlStrndup(val, *len);
8835
0
            if (internal_val == NULL)
8836
0
                goto mem_error;
8837
0
            if (!xmlCheckLanguageID(internal_val)) {
8838
0
                xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8839
0
                              "Malformed value for xml:lang : %s\n",
8840
0
                              internal_val, NULL);
8841
0
            }
8842
0
        }
8843
8844
        /*
8845
         * Check that xml:space conforms to the specification
8846
         */
8847
93.4k
        if (xmlStrEqual(name, BAD_CAST "space")) {
8848
73.9k
            internal_val = xmlStrndup(val, *len);
8849
73.9k
            if (internal_val == NULL)
8850
0
                goto mem_error;
8851
73.9k
            if (xmlStrEqual(internal_val, BAD_CAST "default"))
8852
317
                *(ctxt->space) = 0;
8853
73.6k
            else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8854
71.4k
                *(ctxt->space) = 1;
8855
2.19k
            else {
8856
2.19k
                xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8857
2.19k
                              "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8858
2.19k
                              internal_val, NULL);
8859
2.19k
            }
8860
73.9k
        }
8861
93.4k
        if (internal_val) {
8862
73.9k
            xmlFree(internal_val);
8863
73.9k
        }
8864
93.4k
    }
8865
8866
21.6M
    *value = val;
8867
21.6M
    return (hname);
8868
8869
0
mem_error:
8870
0
    xmlErrMemory(ctxt);
8871
24.5k
error:
8872
24.5k
    if ((val != NULL) && (*alloc != 0))
8873
0
        xmlFree(val);
8874
24.5k
    return(hname);
8875
0
}
8876
8877
/**
8878
 * xmlAttrHashInsert:
8879
 * @ctxt: parser context
8880
 * @size: size of the hash table
8881
 * @name: attribute name
8882
 * @uri: namespace uri
8883
 * @hashValue: combined hash value of name and uri
8884
 * @aindex: attribute index (this is a multiple of 5)
8885
 *
8886
 * Inserts a new attribute into the hash table.
8887
 *
8888
 * Returns INT_MAX if no existing attribute was found, the attribute
8889
 * index if an attribute was found, -1 if a memory allocation failed.
8890
 */
8891
static int
8892
xmlAttrHashInsert(xmlParserCtxtPtr ctxt, unsigned size, const xmlChar *name,
8893
16.0M
                  const xmlChar *uri, unsigned hashValue, int aindex) {
8894
16.0M
    xmlAttrHashBucket *table = ctxt->attrHash;
8895
16.0M
    xmlAttrHashBucket *bucket;
8896
16.0M
    unsigned hindex;
8897
8898
16.0M
    hindex = hashValue & (size - 1);
8899
16.0M
    bucket = &table[hindex];
8900
8901
18.8M
    while (bucket->index >= 0) {
8902
2.87M
        const xmlChar **atts = &ctxt->atts[bucket->index];
8903
8904
2.87M
        if (name == atts[0]) {
8905
176k
            int nsIndex = XML_PTR_TO_INT(atts[2]);
8906
8907
176k
            if ((nsIndex == NS_INDEX_EMPTY) ? (uri == NULL) :
8908
176k
                (nsIndex == NS_INDEX_XML) ? (uri == ctxt->str_xml_ns) :
8909
3.33k
                (uri == ctxt->nsTab[nsIndex * 2 + 1]))
8910
73.7k
                return(bucket->index);
8911
176k
        }
8912
8913
2.80M
        hindex++;
8914
2.80M
        bucket++;
8915
2.80M
        if (hindex >= size) {
8916
973k
            hindex = 0;
8917
973k
            bucket = table;
8918
973k
        }
8919
2.80M
    }
8920
8921
16.0M
    bucket->index = aindex;
8922
8923
16.0M
    return(INT_MAX);
8924
16.0M
}
8925
8926
static int
8927
xmlAttrHashInsertQName(xmlParserCtxtPtr ctxt, unsigned size,
8928
                       const xmlChar *name, const xmlChar *prefix,
8929
538
                       unsigned hashValue, int aindex) {
8930
538
    xmlAttrHashBucket *table = ctxt->attrHash;
8931
538
    xmlAttrHashBucket *bucket;
8932
538
    unsigned hindex;
8933
8934
538
    hindex = hashValue & (size - 1);
8935
538
    bucket = &table[hindex];
8936
8937
581
    while (bucket->index >= 0) {
8938
497
        const xmlChar **atts = &ctxt->atts[bucket->index];
8939
8940
497
        if ((name == atts[0]) && (prefix == atts[1]))
8941
454
            return(bucket->index);
8942
8943
43
        hindex++;
8944
43
        bucket++;
8945
43
        if (hindex >= size) {
8946
2
            hindex = 0;
8947
2
            bucket = table;
8948
2
        }
8949
43
    }
8950
8951
84
    bucket->index = aindex;
8952
8953
84
    return(INT_MAX);
8954
538
}
8955
/**
8956
 * xmlParseStartTag2:
8957
 * @ctxt:  an XML parser context
8958
 *
8959
 * Parse a start tag. Always consumes '<'.
8960
 *
8961
 * This routine is called when running SAX2 parsing
8962
 *
8963
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8964
 *
8965
 * [ WFC: Unique Att Spec ]
8966
 * No attribute name may appear more than once in the same start-tag or
8967
 * empty-element tag.
8968
 *
8969
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8970
 *
8971
 * [ WFC: Unique Att Spec ]
8972
 * No attribute name may appear more than once in the same start-tag or
8973
 * empty-element tag.
8974
 *
8975
 * With namespace:
8976
 *
8977
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8978
 *
8979
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8980
 *
8981
 * Returns the element name parsed
8982
 */
8983
8984
static const xmlChar *
8985
xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
8986
33.2M
                  const xmlChar **URI, int *nbNsPtr) {
8987
33.2M
    xmlHashedString hlocalname;
8988
33.2M
    xmlHashedString hprefix;
8989
33.2M
    xmlHashedString hattname;
8990
33.2M
    xmlHashedString haprefix;
8991
33.2M
    const xmlChar *localname;
8992
33.2M
    const xmlChar *prefix;
8993
33.2M
    const xmlChar *attname;
8994
33.2M
    const xmlChar *aprefix;
8995
33.2M
    const xmlChar *uri;
8996
33.2M
    xmlChar *attvalue = NULL;
8997
33.2M
    const xmlChar **atts = ctxt->atts;
8998
33.2M
    unsigned attrHashSize = 0;
8999
33.2M
    int maxatts = ctxt->maxatts;
9000
33.2M
    int nratts, nbatts, nbdef;
9001
33.2M
    int i, j, nbNs, nbTotalDef, attval, nsIndex, maxAtts;
9002
33.2M
    int alloc = 0;
9003
33.2M
    int numNsErr = 0;
9004
33.2M
    int numDupErr = 0;
9005
9006
33.2M
    if (RAW != '<') return(NULL);
9007
33.2M
    NEXT1;
9008
9009
33.2M
    nbatts = 0;
9010
33.2M
    nratts = 0;
9011
33.2M
    nbdef = 0;
9012
33.2M
    nbNs = 0;
9013
33.2M
    nbTotalDef = 0;
9014
33.2M
    attval = 0;
9015
9016
33.2M
    if (xmlParserNsStartElement(ctxt->nsdb) < 0) {
9017
0
        xmlErrMemory(ctxt);
9018
0
        return(NULL);
9019
0
    }
9020
9021
33.2M
    hlocalname = xmlParseQNameHashed(ctxt, &hprefix);
9022
33.2M
    if (hlocalname.name == NULL) {
9023
11.8k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9024
11.8k
           "StartTag: invalid element name\n");
9025
11.8k
        return(NULL);
9026
11.8k
    }
9027
33.2M
    localname = hlocalname.name;
9028
33.2M
    prefix = hprefix.name;
9029
9030
    /*
9031
     * Now parse the attributes, it ends up with the ending
9032
     *
9033
     * (S Attribute)* S?
9034
     */
9035
33.2M
    SKIP_BLANKS;
9036
33.2M
    GROW;
9037
9038
    /*
9039
     * The ctxt->atts array will be ultimately passed to the SAX callback
9040
     * containing five xmlChar pointers for each attribute:
9041
     *
9042
     * [0] attribute name
9043
     * [1] attribute prefix
9044
     * [2] namespace URI
9045
     * [3] attribute value
9046
     * [4] end of attribute value
9047
     *
9048
     * To save memory, we reuse this array temporarily and store integers
9049
     * in these pointer variables.
9050
     *
9051
     * [0] attribute name
9052
     * [1] attribute prefix
9053
     * [2] hash value of attribute prefix, and later namespace index
9054
     * [3] for non-allocated values: ptrdiff_t offset into input buffer
9055
     * [4] for non-allocated values: ptrdiff_t offset into input buffer
9056
     *
9057
     * The ctxt->attallocs array contains an additional unsigned int for
9058
     * each attribute, containing the hash value of the attribute name
9059
     * and the alloc flag in bit 31.
9060
     */
9061
9062
45.1M
    while (((RAW != '>') &&
9063
24.1M
     ((RAW != '/') || (NXT(1) != '>')) &&
9064
21.7M
     (IS_BYTE_CHAR(RAW))) && (PARSER_STOPPED(ctxt) == 0)) {
9065
21.7M
  int len = -1;
9066
9067
21.7M
  hattname = xmlParseAttribute2(ctxt, prefix, localname,
9068
21.7M
                                          &haprefix, &attvalue, &len,
9069
21.7M
                                          &alloc);
9070
21.7M
        if (hattname.name == NULL)
9071
46.3k
      break;
9072
21.7M
        if (attvalue == NULL)
9073
24.5k
            goto next_attr;
9074
21.6M
        attname = hattname.name;
9075
21.6M
        aprefix = haprefix.name;
9076
21.6M
  if (len < 0) len = xmlStrlen(attvalue);
9077
9078
21.6M
        if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9079
325k
            xmlHashedString huri;
9080
325k
            xmlURIPtr parsedUri;
9081
9082
325k
            huri = xmlDictLookupHashed(ctxt->dict, attvalue, len);
9083
325k
            uri = huri.name;
9084
325k
            if (uri == NULL) {
9085
0
                xmlErrMemory(ctxt);
9086
0
                goto next_attr;
9087
0
            }
9088
325k
            if (*uri != 0) {
9089
320k
                if (xmlParseURISafe((const char *) uri, &parsedUri) < 0) {
9090
0
                    xmlErrMemory(ctxt);
9091
0
                    goto next_attr;
9092
0
                }
9093
320k
                if (parsedUri == NULL) {
9094
119k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9095
119k
                             "xmlns: '%s' is not a valid URI\n",
9096
119k
                                       uri, NULL, NULL);
9097
201k
                } else {
9098
201k
                    if (parsedUri->scheme == NULL) {
9099
67.7k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9100
67.7k
                                  "xmlns: URI %s is not absolute\n",
9101
67.7k
                                  uri, NULL, NULL);
9102
67.7k
                    }
9103
201k
                    xmlFreeURI(parsedUri);
9104
201k
                }
9105
320k
                if (uri == ctxt->str_xml_ns) {
9106
317
                    if (attname != ctxt->str_xml) {
9107
317
                        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9108
317
                     "xml namespace URI cannot be the default namespace\n",
9109
317
                                 NULL, NULL, NULL);
9110
317
                    }
9111
317
                    goto next_attr;
9112
317
                }
9113
320k
                if ((len == 29) &&
9114
7.36k
                    (xmlStrEqual(uri,
9115
7.36k
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9116
327
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9117
327
                         "reuse of the xmlns namespace name is forbidden\n",
9118
327
                             NULL, NULL, NULL);
9119
327
                    goto next_attr;
9120
327
                }
9121
320k
            }
9122
9123
324k
            if (xmlParserNsPush(ctxt, NULL, &huri, NULL, 0) > 0)
9124
205k
                nbNs++;
9125
21.3M
        } else if (aprefix == ctxt->str_xmlns) {
9126
855k
            xmlHashedString huri;
9127
855k
            xmlURIPtr parsedUri;
9128
9129
855k
            huri = xmlDictLookupHashed(ctxt->dict, attvalue, len);
9130
855k
            uri = huri.name;
9131
855k
            if (uri == NULL) {
9132
0
                xmlErrMemory(ctxt);
9133
0
                goto next_attr;
9134
0
            }
9135
9136
855k
            if (attname == ctxt->str_xml) {
9137
419
                if (uri != ctxt->str_xml_ns) {
9138
268
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9139
268
                             "xml namespace prefix mapped to wrong URI\n",
9140
268
                             NULL, NULL, NULL);
9141
268
                }
9142
                /*
9143
                 * Do not keep a namespace definition node
9144
                 */
9145
419
                goto next_attr;
9146
419
            }
9147
854k
            if (uri == ctxt->str_xml_ns) {
9148
18
                if (attname != ctxt->str_xml) {
9149
18
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9150
18
                             "xml namespace URI mapped to wrong prefix\n",
9151
18
                             NULL, NULL, NULL);
9152
18
                }
9153
18
                goto next_attr;
9154
18
            }
9155
854k
            if (attname == ctxt->str_xmlns) {
9156
192
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9157
192
                         "redefinition of the xmlns prefix is forbidden\n",
9158
192
                         NULL, NULL, NULL);
9159
192
                goto next_attr;
9160
192
            }
9161
854k
            if ((len == 29) &&
9162
16.9k
                (xmlStrEqual(uri,
9163
16.9k
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9164
131
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9165
131
                         "reuse of the xmlns namespace name is forbidden\n",
9166
131
                         NULL, NULL, NULL);
9167
131
                goto next_attr;
9168
131
            }
9169
854k
            if ((uri == NULL) || (uri[0] == 0)) {
9170
365
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9171
365
                         "xmlns:%s: Empty XML namespace is not allowed\n",
9172
365
                              attname, NULL, NULL);
9173
365
                goto next_attr;
9174
854k
            } else {
9175
854k
                if (xmlParseURISafe((const char *) uri, &parsedUri) < 0) {
9176
0
                    xmlErrMemory(ctxt);
9177
0
                    goto next_attr;
9178
0
                }
9179
854k
                if (parsedUri == NULL) {
9180
93.2k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9181
93.2k
                         "xmlns:%s: '%s' is not a valid URI\n",
9182
93.2k
                                       attname, uri, NULL);
9183
760k
                } else {
9184
760k
                    if ((ctxt->pedantic) && (parsedUri->scheme == NULL)) {
9185
0
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9186
0
                                  "xmlns:%s: URI %s is not absolute\n",
9187
0
                                  attname, uri, NULL);
9188
0
                    }
9189
760k
                    xmlFreeURI(parsedUri);
9190
760k
                }
9191
854k
            }
9192
9193
854k
            if (xmlParserNsPush(ctxt, &hattname, &huri, NULL, 0) > 0)
9194
804k
                nbNs++;
9195
20.5M
        } else {
9196
            /*
9197
             * Populate attributes array, see above for repurposing
9198
             * of xmlChar pointers.
9199
             */
9200
20.5M
            if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9201
560k
                int res = xmlCtxtGrowAttrs(ctxt);
9202
9203
560k
                maxatts = ctxt->maxatts;
9204
560k
                atts = ctxt->atts;
9205
9206
560k
                if (res < 0)
9207
0
                    goto next_attr;
9208
560k
            }
9209
20.5M
            ctxt->attallocs[nratts++] = (hattname.hashValue & 0x7FFFFFFF) |
9210
20.5M
                                        ((unsigned) alloc << 31);
9211
20.5M
            atts[nbatts++] = attname;
9212
20.5M
            atts[nbatts++] = aprefix;
9213
20.5M
            atts[nbatts++] = (const xmlChar *) (size_t) haprefix.hashValue;
9214
20.5M
            if (alloc) {
9215
93.0k
                atts[nbatts++] = attvalue;
9216
93.0k
                attvalue += len;
9217
93.0k
                atts[nbatts++] = attvalue;
9218
20.4M
            } else {
9219
                /*
9220
                 * attvalue points into the input buffer which can be
9221
                 * reallocated. Store differences to input->base instead.
9222
                 * The pointers will be reconstructed later.
9223
                 */
9224
20.4M
                atts[nbatts++] = (void *) (attvalue - BASE_PTR);
9225
20.4M
                attvalue += len;
9226
20.4M
                atts[nbatts++] = (void *) (attvalue - BASE_PTR);
9227
20.4M
            }
9228
            /*
9229
             * tag if some deallocation is needed
9230
             */
9231
20.5M
            if (alloc != 0) attval = 1;
9232
20.5M
            attvalue = NULL; /* moved into atts */
9233
20.5M
        }
9234
9235
21.7M
next_attr:
9236
21.7M
        if ((attvalue != NULL) && (alloc != 0)) {
9237
101k
            xmlFree(attvalue);
9238
101k
            attvalue = NULL;
9239
101k
        }
9240
9241
21.7M
  GROW
9242
21.7M
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9243
9.80M
      break;
9244
11.9M
  if (SKIP_BLANKS == 0) {
9245
32.4k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9246
32.4k
         "attributes construct error\n");
9247
32.4k
      break;
9248
32.4k
  }
9249
11.8M
        GROW;
9250
11.8M
    }
9251
9252
    /*
9253
     * Namespaces from default attributes
9254
     */
9255
33.2M
    if (ctxt->attsDefault != NULL) {
9256
117k
        xmlDefAttrsPtr defaults;
9257
9258
117k
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9259
117k
  if (defaults != NULL) {
9260
828k
      for (i = 0; i < defaults->nbAttrs; i++) {
9261
742k
                xmlDefAttr *attr = &defaults->attrs[i];
9262
9263
742k
          attname = attr->name.name;
9264
742k
    aprefix = attr->prefix.name;
9265
9266
742k
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9267
25.0k
                    xmlParserEntityCheck(ctxt, attr->expandedSize);
9268
9269
25.0k
                    if (xmlParserNsPush(ctxt, NULL, &attr->value, NULL, 1) > 0)
9270
20.9k
                        nbNs++;
9271
717k
    } else if (aprefix == ctxt->str_xmlns) {
9272
325k
                    xmlParserEntityCheck(ctxt, attr->expandedSize);
9273
9274
325k
                    if (xmlParserNsPush(ctxt, &attr->name, &attr->value,
9275
325k
                                      NULL, 1) > 0)
9276
325k
                        nbNs++;
9277
392k
    } else {
9278
392k
                    if (nratts + nbTotalDef >= XML_MAX_ATTRS) {
9279
0
                        xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
9280
0
                                    "Maximum number of attributes exceeded");
9281
0
                        break;
9282
0
                    }
9283
392k
                    nbTotalDef += 1;
9284
392k
                }
9285
742k
      }
9286
85.6k
  }
9287
117k
    }
9288
9289
    /*
9290
     * Resolve attribute namespaces
9291
     */
9292
53.7M
    for (i = 0; i < nbatts; i += 5) {
9293
20.5M
        attname = atts[i];
9294
20.5M
        aprefix = atts[i+1];
9295
9296
        /*
9297
  * The default namespace does not apply to attribute names.
9298
  */
9299
20.5M
  if (aprefix == NULL) {
9300
13.4M
            nsIndex = NS_INDEX_EMPTY;
9301
13.4M
        } else if (aprefix == ctxt->str_xml) {
9302
93.4k
            nsIndex = NS_INDEX_XML;
9303
6.98M
        } else {
9304
6.98M
            haprefix.name = aprefix;
9305
6.98M
            haprefix.hashValue = (size_t) atts[i+2];
9306
6.98M
            nsIndex = xmlParserNsLookup(ctxt, &haprefix, NULL);
9307
9308
6.98M
      if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex)) {
9309
385k
                xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9310
385k
        "Namespace prefix %s for %s on %s is not defined\n",
9311
385k
        aprefix, attname, localname);
9312
385k
                nsIndex = NS_INDEX_EMPTY;
9313
385k
            }
9314
6.98M
        }
9315
9316
20.5M
        atts[i+2] = XML_INT_TO_PTR(nsIndex);
9317
20.5M
    }
9318
9319
    /*
9320
     * Maximum number of attributes including default attributes.
9321
     */
9322
33.2M
    maxAtts = nratts + nbTotalDef;
9323
9324
    /*
9325
     * Verify that attribute names are unique.
9326
     */
9327
33.2M
    if (maxAtts > 1) {
9328
5.39M
        attrHashSize = 4;
9329
8.54M
        while (attrHashSize / 2 < (unsigned) maxAtts)
9330
3.15M
            attrHashSize *= 2;
9331
9332
5.39M
        if (attrHashSize > ctxt->attrHashMax) {
9333
228k
            xmlAttrHashBucket *tmp;
9334
9335
228k
            tmp = xmlRealloc(ctxt->attrHash, attrHashSize * sizeof(tmp[0]));
9336
228k
            if (tmp == NULL) {
9337
0
                xmlErrMemory(ctxt);
9338
0
                goto done;
9339
0
            }
9340
9341
228k
            ctxt->attrHash = tmp;
9342
228k
            ctxt->attrHashMax = attrHashSize;
9343
228k
        }
9344
9345
5.39M
        memset(ctxt->attrHash, -1, attrHashSize * sizeof(ctxt->attrHash[0]));
9346
9347
21.4M
        for (i = 0, j = 0; j < nratts; i += 5, j++) {
9348
16.0M
            const xmlChar *nsuri;
9349
16.0M
            unsigned hashValue, nameHashValue, uriHashValue;
9350
16.0M
            int res;
9351
9352
16.0M
            attname = atts[i];
9353
16.0M
            aprefix = atts[i+1];
9354
16.0M
            nsIndex = XML_PTR_TO_INT(atts[i+2]);
9355
            /* Hash values always have bit 31 set, see dict.c */
9356
16.0M
            nameHashValue = ctxt->attallocs[j] | 0x80000000;
9357
9358
16.0M
            if (nsIndex == NS_INDEX_EMPTY) {
9359
                /*
9360
                 * Prefix with empty namespace means an undeclared
9361
                 * prefix which was already reported above.
9362
                 */
9363
10.9M
                if (aprefix != NULL)
9364
311k
                    continue;
9365
10.6M
                nsuri = NULL;
9366
10.6M
                uriHashValue = URI_HASH_EMPTY;
9367
10.6M
            } else if (nsIndex == NS_INDEX_XML) {
9368
2.61k
                nsuri = ctxt->str_xml_ns;
9369
2.61k
                uriHashValue = URI_HASH_XML;
9370
5.03M
            } else {
9371
5.03M
                nsuri = ctxt->nsTab[nsIndex * 2 + 1];
9372
5.03M
                uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue;
9373
5.03M
            }
9374
9375
15.7M
            hashValue = xmlDictCombineHash(nameHashValue, uriHashValue);
9376
15.7M
            res = xmlAttrHashInsert(ctxt, attrHashSize, attname, nsuri,
9377
15.7M
                                    hashValue, i);
9378
15.7M
            if (res < 0)
9379
0
                continue;
9380
9381
            /*
9382
             * [ WFC: Unique Att Spec ]
9383
             * No attribute name may appear more than once in the same
9384
             * start-tag or empty-element tag.
9385
             * As extended by the Namespace in XML REC.
9386
             */
9387
15.7M
            if (res < INT_MAX) {
9388
65.0k
                if (aprefix == atts[res+1]) {
9389
64.4k
                    xmlErrAttributeDup(ctxt, aprefix, attname);
9390
64.4k
                    numDupErr += 1;
9391
64.4k
                } else {
9392
600
                    xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9393
600
                             "Namespaced Attribute %s in '%s' redefined\n",
9394
600
                             attname, nsuri, NULL);
9395
600
                    numNsErr += 1;
9396
600
                }
9397
65.0k
            }
9398
15.7M
        }
9399
5.39M
    }
9400
9401
    /*
9402
     * Default attributes
9403
     */
9404
33.2M
    if (ctxt->attsDefault != NULL) {
9405
117k
        xmlDefAttrsPtr defaults;
9406
9407
117k
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9408
117k
  if (defaults != NULL) {
9409
828k
      for (i = 0; i < defaults->nbAttrs; i++) {
9410
742k
                xmlDefAttr *attr = &defaults->attrs[i];
9411
742k
                const xmlChar *nsuri = NULL;
9412
742k
                unsigned hashValue, uriHashValue = 0;
9413
742k
                int res;
9414
9415
742k
          attname = attr->name.name;
9416
742k
    aprefix = attr->prefix.name;
9417
9418
742k
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL))
9419
25.0k
                    continue;
9420
717k
    if (aprefix == ctxt->str_xmlns)
9421
325k
                    continue;
9422
9423
392k
                if (aprefix == NULL) {
9424
215k
                    nsIndex = NS_INDEX_EMPTY;
9425
215k
                    nsuri = NULL;
9426
215k
                    uriHashValue = URI_HASH_EMPTY;
9427
215k
                } else if (aprefix == ctxt->str_xml) {
9428
44.6k
                    nsIndex = NS_INDEX_XML;
9429
44.6k
                    nsuri = ctxt->str_xml_ns;
9430
44.6k
                    uriHashValue = URI_HASH_XML;
9431
132k
                } else {
9432
132k
                    nsIndex = xmlParserNsLookup(ctxt, &attr->prefix, NULL);
9433
132k
                    if ((nsIndex == INT_MAX) ||
9434
132k
                        (nsIndex < ctxt->nsdb->minNsIndex)) {
9435
132k
                        xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9436
132k
                                 "Namespace prefix %s for %s on %s is not "
9437
132k
                                 "defined\n",
9438
132k
                                 aprefix, attname, localname);
9439
132k
                        nsIndex = NS_INDEX_EMPTY;
9440
132k
                        nsuri = NULL;
9441
132k
                        uriHashValue = URI_HASH_EMPTY;
9442
132k
                    } else {
9443
571
                        nsuri = ctxt->nsTab[nsIndex * 2 + 1];
9444
571
                        uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue;
9445
571
                    }
9446
132k
                }
9447
9448
                /*
9449
                 * Check whether the attribute exists
9450
                 */
9451
392k
                if (maxAtts > 1) {
9452
376k
                    hashValue = xmlDictCombineHash(attr->name.hashValue,
9453
376k
                                                   uriHashValue);
9454
376k
                    res = xmlAttrHashInsert(ctxt, attrHashSize, attname, nsuri,
9455
376k
                                            hashValue, nbatts);
9456
376k
                    if (res < 0)
9457
0
                        continue;
9458
376k
                    if (res < INT_MAX) {
9459
8.70k
                        if (aprefix == atts[res+1])
9460
876
                            continue;
9461
7.82k
                        xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9462
7.82k
                                 "Namespaced Attribute %s in '%s' redefined\n",
9463
7.82k
                                 attname, nsuri, NULL);
9464
7.82k
                    }
9465
376k
                }
9466
9467
391k
                xmlParserEntityCheck(ctxt, attr->expandedSize);
9468
9469
391k
                if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9470
6.05k
                    res = xmlCtxtGrowAttrs(ctxt);
9471
9472
6.05k
                    maxatts = ctxt->maxatts;
9473
6.05k
                    atts = ctxt->atts;
9474
9475
6.05k
                    if (res < 0) {
9476
0
                        localname = NULL;
9477
0
                        goto done;
9478
0
                    }
9479
6.05k
                }
9480
9481
391k
                atts[nbatts++] = attname;
9482
391k
                atts[nbatts++] = aprefix;
9483
391k
                atts[nbatts++] = XML_INT_TO_PTR(nsIndex);
9484
391k
                atts[nbatts++] = attr->value.name;
9485
391k
                atts[nbatts++] = attr->valueEnd;
9486
391k
                if ((ctxt->standalone == 1) && (attr->external != 0)) {
9487
0
                    xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9488
0
                            "standalone: attribute %s on %s defaulted "
9489
0
                            "from external subset\n",
9490
0
                            attname, localname);
9491
0
                }
9492
391k
                nbdef++;
9493
391k
      }
9494
85.6k
  }
9495
117k
    }
9496
9497
    /*
9498
     * Using a single hash table for nsUri/localName pairs cannot
9499
     * detect duplicate QNames reliably. The following example will
9500
     * only result in two namespace errors.
9501
     *
9502
     * <doc xmlns:a="a" xmlns:b="a">
9503
     *   <elem a:a="" b:a="" b:a=""/>
9504
     * </doc>
9505
     *
9506
     * If we saw more than one namespace error but no duplicate QNames
9507
     * were found, we have to scan for duplicate QNames.
9508
     */
9509
33.2M
    if ((numDupErr == 0) && (numNsErr > 1)) {
9510
25
        memset(ctxt->attrHash, -1,
9511
25
               attrHashSize * sizeof(ctxt->attrHash[0]));
9512
9513
593
        for (i = 0, j = 0; j < nratts; i += 5, j++) {
9514
568
            unsigned hashValue, nameHashValue, prefixHashValue;
9515
568
            int res;
9516
9517
568
            aprefix = atts[i+1];
9518
568
            if (aprefix == NULL)
9519
30
                continue;
9520
9521
538
            attname = atts[i];
9522
            /* Hash values always have bit 31 set, see dict.c */
9523
538
            nameHashValue = ctxt->attallocs[j] | 0x80000000;
9524
538
            prefixHashValue = xmlDictComputeHash(ctxt->dict, aprefix);
9525
9526
538
            hashValue = xmlDictCombineHash(nameHashValue, prefixHashValue);
9527
538
            res = xmlAttrHashInsertQName(ctxt, attrHashSize, attname,
9528
538
                                         aprefix, hashValue, i);
9529
538
            if (res < INT_MAX)
9530
454
                xmlErrAttributeDup(ctxt, aprefix, attname);
9531
538
        }
9532
25
    }
9533
9534
    /*
9535
     * Reconstruct attribute pointers
9536
     */
9537
54.1M
    for (i = 0, j = 0; i < nbatts; i += 5, j++) {
9538
        /* namespace URI */
9539
20.9M
        nsIndex = XML_PTR_TO_INT(atts[i+2]);
9540
20.9M
        if (nsIndex == INT_MAX)
9541
14.1M
            atts[i+2] = NULL;
9542
6.73M
        else if (nsIndex == INT_MAX - 1)
9543
138k
            atts[i+2] = ctxt->str_xml_ns;
9544
6.59M
        else
9545
6.59M
            atts[i+2] = ctxt->nsTab[nsIndex * 2 + 1];
9546
9547
20.9M
        if ((j < nratts) && (ctxt->attallocs[j] & 0x80000000) == 0) {
9548
20.4M
            atts[i+3] = BASE_PTR + XML_PTR_TO_INT(atts[i+3]);  /* value */
9549
20.4M
            atts[i+4] = BASE_PTR + XML_PTR_TO_INT(atts[i+4]);  /* valuend */
9550
20.4M
        }
9551
20.9M
    }
9552
9553
33.2M
    uri = xmlParserNsLookupUri(ctxt, &hprefix);
9554
33.2M
    if ((prefix != NULL) && (uri == NULL)) {
9555
567k
  xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9556
567k
           "Namespace prefix %s on %s is not defined\n",
9557
567k
     prefix, localname, NULL);
9558
567k
    }
9559
33.2M
    *pref = prefix;
9560
33.2M
    *URI = uri;
9561
9562
    /*
9563
     * SAX callback
9564
     */
9565
33.2M
    if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9566
33.2M
  (!ctxt->disableSAX)) {
9567
33.1M
  if (nbNs > 0)
9568
435k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix, uri,
9569
435k
                          nbNs, ctxt->nsTab + 2 * (ctxt->nsNr - nbNs),
9570
435k
        nbatts / 5, nbdef, atts);
9571
32.6M
  else
9572
32.6M
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix, uri,
9573
32.6M
                          0, NULL, nbatts / 5, nbdef, atts);
9574
33.1M
    }
9575
9576
33.2M
done:
9577
    /*
9578
     * Free allocated attribute values
9579
     */
9580
33.2M
    if (attval != 0) {
9581
377k
  for (i = 0, j = 0; j < nratts; i += 5, j++)
9582
294k
      if (ctxt->attallocs[j] & 0x80000000)
9583
93.0k
          xmlFree((xmlChar *) atts[i+3]);
9584
82.7k
    }
9585
9586
33.2M
    *nbNsPtr = nbNs;
9587
33.2M
    return(localname);
9588
33.2M
}
9589
9590
/**
9591
 * xmlParseEndTag2:
9592
 * @ctxt:  an XML parser context
9593
 * @line:  line of the start tag
9594
 * @nsNr:  number of namespaces on the start tag
9595
 *
9596
 * Parse an end tag. Always consumes '</'.
9597
 *
9598
 * [42] ETag ::= '</' Name S? '>'
9599
 *
9600
 * With namespace
9601
 *
9602
 * [NS 9] ETag ::= '</' QName S? '>'
9603
 */
9604
9605
static void
9606
7.95M
xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9607
7.95M
    const xmlChar *name;
9608
9609
7.95M
    GROW;
9610
7.95M
    if ((RAW != '<') || (NXT(1) != '/')) {
9611
4
  xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9612
4
  return;
9613
4
    }
9614
7.95M
    SKIP(2);
9615
9616
7.95M
    if (tag->prefix == NULL)
9617
2.72M
        name = xmlParseNameAndCompare(ctxt, ctxt->name);
9618
5.23M
    else
9619
5.23M
        name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9620
9621
    /*
9622
     * We should definitely be at the ending "S? '>'" part
9623
     */
9624
7.95M
    GROW;
9625
7.95M
    SKIP_BLANKS;
9626
7.95M
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9627
8.12k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9628
8.12k
    } else
9629
7.95M
  NEXT1;
9630
9631
    /*
9632
     * [ WFC: Element Type Match ]
9633
     * The Name in an element's end-tag must match the element type in the
9634
     * start-tag.
9635
     *
9636
     */
9637
7.95M
    if (name != (xmlChar*)1) {
9638
16.0k
        if (name == NULL) name = BAD_CAST "unparsable";
9639
16.0k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9640
16.0k
         "Opening and ending tag mismatch: %s line %d and %s\n",
9641
16.0k
                    ctxt->name, tag->line, name);
9642
16.0k
    }
9643
9644
    /*
9645
     * SAX: End of Tag
9646
     */
9647
7.95M
    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9648
7.95M
  (!ctxt->disableSAX))
9649
7.93M
  ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9650
7.93M
                                tag->URI);
9651
9652
7.95M
    spacePop(ctxt);
9653
7.95M
    if (tag->nsNr != 0)
9654
162k
  xmlParserNsPop(ctxt, tag->nsNr);
9655
7.95M
}
9656
9657
/**
9658
 * xmlParseCDSect:
9659
 * @ctxt:  an XML parser context
9660
 *
9661
 * DEPRECATED: Internal function, don't use.
9662
 *
9663
 * Parse escaped pure raw content. Always consumes '<!['.
9664
 *
9665
 * [18] CDSect ::= CDStart CData CDEnd
9666
 *
9667
 * [19] CDStart ::= '<![CDATA['
9668
 *
9669
 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9670
 *
9671
 * [21] CDEnd ::= ']]>'
9672
 */
9673
void
9674
13.0k
xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9675
13.0k
    xmlChar *buf = NULL;
9676
13.0k
    int len = 0;
9677
13.0k
    int size = XML_PARSER_BUFFER_SIZE;
9678
13.0k
    int r, rl;
9679
13.0k
    int s, sl;
9680
13.0k
    int cur, l;
9681
13.0k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9682
13.0k
                    XML_MAX_HUGE_LENGTH :
9683
13.0k
                    XML_MAX_TEXT_LENGTH;
9684
9685
13.0k
    if ((CUR != '<') || (NXT(1) != '!') || (NXT(2) != '['))
9686
0
        return;
9687
13.0k
    SKIP(3);
9688
9689
13.0k
    if (!CMP6(CUR_PTR, 'C', 'D', 'A', 'T', 'A', '['))
9690
0
        return;
9691
13.0k
    SKIP(6);
9692
9693
13.0k
    r = xmlCurrentCharRecover(ctxt, &rl);
9694
13.0k
    if (!IS_CHAR(r)) {
9695
22
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9696
22
        goto out;
9697
22
    }
9698
13.0k
    NEXTL(rl);
9699
13.0k
    s = xmlCurrentCharRecover(ctxt, &sl);
9700
13.0k
    if (!IS_CHAR(s)) {
9701
24
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9702
24
        goto out;
9703
24
    }
9704
12.9k
    NEXTL(sl);
9705
12.9k
    cur = xmlCurrentCharRecover(ctxt, &l);
9706
12.9k
    buf = xmlMalloc(size);
9707
12.9k
    if (buf == NULL) {
9708
0
  xmlErrMemory(ctxt);
9709
0
        goto out;
9710
0
    }
9711
44.4M
    while (IS_CHAR(cur) &&
9712
44.4M
           ((r != ']') || (s != ']') || (cur != '>'))) {
9713
44.4M
  if (len + 5 >= size) {
9714
56.5k
      xmlChar *tmp;
9715
56.5k
            int newSize;
9716
9717
56.5k
            newSize = xmlGrowCapacity(size, 1, 1, maxLength);
9718
56.5k
            if (newSize < 0) {
9719
0
                xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9720
0
                               "CData section too big found\n");
9721
0
                goto out;
9722
0
            }
9723
56.5k
      tmp = xmlRealloc(buf, newSize);
9724
56.5k
      if (tmp == NULL) {
9725
0
    xmlErrMemory(ctxt);
9726
0
                goto out;
9727
0
      }
9728
56.5k
      buf = tmp;
9729
56.5k
      size = newSize;
9730
56.5k
  }
9731
44.4M
  COPY_BUF(buf, len, r);
9732
44.4M
  r = s;
9733
44.4M
  rl = sl;
9734
44.4M
  s = cur;
9735
44.4M
  sl = l;
9736
44.4M
  NEXTL(l);
9737
44.4M
  cur = xmlCurrentCharRecover(ctxt, &l);
9738
44.4M
    }
9739
12.9k
    buf[len] = 0;
9740
12.9k
    if (cur != '>') {
9741
335
  xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9742
335
                       "CData section not finished\n%.50s\n", buf);
9743
335
        goto out;
9744
335
    }
9745
12.6k
    NEXTL(l);
9746
9747
    /*
9748
     * OK the buffer is to be consumed as cdata.
9749
     */
9750
12.6k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9751
12.6k
        if ((ctxt->sax->cdataBlock != NULL) &&
9752
0
            ((ctxt->options & XML_PARSE_NOCDATA) == 0)) {
9753
0
            ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9754
12.6k
        } else if (ctxt->sax->characters != NULL) {
9755
12.6k
            ctxt->sax->characters(ctxt->userData, buf, len);
9756
12.6k
        }
9757
12.6k
    }
9758
9759
13.0k
out:
9760
13.0k
    xmlFree(buf);
9761
13.0k
}
9762
9763
/**
9764
 * xmlParseContentInternal:
9765
 * @ctxt:  an XML parser context
9766
 *
9767
 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9768
 * unexpected EOF to the caller.
9769
 */
9770
9771
static void
9772
4.66k
xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9773
4.66k
    int oldNameNr = ctxt->nameNr;
9774
4.66k
    int oldSpaceNr = ctxt->spaceNr;
9775
4.66k
    int oldNodeNr = ctxt->nodeNr;
9776
9777
4.66k
    GROW;
9778
1.52M
    while ((ctxt->input->cur < ctxt->input->end) &&
9779
1.52M
     (PARSER_STOPPED(ctxt) == 0)) {
9780
1.52M
  const xmlChar *cur = ctxt->input->cur;
9781
9782
  /*
9783
   * First case : a Processing Instruction.
9784
   */
9785
1.52M
  if ((*cur == '<') && (cur[1] == '?')) {
9786
653
      xmlParsePI(ctxt);
9787
653
  }
9788
9789
  /*
9790
   * Second case : a CDSection
9791
   */
9792
  /* 2.6.0 test was *cur not RAW */
9793
1.52M
  else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9794
0
      xmlParseCDSect(ctxt);
9795
0
  }
9796
9797
  /*
9798
   * Third case :  a comment
9799
   */
9800
1.52M
  else if ((*cur == '<') && (NXT(1) == '!') &&
9801
0
     (NXT(2) == '-') && (NXT(3) == '-')) {
9802
0
      xmlParseComment(ctxt);
9803
0
  }
9804
9805
  /*
9806
   * Fourth case :  a sub-element.
9807
   */
9808
1.52M
  else if (*cur == '<') {
9809
1.18M
            if (NXT(1) == '/') {
9810
343k
                if (ctxt->nameNr <= oldNameNr)
9811
4.45k
                    break;
9812
338k
          xmlParseElementEnd(ctxt);
9813
846k
            } else {
9814
846k
          xmlParseElementStart(ctxt);
9815
846k
            }
9816
1.18M
  }
9817
9818
  /*
9819
   * Fifth case : a reference. If if has not been resolved,
9820
   *    parsing returns it's Name, create the node
9821
   */
9822
9823
336k
  else if (*cur == '&') {
9824
10
      xmlParseReference(ctxt);
9825
10
  }
9826
9827
  /*
9828
   * Last case, text. Note that References are handled directly.
9829
   */
9830
336k
  else {
9831
336k
      xmlParseCharDataInternal(ctxt, 0);
9832
336k
  }
9833
9834
1.52M
  SHRINK;
9835
1.52M
  GROW;
9836
1.52M
    }
9837
9838
4.66k
    if ((ctxt->nameNr > oldNameNr) &&
9839
204
        (ctxt->input->cur >= ctxt->input->end) &&
9840
200
        (ctxt->wellFormed)) {
9841
0
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9842
0
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9843
0
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9844
0
                "Premature end of data in tag %s line %d\n",
9845
0
                name, line, NULL);
9846
0
    }
9847
9848
    /*
9849
     * Clean up in error case
9850
     */
9851
9852
4.73k
    while (ctxt->nodeNr > oldNodeNr)
9853
66
        nodePop(ctxt);
9854
9855
15.9k
    while (ctxt->nameNr > oldNameNr) {
9856
11.3k
        xmlStartTag *tag = &ctxt->pushTab[ctxt->nameNr - 1];
9857
9858
11.3k
        if (tag->nsNr != 0)
9859
12
            xmlParserNsPop(ctxt, tag->nsNr);
9860
9861
11.3k
        namePop(ctxt);
9862
11.3k
    }
9863
9864
15.9k
    while (ctxt->spaceNr > oldSpaceNr)
9865
11.3k
        spacePop(ctxt);
9866
4.66k
}
9867
9868
/**
9869
 * xmlParseContent:
9870
 * @ctxt:  an XML parser context
9871
 *
9872
 * Parse XML element content. This is useful if you're only interested
9873
 * in custom SAX callbacks. If you want a node list, use
9874
 * xmlCtxtParseContent.
9875
 */
9876
void
9877
0
xmlParseContent(xmlParserCtxtPtr ctxt) {
9878
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
9879
0
        return;
9880
9881
0
    xmlCtxtInitializeLate(ctxt);
9882
9883
0
    xmlParseContentInternal(ctxt);
9884
9885
0
    xmlParserCheckEOF(ctxt, XML_ERR_NOT_WELL_BALANCED);
9886
0
}
9887
9888
/**
9889
 * xmlParseElement:
9890
 * @ctxt:  an XML parser context
9891
 *
9892
 * DEPRECATED: Internal function, don't use.
9893
 *
9894
 * parse an XML element
9895
 *
9896
 * [39] element ::= EmptyElemTag | STag content ETag
9897
 *
9898
 * [ WFC: Element Type Match ]
9899
 * The Name in an element's end-tag must match the element type in the
9900
 * start-tag.
9901
 *
9902
 */
9903
9904
void
9905
4.85k
xmlParseElement(xmlParserCtxtPtr ctxt) {
9906
4.85k
    if (xmlParseElementStart(ctxt) != 0)
9907
182
        return;
9908
9909
4.66k
    xmlParseContentInternal(ctxt);
9910
9911
4.66k
    if (ctxt->input->cur >= ctxt->input->end) {
9912
205
        if (ctxt->wellFormed) {
9913
0
            const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9914
0
            int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9915
0
            xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9916
0
                    "Premature end of data in tag %s line %d\n",
9917
0
                    name, line, NULL);
9918
0
        }
9919
205
        return;
9920
205
    }
9921
9922
4.46k
    xmlParseElementEnd(ctxt);
9923
4.46k
}
9924
9925
/**
9926
 * xmlParseElementStart:
9927
 * @ctxt:  an XML parser context
9928
 *
9929
 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
9930
 * opening tag was parsed, 1 if an empty element was parsed.
9931
 *
9932
 * Always consumes '<'.
9933
 */
9934
static int
9935
851k
xmlParseElementStart(xmlParserCtxtPtr ctxt) {
9936
851k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
9937
851k
    const xmlChar *name;
9938
851k
    const xmlChar *prefix = NULL;
9939
851k
    const xmlChar *URI = NULL;
9940
851k
    xmlParserNodeInfo node_info;
9941
851k
    int line;
9942
851k
    xmlNodePtr cur;
9943
851k
    int nbNs = 0;
9944
9945
851k
    if (ctxt->nameNr > maxDepth) {
9946
4
        xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
9947
4
                "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9948
4
                ctxt->nameNr);
9949
4
  xmlHaltParser(ctxt);
9950
4
  return(-1);
9951
4
    }
9952
9953
    /* Capture start position */
9954
851k
    if (ctxt->record_info) {
9955
0
        node_info.begin_pos = ctxt->input->consumed +
9956
0
                          (CUR_PTR - ctxt->input->base);
9957
0
  node_info.begin_line = ctxt->input->line;
9958
0
    }
9959
9960
851k
    if (ctxt->spaceNr == 0)
9961
4.85k
  spacePush(ctxt, -1);
9962
846k
    else if (*ctxt->space == -2)
9963
0
  spacePush(ctxt, -1);
9964
846k
    else
9965
846k
  spacePush(ctxt, *ctxt->space);
9966
9967
851k
    line = ctxt->input->line;
9968
851k
#ifdef LIBXML_SAX1_ENABLED
9969
851k
    if (ctxt->sax2)
9970
851k
#endif /* LIBXML_SAX1_ENABLED */
9971
851k
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &nbNs);
9972
0
#ifdef LIBXML_SAX1_ENABLED
9973
0
    else
9974
0
  name = xmlParseStartTag(ctxt);
9975
851k
#endif /* LIBXML_SAX1_ENABLED */
9976
851k
    if (name == NULL) {
9977
7.57k
  spacePop(ctxt);
9978
7.57k
        return(-1);
9979
7.57k
    }
9980
843k
    nameNsPush(ctxt, name, prefix, URI, line, nbNs);
9981
843k
    cur = ctxt->node;
9982
9983
843k
#ifdef LIBXML_VALID_ENABLED
9984
    /*
9985
     * [ VC: Root Element Type ]
9986
     * The Name in the document type declaration must match the element
9987
     * type of the root element.
9988
     */
9989
843k
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9990
0
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
9991
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9992
843k
#endif /* LIBXML_VALID_ENABLED */
9993
9994
    /*
9995
     * Check for an Empty Element.
9996
     */
9997
843k
    if ((RAW == '/') && (NXT(1) == '>')) {
9998
440k
        SKIP(2);
9999
440k
  if (ctxt->sax2) {
10000
440k
      if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10001
440k
    (!ctxt->disableSAX))
10002
426k
    ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
10003
440k
#ifdef LIBXML_SAX1_ENABLED
10004
440k
  } else {
10005
0
      if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10006
0
    (!ctxt->disableSAX))
10007
0
    ctxt->sax->endElement(ctxt->userData, name);
10008
0
#endif /* LIBXML_SAX1_ENABLED */
10009
0
  }
10010
440k
  namePop(ctxt);
10011
440k
  spacePop(ctxt);
10012
440k
  if (nbNs > 0)
10013
1.01k
      xmlParserNsPop(ctxt, nbNs);
10014
440k
  if (cur != NULL && ctxt->record_info) {
10015
0
            node_info.node = cur;
10016
0
            node_info.end_pos = ctxt->input->consumed +
10017
0
                                (CUR_PTR - ctxt->input->base);
10018
0
            node_info.end_line = ctxt->input->line;
10019
0
            xmlParserAddNodeInfo(ctxt, &node_info);
10020
0
  }
10021
440k
  return(1);
10022
440k
    }
10023
403k
    if (RAW == '>') {
10024
354k
        NEXT1;
10025
354k
        if (cur != NULL && ctxt->record_info) {
10026
0
            node_info.node = cur;
10027
0
            node_info.end_pos = 0;
10028
0
            node_info.end_line = 0;
10029
0
            xmlParserAddNodeInfo(ctxt, &node_info);
10030
0
        }
10031
354k
    } else {
10032
48.6k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10033
48.6k
         "Couldn't find end of Start Tag %s line %d\n",
10034
48.6k
                    name, line, NULL);
10035
10036
  /*
10037
   * end of parsing of this node.
10038
   */
10039
48.6k
  nodePop(ctxt);
10040
48.6k
  namePop(ctxt);
10041
48.6k
  spacePop(ctxt);
10042
48.6k
  if (nbNs > 0)
10043
185
      xmlParserNsPop(ctxt, nbNs);
10044
48.6k
  return(-1);
10045
48.6k
    }
10046
10047
354k
    return(0);
10048
403k
}
10049
10050
/**
10051
 * xmlParseElementEnd:
10052
 * @ctxt:  an XML parser context
10053
 *
10054
 * Parse the end of an XML element. Always consumes '</'.
10055
 */
10056
static void
10057
343k
xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
10058
343k
    xmlNodePtr cur = ctxt->node;
10059
10060
343k
    if (ctxt->nameNr <= 0) {
10061
0
        if ((RAW == '<') && (NXT(1) == '/'))
10062
0
            SKIP(2);
10063
0
        return;
10064
0
    }
10065
10066
    /*
10067
     * parse the end of tag: '</' should be here.
10068
     */
10069
343k
    if (ctxt->sax2) {
10070
343k
  xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
10071
343k
  namePop(ctxt);
10072
343k
    }
10073
0
#ifdef LIBXML_SAX1_ENABLED
10074
0
    else
10075
0
  xmlParseEndTag1(ctxt, 0);
10076
343k
#endif /* LIBXML_SAX1_ENABLED */
10077
10078
    /*
10079
     * Capture end position
10080
     */
10081
343k
    if (cur != NULL && ctxt->record_info) {
10082
0
        xmlParserNodeInfoPtr node_info;
10083
10084
0
        node_info = (xmlParserNodeInfoPtr) xmlParserFindNodeInfo(ctxt, cur);
10085
0
        if (node_info != NULL) {
10086
0
            node_info->end_pos = ctxt->input->consumed +
10087
0
                                 (CUR_PTR - ctxt->input->base);
10088
0
            node_info->end_line = ctxt->input->line;
10089
0
        }
10090
0
    }
10091
343k
}
10092
10093
/**
10094
 * xmlParseVersionNum:
10095
 * @ctxt:  an XML parser context
10096
 *
10097
 * DEPRECATED: Internal function, don't use.
10098
 *
10099
 * parse the XML version value.
10100
 *
10101
 * [26] VersionNum ::= '1.' [0-9]+
10102
 *
10103
 * In practice allow [0-9].[0-9]+ at that level
10104
 *
10105
 * Returns the string giving the XML version number, or NULL
10106
 */
10107
xmlChar *
10108
160k
xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10109
160k
    xmlChar *buf = NULL;
10110
160k
    int len = 0;
10111
160k
    int size = 10;
10112
160k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
10113
156k
                    XML_MAX_TEXT_LENGTH :
10114
160k
                    XML_MAX_NAME_LENGTH;
10115
160k
    xmlChar cur;
10116
10117
160k
    buf = xmlMalloc(size);
10118
160k
    if (buf == NULL) {
10119
0
  xmlErrMemory(ctxt);
10120
0
  return(NULL);
10121
0
    }
10122
160k
    cur = CUR;
10123
160k
    if (!((cur >= '0') && (cur <= '9'))) {
10124
102
  xmlFree(buf);
10125
102
  return(NULL);
10126
102
    }
10127
160k
    buf[len++] = cur;
10128
160k
    NEXT;
10129
160k
    cur=CUR;
10130
160k
    if (cur != '.') {
10131
35
  xmlFree(buf);
10132
35
  return(NULL);
10133
35
    }
10134
160k
    buf[len++] = cur;
10135
160k
    NEXT;
10136
160k
    cur=CUR;
10137
333k
    while ((cur >= '0') && (cur <= '9')) {
10138
173k
  if (len + 1 >= size) {
10139
356
      xmlChar *tmp;
10140
356
            int newSize;
10141
10142
356
            newSize = xmlGrowCapacity(size, 1, 1, maxLength);
10143
356
            if (newSize < 0) {
10144
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "VersionNum");
10145
0
                xmlFree(buf);
10146
0
                return(NULL);
10147
0
            }
10148
356
      tmp = xmlRealloc(buf, newSize);
10149
356
      if (tmp == NULL) {
10150
0
    xmlErrMemory(ctxt);
10151
0
          xmlFree(buf);
10152
0
    return(NULL);
10153
0
      }
10154
356
      buf = tmp;
10155
356
            size = newSize;
10156
356
  }
10157
173k
  buf[len++] = cur;
10158
173k
  NEXT;
10159
173k
  cur=CUR;
10160
173k
    }
10161
160k
    buf[len] = 0;
10162
160k
    return(buf);
10163
160k
}
10164
10165
/**
10166
 * xmlParseVersionInfo:
10167
 * @ctxt:  an XML parser context
10168
 *
10169
 * DEPRECATED: Internal function, don't use.
10170
 *
10171
 * parse the XML version.
10172
 *
10173
 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10174
 *
10175
 * [25] Eq ::= S? '=' S?
10176
 *
10177
 * Returns the version string, e.g. "1.0"
10178
 */
10179
10180
xmlChar *
10181
161k
xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10182
161k
    xmlChar *version = NULL;
10183
10184
161k
    if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10185
160k
  SKIP(7);
10186
160k
  SKIP_BLANKS;
10187
160k
  if (RAW != '=') {
10188
41
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10189
41
      return(NULL);
10190
41
        }
10191
160k
  NEXT;
10192
160k
  SKIP_BLANKS;
10193
160k
  if (RAW == '"') {
10194
160k
      NEXT;
10195
160k
      version = xmlParseVersionNum(ctxt);
10196
160k
      if (RAW != '"') {
10197
160
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10198
160
      } else
10199
160k
          NEXT;
10200
160k
  } else if (RAW == '\''){
10201
403
      NEXT;
10202
403
      version = xmlParseVersionNum(ctxt);
10203
403
      if (RAW != '\'') {
10204
16
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10205
16
      } else
10206
387
          NEXT;
10207
403
  } else {
10208
43
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10209
43
  }
10210
160k
    }
10211
161k
    return(version);
10212
161k
}
10213
10214
/**
10215
 * xmlParseEncName:
10216
 * @ctxt:  an XML parser context
10217
 *
10218
 * DEPRECATED: Internal function, don't use.
10219
 *
10220
 * parse the XML encoding name
10221
 *
10222
 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10223
 *
10224
 * Returns the encoding name value or NULL
10225
 */
10226
xmlChar *
10227
134k
xmlParseEncName(xmlParserCtxtPtr ctxt) {
10228
134k
    xmlChar *buf = NULL;
10229
134k
    int len = 0;
10230
134k
    int size = 10;
10231
134k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
10232
129k
                    XML_MAX_TEXT_LENGTH :
10233
134k
                    XML_MAX_NAME_LENGTH;
10234
134k
    xmlChar cur;
10235
10236
134k
    cur = CUR;
10237
134k
    if (((cur >= 'a') && (cur <= 'z')) ||
10238
134k
        ((cur >= 'A') && (cur <= 'Z'))) {
10239
134k
  buf = xmlMalloc(size);
10240
134k
  if (buf == NULL) {
10241
0
      xmlErrMemory(ctxt);
10242
0
      return(NULL);
10243
0
  }
10244
10245
134k
  buf[len++] = cur;
10246
134k
  NEXT;
10247
134k
  cur = CUR;
10248
683k
  while (((cur >= 'a') && (cur <= 'z')) ||
10249
676k
         ((cur >= 'A') && (cur <= 'Z')) ||
10250
411k
         ((cur >= '0') && (cur <= '9')) ||
10251
272k
         (cur == '.') || (cur == '_') ||
10252
548k
         (cur == '-')) {
10253
548k
      if (len + 1 >= size) {
10254
1.21k
          xmlChar *tmp;
10255
1.21k
                int newSize;
10256
10257
1.21k
                newSize = xmlGrowCapacity(size, 1, 1, maxLength);
10258
1.21k
                if (newSize < 0) {
10259
0
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "EncName");
10260
0
                    xmlFree(buf);
10261
0
                    return(NULL);
10262
0
                }
10263
1.21k
    tmp = xmlRealloc(buf, newSize);
10264
1.21k
    if (tmp == NULL) {
10265
0
        xmlErrMemory(ctxt);
10266
0
        xmlFree(buf);
10267
0
        return(NULL);
10268
0
    }
10269
1.21k
    buf = tmp;
10270
1.21k
                size = newSize;
10271
1.21k
      }
10272
548k
      buf[len++] = cur;
10273
548k
      NEXT;
10274
548k
      cur = CUR;
10275
548k
        }
10276
134k
  buf[len] = 0;
10277
134k
    } else {
10278
38
  xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10279
38
    }
10280
134k
    return(buf);
10281
134k
}
10282
10283
/**
10284
 * xmlParseEncodingDecl:
10285
 * @ctxt:  an XML parser context
10286
 *
10287
 * DEPRECATED: Internal function, don't use.
10288
 *
10289
 * parse the XML encoding declaration
10290
 *
10291
 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'")
10292
 *
10293
 * this setups the conversion filters.
10294
 *
10295
 * Returns the encoding value or NULL
10296
 */
10297
10298
const xmlChar *
10299
136k
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10300
136k
    xmlChar *encoding = NULL;
10301
10302
136k
    SKIP_BLANKS;
10303
136k
    if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g') == 0)
10304
2.27k
        return(NULL);
10305
10306
134k
    SKIP(8);
10307
134k
    SKIP_BLANKS;
10308
134k
    if (RAW != '=') {
10309
32
        xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10310
32
        return(NULL);
10311
32
    }
10312
134k
    NEXT;
10313
134k
    SKIP_BLANKS;
10314
134k
    if (RAW == '"') {
10315
134k
        NEXT;
10316
134k
        encoding = xmlParseEncName(ctxt);
10317
134k
        if (RAW != '"') {
10318
162
            xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10319
162
            xmlFree((xmlChar *) encoding);
10320
162
            return(NULL);
10321
162
        } else
10322
134k
            NEXT;
10323
134k
    } else if (RAW == '\''){
10324
45
        NEXT;
10325
45
        encoding = xmlParseEncName(ctxt);
10326
45
        if (RAW != '\'') {
10327
17
            xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10328
17
            xmlFree((xmlChar *) encoding);
10329
17
            return(NULL);
10330
17
        } else
10331
28
            NEXT;
10332
45
    } else {
10333
15
        xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10334
15
    }
10335
10336
134k
    if (encoding == NULL)
10337
26
        return(NULL);
10338
10339
134k
    xmlSetDeclaredEncoding(ctxt, encoding);
10340
10341
134k
    return(ctxt->encoding);
10342
134k
}
10343
10344
/**
10345
 * xmlParseSDDecl:
10346
 * @ctxt:  an XML parser context
10347
 *
10348
 * DEPRECATED: Internal function, don't use.
10349
 *
10350
 * parse the XML standalone declaration
10351
 *
10352
 * [32] SDDecl ::= S 'standalone' Eq
10353
 *                 (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10354
 *
10355
 * [ VC: Standalone Document Declaration ]
10356
 * TODO The standalone document declaration must have the value "no"
10357
 * if any external markup declarations contain declarations of:
10358
 *  - attributes with default values, if elements to which these
10359
 *    attributes apply appear in the document without specifications
10360
 *    of values for these attributes, or
10361
 *  - entities (other than amp, lt, gt, apos, quot), if references
10362
 *    to those entities appear in the document, or
10363
 *  - attributes with values subject to normalization, where the
10364
 *    attribute appears in the document with a value which will change
10365
 *    as a result of normalization, or
10366
 *  - element types with element content, if white space occurs directly
10367
 *    within any instance of those types.
10368
 *
10369
 * Returns:
10370
 *   1 if standalone="yes"
10371
 *   0 if standalone="no"
10372
 *  -2 if standalone attribute is missing or invalid
10373
 *    (A standalone value of -2 means that the XML declaration was found,
10374
 *     but no value was specified for the standalone attribute).
10375
 */
10376
10377
int
10378
95.7k
xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10379
95.7k
    int standalone = -2;
10380
10381
95.7k
    SKIP_BLANKS;
10382
95.7k
    if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10383
93.8k
  SKIP(10);
10384
93.8k
        SKIP_BLANKS;
10385
93.8k
  if (RAW != '=') {
10386
14
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10387
14
      return(standalone);
10388
14
        }
10389
93.8k
  NEXT;
10390
93.8k
  SKIP_BLANKS;
10391
93.8k
        if (RAW == '\''){
10392
46
      NEXT;
10393
46
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10394
29
          standalone = 0;
10395
29
                SKIP(2);
10396
29
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10397
7
                 (NXT(2) == 's')) {
10398
4
          standalone = 1;
10399
4
    SKIP(3);
10400
13
            } else {
10401
13
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10402
13
      }
10403
46
      if (RAW != '\'') {
10404
17
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10405
17
      } else
10406
29
          NEXT;
10407
93.8k
  } else if (RAW == '"'){
10408
93.7k
      NEXT;
10409
93.7k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10410
411
          standalone = 0;
10411
411
    SKIP(2);
10412
93.3k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10413
93.3k
                 (NXT(2) == 's')) {
10414
93.3k
          standalone = 1;
10415
93.3k
                SKIP(3);
10416
93.3k
            } else {
10417
44
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10418
44
      }
10419
93.7k
      if (RAW != '"') {
10420
56
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10421
56
      } else
10422
93.7k
          NEXT;
10423
93.7k
  } else {
10424
11
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10425
11
        }
10426
93.8k
    }
10427
95.7k
    return(standalone);
10428
95.7k
}
10429
10430
/**
10431
 * xmlParseXMLDecl:
10432
 * @ctxt:  an XML parser context
10433
 *
10434
 * DEPRECATED: Internal function, don't use.
10435
 *
10436
 * parse an XML declaration header
10437
 *
10438
 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10439
 */
10440
10441
void
10442
161k
xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10443
161k
    xmlChar *version;
10444
10445
    /*
10446
     * This value for standalone indicates that the document has an
10447
     * XML declaration but it does not have a standalone attribute.
10448
     * It will be overwritten later if a standalone attribute is found.
10449
     */
10450
10451
161k
    ctxt->standalone = -2;
10452
10453
    /*
10454
     * We know that '<?xml' is here.
10455
     */
10456
161k
    SKIP(5);
10457
10458
161k
    if (!IS_BLANK_CH(RAW)) {
10459
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10460
0
                 "Blank needed after '<?xml'\n");
10461
0
    }
10462
161k
    SKIP_BLANKS;
10463
10464
    /*
10465
     * We must have the VersionInfo here.
10466
     */
10467
161k
    version = xmlParseVersionInfo(ctxt);
10468
161k
    if (version == NULL) {
10469
931
  xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10470
160k
    } else {
10471
160k
  if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10472
      /*
10473
       * Changed here for XML-1.0 5th edition
10474
       */
10475
5.29k
      if (ctxt->options & XML_PARSE_OLD10) {
10476
0
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10477
0
                "Unsupported version '%s'\n",
10478
0
                version);
10479
5.29k
      } else {
10480
5.29k
          if ((version[0] == '1') && ((version[1] == '.'))) {
10481
5.13k
        xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10482
5.13k
                      "Unsupported version '%s'\n",
10483
5.13k
          version, NULL);
10484
5.13k
    } else {
10485
158
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10486
158
              "Unsupported version '%s'\n",
10487
158
              version);
10488
158
    }
10489
5.29k
      }
10490
5.29k
  }
10491
160k
  if (ctxt->version != NULL)
10492
0
      xmlFree((void *) ctxt->version);
10493
160k
  ctxt->version = version;
10494
160k
    }
10495
10496
    /*
10497
     * We may have the encoding declaration
10498
     */
10499
161k
    if (!IS_BLANK_CH(RAW)) {
10500
25.5k
        if ((RAW == '?') && (NXT(1) == '>')) {
10501
24.5k
      SKIP(2);
10502
24.5k
      return;
10503
24.5k
  }
10504
1.02k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10505
1.02k
    }
10506
136k
    xmlParseEncodingDecl(ctxt);
10507
10508
    /*
10509
     * We may have the standalone status.
10510
     */
10511
136k
    if ((ctxt->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10512
41.1k
        if ((RAW == '?') && (NXT(1) == '>')) {
10513
41.1k
      SKIP(2);
10514
41.1k
      return;
10515
41.1k
  }
10516
44
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10517
44
    }
10518
10519
    /*
10520
     * We can grow the input buffer freely at that point
10521
     */
10522
95.7k
    GROW;
10523
10524
95.7k
    SKIP_BLANKS;
10525
95.7k
    ctxt->standalone = xmlParseSDDecl(ctxt);
10526
10527
95.7k
    SKIP_BLANKS;
10528
95.7k
    if ((RAW == '?') && (NXT(1) == '>')) {
10529
94.2k
        SKIP(2);
10530
94.2k
    } else if (RAW == '>') {
10531
        /* Deprecated old WD ... */
10532
54
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10533
54
  NEXT;
10534
1.47k
    } else {
10535
1.47k
        int c;
10536
10537
1.47k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10538
641k
        while ((PARSER_STOPPED(ctxt) == 0) &&
10539
641k
               ((c = CUR) != 0)) {
10540
641k
            NEXT;
10541
641k
            if (c == '>')
10542
963
                break;
10543
641k
        }
10544
1.47k
    }
10545
95.7k
}
10546
10547
/**
10548
 * xmlCtxtGetVersion:
10549
 * @ctxt:  parser context
10550
 *
10551
 * Available since 2.14.0.
10552
 *
10553
 * Returns the version from the XML declaration.
10554
 */
10555
const xmlChar *
10556
0
xmlCtxtGetVersion(xmlParserCtxtPtr ctxt) {
10557
0
    if (ctxt == NULL)
10558
0
        return(NULL);
10559
10560
0
    return(ctxt->version);
10561
0
}
10562
10563
/**
10564
 * xmlCtxtGetStandalone:
10565
 * @ctxt:  parser context
10566
 *
10567
 * Available since 2.14.0.
10568
 *
10569
 * Returns the value from the standalone document declaration.
10570
 */
10571
int
10572
0
xmlCtxtGetStandalone(xmlParserCtxtPtr ctxt) {
10573
0
    if (ctxt == NULL)
10574
0
        return(0);
10575
10576
0
    return(ctxt->standalone);
10577
0
}
10578
10579
/**
10580
 * xmlParseMisc:
10581
 * @ctxt:  an XML parser context
10582
 *
10583
 * DEPRECATED: Internal function, don't use.
10584
 *
10585
 * parse an XML Misc* optional field.
10586
 *
10587
 * [27] Misc ::= Comment | PI |  S
10588
 */
10589
10590
void
10591
9.70k
xmlParseMisc(xmlParserCtxtPtr ctxt) {
10592
9.77k
    while (PARSER_STOPPED(ctxt) == 0) {
10593
9.76k
        SKIP_BLANKS;
10594
9.76k
        GROW;
10595
9.76k
        if ((RAW == '<') && (NXT(1) == '?')) {
10596
66
      xmlParsePI(ctxt);
10597
9.70k
        } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10598
0
      xmlParseComment(ctxt);
10599
9.70k
        } else {
10600
9.70k
            break;
10601
9.70k
        }
10602
9.76k
    }
10603
9.70k
}
10604
10605
static void
10606
142k
xmlFinishDocument(xmlParserCtxtPtr ctxt) {
10607
142k
    xmlDocPtr doc;
10608
10609
    /*
10610
     * SAX: end of the document processing.
10611
     */
10612
142k
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10613
4.85k
        ctxt->sax->endDocument(ctxt->userData);
10614
10615
142k
    doc = ctxt->myDoc;
10616
142k
    if (doc != NULL) {
10617
4.85k
        if (ctxt->wellFormed) {
10618
4.54k
            doc->properties |= XML_DOC_WELLFORMED;
10619
4.54k
            if (ctxt->valid)
10620
4.54k
                doc->properties |= XML_DOC_DTDVALID;
10621
4.54k
            if (ctxt->nsWellFormed)
10622
4.54k
                doc->properties |= XML_DOC_NSVALID;
10623
4.54k
        }
10624
10625
4.85k
        if (ctxt->options & XML_PARSE_OLD10)
10626
0
            doc->properties |= XML_DOC_OLD10;
10627
10628
        /*
10629
         * Remove locally kept entity definitions if the tree was not built
10630
         */
10631
4.85k
  if (xmlStrEqual(doc->version, SAX_COMPAT_MODE)) {
10632
11
            xmlFreeDoc(doc);
10633
11
            ctxt->myDoc = NULL;
10634
11
        }
10635
4.85k
    }
10636
142k
}
10637
10638
/**
10639
 * xmlParseDocument:
10640
 * @ctxt:  an XML parser context
10641
 *
10642
 * Parse an XML document and invoke the SAX handlers. This is useful
10643
 * if you're only interested in custom SAX callbacks. If you want a
10644
 * document tree, use xmlCtxtParseDocument.
10645
 *
10646
 * Returns 0, -1 in case of error.
10647
 */
10648
10649
int
10650
4.89k
xmlParseDocument(xmlParserCtxtPtr ctxt) {
10651
4.89k
    if ((ctxt == NULL) || (ctxt->input == NULL))
10652
0
        return(-1);
10653
10654
4.89k
    GROW;
10655
10656
    /*
10657
     * SAX: detecting the level.
10658
     */
10659
4.89k
    xmlCtxtInitializeLate(ctxt);
10660
10661
4.89k
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10662
4.89k
        ctxt->sax->setDocumentLocator(ctxt->userData,
10663
4.89k
                (xmlSAXLocator *) &xmlDefaultSAXLocator);
10664
4.89k
    }
10665
10666
4.89k
    xmlDetectEncoding(ctxt);
10667
10668
4.89k
    if (CUR == 0) {
10669
40
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10670
40
  return(-1);
10671
40
    }
10672
10673
4.85k
    GROW;
10674
4.85k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10675
10676
  /*
10677
   * Note that we will switch encoding on the fly.
10678
   */
10679
4.60k
  xmlParseXMLDecl(ctxt);
10680
4.60k
  SKIP_BLANKS;
10681
4.60k
    } else {
10682
249
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10683
249
        if (ctxt->version == NULL) {
10684
0
            xmlErrMemory(ctxt);
10685
0
            return(-1);
10686
0
        }
10687
249
    }
10688
4.85k
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10689
4.84k
        ctxt->sax->startDocument(ctxt->userData);
10690
4.85k
    if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10691
4.84k
        (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10692
0
  ctxt->myDoc->compression = ctxt->input->buf->compressed;
10693
0
    }
10694
10695
    /*
10696
     * The Misc part of the Prolog
10697
     */
10698
4.85k
    xmlParseMisc(ctxt);
10699
10700
    /*
10701
     * Then possibly doc type declaration(s) and more Misc
10702
     * (doctypedecl Misc*)?
10703
     */
10704
4.85k
    GROW;
10705
4.85k
    if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10706
10707
0
  ctxt->inSubset = 1;
10708
0
  xmlParseDocTypeDecl(ctxt);
10709
0
  if (RAW == '[') {
10710
0
      xmlParseInternalSubset(ctxt);
10711
0
  } else if (RAW == '>') {
10712
0
            NEXT;
10713
0
        }
10714
10715
  /*
10716
   * Create and update the external subset.
10717
   */
10718
0
  ctxt->inSubset = 2;
10719
0
  if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10720
0
      (!ctxt->disableSAX))
10721
0
      ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10722
0
                                ctxt->extSubSystem, ctxt->extSubURI);
10723
0
  ctxt->inSubset = 0;
10724
10725
0
        xmlCleanSpecialAttr(ctxt);
10726
10727
0
  xmlParseMisc(ctxt);
10728
0
    }
10729
10730
    /*
10731
     * Time to start parsing the tree itself
10732
     */
10733
4.85k
    GROW;
10734
4.85k
    if (RAW != '<') {
10735
4
        if (ctxt->wellFormed)
10736
1
            xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10737
1
                           "Start tag expected, '<' not found\n");
10738
4.85k
    } else {
10739
4.85k
  xmlParseElement(ctxt);
10740
10741
  /*
10742
   * The Misc part at the end
10743
   */
10744
4.85k
  xmlParseMisc(ctxt);
10745
10746
4.85k
        xmlParserCheckEOF(ctxt, XML_ERR_DOCUMENT_END);
10747
4.85k
    }
10748
10749
4.85k
    ctxt->instate = XML_PARSER_EOF;
10750
4.85k
    xmlFinishDocument(ctxt);
10751
10752
4.85k
    if (! ctxt->wellFormed) {
10753
308
  ctxt->valid = 0;
10754
308
  return(-1);
10755
308
    }
10756
10757
4.54k
    return(0);
10758
4.85k
}
10759
10760
/**
10761
 * xmlParseExtParsedEnt:
10762
 * @ctxt:  an XML parser context
10763
 *
10764
 * DEPRECATED: Internal function, don't use.
10765
 *
10766
 * parse a general parsed entity
10767
 * An external general parsed entity is well-formed if it matches the
10768
 * production labeled extParsedEnt.
10769
 *
10770
 * [78] extParsedEnt ::= TextDecl? content
10771
 *
10772
 * Returns 0, -1 in case of error. the parser context is augmented
10773
 *                as a result of the parsing.
10774
 */
10775
10776
int
10777
0
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10778
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
10779
0
        return(-1);
10780
10781
0
    xmlCtxtInitializeLate(ctxt);
10782
10783
0
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10784
0
        ctxt->sax->setDocumentLocator(ctxt->userData,
10785
0
                (xmlSAXLocator *) &xmlDefaultSAXLocator);
10786
0
    }
10787
10788
0
    xmlDetectEncoding(ctxt);
10789
10790
0
    if (CUR == 0) {
10791
0
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10792
0
    }
10793
10794
    /*
10795
     * Check for the XMLDecl in the Prolog.
10796
     */
10797
0
    GROW;
10798
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10799
10800
  /*
10801
   * Note that we will switch encoding on the fly.
10802
   */
10803
0
  xmlParseXMLDecl(ctxt);
10804
0
  SKIP_BLANKS;
10805
0
    } else {
10806
0
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10807
0
    }
10808
0
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10809
0
        ctxt->sax->startDocument(ctxt->userData);
10810
10811
    /*
10812
     * Doing validity checking on chunk doesn't make sense
10813
     */
10814
0
    ctxt->options &= ~XML_PARSE_DTDVALID;
10815
0
    ctxt->validate = 0;
10816
0
    ctxt->depth = 0;
10817
10818
0
    xmlParseContentInternal(ctxt);
10819
10820
0
    if (ctxt->input->cur < ctxt->input->end)
10821
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10822
10823
    /*
10824
     * SAX: end of the document processing.
10825
     */
10826
0
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10827
0
        ctxt->sax->endDocument(ctxt->userData);
10828
10829
0
    if (! ctxt->wellFormed) return(-1);
10830
0
    return(0);
10831
0
}
10832
10833
#ifdef LIBXML_PUSH_ENABLED
10834
/************************************************************************
10835
 *                  *
10836
 *    Progressive parsing interfaces        *
10837
 *                  *
10838
 ************************************************************************/
10839
10840
/**
10841
 * xmlParseLookupChar:
10842
 * @ctxt:  an XML parser context
10843
 * @c:  character
10844
 *
10845
 * Check whether the input buffer contains a character.
10846
 */
10847
static int
10848
3.86M
xmlParseLookupChar(xmlParserCtxtPtr ctxt, int c) {
10849
3.86M
    const xmlChar *cur;
10850
10851
3.86M
    if (ctxt->checkIndex == 0) {
10852
3.86M
        cur = ctxt->input->cur + 1;
10853
3.86M
    } else {
10854
551
        cur = ctxt->input->cur + ctxt->checkIndex;
10855
551
    }
10856
10857
3.86M
    if (memchr(cur, c, ctxt->input->end - cur) == NULL) {
10858
716
        size_t index = ctxt->input->end - ctxt->input->cur;
10859
10860
716
        if (index > LONG_MAX) {
10861
0
            ctxt->checkIndex = 0;
10862
0
            return(1);
10863
0
        }
10864
716
        ctxt->checkIndex = index;
10865
716
        return(0);
10866
3.86M
    } else {
10867
3.86M
        ctxt->checkIndex = 0;
10868
3.86M
        return(1);
10869
3.86M
    }
10870
3.86M
}
10871
10872
/**
10873
 * xmlParseLookupString:
10874
 * @ctxt:  an XML parser context
10875
 * @startDelta: delta to apply at the start
10876
 * @str:  string
10877
 * @strLen:  length of string
10878
 *
10879
 * Check whether the input buffer contains a string.
10880
 */
10881
static const xmlChar *
10882
xmlParseLookupString(xmlParserCtxtPtr ctxt, size_t startDelta,
10883
88.1k
                     const char *str, size_t strLen) {
10884
88.1k
    const xmlChar *cur, *term;
10885
10886
88.1k
    if (ctxt->checkIndex == 0) {
10887
87.2k
        cur = ctxt->input->cur + startDelta;
10888
87.2k
    } else {
10889
851
        cur = ctxt->input->cur + ctxt->checkIndex;
10890
851
    }
10891
10892
88.1k
    term = BAD_CAST strstr((const char *) cur, str);
10893
88.1k
    if (term == NULL) {
10894
1.16k
        const xmlChar *end = ctxt->input->end;
10895
1.16k
        size_t index;
10896
10897
        /* Rescan (strLen - 1) characters. */
10898
1.16k
        if ((size_t) (end - cur) < strLen)
10899
26
            end = cur;
10900
1.14k
        else
10901
1.14k
            end -= strLen - 1;
10902
1.16k
        index = end - ctxt->input->cur;
10903
1.16k
        if (index > LONG_MAX) {
10904
0
            ctxt->checkIndex = 0;
10905
0
            return(ctxt->input->end - strLen);
10906
0
        }
10907
1.16k
        ctxt->checkIndex = index;
10908
86.9k
    } else {
10909
86.9k
        ctxt->checkIndex = 0;
10910
86.9k
    }
10911
10912
88.1k
    return(term);
10913
88.1k
}
10914
10915
/**
10916
 * xmlParseLookupCharData:
10917
 * @ctxt:  an XML parser context
10918
 *
10919
 * Check whether the input buffer contains terminated char data.
10920
 */
10921
static int
10922
72.4k
xmlParseLookupCharData(xmlParserCtxtPtr ctxt) {
10923
72.4k
    const xmlChar *cur = ctxt->input->cur + ctxt->checkIndex;
10924
72.4k
    const xmlChar *end = ctxt->input->end;
10925
72.4k
    size_t index;
10926
10927
896k
    while (cur < end) {
10928
894k
        if ((*cur == '<') || (*cur == '&')) {
10929
70.2k
            ctxt->checkIndex = 0;
10930
70.2k
            return(1);
10931
70.2k
        }
10932
824k
        cur++;
10933
824k
    }
10934
10935
2.21k
    index = cur - ctxt->input->cur;
10936
2.21k
    if (index > LONG_MAX) {
10937
0
        ctxt->checkIndex = 0;
10938
0
        return(1);
10939
0
    }
10940
2.21k
    ctxt->checkIndex = index;
10941
2.21k
    return(0);
10942
2.21k
}
10943
10944
/**
10945
 * xmlParseLookupGt:
10946
 * @ctxt:  an XML parser context
10947
 *
10948
 * Check whether there's enough data in the input buffer to finish parsing
10949
 * a start tag. This has to take quotes into account.
10950
 */
10951
static int
10952
18.5M
xmlParseLookupGt(xmlParserCtxtPtr ctxt) {
10953
18.5M
    const xmlChar *cur;
10954
18.5M
    const xmlChar *end = ctxt->input->end;
10955
18.5M
    int state = ctxt->endCheckState;
10956
18.5M
    size_t index;
10957
10958
18.5M
    if (ctxt->checkIndex == 0)
10959
18.5M
        cur = ctxt->input->cur + 1;
10960
6.96k
    else
10961
6.96k
        cur = ctxt->input->cur + ctxt->checkIndex;
10962
10963
458M
    while (cur < end) {
10964
458M
        if (state) {
10965
120M
            if (*cur == state)
10966
10.8M
                state = 0;
10967
338M
        } else if (*cur == '\'' || *cur == '"') {
10968
10.8M
            state = *cur;
10969
327M
        } else if (*cur == '>') {
10970
18.5M
            ctxt->checkIndex = 0;
10971
18.5M
            ctxt->endCheckState = 0;
10972
18.5M
            return(1);
10973
18.5M
        }
10974
440M
        cur++;
10975
440M
    }
10976
10977
10.5k
    index = cur - ctxt->input->cur;
10978
10.5k
    if (index > LONG_MAX) {
10979
0
        ctxt->checkIndex = 0;
10980
0
        ctxt->endCheckState = 0;
10981
0
        return(1);
10982
0
    }
10983
10.5k
    ctxt->checkIndex = index;
10984
10.5k
    ctxt->endCheckState = state;
10985
10.5k
    return(0);
10986
10.5k
}
10987
10988
/**
10989
 * xmlParseLookupInternalSubset:
10990
 * @ctxt:  an XML parser context
10991
 *
10992
 * Check whether there's enough data in the input buffer to finish parsing
10993
 * the internal subset.
10994
 */
10995
static int
10996
829
xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt) {
10997
    /*
10998
     * Sorry, but progressive parsing of the internal subset is not
10999
     * supported. We first check that the full content of the internal
11000
     * subset is available and parsing is launched only at that point.
11001
     * Internal subset ends with "']' S? '>'" in an unescaped section and
11002
     * not in a ']]>' sequence which are conditional sections.
11003
     */
11004
829
    const xmlChar *cur, *start;
11005
829
    const xmlChar *end = ctxt->input->end;
11006
829
    int state = ctxt->endCheckState;
11007
829
    size_t index;
11008
11009
829
    if (ctxt->checkIndex == 0) {
11010
613
        cur = ctxt->input->cur + 1;
11011
613
    } else {
11012
216
        cur = ctxt->input->cur + ctxt->checkIndex;
11013
216
    }
11014
829
    start = cur;
11015
11016
16.0M
    while (cur < end) {
11017
16.0M
        if (state == '-') {
11018
2.49M
            if ((*cur == '-') &&
11019
155k
                (cur[1] == '-') &&
11020
79.3k
                (cur[2] == '>')) {
11021
36.0k
                state = 0;
11022
36.0k
                cur += 3;
11023
36.0k
                start = cur;
11024
36.0k
                continue;
11025
36.0k
            }
11026
2.49M
        }
11027
13.5M
        else if (state == ']') {
11028
11.2k
            if (*cur == '>') {
11029
128
                ctxt->checkIndex = 0;
11030
128
                ctxt->endCheckState = 0;
11031
128
                return(1);
11032
128
            }
11033
11.1k
            if (IS_BLANK_CH(*cur)) {
11034
4.62k
                state = ' ';
11035
6.51k
            } else if (*cur != ']') {
11036
3.74k
                state = 0;
11037
3.74k
                start = cur;
11038
3.74k
                continue;
11039
3.74k
            }
11040
11.1k
        }
11041
13.5M
        else if (state == ' ') {
11042
14.3k
            if (*cur == '>') {
11043
4
                ctxt->checkIndex = 0;
11044
4
                ctxt->endCheckState = 0;
11045
4
                return(1);
11046
4
            }
11047
14.3k
            if (!IS_BLANK_CH(*cur)) {
11048
4.62k
                state = 0;
11049
4.62k
                start = cur;
11050
4.62k
                continue;
11051
4.62k
            }
11052
14.3k
        }
11053
13.4M
        else if (state != 0) {
11054
9.30M
            if (*cur == state) {
11055
83.1k
                state = 0;
11056
83.1k
                start = cur + 1;
11057
83.1k
            }
11058
9.30M
        }
11059
4.19M
        else if (*cur == '<') {
11060
172k
            if ((cur[1] == '!') &&
11061
85.2k
                (cur[2] == '-') &&
11062
36.2k
                (cur[3] == '-')) {
11063
36.1k
                state = '-';
11064
36.1k
                cur += 4;
11065
                /* Don't treat <!--> as comment */
11066
36.1k
                start = cur;
11067
36.1k
                continue;
11068
36.1k
            }
11069
172k
        }
11070
4.02M
        else if ((*cur == '"') || (*cur == '\'') || (*cur == ']')) {
11071
91.9k
            state = *cur;
11072
91.9k
        }
11073
11074
15.9M
        cur++;
11075
15.9M
    }
11076
11077
    /*
11078
     * Rescan the three last characters to detect "<!--" and "-->"
11079
     * split across chunks.
11080
     */
11081
697
    if ((state == 0) || (state == '-')) {
11082
346
        if (cur - start < 3)
11083
26
            cur = start;
11084
320
        else
11085
320
            cur -= 3;
11086
346
    }
11087
697
    index = cur - ctxt->input->cur;
11088
697
    if (index > LONG_MAX) {
11089
0
        ctxt->checkIndex = 0;
11090
0
        ctxt->endCheckState = 0;
11091
0
        return(1);
11092
0
    }
11093
697
    ctxt->checkIndex = index;
11094
697
    ctxt->endCheckState = state;
11095
697
    return(0);
11096
697
}
11097
11098
/**
11099
 * xmlParseTryOrFinish:
11100
 * @ctxt:  an XML parser context
11101
 * @terminate:  last chunk indicator
11102
 *
11103
 * Try to progress on parsing
11104
 *
11105
 * Returns zero if no parsing was possible
11106
 */
11107
static int
11108
235k
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11109
235k
    int ret = 0;
11110
235k
    size_t avail;
11111
235k
    xmlChar cur, next;
11112
11113
235k
    if (ctxt->input == NULL)
11114
0
        return(0);
11115
11116
235k
    if ((ctxt->input != NULL) &&
11117
235k
        (ctxt->input->cur - ctxt->input->base > 4096)) {
11118
19.7k
        xmlParserShrink(ctxt);
11119
19.7k
    }
11120
11121
92.1M
    while (ctxt->disableSAX == 0) {
11122
92.0M
        avail = ctxt->input->end - ctxt->input->cur;
11123
92.0M
        if (avail < 1)
11124
130k
      goto done;
11125
91.9M
        switch (ctxt->instate) {
11126
1.45k
            case XML_PARSER_EOF:
11127
          /*
11128
     * Document parsing is done !
11129
     */
11130
1.45k
          goto done;
11131
212k
            case XML_PARSER_START:
11132
                /*
11133
                 * Very first chars read from the document flow.
11134
                 */
11135
212k
                if ((!terminate) && (avail < 4))
11136
0
                    goto done;
11137
11138
                /*
11139
                 * We need more bytes to detect EBCDIC code pages.
11140
                 * See xmlDetectEBCDIC.
11141
                 */
11142
212k
                if ((CMP4(CUR_PTR, 0x4C, 0x6F, 0xA7, 0x94)) &&
11143
6
                    (!terminate) && (avail < 200))
11144
0
                    goto done;
11145
11146
212k
                xmlDetectEncoding(ctxt);
11147
212k
                ctxt->instate = XML_PARSER_XML_DECL;
11148
212k
    break;
11149
11150
212k
            case XML_PARSER_XML_DECL:
11151
212k
    if ((!terminate) && (avail < 2))
11152
0
        goto done;
11153
212k
    cur = ctxt->input->cur[0];
11154
212k
    next = ctxt->input->cur[1];
11155
212k
          if ((cur == '<') && (next == '?')) {
11156
        /* PI or XML decl */
11157
174k
        if ((!terminate) &&
11158
19.4k
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11159
157
      goto done;
11160
174k
        if ((ctxt->input->cur[2] == 'x') &&
11161
170k
      (ctxt->input->cur[3] == 'm') &&
11162
169k
      (ctxt->input->cur[4] == 'l') &&
11163
168k
      (IS_BLANK_CH(ctxt->input->cur[5]))) {
11164
156k
      ret += 5;
11165
156k
      xmlParseXMLDecl(ctxt);
11166
156k
        } else {
11167
17.4k
      ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11168
17.4k
                        if (ctxt->version == NULL) {
11169
0
                            xmlErrMemory(ctxt);
11170
0
                            break;
11171
0
                        }
11172
17.4k
        }
11173
174k
    } else {
11174
37.7k
        ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11175
37.7k
        if (ctxt->version == NULL) {
11176
0
            xmlErrMemory(ctxt);
11177
0
      break;
11178
0
        }
11179
37.7k
    }
11180
212k
                if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
11181
0
                    ctxt->sax->setDocumentLocator(ctxt->userData,
11182
0
                            (xmlSAXLocator *) &xmlDefaultSAXLocator);
11183
0
                }
11184
212k
                if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11185
0
                    (!ctxt->disableSAX))
11186
0
                    ctxt->sax->startDocument(ctxt->userData);
11187
212k
                ctxt->instate = XML_PARSER_MISC;
11188
212k
    break;
11189
32.4M
            case XML_PARSER_START_TAG: {
11190
32.4M
          const xmlChar *name;
11191
32.4M
    const xmlChar *prefix = NULL;
11192
32.4M
    const xmlChar *URI = NULL;
11193
32.4M
                int line = ctxt->input->line;
11194
32.4M
    int nbNs = 0;
11195
11196
32.4M
    if ((!terminate) && (avail < 2))
11197
2
        goto done;
11198
32.4M
    cur = ctxt->input->cur[0];
11199
32.4M
          if (cur != '<') {
11200
455
        xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
11201
455
                                   "Start tag expected, '<' not found");
11202
455
                    ctxt->instate = XML_PARSER_EOF;
11203
455
                    xmlFinishDocument(ctxt);
11204
455
        goto done;
11205
455
    }
11206
32.4M
    if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11207
10.4k
                    goto done;
11208
32.4M
    if (ctxt->spaceNr == 0)
11209
0
        spacePush(ctxt, -1);
11210
32.4M
    else if (*ctxt->space == -2)
11211
8.24M
        spacePush(ctxt, -1);
11212
24.1M
    else
11213
24.1M
        spacePush(ctxt, *ctxt->space);
11214
32.4M
#ifdef LIBXML_SAX1_ENABLED
11215
32.4M
    if (ctxt->sax2)
11216
32.4M
#endif /* LIBXML_SAX1_ENABLED */
11217
32.4M
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &nbNs);
11218
532
#ifdef LIBXML_SAX1_ENABLED
11219
532
    else
11220
532
        name = xmlParseStartTag(ctxt);
11221
32.4M
#endif /* LIBXML_SAX1_ENABLED */
11222
32.4M
    if (name == NULL) {
11223
4.27k
        spacePop(ctxt);
11224
4.27k
                    ctxt->instate = XML_PARSER_EOF;
11225
4.27k
                    xmlFinishDocument(ctxt);
11226
4.27k
        goto done;
11227
4.27k
    }
11228
32.4M
#ifdef LIBXML_VALID_ENABLED
11229
    /*
11230
     * [ VC: Root Element Type ]
11231
     * The Name in the document type declaration must match
11232
     * the element type of the root element.
11233
     */
11234
32.4M
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11235
0
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
11236
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11237
32.4M
#endif /* LIBXML_VALID_ENABLED */
11238
11239
    /*
11240
     * Check for an Empty Element.
11241
     */
11242
32.4M
    if ((RAW == '/') && (NXT(1) == '>')) {
11243
8.07M
        SKIP(2);
11244
11245
8.07M
        if (ctxt->sax2) {
11246
8.07M
      if ((ctxt->sax != NULL) &&
11247
8.07M
          (ctxt->sax->endElementNs != NULL) &&
11248
8.07M
          (!ctxt->disableSAX))
11249
8.07M
          ctxt->sax->endElementNs(ctxt->userData, name,
11250
8.07M
                                  prefix, URI);
11251
8.07M
      if (nbNs > 0)
11252
138k
          xmlParserNsPop(ctxt, nbNs);
11253
8.07M
#ifdef LIBXML_SAX1_ENABLED
11254
18.4E
        } else {
11255
18.4E
      if ((ctxt->sax != NULL) &&
11256
0
          (ctxt->sax->endElement != NULL) &&
11257
0
          (!ctxt->disableSAX))
11258
0
          ctxt->sax->endElement(ctxt->userData, name);
11259
18.4E
#endif /* LIBXML_SAX1_ENABLED */
11260
18.4E
        }
11261
8.07M
        spacePop(ctxt);
11262
24.3M
    } else if (RAW == '>') {
11263
24.2M
        NEXT;
11264
24.2M
                    nameNsPush(ctxt, name, prefix, URI, line, nbNs);
11265
24.2M
    } else {
11266
50.5k
        xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11267
50.5k
           "Couldn't find end of Start Tag %s\n",
11268
50.5k
           name);
11269
50.5k
        nodePop(ctxt);
11270
50.5k
        spacePop(ctxt);
11271
50.5k
                    if (nbNs > 0)
11272
5.20k
                        xmlParserNsPop(ctxt, nbNs);
11273
50.5k
    }
11274
11275
32.4M
                if (ctxt->nameNr == 0)
11276
9.66k
                    ctxt->instate = XML_PARSER_EPILOG;
11277
32.4M
                else
11278
32.4M
                    ctxt->instate = XML_PARSER_CONTENT;
11279
32.4M
                break;
11280
32.4M
      }
11281
51.1M
            case XML_PARSER_CONTENT: {
11282
51.1M
    cur = ctxt->input->cur[0];
11283
11284
51.1M
    if (cur == '<') {
11285
39.9M
                    if ((!terminate) && (avail < 2))
11286
518
                        goto done;
11287
39.9M
        next = ctxt->input->cur[1];
11288
11289
39.9M
                    if (next == '/') {
11290
7.61M
                        ctxt->instate = XML_PARSER_END_TAG;
11291
7.61M
                        break;
11292
32.2M
                    } else if (next == '?') {
11293
12.5k
                        if ((!terminate) &&
11294
6.94k
                            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11295
89
                            goto done;
11296
12.4k
                        xmlParsePI(ctxt);
11297
12.4k
                        ctxt->instate = XML_PARSER_CONTENT;
11298
12.4k
                        break;
11299
32.2M
                    } else if (next == '!') {
11300
66.6k
                        if ((!terminate) && (avail < 3))
11301
8
                            goto done;
11302
66.6k
                        next = ctxt->input->cur[2];
11303
11304
66.6k
                        if (next == '-') {
11305
52.7k
                            if ((!terminate) && (avail < 4))
11306
9
                                goto done;
11307
52.7k
                            if (ctxt->input->cur[3] == '-') {
11308
52.6k
                                if ((!terminate) &&
11309
36.1k
                                    (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11310
114
                                    goto done;
11311
52.5k
                                xmlParseComment(ctxt);
11312
52.5k
                                ctxt->instate = XML_PARSER_CONTENT;
11313
52.5k
                                break;
11314
52.6k
                            }
11315
52.7k
                        } else if (next == '[') {
11316
13.8k
                            if ((!terminate) && (avail < 9))
11317
5
                                goto done;
11318
13.8k
                            if ((ctxt->input->cur[2] == '[') &&
11319
13.8k
                                (ctxt->input->cur[3] == 'C') &&
11320
13.8k
                                (ctxt->input->cur[4] == 'D') &&
11321
13.8k
                                (ctxt->input->cur[5] == 'A') &&
11322
13.7k
                                (ctxt->input->cur[6] == 'T') &&
11323
13.7k
                                (ctxt->input->cur[7] == 'A') &&
11324
13.7k
                                (ctxt->input->cur[8] == '[')) {
11325
13.7k
                                if ((!terminate) &&
11326
6.23k
                                    (!xmlParseLookupString(ctxt, 9, "]]>", 3)))
11327
718
                                    goto done;
11328
13.0k
                                ctxt->instate = XML_PARSER_CDATA_SECTION;
11329
13.0k
                                xmlParseCDSect(ctxt);
11330
13.0k
                                ctxt->instate = XML_PARSER_CONTENT;
11331
13.0k
                                break;
11332
13.7k
                            }
11333
13.8k
                        }
11334
66.6k
                    }
11335
39.9M
    } else if (cur == '&') {
11336
340k
        if ((!terminate) && (!xmlParseLookupChar(ctxt, ';')))
11337
75
      goto done;
11338
340k
        xmlParseReference(ctxt);
11339
340k
                    break;
11340
10.9M
    } else {
11341
        /* TODO Avoid the extra copy, handle directly !!! */
11342
        /*
11343
         * Goal of the following test is:
11344
         *  - minimize calls to the SAX 'character' callback
11345
         *    when they are mergeable
11346
         *  - handle an problem for isBlank when we only parse
11347
         *    a sequence of blank chars and the next one is
11348
         *    not available to check against '<' presence.
11349
         *  - tries to homogenize the differences in SAX
11350
         *    callbacks between the push and pull versions
11351
         *    of the parser.
11352
         */
11353
10.9M
        if (avail < XML_PARSER_BIG_BUFFER_SIZE) {
11354
424k
      if ((!terminate) && (!xmlParseLookupCharData(ctxt)))
11355
2.21k
          goto done;
11356
424k
                    }
11357
10.9M
                    ctxt->checkIndex = 0;
11358
10.9M
        xmlParseCharDataInternal(ctxt, !terminate);
11359
10.9M
                    break;
11360
10.9M
    }
11361
11362
32.2M
                ctxt->instate = XML_PARSER_START_TAG;
11363
32.2M
    break;
11364
51.1M
      }
11365
7.61M
            case XML_PARSER_END_TAG:
11366
7.61M
    if ((!terminate) && (!xmlParseLookupChar(ctxt, '>')))
11367
641
        goto done;
11368
7.61M
    if (ctxt->sax2) {
11369
7.61M
              xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11370
7.61M
        nameNsPop(ctxt);
11371
7.61M
    }
11372
5
#ifdef LIBXML_SAX1_ENABLED
11373
5
      else
11374
5
        xmlParseEndTag1(ctxt, 0);
11375
7.61M
#endif /* LIBXML_SAX1_ENABLED */
11376
7.61M
    if (ctxt->nameNr == 0) {
11377
121k
        ctxt->instate = XML_PARSER_EPILOG;
11378
7.49M
    } else {
11379
7.49M
        ctxt->instate = XML_PARSER_CONTENT;
11380
7.49M
    }
11381
7.61M
    break;
11382
251k
            case XML_PARSER_MISC:
11383
254k
            case XML_PARSER_PROLOG:
11384
266k
            case XML_PARSER_EPILOG:
11385
266k
    SKIP_BLANKS;
11386
266k
                avail = ctxt->input->end - ctxt->input->cur;
11387
266k
    if (avail < 1)
11388
8.69k
        goto done;
11389
257k
    if (ctxt->input->cur[0] == '<') {
11390
256k
                    if ((!terminate) && (avail < 2))
11391
16
                        goto done;
11392
256k
                    next = ctxt->input->cur[1];
11393
256k
                    if (next == '?') {
11394
42.5k
                        if ((!terminate) &&
11395
17.8k
                            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11396
53
                            goto done;
11397
42.5k
                        xmlParsePI(ctxt);
11398
42.5k
                        break;
11399
214k
                    } else if (next == '!') {
11400
11.2k
                        if ((!terminate) && (avail < 3))
11401
5
                            goto done;
11402
11403
11.2k
                        if (ctxt->input->cur[2] == '-') {
11404
3.41k
                            if ((!terminate) && (avail < 4))
11405
0
                                goto done;
11406
3.41k
                            if (ctxt->input->cur[3] == '-') {
11407
3.39k
                                if ((!terminate) &&
11408
1.50k
                                    (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11409
36
                                    goto done;
11410
3.36k
                                xmlParseComment(ctxt);
11411
3.36k
                                break;
11412
3.39k
                            }
11413
7.81k
                        } else if (ctxt->instate == XML_PARSER_MISC) {
11414
7.80k
                            if ((!terminate) && (avail < 9))
11415
0
                                goto done;
11416
7.80k
                            if ((ctxt->input->cur[2] == 'D') &&
11417
7.78k
                                (ctxt->input->cur[3] == 'O') &&
11418
7.78k
                                (ctxt->input->cur[4] == 'C') &&
11419
7.77k
                                (ctxt->input->cur[5] == 'T') &&
11420
7.75k
                                (ctxt->input->cur[6] == 'Y') &&
11421
7.75k
                                (ctxt->input->cur[7] == 'P') &&
11422
7.75k
                                (ctxt->input->cur[8] == 'E')) {
11423
7.74k
                                if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11424
79
                                    goto done;
11425
7.66k
                                ctxt->inSubset = 1;
11426
7.66k
                                xmlParseDocTypeDecl(ctxt);
11427
7.66k
                                if (RAW == '[') {
11428
7.22k
                                    ctxt->instate = XML_PARSER_DTD;
11429
7.22k
                                } else {
11430
437
                                    if (RAW == '>')
11431
222
                                        NEXT;
11432
                                    /*
11433
                                     * Create and update the external subset.
11434
                                     */
11435
437
                                    ctxt->inSubset = 2;
11436
437
                                    if ((ctxt->sax != NULL) &&
11437
437
                                        (!ctxt->disableSAX) &&
11438
220
                                        (ctxt->sax->externalSubset != NULL))
11439
0
                                        ctxt->sax->externalSubset(
11440
0
                                                ctxt->userData,
11441
0
                                                ctxt->intSubName,
11442
0
                                                ctxt->extSubSystem,
11443
0
                                                ctxt->extSubURI);
11444
437
                                    ctxt->inSubset = 0;
11445
437
                                    xmlCleanSpecialAttr(ctxt);
11446
437
                                    ctxt->instate = XML_PARSER_PROLOG;
11447
437
                                }
11448
7.66k
                                break;
11449
7.74k
                            }
11450
7.80k
                        }
11451
11.2k
                    }
11452
256k
                }
11453
11454
204k
                if (ctxt->instate == XML_PARSER_EPILOG) {
11455
1.32k
                    if (ctxt->errNo == XML_ERR_OK)
11456
136
                        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11457
1.32k
        ctxt->instate = XML_PARSER_EOF;
11458
1.32k
                    xmlFinishDocument(ctxt);
11459
202k
                } else {
11460
202k
        ctxt->instate = XML_PARSER_START_TAG;
11461
202k
    }
11462
204k
    break;
11463
7.85k
            case XML_PARSER_DTD: {
11464
7.85k
                if ((!terminate) && (!xmlParseLookupInternalSubset(ctxt)))
11465
697
                    goto done;
11466
7.15k
    xmlParseInternalSubset(ctxt);
11467
7.15k
    ctxt->inSubset = 2;
11468
7.15k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11469
2.33k
        (ctxt->sax->externalSubset != NULL))
11470
0
        ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11471
0
          ctxt->extSubSystem, ctxt->extSubURI);
11472
7.15k
    ctxt->inSubset = 0;
11473
7.15k
    xmlCleanSpecialAttr(ctxt);
11474
7.15k
    ctxt->instate = XML_PARSER_PROLOG;
11475
7.15k
                break;
11476
7.85k
      }
11477
0
            default:
11478
0
                xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
11479
0
      "PP: internal error\n");
11480
0
    ctxt->instate = XML_PARSER_EOF;
11481
0
    break;
11482
91.9M
  }
11483
91.9M
    }
11484
235k
done:
11485
235k
    return(ret);
11486
235k
}
11487
11488
/**
11489
 * xmlParseChunk:
11490
 * @ctxt:  an XML parser context
11491
 * @chunk:  chunk of memory
11492
 * @size:  size of chunk in bytes
11493
 * @terminate:  last chunk indicator
11494
 *
11495
 * Parse a chunk of memory in push parser mode.
11496
 *
11497
 * Assumes that the parser context was initialized with
11498
 * xmlCreatePushParserCtxt.
11499
 *
11500
 * The last chunk, which will often be empty, must be marked with
11501
 * the @terminate flag. With the default SAX callbacks, the resulting
11502
 * document will be available in ctxt->myDoc. This pointer will not
11503
 * be freed when calling xmlFreeParserCtxt and must be freed by the
11504
 * caller. If the document isn't well-formed, it will still be returned
11505
 * in ctxt->myDoc.
11506
 *
11507
 * As an exception, xmlCtxtResetPush will free the document in
11508
 * ctxt->myDoc. So ctxt->myDoc should be set to NULL after extracting
11509
 * the document.
11510
 *
11511
 * Returns an xmlParserErrors code (0 on success).
11512
 */
11513
int
11514
xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
11515
235k
              int terminate) {
11516
235k
    size_t curBase;
11517
235k
    size_t maxLength;
11518
235k
    size_t pos;
11519
235k
    int end_in_lf = 0;
11520
235k
    int res;
11521
11522
235k
    if ((ctxt == NULL) || (size < 0))
11523
0
        return(XML_ERR_ARGUMENT);
11524
235k
    if ((chunk == NULL) && (size > 0))
11525
0
        return(XML_ERR_ARGUMENT);
11526
235k
    if ((ctxt->input == NULL) || (ctxt->input->buf == NULL))
11527
0
        return(XML_ERR_ARGUMENT);
11528
235k
    if (ctxt->disableSAX != 0)
11529
0
        return(ctxt->errNo);
11530
11531
235k
    ctxt->input->flags |= XML_INPUT_PROGRESSIVE;
11532
235k
    if (ctxt->instate == XML_PARSER_START)
11533
212k
        xmlCtxtInitializeLate(ctxt);
11534
235k
    if ((size > 0) && (chunk != NULL) && (!terminate) &&
11535
33.4k
        (chunk[size - 1] == '\r')) {
11536
107
  end_in_lf = 1;
11537
107
  size--;
11538
107
    }
11539
11540
    /*
11541
     * Also push an empty chunk to make sure that the raw buffer
11542
     * will be flushed if there is an encoder.
11543
     */
11544
235k
    pos = ctxt->input->cur - ctxt->input->base;
11545
235k
    res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11546
235k
    xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
11547
235k
    if (res < 0) {
11548
3
        xmlCtxtErrIO(ctxt, ctxt->input->buf->error, NULL);
11549
3
        xmlHaltParser(ctxt);
11550
3
        return(ctxt->errNo);
11551
3
    }
11552
11553
235k
    xmlParseTryOrFinish(ctxt, terminate);
11554
11555
235k
    curBase = ctxt->input->cur - ctxt->input->base;
11556
235k
    maxLength = (ctxt->options & XML_PARSE_HUGE) ?
11557
235k
                XML_MAX_HUGE_LENGTH :
11558
235k
                XML_MAX_LOOKUP_LIMIT;
11559
235k
    if (curBase > maxLength) {
11560
0
        xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
11561
0
                    "Buffer size limit exceeded, try XML_PARSE_HUGE\n");
11562
0
        xmlHaltParser(ctxt);
11563
0
    }
11564
11565
235k
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX != 0))
11566
79.1k
        return(ctxt->errNo);
11567
11568
156k
    if (end_in_lf == 1) {
11569
93
  pos = ctxt->input->cur - ctxt->input->base;
11570
93
  res = xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
11571
93
  xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
11572
93
        if (res < 0) {
11573
0
            xmlCtxtErrIO(ctxt, ctxt->input->buf->error, NULL);
11574
0
            xmlHaltParser(ctxt);
11575
0
            return(ctxt->errNo);
11576
0
        }
11577
93
    }
11578
156k
    if (terminate) {
11579
  /*
11580
   * Check for termination
11581
   */
11582
132k
        if ((ctxt->instate != XML_PARSER_EOF) &&
11583
131k
            (ctxt->instate != XML_PARSER_EPILOG)) {
11584
10.0k
            if (ctxt->nameNr > 0) {
11585
9.91k
                const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
11586
9.91k
                int line = ctxt->pushTab[ctxt->nameNr - 1].line;
11587
9.91k
                xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
11588
9.91k
                        "Premature end of data in tag %s line %d\n",
11589
9.91k
                        name, line, NULL);
11590
9.91k
            } else if (ctxt->instate == XML_PARSER_START) {
11591
0
                xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11592
159
            } else {
11593
159
                xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
11594
159
                               "Start tag expected, '<' not found\n");
11595
159
            }
11596
122k
        } else {
11597
122k
            xmlParserCheckEOF(ctxt, XML_ERR_DOCUMENT_END);
11598
122k
        }
11599
132k
  if (ctxt->instate != XML_PARSER_EOF) {
11600
131k
            ctxt->instate = XML_PARSER_EOF;
11601
131k
            xmlFinishDocument(ctxt);
11602
131k
  }
11603
132k
    }
11604
156k
    if (ctxt->wellFormed == 0)
11605
10.0k
  return((xmlParserErrors) ctxt->errNo);
11606
145k
    else
11607
145k
        return(0);
11608
156k
}
11609
11610
/************************************************************************
11611
 *                  *
11612
 *    I/O front end functions to the parser     *
11613
 *                  *
11614
 ************************************************************************/
11615
11616
/**
11617
 * xmlCreatePushParserCtxt:
11618
 * @sax:  a SAX handler (optional)
11619
 * @user_data:  user data for SAX callbacks (optional)
11620
 * @chunk:  initial chunk (optional, deprecated)
11621
 * @size:  size of initial chunk in bytes
11622
 * @filename:  file name or URI (optional)
11623
 *
11624
 * Create a parser context for using the XML parser in push mode.
11625
 * See xmlParseChunk.
11626
 *
11627
 * Passing an initial chunk is useless and deprecated.
11628
 *
11629
 * The push parser doesn't support recovery mode or the
11630
 * XML_PARSE_NOBLANKS option.
11631
 *
11632
 * @filename is used as base URI to fetch external entities and for
11633
 * error reports.
11634
 *
11635
 * Returns the new parser context or NULL if a memory allocation
11636
 * failed.
11637
 */
11638
11639
xmlParserCtxtPtr
11640
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11641
212k
                        const char *chunk, int size, const char *filename) {
11642
212k
    xmlParserCtxtPtr ctxt;
11643
212k
    xmlParserInputPtr input;
11644
11645
212k
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
11646
212k
    if (ctxt == NULL)
11647
0
  return(NULL);
11648
11649
212k
    ctxt->options &= ~XML_PARSE_NODICT;
11650
212k
    ctxt->dictNames = 1;
11651
11652
212k
    input = xmlNewPushInput(filename, chunk, size);
11653
212k
    if (input == NULL) {
11654
0
  xmlFreeParserCtxt(ctxt);
11655
0
  return(NULL);
11656
0
    }
11657
212k
    if (xmlCtxtPushInput(ctxt, input) < 0) {
11658
0
        xmlFreeInputStream(input);
11659
0
        xmlFreeParserCtxt(ctxt);
11660
0
        return(NULL);
11661
0
    }
11662
11663
212k
    return(ctxt);
11664
212k
}
11665
#endif /* LIBXML_PUSH_ENABLED */
11666
11667
/**
11668
 * xmlStopParser:
11669
 * @ctxt:  an XML parser context
11670
 *
11671
 * Blocks further parser processing
11672
 */
11673
void
11674
0
xmlStopParser(xmlParserCtxtPtr ctxt) {
11675
0
    if (ctxt == NULL)
11676
0
        return;
11677
0
    xmlHaltParser(ctxt);
11678
0
    if (ctxt->errNo != XML_ERR_NO_MEMORY)
11679
0
        ctxt->errNo = XML_ERR_USER_STOP;
11680
0
}
11681
11682
/**
11683
 * xmlCreateIOParserCtxt:
11684
 * @sax:  a SAX handler (optional)
11685
 * @user_data:  user data for SAX callbacks (optional)
11686
 * @ioread:  an I/O read function
11687
 * @ioclose:  an I/O close function (optional)
11688
 * @ioctx:  an I/O handler
11689
 * @enc:  the charset encoding if known (deprecated)
11690
 *
11691
 * Create a parser context for using the XML parser with an existing
11692
 * I/O stream
11693
 *
11694
 * Returns the new parser context or NULL
11695
 */
11696
xmlParserCtxtPtr
11697
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11698
                      xmlInputReadCallback ioread,
11699
                      xmlInputCloseCallback ioclose,
11700
0
                      void *ioctx, xmlCharEncoding enc) {
11701
0
    xmlParserCtxtPtr ctxt;
11702
0
    xmlParserInputPtr input;
11703
0
    const char *encoding;
11704
11705
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
11706
0
    if (ctxt == NULL)
11707
0
  return(NULL);
11708
11709
0
    encoding = xmlGetCharEncodingName(enc);
11710
0
    input = xmlCtxtNewInputFromIO(ctxt, NULL, ioread, ioclose, ioctx,
11711
0
                                  encoding, 0);
11712
0
    if (input == NULL) {
11713
0
  xmlFreeParserCtxt(ctxt);
11714
0
        return (NULL);
11715
0
    }
11716
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
11717
0
        xmlFreeInputStream(input);
11718
0
        xmlFreeParserCtxt(ctxt);
11719
0
        return(NULL);
11720
0
    }
11721
11722
0
    return(ctxt);
11723
0
}
11724
11725
#ifdef LIBXML_VALID_ENABLED
11726
/************************************************************************
11727
 *                  *
11728
 *    Front ends when parsing a DTD       *
11729
 *                  *
11730
 ************************************************************************/
11731
11732
/**
11733
 * xmlCtxtParseDtd:
11734
 * @ctxt:  a parser context
11735
 * @input:  a parser input
11736
 * @publicId:  public ID of the DTD (optional)
11737
 * @systemId:  system ID of the DTD (optional)
11738
 *
11739
 * Parse a DTD.
11740
 *
11741
 * Option XML_PARSE_DTDLOAD should be enabled in the parser context
11742
 * to make external entities work.
11743
 *
11744
 * Availabe since 2.14.0.
11745
 *
11746
 * Returns the resulting xmlDtdPtr or NULL in case of error.
11747
 * @input will be freed by the function in any case.
11748
 */
11749
xmlDtdPtr
11750
xmlCtxtParseDtd(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
11751
0
                const xmlChar *publicId, const xmlChar *systemId) {
11752
0
    xmlDtdPtr ret = NULL;
11753
11754
0
    if ((ctxt == NULL) || (input == NULL)) {
11755
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
11756
0
        xmlFreeInputStream(input);
11757
0
        return(NULL);
11758
0
    }
11759
11760
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
11761
0
        xmlFreeInputStream(input);
11762
0
        return(NULL);
11763
0
    }
11764
11765
0
    if (publicId == NULL)
11766
0
        publicId = BAD_CAST "none";
11767
0
    if (systemId == NULL)
11768
0
        systemId = BAD_CAST "none";
11769
11770
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11771
0
    if (ctxt->myDoc == NULL) {
11772
0
        xmlErrMemory(ctxt);
11773
0
        goto error;
11774
0
    }
11775
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
11776
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11777
0
                                       publicId, systemId);
11778
0
    if (ctxt->myDoc->extSubset == NULL) {
11779
0
        xmlErrMemory(ctxt);
11780
0
        xmlFreeDoc(ctxt->myDoc);
11781
0
        goto error;
11782
0
    }
11783
11784
0
    xmlParseExternalSubset(ctxt, publicId, systemId);
11785
11786
0
    if (ctxt->wellFormed) {
11787
0
        ret = ctxt->myDoc->extSubset;
11788
0
        ctxt->myDoc->extSubset = NULL;
11789
0
        if (ret != NULL) {
11790
0
            xmlNodePtr tmp;
11791
11792
0
            ret->doc = NULL;
11793
0
            tmp = ret->children;
11794
0
            while (tmp != NULL) {
11795
0
                tmp->doc = NULL;
11796
0
                tmp = tmp->next;
11797
0
            }
11798
0
        }
11799
0
    } else {
11800
0
        ret = NULL;
11801
0
    }
11802
0
    xmlFreeDoc(ctxt->myDoc);
11803
0
    ctxt->myDoc = NULL;
11804
11805
0
error:
11806
0
    xmlFreeInputStream(xmlCtxtPopInput(ctxt));
11807
11808
0
    return(ret);
11809
0
}
11810
11811
/**
11812
 * xmlIOParseDTD:
11813
 * @sax:  the SAX handler block or NULL
11814
 * @input:  an Input Buffer
11815
 * @enc:  the charset encoding if known
11816
 *
11817
 * DEPRECATED: Use xmlCtxtParseDtd.
11818
 *
11819
 * Load and parse a DTD
11820
 *
11821
 * Returns the resulting xmlDtdPtr or NULL in case of error.
11822
 * @input will be freed by the function in any case.
11823
 */
11824
11825
xmlDtdPtr
11826
xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
11827
0
        xmlCharEncoding enc) {
11828
0
    xmlDtdPtr ret = NULL;
11829
0
    xmlParserCtxtPtr ctxt;
11830
0
    xmlParserInputPtr pinput = NULL;
11831
11832
0
    if (input == NULL)
11833
0
  return(NULL);
11834
11835
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
11836
0
    if (ctxt == NULL) {
11837
0
        xmlFreeParserInputBuffer(input);
11838
0
  return(NULL);
11839
0
    }
11840
0
    xmlCtxtSetOptions(ctxt, XML_PARSE_DTDLOAD);
11841
11842
    /*
11843
     * generate a parser input from the I/O handler
11844
     */
11845
11846
0
    pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
11847
0
    if (pinput == NULL) {
11848
0
        xmlFreeParserInputBuffer(input);
11849
0
  xmlFreeParserCtxt(ctxt);
11850
0
  return(NULL);
11851
0
    }
11852
11853
0
    if (enc != XML_CHAR_ENCODING_NONE) {
11854
0
        xmlSwitchEncoding(ctxt, enc);
11855
0
    }
11856
11857
0
    ret = xmlCtxtParseDtd(ctxt, pinput, NULL, NULL);
11858
11859
0
    xmlFreeParserCtxt(ctxt);
11860
0
    return(ret);
11861
0
}
11862
11863
/**
11864
 * xmlSAXParseDTD:
11865
 * @sax:  the SAX handler block
11866
 * @ExternalID:  a NAME* containing the External ID of the DTD
11867
 * @SystemID:  a NAME* containing the URL to the DTD
11868
 *
11869
 * DEPRECATED: Use xmlCtxtParseDtd.
11870
 *
11871
 * Load and parse an external subset.
11872
 *
11873
 * Returns the resulting xmlDtdPtr or NULL in case of error.
11874
 */
11875
11876
xmlDtdPtr
11877
xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
11878
0
                          const xmlChar *SystemID) {
11879
0
    xmlDtdPtr ret = NULL;
11880
0
    xmlParserCtxtPtr ctxt;
11881
0
    xmlParserInputPtr input = NULL;
11882
0
    xmlChar* systemIdCanonic;
11883
11884
0
    if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
11885
11886
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
11887
0
    if (ctxt == NULL) {
11888
0
  return(NULL);
11889
0
    }
11890
0
    xmlCtxtSetOptions(ctxt, XML_PARSE_DTDLOAD);
11891
11892
    /*
11893
     * Canonicalise the system ID
11894
     */
11895
0
    systemIdCanonic = xmlCanonicPath(SystemID);
11896
0
    if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
11897
0
  xmlFreeParserCtxt(ctxt);
11898
0
  return(NULL);
11899
0
    }
11900
11901
    /*
11902
     * Ask the Entity resolver to load the damn thing
11903
     */
11904
11905
0
    if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
11906
0
  input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
11907
0
                                   systemIdCanonic);
11908
0
    if (input == NULL) {
11909
0
  xmlFreeParserCtxt(ctxt);
11910
0
  if (systemIdCanonic != NULL)
11911
0
      xmlFree(systemIdCanonic);
11912
0
  return(NULL);
11913
0
    }
11914
11915
0
    if (input->filename == NULL)
11916
0
  input->filename = (char *) systemIdCanonic;
11917
0
    else
11918
0
  xmlFree(systemIdCanonic);
11919
11920
0
    ret = xmlCtxtParseDtd(ctxt, input, ExternalID, SystemID);
11921
11922
0
    xmlFreeParserCtxt(ctxt);
11923
0
    return(ret);
11924
0
}
11925
11926
11927
/**
11928
 * xmlParseDTD:
11929
 * @ExternalID:  a NAME* containing the External ID of the DTD
11930
 * @SystemID:  a NAME* containing the URL to the DTD
11931
 *
11932
 * Load and parse an external subset.
11933
 *
11934
 * Returns the resulting xmlDtdPtr or NULL in case of error.
11935
 */
11936
11937
xmlDtdPtr
11938
0
xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
11939
0
    return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
11940
0
}
11941
#endif /* LIBXML_VALID_ENABLED */
11942
11943
/************************************************************************
11944
 *                  *
11945
 *    Front ends when parsing an Entity     *
11946
 *                  *
11947
 ************************************************************************/
11948
11949
static xmlNodePtr
11950
xmlCtxtParseContentInternal(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
11951
0
                            int hasTextDecl, int buildTree) {
11952
0
    xmlNodePtr root = NULL;
11953
0
    xmlNodePtr list = NULL;
11954
0
    xmlChar *rootName = BAD_CAST "#root";
11955
0
    int result;
11956
11957
0
    if (buildTree) {
11958
0
        root = xmlNewDocNode(ctxt->myDoc, NULL, rootName, NULL);
11959
0
        if (root == NULL) {
11960
0
            xmlErrMemory(ctxt);
11961
0
            goto error;
11962
0
        }
11963
0
    }
11964
11965
0
    if (xmlCtxtPushInput(ctxt, input) < 0)
11966
0
        goto error;
11967
11968
0
    nameNsPush(ctxt, rootName, NULL, NULL, 0, 0);
11969
0
    spacePush(ctxt, -1);
11970
11971
0
    if (buildTree)
11972
0
        nodePush(ctxt, root);
11973
11974
0
    if (hasTextDecl) {
11975
0
        xmlDetectEncoding(ctxt);
11976
11977
        /*
11978
         * Parse a possible text declaration first
11979
         */
11980
0
        if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
11981
0
            (IS_BLANK_CH(NXT(5)))) {
11982
0
            xmlParseTextDecl(ctxt);
11983
            /*
11984
             * An XML-1.0 document can't reference an entity not XML-1.0
11985
             */
11986
0
            if ((xmlStrEqual(ctxt->version, BAD_CAST "1.0")) &&
11987
0
                (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
11988
0
                xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
11989
0
                               "Version mismatch between document and "
11990
0
                               "entity\n");
11991
0
            }
11992
0
        }
11993
0
    }
11994
11995
0
    xmlParseContentInternal(ctxt);
11996
11997
0
    if (ctxt->input->cur < ctxt->input->end)
11998
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11999
12000
0
    if ((ctxt->wellFormed) ||
12001
0
        ((ctxt->recovery) && (!xmlCtxtIsCatastrophicError(ctxt)))) {
12002
0
        if (root != NULL) {
12003
0
            xmlNodePtr cur;
12004
12005
            /*
12006
             * Unlink newly created node list.
12007
             */
12008
0
            list = root->children;
12009
0
            root->children = NULL;
12010
0
            root->last = NULL;
12011
0
            for (cur = list; cur != NULL; cur = cur->next)
12012
0
                cur->parent = NULL;
12013
0
        }
12014
0
    }
12015
12016
    /*
12017
     * Read the rest of the stream in case of errors. We want
12018
     * to account for the whole entity size.
12019
     */
12020
0
    do {
12021
0
        ctxt->input->cur = ctxt->input->end;
12022
0
        xmlParserShrink(ctxt);
12023
0
        result = xmlParserGrow(ctxt);
12024
0
    } while (result > 0);
12025
12026
0
    if (buildTree)
12027
0
        nodePop(ctxt);
12028
12029
0
    namePop(ctxt);
12030
0
    spacePop(ctxt);
12031
12032
0
    xmlCtxtPopInput(ctxt);
12033
12034
0
error:
12035
0
    xmlFreeNode(root);
12036
12037
0
    return(list);
12038
0
}
12039
12040
static void
12041
0
xmlCtxtParseEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr ent) {
12042
0
    xmlParserInputPtr input;
12043
0
    xmlNodePtr list;
12044
0
    unsigned long consumed;
12045
0
    int isExternal;
12046
0
    int buildTree;
12047
0
    int oldMinNsIndex;
12048
0
    int oldNodelen, oldNodemem;
12049
12050
0
    isExternal = (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY);
12051
0
    buildTree = (ctxt->node != NULL);
12052
12053
    /*
12054
     * Recursion check
12055
     */
12056
0
    if (ent->flags & XML_ENT_EXPANDING) {
12057
0
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
12058
0
        xmlHaltParser(ctxt);
12059
0
        goto error;
12060
0
    }
12061
12062
    /*
12063
     * Load entity
12064
     */
12065
0
    input = xmlNewEntityInputStream(ctxt, ent);
12066
0
    if (input == NULL)
12067
0
        goto error;
12068
12069
    /*
12070
     * When building a tree, we need to limit the scope of namespace
12071
     * declarations, so that entities don't reference xmlNs structs
12072
     * from the parent of a reference.
12073
     */
12074
0
    oldMinNsIndex = ctxt->nsdb->minNsIndex;
12075
0
    if (buildTree)
12076
0
        ctxt->nsdb->minNsIndex = ctxt->nsNr;
12077
12078
0
    oldNodelen = ctxt->nodelen;
12079
0
    oldNodemem = ctxt->nodemem;
12080
0
    ctxt->nodelen = 0;
12081
0
    ctxt->nodemem = 0;
12082
12083
    /*
12084
     * Parse content
12085
     *
12086
     * This initiates a recursive call chain:
12087
     *
12088
     * - xmlCtxtParseContentInternal
12089
     * - xmlParseContentInternal
12090
     * - xmlParseReference
12091
     * - xmlCtxtParseEntity
12092
     *
12093
     * The nesting depth is limited by the maximum number of inputs,
12094
     * see xmlCtxtPushInput.
12095
     *
12096
     * It's possible to make this non-recursive (minNsIndex must be
12097
     * stored in the input struct) at the expense of code readability.
12098
     */
12099
12100
0
    ent->flags |= XML_ENT_EXPANDING;
12101
12102
0
    list = xmlCtxtParseContentInternal(ctxt, input, isExternal, buildTree);
12103
12104
0
    ent->flags &= ~XML_ENT_EXPANDING;
12105
12106
0
    ctxt->nsdb->minNsIndex = oldMinNsIndex;
12107
0
    ctxt->nodelen = oldNodelen;
12108
0
    ctxt->nodemem = oldNodemem;
12109
12110
    /*
12111
     * Entity size accounting
12112
     */
12113
0
    consumed = input->consumed;
12114
0
    xmlSaturatedAddSizeT(&consumed, input->end - input->base);
12115
12116
0
    if ((ent->flags & XML_ENT_CHECKED) == 0)
12117
0
        xmlSaturatedAdd(&ent->expandedSize, consumed);
12118
12119
0
    if ((ent->flags & XML_ENT_PARSED) == 0) {
12120
0
        if (isExternal)
12121
0
            xmlSaturatedAdd(&ctxt->sizeentities, consumed);
12122
12123
0
        ent->children = list;
12124
12125
0
        while (list != NULL) {
12126
0
            list->parent = (xmlNodePtr) ent;
12127
12128
            /*
12129
             * Downstream code like the nginx xslt module can set
12130
             * ctxt->myDoc->extSubset to a separate DTD, so the entity
12131
             * might have a different or a NULL document.
12132
             */
12133
0
            if (list->doc != ent->doc)
12134
0
                xmlSetTreeDoc(list, ent->doc);
12135
12136
0
            if (list->next == NULL)
12137
0
                ent->last = list;
12138
0
            list = list->next;
12139
0
        }
12140
0
    } else {
12141
0
        xmlFreeNodeList(list);
12142
0
    }
12143
12144
0
    xmlFreeInputStream(input);
12145
12146
0
error:
12147
0
    ent->flags |= XML_ENT_PARSED | XML_ENT_CHECKED;
12148
0
}
12149
12150
/**
12151
 * xmlParseCtxtExternalEntity:
12152
 * @ctxt:  the existing parsing context
12153
 * @URL:  the URL for the entity to load
12154
 * @ID:  the System ID for the entity to load
12155
 * @listOut:  the return value for the set of parsed nodes
12156
 *
12157
 * Parse an external general entity within an existing parsing context
12158
 * An external general parsed entity is well-formed if it matches the
12159
 * production labeled extParsedEnt.
12160
 *
12161
 * [78] extParsedEnt ::= TextDecl? content
12162
 *
12163
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12164
 *    the parser error code otherwise
12165
 */
12166
12167
int
12168
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctxt, const xmlChar *URL,
12169
0
                           const xmlChar *ID, xmlNodePtr *listOut) {
12170
0
    xmlParserInputPtr input;
12171
0
    xmlNodePtr list;
12172
12173
0
    if (listOut != NULL)
12174
0
        *listOut = NULL;
12175
12176
0
    if (ctxt == NULL)
12177
0
        return(XML_ERR_ARGUMENT);
12178
12179
0
    input = xmlLoadResource(ctxt, (char *) URL, (char *) ID,
12180
0
                            XML_RESOURCE_GENERAL_ENTITY);
12181
0
    if (input == NULL)
12182
0
        return(ctxt->errNo);
12183
12184
0
    xmlCtxtInitializeLate(ctxt);
12185
12186
0
    list = xmlCtxtParseContentInternal(ctxt, input, /* hasTextDecl */ 1, 1);
12187
0
    if (listOut != NULL)
12188
0
        *listOut = list;
12189
0
    else
12190
0
        xmlFreeNodeList(list);
12191
12192
0
    xmlFreeInputStream(input);
12193
0
    return(ctxt->errNo);
12194
0
}
12195
12196
#ifdef LIBXML_SAX1_ENABLED
12197
/**
12198
 * xmlParseExternalEntity:
12199
 * @doc:  the document the chunk pertains to
12200
 * @sax:  the SAX handler block (possibly NULL)
12201
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12202
 * @depth:  Used for loop detection, use 0
12203
 * @URL:  the URL for the entity to load
12204
 * @ID:  the System ID for the entity to load
12205
 * @list:  the return value for the set of parsed nodes
12206
 *
12207
 * DEPRECATED: Use xmlParseCtxtExternalEntity.
12208
 *
12209
 * Parse an external general entity
12210
 * An external general parsed entity is well-formed if it matches the
12211
 * production labeled extParsedEnt.
12212
 *
12213
 * [78] extParsedEnt ::= TextDecl? content
12214
 *
12215
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12216
 *    the parser error code otherwise
12217
 */
12218
12219
int
12220
xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
12221
0
    int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *list) {
12222
0
    xmlParserCtxtPtr ctxt;
12223
0
    int ret;
12224
12225
0
    if (list != NULL)
12226
0
        *list = NULL;
12227
12228
0
    if (doc == NULL)
12229
0
        return(XML_ERR_ARGUMENT);
12230
12231
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12232
0
    if (ctxt == NULL)
12233
0
        return(XML_ERR_NO_MEMORY);
12234
12235
0
    ctxt->depth = depth;
12236
0
    ctxt->myDoc = doc;
12237
0
    ret = xmlParseCtxtExternalEntity(ctxt, URL, ID, list);
12238
12239
0
    xmlFreeParserCtxt(ctxt);
12240
0
    return(ret);
12241
0
}
12242
12243
/**
12244
 * xmlParseBalancedChunkMemory:
12245
 * @doc:  the document the chunk pertains to (must not be NULL)
12246
 * @sax:  the SAX handler block (possibly NULL)
12247
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12248
 * @depth:  Used for loop detection, use 0
12249
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
12250
 * @lst:  the return value for the set of parsed nodes
12251
 *
12252
 * Parse a well-balanced chunk of an XML document
12253
 * called by the parser
12254
 * The allowed sequence for the Well Balanced Chunk is the one defined by
12255
 * the content production in the XML grammar:
12256
 *
12257
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12258
 *
12259
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12260
 *    the parser error code otherwise
12261
 */
12262
12263
int
12264
xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12265
0
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
12266
0
    return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
12267
0
                                                depth, string, lst, 0 );
12268
0
}
12269
#endif /* LIBXML_SAX1_ENABLED */
12270
12271
/**
12272
 * xmlCtxtParseContent:
12273
 * @ctxt:  parser context
12274
 * @input:  parser input
12275
 * @node:  target node or document
12276
 * @hasTextDecl:  whether to parse text declaration
12277
 *
12278
 * Parse a well-balanced chunk of XML matching the 'content' production.
12279
 *
12280
 * Namespaces in scope of @node and entities of @node's document are
12281
 * recognized. When validating, the DTD of @node's document is used.
12282
 *
12283
 * Always consumes @input even in error case.
12284
 *
12285
 * Available since 2.14.0.
12286
 *
12287
 * Returns a node list or NULL in case of error.
12288
 */
12289
xmlNodePtr
12290
xmlCtxtParseContent(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
12291
0
                    xmlNodePtr node, int hasTextDecl) {
12292
0
    xmlDocPtr doc;
12293
0
    xmlNodePtr cur, list = NULL;
12294
0
    int nsnr = 0;
12295
0
    xmlDictPtr oldDict;
12296
0
    int oldOptions, oldDictNames, oldLoadSubset;
12297
12298
0
    if ((ctxt == NULL) || (input == NULL) || (node == NULL)) {
12299
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
12300
0
        goto exit;
12301
0
    }
12302
12303
0
    doc = node->doc;
12304
0
    if (doc == NULL) {
12305
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
12306
0
        goto exit;
12307
0
    }
12308
12309
0
    switch (node->type) {
12310
0
        case XML_ELEMENT_NODE:
12311
0
        case XML_DOCUMENT_NODE:
12312
0
        case XML_HTML_DOCUMENT_NODE:
12313
0
            break;
12314
12315
0
        case XML_ATTRIBUTE_NODE:
12316
0
        case XML_TEXT_NODE:
12317
0
        case XML_CDATA_SECTION_NODE:
12318
0
        case XML_ENTITY_REF_NODE:
12319
0
        case XML_PI_NODE:
12320
0
        case XML_COMMENT_NODE:
12321
0
            for (cur = node->parent; cur != NULL; cur = node->parent) {
12322
0
                if ((cur->type == XML_ELEMENT_NODE) ||
12323
0
                    (cur->type == XML_DOCUMENT_NODE) ||
12324
0
                    (cur->type == XML_HTML_DOCUMENT_NODE)) {
12325
0
                    node = cur;
12326
0
                    break;
12327
0
                }
12328
0
            }
12329
0
            break;
12330
12331
0
        default:
12332
0
            xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
12333
0
            goto exit;
12334
0
    }
12335
12336
0
#ifdef LIBXML_HTML_ENABLED
12337
0
    if (ctxt->html)
12338
0
        htmlCtxtReset(ctxt);
12339
0
    else
12340
0
#endif
12341
0
        xmlCtxtReset(ctxt);
12342
12343
0
    oldDict = ctxt->dict;
12344
0
    oldOptions = ctxt->options;
12345
0
    oldDictNames = ctxt->dictNames;
12346
0
    oldLoadSubset = ctxt->loadsubset;
12347
12348
    /*
12349
     * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
12350
     */
12351
0
    if (doc->dict != NULL) {
12352
0
        ctxt->dict = doc->dict;
12353
0
    } else {
12354
0
        ctxt->options |= XML_PARSE_NODICT;
12355
0
        ctxt->dictNames = 0;
12356
0
    }
12357
12358
    /*
12359
     * Disable IDs
12360
     */
12361
0
    ctxt->loadsubset |= XML_SKIP_IDS;
12362
12363
0
    ctxt->myDoc = doc;
12364
12365
0
#ifdef LIBXML_HTML_ENABLED
12366
0
    if (ctxt->html) {
12367
        /*
12368
         * When parsing in context, it makes no sense to add implied
12369
         * elements like html/body/etc...
12370
         */
12371
0
        ctxt->options |= HTML_PARSE_NOIMPLIED;
12372
12373
0
        list = htmlCtxtParseContentInternal(ctxt, input);
12374
0
    } else
12375
0
#endif
12376
0
    {
12377
0
        xmlCtxtInitializeLate(ctxt);
12378
12379
        /*
12380
         * initialize the SAX2 namespaces stack
12381
         */
12382
0
        cur = node;
12383
0
        while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
12384
0
            xmlNsPtr ns = cur->nsDef;
12385
0
            xmlHashedString hprefix, huri;
12386
12387
0
            while (ns != NULL) {
12388
0
                hprefix = xmlDictLookupHashed(ctxt->dict, ns->prefix, -1);
12389
0
                huri = xmlDictLookupHashed(ctxt->dict, ns->href, -1);
12390
0
                if (xmlParserNsPush(ctxt, &hprefix, &huri, ns, 1) > 0)
12391
0
                    nsnr++;
12392
0
                ns = ns->next;
12393
0
            }
12394
0
            cur = cur->parent;
12395
0
        }
12396
12397
0
        list = xmlCtxtParseContentInternal(ctxt, input, hasTextDecl, 1);
12398
12399
0
        if (nsnr > 0)
12400
0
            xmlParserNsPop(ctxt, nsnr);
12401
0
    }
12402
12403
0
    ctxt->dict = oldDict;
12404
0
    ctxt->options = oldOptions;
12405
0
    ctxt->dictNames = oldDictNames;
12406
0
    ctxt->loadsubset = oldLoadSubset;
12407
0
    ctxt->myDoc = NULL;
12408
0
    ctxt->node = NULL;
12409
12410
0
exit:
12411
0
    xmlFreeInputStream(input);
12412
0
    return(list);
12413
0
}
12414
12415
/**
12416
 * xmlParseInNodeContext:
12417
 * @node:  the context node
12418
 * @data:  the input string
12419
 * @datalen:  the input string length in bytes
12420
 * @options:  a combination of xmlParserOption
12421
 * @listOut:  the return value for the set of parsed nodes
12422
 *
12423
 * Parse a well-balanced chunk of an XML document
12424
 * within the context (DTD, namespaces, etc ...) of the given node.
12425
 *
12426
 * The allowed sequence for the data is a Well Balanced Chunk defined by
12427
 * the content production in the XML grammar:
12428
 *
12429
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12430
 *
12431
 * This function assumes the encoding of @node's document which is
12432
 * typically not what you want. A better alternative is
12433
 * xmlCtxtParseContent.
12434
 *
12435
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12436
 * error code otherwise
12437
 */
12438
xmlParserErrors
12439
xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
12440
0
                      int options, xmlNodePtr *listOut) {
12441
0
    xmlParserCtxtPtr ctxt;
12442
0
    xmlParserInputPtr input;
12443
0
    xmlDocPtr doc;
12444
0
    xmlNodePtr list;
12445
0
    xmlParserErrors ret;
12446
12447
0
    if (listOut == NULL)
12448
0
        return(XML_ERR_INTERNAL_ERROR);
12449
0
    *listOut = NULL;
12450
12451
0
    if ((node == NULL) || (data == NULL) || (datalen < 0))
12452
0
        return(XML_ERR_INTERNAL_ERROR);
12453
12454
0
    doc = node->doc;
12455
0
    if (doc == NULL)
12456
0
        return(XML_ERR_INTERNAL_ERROR);
12457
12458
0
#ifdef LIBXML_HTML_ENABLED
12459
0
    if (doc->type == XML_HTML_DOCUMENT_NODE) {
12460
0
        ctxt = htmlNewParserCtxt();
12461
0
    }
12462
0
    else
12463
0
#endif
12464
0
        ctxt = xmlNewParserCtxt();
12465
12466
0
    if (ctxt == NULL)
12467
0
        return(XML_ERR_NO_MEMORY);
12468
12469
0
    input = xmlCtxtNewInputFromMemory(ctxt, NULL, data, datalen,
12470
0
                                      (const char *) doc->encoding,
12471
0
                                      XML_INPUT_BUF_STATIC);
12472
0
    if (input == NULL) {
12473
0
        xmlFreeParserCtxt(ctxt);
12474
0
        return(XML_ERR_NO_MEMORY);
12475
0
    }
12476
12477
0
    xmlCtxtUseOptions(ctxt, options);
12478
12479
0
    list = xmlCtxtParseContent(ctxt, input, node, /* hasTextDecl */ 0);
12480
12481
0
    if (list == NULL) {
12482
0
        ret = ctxt->errNo;
12483
0
        if (ret == XML_ERR_ARGUMENT)
12484
0
            ret = XML_ERR_INTERNAL_ERROR;
12485
0
    } else {
12486
0
        ret = XML_ERR_OK;
12487
0
        *listOut = list;
12488
0
    }
12489
12490
0
    xmlFreeParserCtxt(ctxt);
12491
12492
0
    return(ret);
12493
0
}
12494
12495
#ifdef LIBXML_SAX1_ENABLED
12496
/**
12497
 * xmlParseBalancedChunkMemoryRecover:
12498
 * @doc:  the document the chunk pertains to (must not be NULL)
12499
 * @sax:  the SAX handler block (possibly NULL)
12500
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12501
 * @depth:  Used for loop detection, use 0
12502
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
12503
 * @listOut:  the return value for the set of parsed nodes
12504
 * @recover: return nodes even if the data is broken (use 0)
12505
 *
12506
 * Parse a well-balanced chunk of an XML document
12507
 *
12508
 * The allowed sequence for the Well Balanced Chunk is the one defined by
12509
 * the content production in the XML grammar:
12510
 *
12511
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12512
 *
12513
 * Returns 0 if the chunk is well balanced, or thehe parser error code
12514
 * otherwise.
12515
 *
12516
 * In case recover is set to 1, the nodelist will not be empty even if
12517
 * the parsed chunk is not well balanced, assuming the parsing succeeded to
12518
 * some extent.
12519
 */
12520
int
12521
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12522
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *listOut,
12523
0
     int recover) {
12524
0
    xmlParserCtxtPtr ctxt;
12525
0
    xmlParserInputPtr input;
12526
0
    xmlNodePtr list;
12527
0
    int ret;
12528
12529
0
    if (listOut != NULL)
12530
0
        *listOut = NULL;
12531
12532
0
    if (string == NULL)
12533
0
        return(XML_ERR_ARGUMENT);
12534
12535
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12536
0
    if (ctxt == NULL)
12537
0
        return(XML_ERR_NO_MEMORY);
12538
12539
0
    xmlCtxtInitializeLate(ctxt);
12540
12541
0
    ctxt->depth = depth;
12542
0
    ctxt->myDoc = doc;
12543
0
    if (recover) {
12544
0
        ctxt->options |= XML_PARSE_RECOVER;
12545
0
        ctxt->recovery = 1;
12546
0
    }
12547
12548
0
    input = xmlNewStringInputStream(ctxt, string);
12549
0
    if (input == NULL) {
12550
0
        ret = ctxt->errNo;
12551
0
        goto error;
12552
0
    }
12553
12554
0
    list = xmlCtxtParseContentInternal(ctxt, input, /* hasTextDecl */ 0, 1);
12555
0
    if (listOut != NULL)
12556
0
        *listOut = list;
12557
0
    else
12558
0
        xmlFreeNodeList(list);
12559
12560
0
    if (!ctxt->wellFormed)
12561
0
        ret = ctxt->errNo;
12562
0
    else
12563
0
        ret = XML_ERR_OK;
12564
12565
0
error:
12566
0
    xmlFreeInputStream(input);
12567
0
    xmlFreeParserCtxt(ctxt);
12568
0
    return(ret);
12569
0
}
12570
12571
/**
12572
 * xmlSAXParseEntity:
12573
 * @sax:  the SAX handler block
12574
 * @filename:  the filename
12575
 *
12576
 * DEPRECATED: Don't use.
12577
 *
12578
 * parse an XML external entity out of context and build a tree.
12579
 * It use the given SAX function block to handle the parsing callback.
12580
 * If sax is NULL, fallback to the default DOM tree building routines.
12581
 *
12582
 * [78] extParsedEnt ::= TextDecl? content
12583
 *
12584
 * This correspond to a "Well Balanced" chunk
12585
 *
12586
 * Returns the resulting document tree
12587
 */
12588
12589
xmlDocPtr
12590
0
xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
12591
0
    xmlDocPtr ret;
12592
0
    xmlParserCtxtPtr ctxt;
12593
12594
0
    ctxt = xmlCreateFileParserCtxt(filename);
12595
0
    if (ctxt == NULL) {
12596
0
  return(NULL);
12597
0
    }
12598
0
    if (sax != NULL) {
12599
0
        if (sax->initialized == XML_SAX2_MAGIC) {
12600
0
            *ctxt->sax = *sax;
12601
0
        } else {
12602
0
            memset(ctxt->sax, 0, sizeof(*ctxt->sax));
12603
0
            memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12604
0
        }
12605
0
        ctxt->userData = NULL;
12606
0
    }
12607
12608
0
    xmlParseExtParsedEnt(ctxt);
12609
12610
0
    if (ctxt->wellFormed) {
12611
0
  ret = ctxt->myDoc;
12612
0
    } else {
12613
0
        ret = NULL;
12614
0
        xmlFreeDoc(ctxt->myDoc);
12615
0
    }
12616
12617
0
    xmlFreeParserCtxt(ctxt);
12618
12619
0
    return(ret);
12620
0
}
12621
12622
/**
12623
 * xmlParseEntity:
12624
 * @filename:  the filename
12625
 *
12626
 * parse an XML external entity out of context and build a tree.
12627
 *
12628
 * [78] extParsedEnt ::= TextDecl? content
12629
 *
12630
 * This correspond to a "Well Balanced" chunk
12631
 *
12632
 * Returns the resulting document tree
12633
 */
12634
12635
xmlDocPtr
12636
0
xmlParseEntity(const char *filename) {
12637
0
    return(xmlSAXParseEntity(NULL, filename));
12638
0
}
12639
#endif /* LIBXML_SAX1_ENABLED */
12640
12641
/**
12642
 * xmlCreateEntityParserCtxt:
12643
 * @URL:  the entity URL
12644
 * @ID:  the entity PUBLIC ID
12645
 * @base:  a possible base for the target URI
12646
 *
12647
 * DEPRECATED: Don't use.
12648
 *
12649
 * Create a parser context for an external entity
12650
 * Automatic support for ZLIB/Compress compressed document is provided
12651
 * by default if found at compile-time.
12652
 *
12653
 * Returns the new parser context or NULL
12654
 */
12655
xmlParserCtxtPtr
12656
xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
12657
0
                    const xmlChar *base) {
12658
0
    xmlParserCtxtPtr ctxt;
12659
0
    xmlParserInputPtr input;
12660
0
    xmlChar *uri = NULL;
12661
12662
0
    ctxt = xmlNewParserCtxt();
12663
0
    if (ctxt == NULL)
12664
0
  return(NULL);
12665
12666
0
    if (base != NULL) {
12667
0
        if (xmlBuildURISafe(URL, base, &uri) < 0)
12668
0
            goto error;
12669
0
        if (uri != NULL)
12670
0
            URL = uri;
12671
0
    }
12672
12673
0
    input = xmlLoadResource(ctxt, (char *) URL, (char *) ID,
12674
0
                            XML_RESOURCE_UNKNOWN);
12675
0
    if (input == NULL)
12676
0
        goto error;
12677
12678
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
12679
0
        xmlFreeInputStream(input);
12680
0
        goto error;
12681
0
    }
12682
12683
0
    xmlFree(uri);
12684
0
    return(ctxt);
12685
12686
0
error:
12687
0
    xmlFree(uri);
12688
0
    xmlFreeParserCtxt(ctxt);
12689
0
    return(NULL);
12690
0
}
12691
12692
/************************************************************************
12693
 *                  *
12694
 *    Front ends when parsing from a file     *
12695
 *                  *
12696
 ************************************************************************/
12697
12698
/**
12699
 * xmlCreateURLParserCtxt:
12700
 * @filename:  the filename or URL
12701
 * @options:  a combination of xmlParserOption
12702
 *
12703
 * DEPRECATED: Use xmlNewParserCtxt and xmlCtxtReadFile.
12704
 *
12705
 * Create a parser context for a file or URL content.
12706
 * Automatic support for ZLIB/Compress compressed document is provided
12707
 * by default if found at compile-time and for file accesses
12708
 *
12709
 * Returns the new parser context or NULL
12710
 */
12711
xmlParserCtxtPtr
12712
xmlCreateURLParserCtxt(const char *filename, int options)
12713
0
{
12714
0
    xmlParserCtxtPtr ctxt;
12715
0
    xmlParserInputPtr input;
12716
12717
0
    ctxt = xmlNewParserCtxt();
12718
0
    if (ctxt == NULL)
12719
0
  return(NULL);
12720
12721
0
    options |= XML_PARSE_UNZIP;
12722
12723
0
    xmlCtxtUseOptions(ctxt, options);
12724
0
    ctxt->linenumbers = 1;
12725
12726
0
    input = xmlLoadResource(ctxt, filename, NULL, XML_RESOURCE_MAIN_DOCUMENT);
12727
0
    if (input == NULL) {
12728
0
  xmlFreeParserCtxt(ctxt);
12729
0
  return(NULL);
12730
0
    }
12731
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
12732
0
        xmlFreeInputStream(input);
12733
0
        xmlFreeParserCtxt(ctxt);
12734
0
        return(NULL);
12735
0
    }
12736
12737
0
    return(ctxt);
12738
0
}
12739
12740
/**
12741
 * xmlCreateFileParserCtxt:
12742
 * @filename:  the filename
12743
 *
12744
 * DEPRECATED: Use xmlNewParserCtxt and xmlCtxtReadFile.
12745
 *
12746
 * Create a parser context for a file content.
12747
 * Automatic support for ZLIB/Compress compressed document is provided
12748
 * by default if found at compile-time.
12749
 *
12750
 * Returns the new parser context or NULL
12751
 */
12752
xmlParserCtxtPtr
12753
xmlCreateFileParserCtxt(const char *filename)
12754
0
{
12755
0
    return(xmlCreateURLParserCtxt(filename, 0));
12756
0
}
12757
12758
#ifdef LIBXML_SAX1_ENABLED
12759
/**
12760
 * xmlSAXParseFileWithData:
12761
 * @sax:  the SAX handler block
12762
 * @filename:  the filename
12763
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
12764
 *             documents
12765
 * @data:  the userdata
12766
 *
12767
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
12768
 *
12769
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12770
 * compressed document is provided by default if found at compile-time.
12771
 * It use the given SAX function block to handle the parsing callback.
12772
 * If sax is NULL, fallback to the default DOM tree building routines.
12773
 *
12774
 * User data (void *) is stored within the parser context in the
12775
 * context's _private member, so it is available nearly everywhere in libxml
12776
 *
12777
 * Returns the resulting document tree
12778
 */
12779
12780
xmlDocPtr
12781
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
12782
0
                        int recovery, void *data) {
12783
0
    xmlDocPtr ret = NULL;
12784
0
    xmlParserCtxtPtr ctxt;
12785
0
    xmlParserInputPtr input;
12786
12787
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12788
0
    if (ctxt == NULL)
12789
0
  return(NULL);
12790
12791
0
    if (data != NULL)
12792
0
  ctxt->_private = data;
12793
12794
0
    if (recovery) {
12795
0
        ctxt->options |= XML_PARSE_RECOVER;
12796
0
        ctxt->recovery = 1;
12797
0
    }
12798
12799
0
    if ((filename != NULL) && (filename[0] == '-') && (filename[1] == 0))
12800
0
        input = xmlCtxtNewInputFromFd(ctxt, filename, STDIN_FILENO, NULL, 0);
12801
0
    else
12802
0
        input = xmlCtxtNewInputFromUrl(ctxt, filename, NULL, NULL, 0);
12803
12804
0
    if (input != NULL)
12805
0
        ret = xmlCtxtParseDocument(ctxt, input);
12806
12807
0
    xmlFreeParserCtxt(ctxt);
12808
0
    return(ret);
12809
0
}
12810
12811
/**
12812
 * xmlSAXParseFile:
12813
 * @sax:  the SAX handler block
12814
 * @filename:  the filename
12815
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
12816
 *             documents
12817
 *
12818
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
12819
 *
12820
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12821
 * compressed document is provided by default if found at compile-time.
12822
 * It use the given SAX function block to handle the parsing callback.
12823
 * If sax is NULL, fallback to the default DOM tree building routines.
12824
 *
12825
 * Returns the resulting document tree
12826
 */
12827
12828
xmlDocPtr
12829
xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
12830
0
                          int recovery) {
12831
0
    return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
12832
0
}
12833
12834
/**
12835
 * xmlRecoverDoc:
12836
 * @cur:  a pointer to an array of xmlChar
12837
 *
12838
 * DEPRECATED: Use xmlReadDoc with XML_PARSE_RECOVER.
12839
 *
12840
 * parse an XML in-memory document and build a tree.
12841
 * In the case the document is not Well Formed, a attempt to build a
12842
 * tree is tried anyway
12843
 *
12844
 * Returns the resulting document tree or NULL in case of failure
12845
 */
12846
12847
xmlDocPtr
12848
0
xmlRecoverDoc(const xmlChar *cur) {
12849
0
    return(xmlSAXParseDoc(NULL, cur, 1));
12850
0
}
12851
12852
/**
12853
 * xmlParseFile:
12854
 * @filename:  the filename
12855
 *
12856
 * DEPRECATED: Use xmlReadFile.
12857
 *
12858
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12859
 * compressed document is provided by default if found at compile-time.
12860
 *
12861
 * Returns the resulting document tree if the file was wellformed,
12862
 * NULL otherwise.
12863
 */
12864
12865
xmlDocPtr
12866
0
xmlParseFile(const char *filename) {
12867
0
    return(xmlSAXParseFile(NULL, filename, 0));
12868
0
}
12869
12870
/**
12871
 * xmlRecoverFile:
12872
 * @filename:  the filename
12873
 *
12874
 * DEPRECATED: Use xmlReadFile with XML_PARSE_RECOVER.
12875
 *
12876
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12877
 * compressed document is provided by default if found at compile-time.
12878
 * In the case the document is not Well Formed, it attempts to build
12879
 * a tree anyway
12880
 *
12881
 * Returns the resulting document tree or NULL in case of failure
12882
 */
12883
12884
xmlDocPtr
12885
0
xmlRecoverFile(const char *filename) {
12886
0
    return(xmlSAXParseFile(NULL, filename, 1));
12887
0
}
12888
12889
12890
/**
12891
 * xmlSetupParserForBuffer:
12892
 * @ctxt:  an XML parser context
12893
 * @buffer:  a xmlChar * buffer
12894
 * @filename:  a file name
12895
 *
12896
 * DEPRECATED: Don't use.
12897
 *
12898
 * Setup the parser context to parse a new buffer; Clears any prior
12899
 * contents from the parser context. The buffer parameter must not be
12900
 * NULL, but the filename parameter can be
12901
 */
12902
void
12903
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
12904
                             const char* filename)
12905
0
{
12906
0
    xmlParserInputPtr input;
12907
12908
0
    if ((ctxt == NULL) || (buffer == NULL))
12909
0
        return;
12910
12911
0
    xmlClearParserCtxt(ctxt);
12912
12913
0
    input = xmlCtxtNewInputFromString(ctxt, filename, (const char *) buffer,
12914
0
                                      NULL, 0);
12915
0
    if (input == NULL)
12916
0
        return;
12917
0
    if (xmlCtxtPushInput(ctxt, input) < 0)
12918
0
        xmlFreeInputStream(input);
12919
0
}
12920
12921
/**
12922
 * xmlSAXUserParseFile:
12923
 * @sax:  a SAX handler
12924
 * @user_data:  The user data returned on SAX callbacks
12925
 * @filename:  a file name
12926
 *
12927
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
12928
 *
12929
 * parse an XML file and call the given SAX handler routines.
12930
 * Automatic support for ZLIB/Compress compressed document is provided
12931
 *
12932
 * Returns 0 in case of success or a error number otherwise
12933
 */
12934
int
12935
xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
12936
0
                    const char *filename) {
12937
0
    int ret = 0;
12938
0
    xmlParserCtxtPtr ctxt;
12939
12940
0
    ctxt = xmlCreateFileParserCtxt(filename);
12941
0
    if (ctxt == NULL) return -1;
12942
0
    if (sax != NULL) {
12943
0
        if (sax->initialized == XML_SAX2_MAGIC) {
12944
0
            *ctxt->sax = *sax;
12945
0
        } else {
12946
0
            memset(ctxt->sax, 0, sizeof(*ctxt->sax));
12947
0
            memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12948
0
        }
12949
0
  ctxt->userData = user_data;
12950
0
    }
12951
12952
0
    xmlParseDocument(ctxt);
12953
12954
0
    if (ctxt->wellFormed)
12955
0
  ret = 0;
12956
0
    else {
12957
0
        if (ctxt->errNo != 0)
12958
0
      ret = ctxt->errNo;
12959
0
  else
12960
0
      ret = -1;
12961
0
    }
12962
0
    if (ctxt->myDoc != NULL) {
12963
0
        xmlFreeDoc(ctxt->myDoc);
12964
0
  ctxt->myDoc = NULL;
12965
0
    }
12966
0
    xmlFreeParserCtxt(ctxt);
12967
12968
0
    return ret;
12969
0
}
12970
#endif /* LIBXML_SAX1_ENABLED */
12971
12972
/************************************************************************
12973
 *                  *
12974
 *    Front ends when parsing from memory     *
12975
 *                  *
12976
 ************************************************************************/
12977
12978
/**
12979
 * xmlCreateMemoryParserCtxt:
12980
 * @buffer:  a pointer to a char array
12981
 * @size:  the size of the array
12982
 *
12983
 * Create a parser context for an XML in-memory document. The input buffer
12984
 * must not contain a terminating null byte.
12985
 *
12986
 * Returns the new parser context or NULL
12987
 */
12988
xmlParserCtxtPtr
12989
0
xmlCreateMemoryParserCtxt(const char *buffer, int size) {
12990
0
    xmlParserCtxtPtr ctxt;
12991
0
    xmlParserInputPtr input;
12992
12993
0
    if (size < 0)
12994
0
  return(NULL);
12995
12996
0
    ctxt = xmlNewParserCtxt();
12997
0
    if (ctxt == NULL)
12998
0
  return(NULL);
12999
13000
0
    input = xmlCtxtNewInputFromMemory(ctxt, NULL, buffer, size, NULL, 0);
13001
0
    if (input == NULL) {
13002
0
  xmlFreeParserCtxt(ctxt);
13003
0
  return(NULL);
13004
0
    }
13005
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
13006
0
        xmlFreeInputStream(input);
13007
0
        xmlFreeParserCtxt(ctxt);
13008
0
        return(NULL);
13009
0
    }
13010
13011
0
    return(ctxt);
13012
0
}
13013
13014
#ifdef LIBXML_SAX1_ENABLED
13015
/**
13016
 * xmlSAXParseMemoryWithData:
13017
 * @sax:  the SAX handler block
13018
 * @buffer:  an pointer to a char array
13019
 * @size:  the size of the array
13020
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13021
 *             documents
13022
 * @data:  the userdata
13023
 *
13024
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
13025
 *
13026
 * parse an XML in-memory block and use the given SAX function block
13027
 * to handle the parsing callback. If sax is NULL, fallback to the default
13028
 * DOM tree building routines.
13029
 *
13030
 * User data (void *) is stored within the parser context in the
13031
 * context's _private member, so it is available nearly everywhere in libxml
13032
 *
13033
 * Returns the resulting document tree
13034
 */
13035
13036
xmlDocPtr
13037
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
13038
0
                          int size, int recovery, void *data) {
13039
0
    xmlDocPtr ret = NULL;
13040
0
    xmlParserCtxtPtr ctxt;
13041
0
    xmlParserInputPtr input;
13042
13043
0
    if (size < 0)
13044
0
        return(NULL);
13045
13046
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
13047
0
    if (ctxt == NULL)
13048
0
        return(NULL);
13049
13050
0
    if (data != NULL)
13051
0
  ctxt->_private=data;
13052
13053
0
    if (recovery) {
13054
0
        ctxt->options |= XML_PARSE_RECOVER;
13055
0
        ctxt->recovery = 1;
13056
0
    }
13057
13058
0
    input = xmlCtxtNewInputFromMemory(ctxt, NULL, buffer, size, NULL,
13059
0
                                      XML_INPUT_BUF_STATIC);
13060
13061
0
    if (input != NULL)
13062
0
        ret = xmlCtxtParseDocument(ctxt, input);
13063
13064
0
    xmlFreeParserCtxt(ctxt);
13065
0
    return(ret);
13066
0
}
13067
13068
/**
13069
 * xmlSAXParseMemory:
13070
 * @sax:  the SAX handler block
13071
 * @buffer:  an pointer to a char array
13072
 * @size:  the size of the array
13073
 * @recovery:  work in recovery mode, i.e. tries to read not Well Formed
13074
 *             documents
13075
 *
13076
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
13077
 *
13078
 * parse an XML in-memory block and use the given SAX function block
13079
 * to handle the parsing callback. If sax is NULL, fallback to the default
13080
 * DOM tree building routines.
13081
 *
13082
 * Returns the resulting document tree
13083
 */
13084
xmlDocPtr
13085
xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
13086
0
            int size, int recovery) {
13087
0
    return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
13088
0
}
13089
13090
/**
13091
 * xmlParseMemory:
13092
 * @buffer:  an pointer to a char array
13093
 * @size:  the size of the array
13094
 *
13095
 * DEPRECATED: Use xmlReadMemory.
13096
 *
13097
 * parse an XML in-memory block and build a tree.
13098
 *
13099
 * Returns the resulting document tree
13100
 */
13101
13102
0
xmlDocPtr xmlParseMemory(const char *buffer, int size) {
13103
0
   return(xmlSAXParseMemory(NULL, buffer, size, 0));
13104
0
}
13105
13106
/**
13107
 * xmlRecoverMemory:
13108
 * @buffer:  an pointer to a char array
13109
 * @size:  the size of the array
13110
 *
13111
 * DEPRECATED: Use xmlReadMemory with XML_PARSE_RECOVER.
13112
 *
13113
 * parse an XML in-memory block and build a tree.
13114
 * In the case the document is not Well Formed, an attempt to
13115
 * build a tree is tried anyway
13116
 *
13117
 * Returns the resulting document tree or NULL in case of error
13118
 */
13119
13120
0
xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
13121
0
   return(xmlSAXParseMemory(NULL, buffer, size, 1));
13122
0
}
13123
13124
/**
13125
 * xmlSAXUserParseMemory:
13126
 * @sax:  a SAX handler
13127
 * @user_data:  The user data returned on SAX callbacks
13128
 * @buffer:  an in-memory XML document input
13129
 * @size:  the length of the XML document in bytes
13130
 *
13131
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
13132
 *
13133
 * parse an XML in-memory buffer and call the given SAX handler routines.
13134
 *
13135
 * Returns 0 in case of success or a error number otherwise
13136
 */
13137
int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
13138
0
        const char *buffer, int size) {
13139
0
    int ret = 0;
13140
0
    xmlParserCtxtPtr ctxt;
13141
13142
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13143
0
    if (ctxt == NULL) return -1;
13144
0
    if (sax != NULL) {
13145
0
        if (sax->initialized == XML_SAX2_MAGIC) {
13146
0
            *ctxt->sax = *sax;
13147
0
        } else {
13148
0
            memset(ctxt->sax, 0, sizeof(*ctxt->sax));
13149
0
            memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
13150
0
        }
13151
0
  ctxt->userData = user_data;
13152
0
    }
13153
13154
0
    xmlParseDocument(ctxt);
13155
13156
0
    if (ctxt->wellFormed)
13157
0
  ret = 0;
13158
0
    else {
13159
0
        if (ctxt->errNo != 0)
13160
0
      ret = ctxt->errNo;
13161
0
  else
13162
0
      ret = -1;
13163
0
    }
13164
0
    if (ctxt->myDoc != NULL) {
13165
0
        xmlFreeDoc(ctxt->myDoc);
13166
0
  ctxt->myDoc = NULL;
13167
0
    }
13168
0
    xmlFreeParserCtxt(ctxt);
13169
13170
0
    return ret;
13171
0
}
13172
#endif /* LIBXML_SAX1_ENABLED */
13173
13174
/**
13175
 * xmlCreateDocParserCtxt:
13176
 * @str:  a pointer to an array of xmlChar
13177
 *
13178
 * Creates a parser context for an XML in-memory document.
13179
 *
13180
 * Returns the new parser context or NULL
13181
 */
13182
xmlParserCtxtPtr
13183
0
xmlCreateDocParserCtxt(const xmlChar *str) {
13184
0
    xmlParserCtxtPtr ctxt;
13185
0
    xmlParserInputPtr input;
13186
13187
0
    ctxt = xmlNewParserCtxt();
13188
0
    if (ctxt == NULL)
13189
0
  return(NULL);
13190
13191
0
    input = xmlCtxtNewInputFromString(ctxt, NULL, (const char *) str, NULL, 0);
13192
0
    if (input == NULL) {
13193
0
  xmlFreeParserCtxt(ctxt);
13194
0
  return(NULL);
13195
0
    }
13196
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
13197
0
        xmlFreeInputStream(input);
13198
0
        xmlFreeParserCtxt(ctxt);
13199
0
        return(NULL);
13200
0
    }
13201
13202
0
    return(ctxt);
13203
0
}
13204
13205
#ifdef LIBXML_SAX1_ENABLED
13206
/**
13207
 * xmlSAXParseDoc:
13208
 * @sax:  the SAX handler block
13209
 * @cur:  a pointer to an array of xmlChar
13210
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13211
 *             documents
13212
 *
13213
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadDoc.
13214
 *
13215
 * parse an XML in-memory document and build a tree.
13216
 * It use the given SAX function block to handle the parsing callback.
13217
 * If sax is NULL, fallback to the default DOM tree building routines.
13218
 *
13219
 * Returns the resulting document tree
13220
 */
13221
13222
xmlDocPtr
13223
0
xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
13224
0
    xmlDocPtr ret;
13225
0
    xmlParserCtxtPtr ctxt;
13226
0
    xmlSAXHandlerPtr oldsax = NULL;
13227
13228
0
    if (cur == NULL) return(NULL);
13229
13230
13231
0
    ctxt = xmlCreateDocParserCtxt(cur);
13232
0
    if (ctxt == NULL) return(NULL);
13233
0
    if (sax != NULL) {
13234
0
        oldsax = ctxt->sax;
13235
0
        ctxt->sax = sax;
13236
0
        ctxt->userData = NULL;
13237
0
    }
13238
13239
0
    xmlParseDocument(ctxt);
13240
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13241
0
    else {
13242
0
       ret = NULL;
13243
0
       xmlFreeDoc(ctxt->myDoc);
13244
0
       ctxt->myDoc = NULL;
13245
0
    }
13246
0
    if (sax != NULL)
13247
0
  ctxt->sax = oldsax;
13248
0
    xmlFreeParserCtxt(ctxt);
13249
13250
0
    return(ret);
13251
0
}
13252
13253
/**
13254
 * xmlParseDoc:
13255
 * @cur:  a pointer to an array of xmlChar
13256
 *
13257
 * DEPRECATED: Use xmlReadDoc.
13258
 *
13259
 * parse an XML in-memory document and build a tree.
13260
 *
13261
 * Returns the resulting document tree
13262
 */
13263
13264
xmlDocPtr
13265
0
xmlParseDoc(const xmlChar *cur) {
13266
0
    return(xmlSAXParseDoc(NULL, cur, 0));
13267
0
}
13268
#endif /* LIBXML_SAX1_ENABLED */
13269
13270
/************************************************************************
13271
 *                  *
13272
 *  New set (2.6.0) of simpler and more flexible APIs   *
13273
 *                  *
13274
 ************************************************************************/
13275
13276
/**
13277
 * DICT_FREE:
13278
 * @str:  a string
13279
 *
13280
 * Free a string if it is not owned by the "dict" dictionary in the
13281
 * current scope
13282
 */
13283
#define DICT_FREE(str)            \
13284
19.5k
  if ((str) && ((!dict) ||       \
13285
0
      (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
13286
19.5k
      xmlFree((char *)(str));
13287
13288
/**
13289
 * xmlCtxtReset:
13290
 * @ctxt: an XML parser context
13291
 *
13292
 * Reset a parser context
13293
 */
13294
void
13295
xmlCtxtReset(xmlParserCtxtPtr ctxt)
13296
4.89k
{
13297
4.89k
    xmlParserInputPtr input;
13298
4.89k
    xmlDictPtr dict;
13299
13300
4.89k
    if (ctxt == NULL)
13301
0
        return;
13302
13303
4.89k
    dict = ctxt->dict;
13304
13305
4.89k
    while ((input = xmlCtxtPopInput(ctxt)) != NULL) { /* Non consuming */
13306
0
        xmlFreeInputStream(input);
13307
0
    }
13308
4.89k
    ctxt->inputNr = 0;
13309
4.89k
    ctxt->input = NULL;
13310
13311
4.89k
    ctxt->spaceNr = 0;
13312
4.89k
    if (ctxt->spaceTab != NULL) {
13313
4.89k
  ctxt->spaceTab[0] = -1;
13314
4.89k
  ctxt->space = &ctxt->spaceTab[0];
13315
4.89k
    } else {
13316
0
        ctxt->space = NULL;
13317
0
    }
13318
13319
13320
4.89k
    ctxt->nodeNr = 0;
13321
4.89k
    ctxt->node = NULL;
13322
13323
4.89k
    ctxt->nameNr = 0;
13324
4.89k
    ctxt->name = NULL;
13325
13326
4.89k
    ctxt->nsNr = 0;
13327
4.89k
    xmlParserNsReset(ctxt->nsdb);
13328
13329
4.89k
    DICT_FREE(ctxt->version);
13330
4.89k
    ctxt->version = NULL;
13331
4.89k
    DICT_FREE(ctxt->encoding);
13332
4.89k
    ctxt->encoding = NULL;
13333
4.89k
    DICT_FREE(ctxt->extSubURI);
13334
4.89k
    ctxt->extSubURI = NULL;
13335
4.89k
    DICT_FREE(ctxt->extSubSystem);
13336
4.89k
    ctxt->extSubSystem = NULL;
13337
13338
4.89k
    if (ctxt->directory != NULL) {
13339
0
        xmlFree(ctxt->directory);
13340
0
        ctxt->directory = NULL;
13341
0
    }
13342
13343
4.89k
    if (ctxt->myDoc != NULL)
13344
0
        xmlFreeDoc(ctxt->myDoc);
13345
4.89k
    ctxt->myDoc = NULL;
13346
13347
4.89k
    ctxt->standalone = -1;
13348
4.89k
    ctxt->hasExternalSubset = 0;
13349
4.89k
    ctxt->hasPErefs = 0;
13350
4.89k
    ctxt->html = 0;
13351
4.89k
    ctxt->instate = XML_PARSER_START;
13352
13353
4.89k
    ctxt->wellFormed = 1;
13354
4.89k
    ctxt->nsWellFormed = 1;
13355
4.89k
    ctxt->disableSAX = 0;
13356
4.89k
    ctxt->valid = 1;
13357
4.89k
    ctxt->record_info = 0;
13358
4.89k
    ctxt->checkIndex = 0;
13359
4.89k
    ctxt->endCheckState = 0;
13360
4.89k
    ctxt->inSubset = 0;
13361
4.89k
    ctxt->errNo = XML_ERR_OK;
13362
4.89k
    ctxt->depth = 0;
13363
4.89k
    ctxt->catalogs = NULL;
13364
4.89k
    ctxt->sizeentities = 0;
13365
4.89k
    ctxt->sizeentcopy = 0;
13366
4.89k
    xmlInitNodeInfoSeq(&ctxt->node_seq);
13367
13368
4.89k
    if (ctxt->attsDefault != NULL) {
13369
0
        xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
13370
0
        ctxt->attsDefault = NULL;
13371
0
    }
13372
4.89k
    if (ctxt->attsSpecial != NULL) {
13373
0
        xmlHashFree(ctxt->attsSpecial, NULL);
13374
0
        ctxt->attsSpecial = NULL;
13375
0
    }
13376
13377
4.89k
#ifdef LIBXML_CATALOG_ENABLED
13378
4.89k
    if (ctxt->catalogs != NULL)
13379
0
  xmlCatalogFreeLocal(ctxt->catalogs);
13380
4.89k
#endif
13381
4.89k
    ctxt->nbErrors = 0;
13382
4.89k
    ctxt->nbWarnings = 0;
13383
4.89k
    if (ctxt->lastError.code != XML_ERR_OK)
13384
0
        xmlResetError(&ctxt->lastError);
13385
4.89k
}
13386
13387
/**
13388
 * xmlCtxtResetPush:
13389
 * @ctxt: an XML parser context
13390
 * @chunk:  a pointer to an array of chars
13391
 * @size:  number of chars in the array
13392
 * @filename:  an optional file name or URI
13393
 * @encoding:  the document encoding, or NULL
13394
 *
13395
 * Reset a push parser context
13396
 *
13397
 * Returns 0 in case of success and 1 in case of error
13398
 */
13399
int
13400
xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
13401
                 int size, const char *filename, const char *encoding)
13402
0
{
13403
0
    xmlParserInputPtr input;
13404
13405
0
    if (ctxt == NULL)
13406
0
        return(1);
13407
13408
0
    xmlCtxtReset(ctxt);
13409
13410
0
    input = xmlNewPushInput(filename, chunk, size);
13411
0
    if (input == NULL)
13412
0
        return(1);
13413
13414
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
13415
0
        xmlFreeInputStream(input);
13416
0
        return(1);
13417
0
    }
13418
13419
0
    if (encoding != NULL)
13420
0
        xmlSwitchEncodingName(ctxt, encoding);
13421
13422
0
    return(0);
13423
0
}
13424
13425
static int
13426
xmlCtxtSetOptionsInternal(xmlParserCtxtPtr ctxt, int options, int keepMask)
13427
217k
{
13428
217k
    int allMask;
13429
13430
217k
    if (ctxt == NULL)
13431
0
        return(-1);
13432
13433
    /*
13434
     * XInclude options aren't handled by the parser.
13435
     *
13436
     * XML_PARSE_XINCLUDE
13437
     * XML_PARSE_NOXINCNODE
13438
     * XML_PARSE_NOBASEFIX
13439
     */
13440
217k
    allMask = XML_PARSE_RECOVER |
13441
217k
              XML_PARSE_NOENT |
13442
217k
              XML_PARSE_DTDLOAD |
13443
217k
              XML_PARSE_DTDATTR |
13444
217k
              XML_PARSE_DTDVALID |
13445
217k
              XML_PARSE_NOERROR |
13446
217k
              XML_PARSE_NOWARNING |
13447
217k
              XML_PARSE_PEDANTIC |
13448
217k
              XML_PARSE_NOBLANKS |
13449
217k
#ifdef LIBXML_SAX1_ENABLED
13450
217k
              XML_PARSE_SAX1 |
13451
217k
#endif
13452
217k
              XML_PARSE_NONET |
13453
217k
              XML_PARSE_NODICT |
13454
217k
              XML_PARSE_NSCLEAN |
13455
217k
              XML_PARSE_NOCDATA |
13456
217k
              XML_PARSE_COMPACT |
13457
217k
              XML_PARSE_OLD10 |
13458
217k
              XML_PARSE_HUGE |
13459
217k
              XML_PARSE_OLDSAX |
13460
217k
              XML_PARSE_IGNORE_ENC |
13461
217k
              XML_PARSE_BIG_LINES |
13462
217k
              XML_PARSE_NO_XXE |
13463
217k
              XML_PARSE_UNZIP |
13464
217k
              XML_PARSE_NO_SYS_CATALOG |
13465
217k
              XML_PARSE_CATALOG_PI;
13466
13467
217k
    ctxt->options = (ctxt->options & keepMask) | (options & allMask);
13468
13469
    /*
13470
     * For some options, struct members are historically the source
13471
     * of truth. The values are initalized from global variables and
13472
     * old code could also modify them directly. Several older API
13473
     * functions that don't take an options argument rely on these
13474
     * deprecated mechanisms.
13475
     *
13476
     * Once public access to struct members and the globals are
13477
     * disabled, we can use the options bitmask as source of
13478
     * truth, making all these struct members obsolete.
13479
     *
13480
     * The XML_DETECT_IDS flags is misnamed. It simply enables
13481
     * loading of the external subset.
13482
     */
13483
217k
    ctxt->recovery = (options & XML_PARSE_RECOVER) ? 1 : 0;
13484
217k
    ctxt->replaceEntities = (options & XML_PARSE_NOENT) ? 1 : 0;
13485
217k
    ctxt->loadsubset = (options & XML_PARSE_DTDLOAD) ? XML_DETECT_IDS : 0;
13486
217k
    ctxt->loadsubset |= (options & XML_PARSE_DTDATTR) ? XML_COMPLETE_ATTRS : 0;
13487
217k
    ctxt->validate = (options & XML_PARSE_DTDVALID) ? 1 : 0;
13488
217k
    ctxt->pedantic = (options & XML_PARSE_PEDANTIC) ? 1 : 0;
13489
217k
    ctxt->keepBlanks = (options & XML_PARSE_NOBLANKS) ? 0 : 1;
13490
217k
    ctxt->dictNames = (options & XML_PARSE_NODICT) ? 0 : 1;
13491
13492
217k
    if (options & XML_PARSE_HUGE) {
13493
212k
        if (ctxt->dict != NULL)
13494
212k
            xmlDictSetLimit(ctxt->dict, 0);
13495
212k
    }
13496
13497
217k
    ctxt->linenumbers = 1;
13498
13499
217k
    return(options & ~allMask);
13500
217k
}
13501
13502
/**
13503
 * xmlCtxtSetOptions:
13504
 * @ctxt: an XML parser context
13505
 * @options:  a bitmask of xmlParserOption values
13506
 *
13507
 * Applies the options to the parser context. Unset options are
13508
 * cleared.
13509
 *
13510
 * Available since 2.13.0. With older versions, you can use
13511
 * xmlCtxtUseOptions.
13512
 *
13513
 * XML_PARSE_RECOVER
13514
 *
13515
 * Enable "recovery" mode which allows non-wellformed documents.
13516
 * How this mode behaves exactly is unspecified and may change
13517
 * without further notice. Use of this feature is DISCOURAGED.
13518
 *
13519
 * Not supported by the push parser.
13520
 *
13521
 * XML_PARSE_NOENT
13522
 *
13523
 * Despite the confusing name, this option enables substitution
13524
 * of entities. The resulting tree won't contain any entity
13525
 * reference nodes.
13526
 *
13527
 * This option also enables loading of external entities (both
13528
 * general and parameter entities) which is dangerous. If you
13529
 * process untrusted data, it's recommended to set the
13530
 * XML_PARSE_NO_XXE option to disable loading of external
13531
 * entities.
13532
 *
13533
 * XML_PARSE_DTDLOAD
13534
 *
13535
 * Enables loading of an external DTD and the loading and
13536
 * substitution of external parameter entities. Has no effect
13537
 * if XML_PARSE_NO_XXE is set.
13538
 *
13539
 * XML_PARSE_DTDATTR
13540
 *
13541
 * Adds default attributes from the DTD to the result document.
13542
 *
13543
 * Implies XML_PARSE_DTDLOAD, but loading of external content
13544
 * can be disabled with XML_PARSE_NO_XXE.
13545
 *
13546
 * XML_PARSE_DTDVALID
13547
 *
13548
 * This option enables DTD validation which requires to load
13549
 * external DTDs and external entities (both general and
13550
 * parameter entities) unless XML_PARSE_NO_XXE was set.
13551
 *
13552
 * XML_PARSE_NO_XXE
13553
 *
13554
 * Disables loading of external DTDs or entities.
13555
 *
13556
 * Available since 2.13.0.
13557
 *
13558
 * XML_PARSE_NOERROR
13559
 *
13560
 * Disable error and warning reports to the error handlers.
13561
 * Errors are still accessible with xmlCtxtGetLastError.
13562
 *
13563
 * XML_PARSE_NOWARNING
13564
 *
13565
 * Disable warning reports.
13566
 *
13567
 * XML_PARSE_PEDANTIC
13568
 *
13569
 * Enable some pedantic warnings.
13570
 *
13571
 * XML_PARSE_NOBLANKS
13572
 *
13573
 * Remove some whitespace from the result document. Where to
13574
 * remove whitespace depends on DTD element declarations or a
13575
 * broken heuristic with unfixable bugs. Use of this option is
13576
 * DISCOURAGED.
13577
 *
13578
 * Not supported by the push parser.
13579
 *
13580
 * XML_PARSE_SAX1
13581
 *
13582
 * Always invoke the deprecated SAX1 startElement and endElement
13583
 * handlers. This option is DEPRECATED.
13584
 *
13585
 * XML_PARSE_NONET
13586
 *
13587
 * Disable network access with the builtin HTTP client.
13588
 *
13589
 * XML_PARSE_NODICT
13590
 *
13591
 * Create a document without interned strings, making all
13592
 * strings separate memory allocations.
13593
 *
13594
 * XML_PARSE_NSCLEAN
13595
 *
13596
 * Remove redundant namespace declarations from the result
13597
 * document.
13598
 *
13599
 * XML_PARSE_NOCDATA
13600
 *
13601
 * Output normal text nodes instead of CDATA nodes.
13602
 *
13603
 * XML_PARSE_COMPACT
13604
 *
13605
 * Store small strings directly in the node struct to save
13606
 * memory.
13607
 *
13608
 * XML_PARSE_OLD10
13609
 *
13610
 * Use old Name productions from before XML 1.0 Fifth Edition.
13611
 * This options is DEPRECATED.
13612
 *
13613
 * XML_PARSE_HUGE
13614
 *
13615
 * Relax some internal limits.
13616
 *
13617
 * Maximum size of text nodes, tags, comments, processing instructions,
13618
 * CDATA sections, entity values
13619
 *
13620
 * normal: 10M
13621
 * huge:    1B
13622
 *
13623
 * Maximum size of names, system literals, pubid literals
13624
 *
13625
 * normal: 50K
13626
 * huge:   10M
13627
 *
13628
 * Maximum nesting depth of elements
13629
 *
13630
 * normal:  256
13631
 * huge:   2048
13632
 *
13633
 * Maximum nesting depth of entities
13634
 *
13635
 * normal: 20
13636
 * huge:   40
13637
 *
13638
 * XML_PARSE_OLDSAX
13639
 *
13640
 * Enable an unspecified legacy mode for SAX parsers. This
13641
 * option is DEPRECATED.
13642
 *
13643
 * XML_PARSE_IGNORE_ENC
13644
 *
13645
 * Ignore the encoding in the XML declaration. This option is
13646
 * mostly unneeded these days. The only effect is to enforce
13647
 * UTF-8 decoding of ASCII-like data.
13648
 *
13649
 * XML_PARSE_BIG_LINES
13650
 *
13651
 * Enable reporting of line numbers larger than 65535.
13652
 *
13653
 * XML_PARSE_UNZIP
13654
 *
13655
 * Enable input decompression. Setting this option is discouraged
13656
 * to avoid zip bombs.
13657
 *
13658
 * Available since 2.14.0.
13659
 *
13660
 * XML_PARSE_NO_SYS_CATALOG
13661
 *
13662
 * Disables the global system XML catalog.
13663
 *
13664
 * Available since 2.14.0.
13665
 *
13666
 * XML_PARSE_CATALOG_PI
13667
 *
13668
 * Enable XML catalog processing instructions.
13669
 *
13670
 * Available since 2.14.0.
13671
 *
13672
 * Returns 0 in case of success, the set of unknown or unimplemented options
13673
 *         in case of error.
13674
 */
13675
int
13676
xmlCtxtSetOptions(xmlParserCtxtPtr ctxt, int options)
13677
0
{
13678
0
#ifdef LIBXML_HTML_ENABLED
13679
0
    if ((ctxt != NULL) && (ctxt->html))
13680
0
        return(htmlCtxtSetOptions(ctxt, options));
13681
0
#endif
13682
13683
0
    return(xmlCtxtSetOptionsInternal(ctxt, options, 0));
13684
0
}
13685
13686
/**
13687
 * xmlCtxtGetOptions:
13688
 * @ctxt: an XML parser context
13689
 *
13690
 * Get the current options of the parser context.
13691
 *
13692
 * Available since 2.14.0.
13693
 *
13694
 * Returns the current options set in the parser context, or -1 if ctxt is NULL.
13695
 */
13696
int
13697
xmlCtxtGetOptions(xmlParserCtxtPtr ctxt)
13698
0
{
13699
0
    if (ctxt == NULL)
13700
0
        return(-1);
13701
13702
0
    return(ctxt->options);
13703
0
}
13704
13705
/**
13706
 * xmlCtxtUseOptions:
13707
 * @ctxt: an XML parser context
13708
 * @options:  a combination of xmlParserOption
13709
 *
13710
 * DEPRECATED: Use xmlCtxtSetOptions.
13711
 *
13712
 * Applies the options to the parser context. The following options
13713
 * are never cleared and can only be enabled:
13714
 *
13715
 * XML_PARSE_NOERROR
13716
 * XML_PARSE_NOWARNING
13717
 * XML_PARSE_NONET
13718
 * XML_PARSE_NSCLEAN
13719
 * XML_PARSE_NOCDATA
13720
 * XML_PARSE_COMPACT
13721
 * XML_PARSE_OLD10
13722
 * XML_PARSE_HUGE
13723
 * XML_PARSE_OLDSAX
13724
 * XML_PARSE_IGNORE_ENC
13725
 * XML_PARSE_BIG_LINES
13726
 *
13727
 * Returns 0 in case of success, the set of unknown or unimplemented options
13728
 *         in case of error.
13729
 */
13730
int
13731
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
13732
217k
{
13733
217k
    int keepMask;
13734
13735
217k
#ifdef LIBXML_HTML_ENABLED
13736
217k
    if ((ctxt != NULL) && (ctxt->html))
13737
0
        return(htmlCtxtUseOptions(ctxt, options));
13738
217k
#endif
13739
13740
    /*
13741
     * For historic reasons, some options can only be enabled.
13742
     */
13743
217k
    keepMask = XML_PARSE_NOERROR |
13744
217k
               XML_PARSE_NOWARNING |
13745
217k
               XML_PARSE_NONET |
13746
217k
               XML_PARSE_NSCLEAN |
13747
217k
               XML_PARSE_NOCDATA |
13748
217k
               XML_PARSE_COMPACT |
13749
217k
               XML_PARSE_OLD10 |
13750
217k
               XML_PARSE_HUGE |
13751
217k
               XML_PARSE_OLDSAX |
13752
217k
               XML_PARSE_IGNORE_ENC |
13753
217k
               XML_PARSE_BIG_LINES;
13754
13755
217k
    return(xmlCtxtSetOptionsInternal(ctxt, options, keepMask));
13756
217k
}
13757
13758
/**
13759
 * xmlCtxtSetMaxAmplification:
13760
 * @ctxt: an XML parser context
13761
 * @maxAmpl:  maximum amplification factor
13762
 *
13763
 * To protect against exponential entity expansion ("billion laughs"), the
13764
 * size of serialized output is (roughly) limited to the input size
13765
 * multiplied by this factor. The default value is 5.
13766
 *
13767
 * When working with documents making heavy use of entity expansion, it can
13768
 * be necessary to increase the value. For security reasons, this should only
13769
 * be considered when processing trusted input.
13770
 */
13771
void
13772
xmlCtxtSetMaxAmplification(xmlParserCtxtPtr ctxt, unsigned maxAmpl)
13773
0
{
13774
0
    ctxt->maxAmpl = maxAmpl;
13775
0
}
13776
13777
/**
13778
 * xmlCtxtParseDocument:
13779
 * @ctxt:  an XML parser context
13780
 * @input:  parser input
13781
 *
13782
 * Parse an XML document and return the resulting document tree.
13783
 * Takes ownership of the input object.
13784
 *
13785
 * Available since 2.13.0.
13786
 *
13787
 * Returns the resulting document tree or NULL
13788
 */
13789
xmlDocPtr
13790
xmlCtxtParseDocument(xmlParserCtxtPtr ctxt, xmlParserInputPtr input)
13791
4.89k
{
13792
4.89k
    xmlDocPtr ret = NULL;
13793
13794
4.89k
    if ((ctxt == NULL) || (input == NULL)) {
13795
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
13796
0
        xmlFreeInputStream(input);
13797
0
        return(NULL);
13798
0
    }
13799
13800
    /* assert(ctxt->inputNr == 0); */
13801
4.89k
    while (ctxt->inputNr > 0)
13802
0
        xmlFreeInputStream(xmlCtxtPopInput(ctxt));
13803
13804
4.89k
    if (xmlCtxtPushInput(ctxt, input) < 0) {
13805
0
        xmlFreeInputStream(input);
13806
0
        return(NULL);
13807
0
    }
13808
13809
4.89k
    xmlParseDocument(ctxt);
13810
13811
4.89k
    ret = xmlCtxtGetDocument(ctxt);
13812
13813
    /* assert(ctxt->inputNr == 1); */
13814
9.78k
    while (ctxt->inputNr > 0)
13815
4.89k
        xmlFreeInputStream(xmlCtxtPopInput(ctxt));
13816
13817
4.89k
    return(ret);
13818
4.89k
}
13819
13820
/**
13821
 * xmlReadDoc:
13822
 * @cur:  a pointer to a zero terminated string
13823
 * @URL:  base URL (optional)
13824
 * @encoding:  the document encoding (optional)
13825
 * @options:  a combination of xmlParserOption
13826
 *
13827
 * Convenience function to parse an XML document from a
13828
 * zero-terminated string.
13829
 *
13830
 * See xmlCtxtReadDoc for details.
13831
 *
13832
 * Returns the resulting document tree
13833
 */
13834
xmlDocPtr
13835
xmlReadDoc(const xmlChar *cur, const char *URL, const char *encoding,
13836
           int options)
13837
0
{
13838
0
    xmlParserCtxtPtr ctxt;
13839
0
    xmlParserInputPtr input;
13840
0
    xmlDocPtr doc = NULL;
13841
13842
0
    ctxt = xmlNewParserCtxt();
13843
0
    if (ctxt == NULL)
13844
0
        return(NULL);
13845
13846
0
    xmlCtxtUseOptions(ctxt, options);
13847
13848
0
    input = xmlCtxtNewInputFromString(ctxt, URL, (const char *) cur, encoding,
13849
0
                                      XML_INPUT_BUF_STATIC);
13850
13851
0
    if (input != NULL)
13852
0
        doc = xmlCtxtParseDocument(ctxt, input);
13853
13854
0
    xmlFreeParserCtxt(ctxt);
13855
0
    return(doc);
13856
0
}
13857
13858
/**
13859
 * xmlReadFile:
13860
 * @filename:  a file or URL
13861
 * @encoding:  the document encoding (optional)
13862
 * @options:  a combination of xmlParserOption
13863
 *
13864
 * Convenience function to parse an XML file from the filesystem,
13865
 * the network or a global user-define resource loader.
13866
 *
13867
 * This function always enables the XML_PARSE_UNZIP option for
13868
 * backward compatibility. If a "-" filename is passed, it will
13869
 * read from stdin. Both of these features are potentially
13870
 * insecure and might be removed from later versions.
13871
 *
13872
 * See xmlCtxtReadFile for details.
13873
 *
13874
 * Returns the resulting document tree
13875
 */
13876
xmlDocPtr
13877
xmlReadFile(const char *filename, const char *encoding, int options)
13878
0
{
13879
0
    xmlParserCtxtPtr ctxt;
13880
0
    xmlParserInputPtr input;
13881
0
    xmlDocPtr doc = NULL;
13882
13883
0
    ctxt = xmlNewParserCtxt();
13884
0
    if (ctxt == NULL)
13885
0
        return(NULL);
13886
13887
0
    options |= XML_PARSE_UNZIP;
13888
13889
0
    xmlCtxtUseOptions(ctxt, options);
13890
13891
    /*
13892
     * Backward compatibility for users of command line utilities like
13893
     * xmlstarlet expecting "-" to mean stdin. This is dangerous and
13894
     * should be removed at some point.
13895
     */
13896
0
    if ((filename != NULL) && (filename[0] == '-') && (filename[1] == 0))
13897
0
        input = xmlCtxtNewInputFromFd(ctxt, filename, STDIN_FILENO,
13898
0
                                      encoding, 0);
13899
0
    else
13900
0
        input = xmlCtxtNewInputFromUrl(ctxt, filename, NULL, encoding, 0);
13901
13902
0
    if (input != NULL)
13903
0
        doc = xmlCtxtParseDocument(ctxt, input);
13904
13905
0
    xmlFreeParserCtxt(ctxt);
13906
0
    return(doc);
13907
0
}
13908
13909
/**
13910
 * xmlReadMemory:
13911
 * @buffer:  a pointer to a char array
13912
 * @size:  the size of the array
13913
 * @url:  base URL (optional)
13914
 * @encoding:  the document encoding (optional)
13915
 * @options:  a combination of xmlParserOption
13916
 *
13917
 * Parse an XML in-memory document and build a tree. The input buffer must
13918
 * not contain a terminating null byte.
13919
 *
13920
 * See xmlCtxtReadMemory for details.
13921
 *
13922
 * Returns the resulting document tree
13923
 */
13924
xmlDocPtr
13925
xmlReadMemory(const char *buffer, int size, const char *url,
13926
              const char *encoding, int options)
13927
0
{
13928
0
    xmlParserCtxtPtr ctxt;
13929
0
    xmlParserInputPtr input;
13930
0
    xmlDocPtr doc = NULL;
13931
13932
0
    if (size < 0)
13933
0
  return(NULL);
13934
13935
0
    ctxt = xmlNewParserCtxt();
13936
0
    if (ctxt == NULL)
13937
0
        return(NULL);
13938
13939
0
    xmlCtxtUseOptions(ctxt, options);
13940
13941
0
    input = xmlCtxtNewInputFromMemory(ctxt, url, buffer, size, encoding,
13942
0
                                      XML_INPUT_BUF_STATIC);
13943
13944
0
    if (input != NULL)
13945
0
        doc = xmlCtxtParseDocument(ctxt, input);
13946
13947
0
    xmlFreeParserCtxt(ctxt);
13948
0
    return(doc);
13949
0
}
13950
13951
/**
13952
 * xmlReadFd:
13953
 * @fd:  an open file descriptor
13954
 * @URL:  base URL (optional)
13955
 * @encoding:  the document encoding (optional)
13956
 * @options:  a combination of xmlParserOption
13957
 *
13958
 * Parse an XML from a file descriptor and build a tree.
13959
 *
13960
 * See xmlCtxtReadFd for details.
13961
 *
13962
 * NOTE that the file descriptor will not be closed when the
13963
 * context is freed or reset.
13964
 *
13965
 * Returns the resulting document tree
13966
 */
13967
xmlDocPtr
13968
xmlReadFd(int fd, const char *URL, const char *encoding, int options)
13969
0
{
13970
0
    xmlParserCtxtPtr ctxt;
13971
0
    xmlParserInputPtr input;
13972
0
    xmlDocPtr doc = NULL;
13973
13974
0
    ctxt = xmlNewParserCtxt();
13975
0
    if (ctxt == NULL)
13976
0
        return(NULL);
13977
13978
0
    xmlCtxtUseOptions(ctxt, options);
13979
13980
0
    input = xmlCtxtNewInputFromFd(ctxt, URL, fd, encoding, 0);
13981
13982
0
    if (input != NULL)
13983
0
        doc = xmlCtxtParseDocument(ctxt, input);
13984
13985
0
    xmlFreeParserCtxt(ctxt);
13986
0
    return(doc);
13987
0
}
13988
13989
/**
13990
 * xmlReadIO:
13991
 * @ioread:  an I/O read function
13992
 * @ioclose:  an I/O close function (optional)
13993
 * @ioctx:  an I/O handler
13994
 * @URL:  base URL (optional)
13995
 * @encoding:  the document encoding (optional)
13996
 * @options:  a combination of xmlParserOption
13997
 *
13998
 * Parse an XML document from I/O functions and context and build a tree.
13999
 *
14000
 * See xmlCtxtReadIO for details.
14001
 *
14002
 * Returns the resulting document tree
14003
 */
14004
xmlDocPtr
14005
xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
14006
          void *ioctx, const char *URL, const char *encoding, int options)
14007
0
{
14008
0
    xmlParserCtxtPtr ctxt;
14009
0
    xmlParserInputPtr input;
14010
0
    xmlDocPtr doc = NULL;
14011
14012
0
    ctxt = xmlNewParserCtxt();
14013
0
    if (ctxt == NULL)
14014
0
        return(NULL);
14015
14016
0
    xmlCtxtUseOptions(ctxt, options);
14017
14018
0
    input = xmlCtxtNewInputFromIO(ctxt, URL, ioread, ioclose, ioctx,
14019
0
                                  encoding, 0);
14020
14021
0
    if (input != NULL)
14022
0
        doc = xmlCtxtParseDocument(ctxt, input);
14023
14024
0
    xmlFreeParserCtxt(ctxt);
14025
0
    return(doc);
14026
0
}
14027
14028
/**
14029
 * xmlCtxtReadDoc:
14030
 * @ctxt:  an XML parser context
14031
 * @str:  a pointer to a zero terminated string
14032
 * @URL:  base URL (optional)
14033
 * @encoding:  the document encoding (optional)
14034
 * @options:  a combination of xmlParserOption
14035
 *
14036
 * Parse an XML in-memory document and build a tree.
14037
 *
14038
 * @URL is used as base to resolve external entities and for error
14039
 * reporting.
14040
 *
14041
 * See xmlCtxtUseOptions for details.
14042
 *
14043
 * Returns the resulting document tree
14044
 */
14045
xmlDocPtr
14046
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar *str,
14047
               const char *URL, const char *encoding, int options)
14048
0
{
14049
0
    xmlParserInputPtr input;
14050
14051
0
    if (ctxt == NULL)
14052
0
        return(NULL);
14053
14054
0
    xmlCtxtReset(ctxt);
14055
0
    xmlCtxtUseOptions(ctxt, options);
14056
14057
0
    input = xmlCtxtNewInputFromString(ctxt, URL, (const char *) str, encoding,
14058
0
                                      XML_INPUT_BUF_STATIC);
14059
0
    if (input == NULL)
14060
0
        return(NULL);
14061
14062
0
    return(xmlCtxtParseDocument(ctxt, input));
14063
0
}
14064
14065
/**
14066
 * xmlCtxtReadFile:
14067
 * @ctxt:  an XML parser context
14068
 * @filename:  a file or URL
14069
 * @encoding:  the document encoding (optional)
14070
 * @options:  a combination of xmlParserOption
14071
 *
14072
 * Parse an XML file from the filesystem, the network or a user-defined
14073
 * resource loader.
14074
 *
14075
 * This function always enables the XML_PARSE_UNZIP option for
14076
 * backward compatibility. This feature is potentially insecure
14077
 * and might be removed from later versions.
14078
 *
14079
 * Returns the resulting document tree
14080
 */
14081
xmlDocPtr
14082
xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
14083
                const char *encoding, int options)
14084
0
{
14085
0
    xmlParserInputPtr input;
14086
14087
0
    if (ctxt == NULL)
14088
0
        return(NULL);
14089
14090
0
    options |= XML_PARSE_UNZIP;
14091
14092
0
    xmlCtxtReset(ctxt);
14093
0
    xmlCtxtUseOptions(ctxt, options);
14094
14095
0
    input = xmlCtxtNewInputFromUrl(ctxt, filename, NULL, encoding, 0);
14096
0
    if (input == NULL)
14097
0
        return(NULL);
14098
14099
0
    return(xmlCtxtParseDocument(ctxt, input));
14100
0
}
14101
14102
/**
14103
 * xmlCtxtReadMemory:
14104
 * @ctxt:  an XML parser context
14105
 * @buffer:  a pointer to a char array
14106
 * @size:  the size of the array
14107
 * @URL:  base URL (optional)
14108
 * @encoding:  the document encoding (optional)
14109
 * @options:  a combination of xmlParserOption
14110
 *
14111
 * Parse an XML in-memory document and build a tree. The input buffer must
14112
 * not contain a terminating null byte.
14113
 *
14114
 * @URL is used as base to resolve external entities and for error
14115
 * reporting.
14116
 *
14117
 * See xmlCtxtUseOptions for details.
14118
 *
14119
 * Returns the resulting document tree
14120
 */
14121
xmlDocPtr
14122
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
14123
                  const char *URL, const char *encoding, int options)
14124
0
{
14125
0
    xmlParserInputPtr input;
14126
14127
0
    if ((ctxt == NULL) || (size < 0))
14128
0
        return(NULL);
14129
14130
0
    xmlCtxtReset(ctxt);
14131
0
    xmlCtxtUseOptions(ctxt, options);
14132
14133
0
    input = xmlCtxtNewInputFromMemory(ctxt, URL, buffer, size, encoding,
14134
0
                                      XML_INPUT_BUF_STATIC);
14135
0
    if (input == NULL)
14136
0
        return(NULL);
14137
14138
0
    return(xmlCtxtParseDocument(ctxt, input));
14139
0
}
14140
14141
/**
14142
 * xmlCtxtReadFd:
14143
 * @ctxt:  an XML parser context
14144
 * @fd:  an open file descriptor
14145
 * @URL:  base URL (optional)
14146
 * @encoding:  the document encoding (optional)
14147
 * @options:  a combination of xmlParserOption
14148
 *
14149
 * Parse an XML document from a file descriptor and build a tree.
14150
 *
14151
 * NOTE that the file descriptor will not be closed when the
14152
 * context is freed or reset.
14153
 *
14154
 * @URL is used as base to resolve external entities and for error
14155
 * reporting.
14156
 *
14157
 * See xmlCtxtUseOptions for details.
14158
 *
14159
 * Returns the resulting document tree
14160
 */
14161
xmlDocPtr
14162
xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
14163
              const char *URL, const char *encoding, int options)
14164
0
{
14165
0
    xmlParserInputPtr input;
14166
14167
0
    if (ctxt == NULL)
14168
0
        return(NULL);
14169
14170
0
    xmlCtxtReset(ctxt);
14171
0
    xmlCtxtUseOptions(ctxt, options);
14172
14173
0
    input = xmlCtxtNewInputFromFd(ctxt, URL, fd, encoding, 0);
14174
0
    if (input == NULL)
14175
0
        return(NULL);
14176
14177
0
    return(xmlCtxtParseDocument(ctxt, input));
14178
0
}
14179
14180
/**
14181
 * xmlCtxtReadIO:
14182
 * @ctxt:  an XML parser context
14183
 * @ioread:  an I/O read function
14184
 * @ioclose:  an I/O close function
14185
 * @ioctx:  an I/O handler
14186
 * @URL:  the base URL to use for the document
14187
 * @encoding:  the document encoding, or NULL
14188
 * @options:  a combination of xmlParserOption
14189
 *
14190
 * parse an XML document from I/O functions and source and build a tree.
14191
 * This reuses the existing @ctxt parser context
14192
 *
14193
 * @URL is used as base to resolve external entities and for error
14194
 * reporting.
14195
 *
14196
 * See xmlCtxtUseOptions for details.
14197
 *
14198
 * Returns the resulting document tree
14199
 */
14200
xmlDocPtr
14201
xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
14202
              xmlInputCloseCallback ioclose, void *ioctx,
14203
        const char *URL,
14204
              const char *encoding, int options)
14205
4.89k
{
14206
4.89k
    xmlParserInputPtr input;
14207
14208
4.89k
    if (ctxt == NULL)
14209
0
        return(NULL);
14210
14211
4.89k
    xmlCtxtReset(ctxt);
14212
4.89k
    xmlCtxtUseOptions(ctxt, options);
14213
14214
4.89k
    input = xmlCtxtNewInputFromIO(ctxt, URL, ioread, ioclose, ioctx,
14215
4.89k
                                  encoding, 0);
14216
4.89k
    if (input == NULL)
14217
0
        return(NULL);
14218
14219
4.89k
    return(xmlCtxtParseDocument(ctxt, input));
14220
4.89k
}
14221