Coverage Report

Created: 2025-11-16 09:57

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/workdir/UnpackedTarball/libxml2/parser.c
Line
Count
Source
1
/*
2
 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
 *            implemented on top of the SAX interfaces
4
 *
5
 * References:
6
 *   The XML specification:
7
 *     http://www.w3.org/TR/REC-xml
8
 *   Original 1.0 version:
9
 *     http://www.w3.org/TR/1998/REC-xml-19980210
10
 *   XML second edition working draft
11
 *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12
 *
13
 * Okay this is a big file, the parser core is around 7000 lines, then it
14
 * is followed by the progressive parser top routines, then the various
15
 * high level APIs to call the parser and a few miscellaneous functions.
16
 * A number of helper functions and deprecated ones have been moved to
17
 * parserInternals.c to reduce this file size.
18
 * As much as possible the functions are associated with their relative
19
 * production in the XML specification. A few productions defining the
20
 * different ranges of character are actually implanted either in
21
 * parserInternals.h or parserInternals.c
22
 * The DOM tree build is realized from the default SAX callbacks in
23
 * the module SAX2.c.
24
 * The routines doing the validation checks are in valid.c and called either
25
 * from the SAX callbacks or as standalone functions using a preparsed
26
 * document.
27
 *
28
 * See Copyright for the status of this software.
29
 *
30
 * daniel@veillard.com
31
 */
32
33
/* To avoid EBCDIC trouble when parsing on zOS */
34
#if defined(__MVS__)
35
#pragma convert("ISO8859-1")
36
#endif
37
38
#define IN_LIBXML
39
#include "libxml.h"
40
41
#if defined(_WIN32)
42
#define XML_DIR_SEP '\\'
43
#else
44
#define XML_DIR_SEP '/'
45
#endif
46
47
#include <stdlib.h>
48
#include <limits.h>
49
#include <string.h>
50
#include <stdarg.h>
51
#include <stddef.h>
52
#include <ctype.h>
53
#include <stdlib.h>
54
#include <libxml/parser.h>
55
#include <libxml/xmlmemory.h>
56
#include <libxml/tree.h>
57
#include <libxml/parserInternals.h>
58
#include <libxml/valid.h>
59
#include <libxml/entities.h>
60
#include <libxml/xmlerror.h>
61
#include <libxml/encoding.h>
62
#include <libxml/xmlIO.h>
63
#include <libxml/uri.h>
64
#include <libxml/SAX2.h>
65
#include <libxml/HTMLparser.h>
66
#ifdef LIBXML_CATALOG_ENABLED
67
#include <libxml/catalog.h>
68
#endif
69
70
#include "private/buf.h"
71
#include "private/dict.h"
72
#include "private/entities.h"
73
#include "private/error.h"
74
#include "private/html.h"
75
#include "private/io.h"
76
#include "private/memory.h"
77
#include "private/parser.h"
78
79
31.2M
#define NS_INDEX_EMPTY  INT_MAX
80
5.05M
#define NS_INDEX_XML    (INT_MAX - 1)
81
11.4M
#define URI_HASH_EMPTY  0xD943A04E
82
40.0k
#define URI_HASH_XML    0xF0451F02
83
84
#ifndef STDIN_FILENO
85
0
  #define STDIN_FILENO 0
86
#endif
87
88
#ifndef SIZE_MAX
89
  #define SIZE_MAX ((size_t) -1)
90
#endif
91
92
967k
#define XML_MAX_ATTRS 100000000 /* 100 million */
93
94
struct _xmlStartTag {
95
    const xmlChar *prefix;
96
    const xmlChar *URI;
97
    int line;
98
    int nsNr;
99
};
100
101
typedef struct {
102
    void *saxData;
103
    unsigned prefixHashValue;
104
    unsigned uriHashValue;
105
    unsigned elementId;
106
    int oldIndex;
107
} xmlParserNsExtra;
108
109
typedef struct {
110
    unsigned hashValue;
111
    int index;
112
} xmlParserNsBucket;
113
114
struct _xmlParserNsData {
115
    xmlParserNsExtra *extra;
116
117
    unsigned hashSize;
118
    unsigned hashElems;
119
    xmlParserNsBucket *hash;
120
121
    unsigned elementId;
122
    int defaultNsIndex;
123
    int minNsIndex;
124
};
125
126
static int
127
xmlParseElementStart(xmlParserCtxtPtr ctxt);
128
129
static void
130
xmlParseElementEnd(xmlParserCtxtPtr ctxt);
131
132
static xmlEntityPtr
133
xmlLookupGeneralEntity(xmlParserCtxtPtr ctxt, const xmlChar *name, int inAttr);
134
135
static const xmlChar *
136
xmlParseEntityRefInternal(xmlParserCtxtPtr ctxt);
137
138
/************************************************************************
139
 *                  *
140
 *  Arbitrary limits set in the parser. See XML_PARSE_HUGE    *
141
 *                  *
142
 ************************************************************************/
143
144
#define XML_PARSER_BIG_ENTITY 1000
145
#define XML_PARSER_LOT_ENTITY 5000
146
147
/*
148
 * Constants for protection against abusive entity expansion
149
 * ("billion laughs").
150
 */
151
152
/*
153
 * A certain amount of entity expansion which is always allowed.
154
 */
155
708k
#define XML_PARSER_ALLOWED_EXPANSION 1000000
156
157
/*
158
 * Fixed cost for each entity reference. This crudely models processing time
159
 * as well to protect, for example, against exponential expansion of empty
160
 * or very short entities.
161
 */
162
708k
#define XML_ENT_FIXED_COST 20
163
164
69.1M
#define XML_PARSER_BIG_BUFFER_SIZE 300
165
199k
#define XML_PARSER_BUFFER_SIZE 100
166
33.0k
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
167
168
/**
169
 * XML_PARSER_CHUNK_SIZE
170
 *
171
 * When calling GROW that's the minimal amount of data
172
 * the parser expected to have received. It is not a hard
173
 * limit but an optimization when reading strings like Names
174
 * It is not strictly needed as long as inputs available characters
175
 * are followed by 0, which should be provided by the I/O level
176
 */
177
#define XML_PARSER_CHUNK_SIZE 100
178
179
/**
180
 * xmlParserVersion:
181
 *
182
 * Constant string describing the internal version of the library
183
 */
184
const char *const
185
xmlParserVersion = LIBXML_VERSION_STRING LIBXML_VERSION_EXTRA;
186
187
/*
188
 * List of XML prefixed PI allowed by W3C specs
189
 */
190
191
static const char* const xmlW3CPIs[] = {
192
    "xml-stylesheet",
193
    "xml-model",
194
    NULL
195
};
196
197
198
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
199
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
200
                                              const xmlChar **str);
201
202
static void
203
xmlCtxtParseEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr ent);
204
205
static int
206
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
207
208
/************************************************************************
209
 *                  *
210
 *    Some factorized error routines        *
211
 *                  *
212
 ************************************************************************/
213
214
static void
215
0
xmlErrMemory(xmlParserCtxtPtr ctxt) {
216
0
    xmlCtxtErrMemory(ctxt);
217
0
}
218
219
/**
220
 * xmlErrAttributeDup:
221
 * @ctxt:  an XML parser context
222
 * @prefix:  the attribute prefix
223
 * @localname:  the attribute localname
224
 *
225
 * Handle a redefinition of attribute error
226
 */
227
static void
228
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
229
                   const xmlChar * localname)
230
178k
{
231
178k
    if (prefix == NULL)
232
122k
        xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, XML_ERR_ATTRIBUTE_REDEFINED,
233
122k
                   XML_ERR_FATAL, localname, NULL, NULL, 0,
234
122k
                   "Attribute %s redefined\n", localname);
235
56.0k
    else
236
56.0k
        xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, XML_ERR_ATTRIBUTE_REDEFINED,
237
56.0k
                   XML_ERR_FATAL, prefix, localname, NULL, 0,
238
56.0k
                   "Attribute %s:%s redefined\n", prefix, localname);
239
178k
}
240
241
/**
242
 * xmlFatalErrMsg:
243
 * @ctxt:  an XML parser context
244
 * @error:  the error number
245
 * @msg:  the error message
246
 *
247
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
248
 */
249
static void LIBXML_ATTR_FORMAT(3,0)
250
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
251
               const char *msg)
252
1.36M
{
253
1.36M
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
254
1.36M
               NULL, NULL, NULL, 0, "%s", msg);
255
1.36M
}
256
257
/**
258
 * xmlWarningMsg:
259
 * @ctxt:  an XML parser context
260
 * @error:  the error number
261
 * @msg:  the error message
262
 * @str1:  extra data
263
 * @str2:  extra data
264
 *
265
 * Handle a warning.
266
 */
267
void LIBXML_ATTR_FORMAT(3,0)
268
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
269
              const char *msg, const xmlChar *str1, const xmlChar *str2)
270
22.7k
{
271
22.7k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_WARNING,
272
22.7k
               str1, str2, NULL, 0, msg, str1, str2);
273
22.7k
}
274
275
/**
276
 * xmlValidityError:
277
 * @ctxt:  an XML parser context
278
 * @error:  the error number
279
 * @msg:  the error message
280
 * @str1:  extra data
281
 *
282
 * Handle a validity error.
283
 */
284
static void LIBXML_ATTR_FORMAT(3,0)
285
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
286
              const char *msg, const xmlChar *str1, const xmlChar *str2)
287
0
{
288
0
    ctxt->valid = 0;
289
290
0
    xmlCtxtErr(ctxt, NULL, XML_FROM_DTD, error, XML_ERR_ERROR,
291
0
               str1, str2, NULL, 0, msg, str1, str2);
292
0
}
293
294
/**
295
 * xmlFatalErrMsgInt:
296
 * @ctxt:  an XML parser context
297
 * @error:  the error number
298
 * @msg:  the error message
299
 * @val:  an integer value
300
 *
301
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
302
 */
303
static void LIBXML_ATTR_FORMAT(3,0)
304
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
305
                  const char *msg, int val)
306
9.87k
{
307
9.87k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
308
9.87k
               NULL, NULL, NULL, val, msg, val);
309
9.87k
}
310
311
/**
312
 * xmlFatalErrMsgStrIntStr:
313
 * @ctxt:  an XML parser context
314
 * @error:  the error number
315
 * @msg:  the error message
316
 * @str1:  an string info
317
 * @val:  an integer value
318
 * @str2:  an string info
319
 *
320
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
321
 */
322
static void LIBXML_ATTR_FORMAT(3,0)
323
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
324
                  const char *msg, const xmlChar *str1, int val,
325
      const xmlChar *str2)
326
68.7k
{
327
68.7k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
328
68.7k
               str1, str2, NULL, val, msg, str1, val, str2);
329
68.7k
}
330
331
/**
332
 * xmlFatalErrMsgStr:
333
 * @ctxt:  an XML parser context
334
 * @error:  the error number
335
 * @msg:  the error message
336
 * @val:  a string value
337
 *
338
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
339
 */
340
static void LIBXML_ATTR_FORMAT(3,0)
341
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
342
                  const char *msg, const xmlChar * val)
343
103k
{
344
103k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
345
103k
               val, NULL, NULL, 0, msg, val);
346
103k
}
347
348
/**
349
 * xmlErrMsgStr:
350
 * @ctxt:  an XML parser context
351
 * @error:  the error number
352
 * @msg:  the error message
353
 * @val:  a string value
354
 *
355
 * Handle a non fatal parser error
356
 */
357
static void LIBXML_ATTR_FORMAT(3,0)
358
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
359
                  const char *msg, const xmlChar * val)
360
6.66k
{
361
6.66k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_ERROR,
362
6.66k
               val, NULL, NULL, 0, msg, val);
363
6.66k
}
364
365
/**
366
 * xmlNsErr:
367
 * @ctxt:  an XML parser context
368
 * @error:  the error number
369
 * @msg:  the message
370
 * @info1:  extra information string
371
 * @info2:  extra information string
372
 *
373
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
374
 */
375
static void LIBXML_ATTR_FORMAT(3,0)
376
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
377
         const char *msg,
378
         const xmlChar * info1, const xmlChar * info2,
379
         const xmlChar * info3)
380
1.58M
{
381
1.58M
    ctxt->nsWellFormed = 0;
382
383
1.58M
    xmlCtxtErr(ctxt, NULL, XML_FROM_NAMESPACE, error, XML_ERR_ERROR,
384
1.58M
               info1, info2, info3, 0, msg, info1, info2, info3);
385
1.58M
}
386
387
/**
388
 * xmlNsWarn
389
 * @ctxt:  an XML parser context
390
 * @error:  the error number
391
 * @msg:  the message
392
 * @info1:  extra information string
393
 * @info2:  extra information string
394
 *
395
 * Handle a namespace warning error
396
 */
397
static void LIBXML_ATTR_FORMAT(3,0)
398
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
399
         const char *msg,
400
         const xmlChar * info1, const xmlChar * info2,
401
         const xmlChar * info3)
402
57.5k
{
403
57.5k
    xmlCtxtErr(ctxt, NULL, XML_FROM_NAMESPACE, error, XML_ERR_WARNING,
404
57.5k
               info1, info2, info3, 0, msg, info1, info2, info3);
405
57.5k
}
406
407
static void
408
2.12M
xmlSaturatedAdd(unsigned long *dst, unsigned long val) {
409
2.12M
    if (val > ULONG_MAX - *dst)
410
0
        *dst = ULONG_MAX;
411
2.12M
    else
412
2.12M
        *dst += val;
413
2.12M
}
414
415
static void
416
708k
xmlSaturatedAddSizeT(unsigned long *dst, size_t val) {
417
708k
    if (val > ULONG_MAX - *dst)
418
0
        *dst = ULONG_MAX;
419
708k
    else
420
708k
        *dst += val;
421
708k
}
422
423
/**
424
 * xmlParserEntityCheck:
425
 * @ctxt:  parser context
426
 * @extra:  sum of unexpanded entity sizes
427
 *
428
 * Check for non-linear entity expansion behaviour.
429
 *
430
 * In some cases like xmlExpandEntityInAttValue, this function is called
431
 * for each, possibly nested entity and its unexpanded content length.
432
 *
433
 * In other cases like xmlParseReference, it's only called for each
434
 * top-level entity with its unexpanded content length plus the sum of
435
 * the unexpanded content lengths (plus fixed cost) of all nested
436
 * entities.
437
 *
438
 * Summing the unexpanded lengths also adds the length of the reference.
439
 * This is by design. Taking the length of the entity name into account
440
 * discourages attacks that try to waste CPU time with abusively long
441
 * entity names. See test/recurse/lol6.xml for example. Each call also
442
 * adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with
443
 * short entities.
444
 *
445
 * Returns 1 on error, 0 on success.
446
 */
447
static int
448
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long extra)
449
708k
{
450
708k
    unsigned long consumed;
451
708k
    unsigned long *expandedSize;
452
708k
    xmlParserInputPtr input = ctxt->input;
453
708k
    xmlEntityPtr entity = input->entity;
454
455
708k
    if ((entity) && (entity->flags & XML_ENT_CHECKED))
456
0
        return(0);
457
458
    /*
459
     * Compute total consumed bytes so far, including input streams of
460
     * external entities.
461
     */
462
708k
    consumed = input->consumed;
463
708k
    xmlSaturatedAddSizeT(&consumed, input->cur - input->base);
464
708k
    xmlSaturatedAdd(&consumed, ctxt->sizeentities);
465
466
708k
    if (entity)
467
0
        expandedSize = &entity->expandedSize;
468
708k
    else
469
708k
        expandedSize = &ctxt->sizeentcopy;
470
471
    /*
472
     * Add extra cost and some fixed cost.
473
     */
474
708k
    xmlSaturatedAdd(expandedSize, extra);
475
708k
    xmlSaturatedAdd(expandedSize, XML_ENT_FIXED_COST);
476
477
    /*
478
     * It's important to always use saturation arithmetic when tracking
479
     * entity sizes to make the size checks reliable. If "sizeentcopy"
480
     * overflows, we have to abort.
481
     */
482
708k
    if ((*expandedSize > XML_PARSER_ALLOWED_EXPANSION) &&
483
79
        ((*expandedSize >= ULONG_MAX) ||
484
79
         (*expandedSize / ctxt->maxAmpl > consumed))) {
485
79
        xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
486
79
                       "Maximum entity amplification factor exceeded, see "
487
79
                       "xmlCtxtSetMaxAmplification.\n");
488
79
        xmlHaltParser(ctxt);
489
79
        return(1);
490
79
    }
491
492
707k
    return(0);
493
708k
}
494
495
/************************************************************************
496
 *                  *
497
 *    Library wide options          *
498
 *                  *
499
 ************************************************************************/
500
501
/**
502
  * xmlHasFeature:
503
  * @feature: the feature to be examined
504
  *
505
  * Examines if the library has been compiled with a given feature.
506
  *
507
  * Returns a non-zero value if the feature exist, otherwise zero.
508
  * Returns zero (0) if the feature does not exist or an unknown
509
  * unknown feature is requested, non-zero otherwise.
510
  */
511
int
512
xmlHasFeature(xmlFeature feature)
513
0
{
514
0
    switch (feature) {
515
0
  case XML_WITH_THREAD:
516
0
#ifdef LIBXML_THREAD_ENABLED
517
0
      return(1);
518
#else
519
      return(0);
520
#endif
521
0
        case XML_WITH_TREE:
522
0
            return(1);
523
0
        case XML_WITH_OUTPUT:
524
0
#ifdef LIBXML_OUTPUT_ENABLED
525
0
            return(1);
526
#else
527
            return(0);
528
#endif
529
0
        case XML_WITH_PUSH:
530
0
#ifdef LIBXML_PUSH_ENABLED
531
0
            return(1);
532
#else
533
            return(0);
534
#endif
535
0
        case XML_WITH_READER:
536
0
#ifdef LIBXML_READER_ENABLED
537
0
            return(1);
538
#else
539
            return(0);
540
#endif
541
0
        case XML_WITH_PATTERN:
542
0
#ifdef LIBXML_PATTERN_ENABLED
543
0
            return(1);
544
#else
545
            return(0);
546
#endif
547
0
        case XML_WITH_WRITER:
548
0
#ifdef LIBXML_WRITER_ENABLED
549
0
            return(1);
550
#else
551
            return(0);
552
#endif
553
0
        case XML_WITH_SAX1:
554
0
#ifdef LIBXML_SAX1_ENABLED
555
0
            return(1);
556
#else
557
            return(0);
558
#endif
559
0
        case XML_WITH_HTTP:
560
#ifdef LIBXML_HTTP_ENABLED
561
            return(1);
562
#else
563
0
            return(0);
564
0
#endif
565
0
        case XML_WITH_VALID:
566
0
#ifdef LIBXML_VALID_ENABLED
567
0
            return(1);
568
#else
569
            return(0);
570
#endif
571
0
        case XML_WITH_HTML:
572
0
#ifdef LIBXML_HTML_ENABLED
573
0
            return(1);
574
#else
575
            return(0);
576
#endif
577
0
        case XML_WITH_LEGACY:
578
0
            return(0);
579
0
        case XML_WITH_C14N:
580
0
#ifdef LIBXML_C14N_ENABLED
581
0
            return(1);
582
#else
583
            return(0);
584
#endif
585
0
        case XML_WITH_CATALOG:
586
0
#ifdef LIBXML_CATALOG_ENABLED
587
0
            return(1);
588
#else
589
            return(0);
590
#endif
591
0
        case XML_WITH_XPATH:
592
0
#ifdef LIBXML_XPATH_ENABLED
593
0
            return(1);
594
#else
595
            return(0);
596
#endif
597
0
        case XML_WITH_XPTR:
598
0
#ifdef LIBXML_XPTR_ENABLED
599
0
            return(1);
600
#else
601
            return(0);
602
#endif
603
0
        case XML_WITH_XINCLUDE:
604
0
#ifdef LIBXML_XINCLUDE_ENABLED
605
0
            return(1);
606
#else
607
            return(0);
608
#endif
609
0
        case XML_WITH_ICONV:
610
#ifdef LIBXML_ICONV_ENABLED
611
            return(1);
612
#else
613
0
            return(0);
614
0
#endif
615
0
        case XML_WITH_ISO8859X:
616
0
#ifdef LIBXML_ISO8859X_ENABLED
617
0
            return(1);
618
#else
619
            return(0);
620
#endif
621
0
        case XML_WITH_UNICODE:
622
0
            return(0);
623
0
        case XML_WITH_REGEXP:
624
0
#ifdef LIBXML_REGEXP_ENABLED
625
0
            return(1);
626
#else
627
            return(0);
628
#endif
629
0
        case XML_WITH_AUTOMATA:
630
0
#ifdef LIBXML_REGEXP_ENABLED
631
0
            return(1);
632
#else
633
            return(0);
634
#endif
635
0
        case XML_WITH_EXPR:
636
#ifdef LIBXML_EXPR_ENABLED
637
            return(1);
638
#else
639
0
            return(0);
640
0
#endif
641
0
        case XML_WITH_RELAXNG:
642
0
#ifdef LIBXML_RELAXNG_ENABLED
643
0
            return(1);
644
#else
645
            return(0);
646
#endif
647
0
        case XML_WITH_SCHEMAS:
648
0
#ifdef LIBXML_SCHEMAS_ENABLED
649
0
            return(1);
650
#else
651
            return(0);
652
#endif
653
0
        case XML_WITH_SCHEMATRON:
654
0
#ifdef LIBXML_SCHEMATRON_ENABLED
655
0
            return(1);
656
#else
657
            return(0);
658
#endif
659
0
        case XML_WITH_MODULES:
660
0
#ifdef LIBXML_MODULES_ENABLED
661
0
            return(1);
662
#else
663
            return(0);
664
#endif
665
0
        case XML_WITH_DEBUG:
666
0
#ifdef LIBXML_DEBUG_ENABLED
667
0
            return(1);
668
#else
669
            return(0);
670
#endif
671
0
        case XML_WITH_DEBUG_MEM:
672
0
            return(0);
673
0
        case XML_WITH_ZLIB:
674
#ifdef LIBXML_ZLIB_ENABLED
675
            return(1);
676
#else
677
0
            return(0);
678
0
#endif
679
0
        case XML_WITH_LZMA:
680
#ifdef LIBXML_LZMA_ENABLED
681
            return(1);
682
#else
683
0
            return(0);
684
0
#endif
685
0
        case XML_WITH_ICU:
686
#ifdef LIBXML_ICU_ENABLED
687
            return(1);
688
#else
689
0
            return(0);
690
0
#endif
691
0
        default:
692
0
      break;
693
0
     }
694
0
     return(0);
695
0
}
696
697
/************************************************************************
698
 *                  *
699
 *      Simple string buffer        *
700
 *                  *
701
 ************************************************************************/
702
703
typedef struct {
704
    xmlChar *mem;
705
    unsigned size;
706
    unsigned cap; /* size < cap */
707
    unsigned max; /* size <= max */
708
    xmlParserErrors code;
709
} xmlSBuf;
710
711
static void
712
22.2M
xmlSBufInit(xmlSBuf *buf, unsigned max) {
713
22.2M
    buf->mem = NULL;
714
22.2M
    buf->size = 0;
715
22.2M
    buf->cap = 0;
716
22.2M
    buf->max = max;
717
22.2M
    buf->code = XML_ERR_OK;
718
22.2M
}
719
720
static int
721
244k
xmlSBufGrow(xmlSBuf *buf, unsigned len) {
722
244k
    xmlChar *mem;
723
244k
    unsigned cap;
724
725
244k
    if (len >= UINT_MAX / 2 - buf->size) {
726
0
        if (buf->code == XML_ERR_OK)
727
0
            buf->code = XML_ERR_RESOURCE_LIMIT;
728
0
        return(-1);
729
0
    }
730
731
244k
    cap = (buf->size + len) * 2;
732
244k
    if (cap < 240)
733
205k
        cap = 240;
734
735
244k
    mem = xmlRealloc(buf->mem, cap);
736
244k
    if (mem == NULL) {
737
0
        buf->code = XML_ERR_NO_MEMORY;
738
0
        return(-1);
739
0
    }
740
741
244k
    buf->mem = mem;
742
244k
    buf->cap = cap;
743
744
244k
    return(0);
745
244k
}
746
747
static void
748
4.53M
xmlSBufAddString(xmlSBuf *buf, const xmlChar *str, unsigned len) {
749
4.53M
    if (buf->max - buf->size < len) {
750
0
        if (buf->code == XML_ERR_OK)
751
0
            buf->code = XML_ERR_RESOURCE_LIMIT;
752
0
        return;
753
0
    }
754
755
4.53M
    if (buf->cap - buf->size <= len) {
756
242k
        if (xmlSBufGrow(buf, len) < 0)
757
0
            return;
758
242k
    }
759
760
4.53M
    if (len > 0)
761
4.53M
        memcpy(buf->mem + buf->size, str, len);
762
4.53M
    buf->size += len;
763
4.53M
}
764
765
static void
766
3.56M
xmlSBufAddCString(xmlSBuf *buf, const char *str, unsigned len) {
767
3.56M
    xmlSBufAddString(buf, (const xmlChar *) str, len);
768
3.56M
}
769
770
static void
771
50.9k
xmlSBufAddChar(xmlSBuf *buf, int c) {
772
50.9k
    xmlChar *end;
773
774
50.9k
    if (buf->max - buf->size < 4) {
775
0
        if (buf->code == XML_ERR_OK)
776
0
            buf->code = XML_ERR_RESOURCE_LIMIT;
777
0
        return;
778
0
    }
779
780
50.9k
    if (buf->cap - buf->size <= 4) {
781
2.22k
        if (xmlSBufGrow(buf, 4) < 0)
782
0
            return;
783
2.22k
    }
784
785
50.9k
    end = buf->mem + buf->size;
786
787
50.9k
    if (c < 0x80) {
788
15.8k
        *end = (xmlChar) c;
789
15.8k
        buf->size += 1;
790
35.1k
    } else {
791
35.1k
        buf->size += xmlCopyCharMultiByte(end, c);
792
35.1k
    }
793
50.9k
}
794
795
static void
796
2.99M
xmlSBufAddReplChar(xmlSBuf *buf) {
797
2.99M
    xmlSBufAddCString(buf, "\xEF\xBF\xBD", 3);
798
2.99M
}
799
800
static void
801
0
xmlSBufReportError(xmlSBuf *buf, xmlParserCtxtPtr ctxt, const char *errMsg) {
802
0
    if (buf->code == XML_ERR_NO_MEMORY)
803
0
        xmlCtxtErrMemory(ctxt);
804
0
    else
805
0
        xmlFatalErr(ctxt, buf->code, errMsg);
806
0
}
807
808
static xmlChar *
809
xmlSBufFinish(xmlSBuf *buf, int *sizeOut, xmlParserCtxtPtr ctxt,
810
232k
              const char *errMsg) {
811
232k
    if (buf->mem == NULL) {
812
21.2k
        buf->mem = xmlMalloc(1);
813
21.2k
        if (buf->mem == NULL) {
814
0
            buf->code = XML_ERR_NO_MEMORY;
815
21.2k
        } else {
816
21.2k
            buf->mem[0] = 0;
817
21.2k
        }
818
211k
    } else {
819
211k
        buf->mem[buf->size] = 0;
820
211k
    }
821
822
232k
    if (buf->code == XML_ERR_OK) {
823
232k
        if (sizeOut != NULL)
824
173k
            *sizeOut = buf->size;
825
232k
        return(buf->mem);
826
232k
    }
827
828
0
    xmlSBufReportError(buf, ctxt, errMsg);
829
830
0
    xmlFree(buf->mem);
831
832
0
    if (sizeOut != NULL)
833
0
        *sizeOut = 0;
834
0
    return(NULL);
835
232k
}
836
837
static void
838
21.9M
xmlSBufCleanup(xmlSBuf *buf, xmlParserCtxtPtr ctxt, const char *errMsg) {
839
21.9M
    if (buf->code != XML_ERR_OK)
840
0
        xmlSBufReportError(buf, ctxt, errMsg);
841
842
21.9M
    xmlFree(buf->mem);
843
21.9M
}
844
845
static int
846
xmlUTF8MultibyteLen(xmlParserCtxtPtr ctxt, const xmlChar *str,
847
5.46M
                    const char *errMsg) {
848
5.46M
    int c = str[0];
849
5.46M
    int c1 = str[1];
850
851
5.46M
    if ((c1 & 0xC0) != 0x80)
852
1.40M
        goto encoding_error;
853
854
4.06M
    if (c < 0xE0) {
855
        /* 2-byte sequence */
856
1.07M
        if (c < 0xC2)
857
527k
            goto encoding_error;
858
859
547k
        return(2);
860
2.98M
    } else {
861
2.98M
        int c2 = str[2];
862
863
2.98M
        if ((c2 & 0xC0) != 0x80)
864
17.8k
            goto encoding_error;
865
866
2.96M
        if (c < 0xF0) {
867
            /* 3-byte sequence */
868
2.86M
            if (c == 0xE0) {
869
                /* overlong */
870
144k
                if (c1 < 0xA0)
871
1.58k
                    goto encoding_error;
872
2.71M
            } else if (c == 0xED) {
873
                /* surrogate */
874
9.83k
                if (c1 >= 0xA0)
875
1.23k
                    goto encoding_error;
876
2.70M
            } else if (c == 0xEF) {
877
                /* U+FFFE and U+FFFF are invalid Chars */
878
1.24M
                if ((c1 == 0xBF) && (c2 >= 0xBE))
879
22.3k
                    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR, errMsg);
880
1.24M
            }
881
882
2.85M
            return(3);
883
2.86M
        } else {
884
            /* 4-byte sequence */
885
107k
            if ((str[3] & 0xC0) != 0x80)
886
4.86k
                goto encoding_error;
887
102k
            if (c == 0xF0) {
888
                /* overlong */
889
2.89k
                if (c1 < 0x90)
890
976
                    goto encoding_error;
891
99.9k
            } else if (c >= 0xF4) {
892
                /* greater than 0x10FFFF */
893
12.3k
                if ((c > 0xF4) || (c1 >= 0x90))
894
2.36k
                    goto encoding_error;
895
12.3k
            }
896
897
99.5k
            return(4);
898
102k
        }
899
2.96M
    }
900
901
1.96M
encoding_error:
902
    /* Only report the first error */
903
1.96M
    if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
904
4.43k
        xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
905
4.43k
        ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
906
4.43k
    }
907
908
1.96M
    return(0);
909
4.06M
}
910
911
/************************************************************************
912
 *                  *
913
 *    SAX2 defaulted attributes handling      *
914
 *                  *
915
 ************************************************************************/
916
917
/**
918
 * xmlCtxtInitializeLate:
919
 * @ctxt:  an XML parser context
920
 *
921
 * Final initialization of the parser context before starting to parse.
922
 *
923
 * This accounts for users modifying struct members of parser context
924
 * directly.
925
 */
926
static void
927
216k
xmlCtxtInitializeLate(xmlParserCtxtPtr ctxt) {
928
216k
    xmlSAXHandlerPtr sax;
929
930
    /* Avoid unused variable warning if features are disabled. */
931
216k
    (void) sax;
932
933
    /*
934
     * Changing the SAX struct directly is still widespread practice
935
     * in internal and external code.
936
     */
937
216k
    if (ctxt == NULL) return;
938
216k
    sax = ctxt->sax;
939
216k
#ifdef LIBXML_SAX1_ENABLED
940
    /*
941
     * Only enable SAX2 if there SAX2 element handlers, except when there
942
     * are no element handlers at all.
943
     */
944
216k
    if (((ctxt->options & XML_PARSE_SAX1) == 0) &&
945
216k
        (sax) &&
946
216k
        (sax->initialized == XML_SAX2_MAGIC) &&
947
216k
        ((sax->startElementNs != NULL) ||
948
0
         (sax->endElementNs != NULL) ||
949
0
         ((sax->startElement == NULL) && (sax->endElement == NULL))))
950
216k
        ctxt->sax2 = 1;
951
#else
952
    ctxt->sax2 = 1;
953
#endif /* LIBXML_SAX1_ENABLED */
954
955
    /*
956
     * Some users replace the dictionary directly in the context struct.
957
     * We really need an API function to do that cleanly.
958
     */
959
216k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
960
216k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
961
216k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
962
216k
    if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
963
216k
    (ctxt->str_xml_ns == NULL)) {
964
0
        xmlErrMemory(ctxt);
965
0
    }
966
967
216k
    xmlDictSetLimit(ctxt->dict,
968
216k
                    (ctxt->options & XML_PARSE_HUGE) ?
969
210k
                        0 :
970
216k
                        XML_MAX_DICTIONARY_LIMIT);
971
216k
}
972
973
typedef struct {
974
    xmlHashedString prefix;
975
    xmlHashedString name;
976
    xmlHashedString value;
977
    const xmlChar *valueEnd;
978
    int external;
979
    int expandedSize;
980
} xmlDefAttr;
981
982
typedef struct _xmlDefAttrs xmlDefAttrs;
983
typedef xmlDefAttrs *xmlDefAttrsPtr;
984
struct _xmlDefAttrs {
985
    int nbAttrs;  /* number of defaulted attributes on that element */
986
    int maxAttrs;       /* the size of the array */
987
#if __STDC_VERSION__ >= 199901L
988
    /* Using a C99 flexible array member avoids UBSan errors. */
989
    xmlDefAttr attrs[] ATTRIBUTE_COUNTED_BY(maxAttrs);
990
#else
991
    xmlDefAttr attrs[1];
992
#endif
993
};
994
995
/**
996
 * xmlAttrNormalizeSpace:
997
 * @src: the source string
998
 * @dst: the target string
999
 *
1000
 * Normalize the space in non CDATA attribute values:
1001
 * If the attribute type is not CDATA, then the XML processor MUST further
1002
 * process the normalized attribute value by discarding any leading and
1003
 * trailing space (#x20) characters, and by replacing sequences of space
1004
 * (#x20) characters by a single space (#x20) character.
1005
 * Note that the size of dst need to be at least src, and if one doesn't need
1006
 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1007
 * passing src as dst is just fine.
1008
 *
1009
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1010
 *         is needed.
1011
 */
1012
static xmlChar *
1013
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1014
26.2k
{
1015
26.2k
    if ((src == NULL) || (dst == NULL))
1016
0
        return(NULL);
1017
1018
35.4k
    while (*src == 0x20) src++;
1019
547k
    while (*src != 0) {
1020
521k
  if (*src == 0x20) {
1021
82.6k
      while (*src == 0x20) src++;
1022
24.6k
      if (*src != 0)
1023
20.3k
    *dst++ = 0x20;
1024
496k
  } else {
1025
496k
      *dst++ = *src++;
1026
496k
  }
1027
521k
    }
1028
26.2k
    *dst = 0;
1029
26.2k
    if (dst == src)
1030
16.9k
       return(NULL);
1031
9.38k
    return(dst);
1032
26.2k
}
1033
1034
/**
1035
 * xmlAddDefAttrs:
1036
 * @ctxt:  an XML parser context
1037
 * @fullname:  the element fullname
1038
 * @fullattr:  the attribute fullname
1039
 * @value:  the attribute value
1040
 *
1041
 * Add a defaulted attribute for an element
1042
 */
1043
static void
1044
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1045
               const xmlChar *fullname,
1046
               const xmlChar *fullattr,
1047
28.9k
               const xmlChar *value) {
1048
28.9k
    xmlDefAttrsPtr defaults;
1049
28.9k
    xmlDefAttr *attr;
1050
28.9k
    int len, expandedSize;
1051
28.9k
    xmlHashedString name;
1052
28.9k
    xmlHashedString prefix;
1053
28.9k
    xmlHashedString hvalue;
1054
28.9k
    const xmlChar *localname;
1055
1056
    /*
1057
     * Allows to detect attribute redefinitions
1058
     */
1059
28.9k
    if (ctxt->attsSpecial != NULL) {
1060
26.4k
        if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1061
7.59k
      return;
1062
26.4k
    }
1063
1064
21.3k
    if (ctxt->attsDefault == NULL) {
1065
2.60k
        ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1066
2.60k
  if (ctxt->attsDefault == NULL)
1067
0
      goto mem_error;
1068
2.60k
    }
1069
1070
    /*
1071
     * split the element name into prefix:localname , the string found
1072
     * are within the DTD and then not associated to namespace names.
1073
     */
1074
21.3k
    localname = xmlSplitQName3(fullname, &len);
1075
21.3k
    if (localname == NULL) {
1076
19.7k
        name = xmlDictLookupHashed(ctxt->dict, fullname, -1);
1077
19.7k
  prefix.name = NULL;
1078
19.7k
    } else {
1079
1.63k
        name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1080
1.63k
  prefix = xmlDictLookupHashed(ctxt->dict, fullname, len);
1081
1.63k
        if (prefix.name == NULL)
1082
0
            goto mem_error;
1083
1.63k
    }
1084
21.3k
    if (name.name == NULL)
1085
0
        goto mem_error;
1086
1087
    /*
1088
     * make sure there is some storage
1089
     */
1090
21.3k
    defaults = xmlHashLookup2(ctxt->attsDefault, name.name, prefix.name);
1091
21.3k
    if ((defaults == NULL) ||
1092
16.7k
        (defaults->nbAttrs >= defaults->maxAttrs)) {
1093
7.20k
        xmlDefAttrsPtr temp;
1094
7.20k
        int newSize;
1095
1096
7.20k
        if (defaults == NULL) {
1097
4.64k
            newSize = 4;
1098
4.64k
        } else {
1099
2.55k
            if ((defaults->maxAttrs >= XML_MAX_ATTRS) ||
1100
2.55k
                ((size_t) defaults->maxAttrs >
1101
2.55k
                     SIZE_MAX / 2 / sizeof(temp[0]) - sizeof(*defaults)))
1102
0
                goto mem_error;
1103
1104
2.55k
            if (defaults->maxAttrs > XML_MAX_ATTRS / 2)
1105
0
                newSize = XML_MAX_ATTRS;
1106
2.55k
            else
1107
2.55k
                newSize = defaults->maxAttrs * 2;
1108
2.55k
        }
1109
7.20k
        temp = xmlRealloc(defaults,
1110
7.20k
                          sizeof(*defaults) + newSize * sizeof(xmlDefAttr));
1111
7.20k
  if (temp == NULL)
1112
0
      goto mem_error;
1113
7.20k
        if (defaults == NULL)
1114
4.64k
            temp->nbAttrs = 0;
1115
7.20k
  temp->maxAttrs = newSize;
1116
7.20k
        defaults = temp;
1117
7.20k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name.name, prefix.name,
1118
7.20k
                          defaults, NULL) < 0) {
1119
0
      xmlFree(defaults);
1120
0
      goto mem_error;
1121
0
  }
1122
7.20k
    }
1123
1124
    /*
1125
     * Split the attribute name into prefix:localname , the string found
1126
     * are within the DTD and hen not associated to namespace names.
1127
     */
1128
21.3k
    localname = xmlSplitQName3(fullattr, &len);
1129
21.3k
    if (localname == NULL) {
1130
9.90k
        name = xmlDictLookupHashed(ctxt->dict, fullattr, -1);
1131
9.90k
  prefix.name = NULL;
1132
11.4k
    } else {
1133
11.4k
        name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1134
11.4k
  prefix = xmlDictLookupHashed(ctxt->dict, fullattr, len);
1135
11.4k
        if (prefix.name == NULL)
1136
0
            goto mem_error;
1137
11.4k
    }
1138
21.3k
    if (name.name == NULL)
1139
0
        goto mem_error;
1140
1141
    /* intern the string and precompute the end */
1142
21.3k
    len = strlen((const char *) value);
1143
21.3k
    hvalue = xmlDictLookupHashed(ctxt->dict, value, len);
1144
21.3k
    if (hvalue.name == NULL)
1145
0
        goto mem_error;
1146
1147
21.3k
    expandedSize = strlen((const char *) name.name);
1148
21.3k
    if (prefix.name != NULL)
1149
11.4k
        expandedSize += strlen((const char *) prefix.name);
1150
21.3k
    expandedSize += len;
1151
1152
21.3k
    attr = &defaults->attrs[defaults->nbAttrs++];
1153
21.3k
    attr->name = name;
1154
21.3k
    attr->prefix = prefix;
1155
21.3k
    attr->value = hvalue;
1156
21.3k
    attr->valueEnd = hvalue.name + len;
1157
21.3k
    attr->external = PARSER_EXTERNAL(ctxt);
1158
21.3k
    attr->expandedSize = expandedSize;
1159
1160
21.3k
    return;
1161
1162
0
mem_error:
1163
0
    xmlErrMemory(ctxt);
1164
0
}
1165
1166
/**
1167
 * xmlAddSpecialAttr:
1168
 * @ctxt:  an XML parser context
1169
 * @fullname:  the element fullname
1170
 * @fullattr:  the attribute fullname
1171
 * @type:  the attribute type
1172
 *
1173
 * Register this attribute type
1174
 */
1175
static void
1176
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1177
      const xmlChar *fullname,
1178
      const xmlChar *fullattr,
1179
      int type)
1180
35.8k
{
1181
35.8k
    if (ctxt->attsSpecial == NULL) {
1182
2.92k
        ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1183
2.92k
  if (ctxt->attsSpecial == NULL)
1184
0
      goto mem_error;
1185
2.92k
    }
1186
1187
35.8k
    if (xmlHashAdd2(ctxt->attsSpecial, fullname, fullattr,
1188
35.8k
                    XML_INT_TO_PTR(type)) < 0)
1189
0
        goto mem_error;
1190
35.8k
    return;
1191
1192
35.8k
mem_error:
1193
0
    xmlErrMemory(ctxt);
1194
0
}
1195
1196
/**
1197
 * xmlCleanSpecialAttrCallback:
1198
 *
1199
 * Removes CDATA attributes from the special attribute table
1200
 */
1201
static void
1202
xmlCleanSpecialAttrCallback(void *payload, void *data,
1203
                            const xmlChar *fullname, const xmlChar *fullattr,
1204
24.4k
                            const xmlChar *unused ATTRIBUTE_UNUSED) {
1205
24.4k
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1206
1207
24.4k
    if (XML_PTR_TO_INT(payload) == XML_ATTRIBUTE_CDATA) {
1208
1.91k
        xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1209
1.91k
    }
1210
24.4k
}
1211
1212
/**
1213
 * xmlCleanSpecialAttr:
1214
 * @ctxt:  an XML parser context
1215
 *
1216
 * Trim the list of attributes defined to remove all those of type
1217
 * CDATA as they are not special. This call should be done when finishing
1218
 * to parse the DTD and before starting to parse the document root.
1219
 */
1220
static void
1221
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1222
6.71k
{
1223
6.71k
    if (ctxt->attsSpecial == NULL)
1224
3.78k
        return;
1225
1226
2.92k
    xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1227
1228
2.92k
    if (xmlHashSize(ctxt->attsSpecial) == 0) {
1229
208
        xmlHashFree(ctxt->attsSpecial, NULL);
1230
208
        ctxt->attsSpecial = NULL;
1231
208
    }
1232
2.92k
}
1233
1234
/**
1235
 * xmlCheckLanguageID:
1236
 * @lang:  pointer to the string value
1237
 *
1238
 * DEPRECATED: Internal function, do not use.
1239
 *
1240
 * Checks that the value conforms to the LanguageID production:
1241
 *
1242
 * NOTE: this is somewhat deprecated, those productions were removed from
1243
 *       the XML Second edition.
1244
 *
1245
 * [33] LanguageID ::= Langcode ('-' Subcode)*
1246
 * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1247
 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1248
 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1249
 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1250
 * [38] Subcode ::= ([a-z] | [A-Z])+
1251
 *
1252
 * The current REC reference the successors of RFC 1766, currently 5646
1253
 *
1254
 * http://www.rfc-editor.org/rfc/rfc5646.txt
1255
 * langtag       = language
1256
 *                 ["-" script]
1257
 *                 ["-" region]
1258
 *                 *("-" variant)
1259
 *                 *("-" extension)
1260
 *                 ["-" privateuse]
1261
 * language      = 2*3ALPHA            ; shortest ISO 639 code
1262
 *                 ["-" extlang]       ; sometimes followed by
1263
 *                                     ; extended language subtags
1264
 *               / 4ALPHA              ; or reserved for future use
1265
 *               / 5*8ALPHA            ; or registered language subtag
1266
 *
1267
 * extlang       = 3ALPHA              ; selected ISO 639 codes
1268
 *                 *2("-" 3ALPHA)      ; permanently reserved
1269
 *
1270
 * script        = 4ALPHA              ; ISO 15924 code
1271
 *
1272
 * region        = 2ALPHA              ; ISO 3166-1 code
1273
 *               / 3DIGIT              ; UN M.49 code
1274
 *
1275
 * variant       = 5*8alphanum         ; registered variants
1276
 *               / (DIGIT 3alphanum)
1277
 *
1278
 * extension     = singleton 1*("-" (2*8alphanum))
1279
 *
1280
 *                                     ; Single alphanumerics
1281
 *                                     ; "x" reserved for private use
1282
 * singleton     = DIGIT               ; 0 - 9
1283
 *               / %x41-57             ; A - W
1284
 *               / %x59-5A             ; Y - Z
1285
 *               / %x61-77             ; a - w
1286
 *               / %x79-7A             ; y - z
1287
 *
1288
 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1289
 * The parser below doesn't try to cope with extension or privateuse
1290
 * that could be added but that's not interoperable anyway
1291
 *
1292
 * Returns 1 if correct 0 otherwise
1293
 **/
1294
int
1295
xmlCheckLanguageID(const xmlChar * lang)
1296
0
{
1297
0
    const xmlChar *cur = lang, *nxt;
1298
1299
0
    if (cur == NULL)
1300
0
        return (0);
1301
0
    if (((cur[0] == 'i') && (cur[1] == '-')) ||
1302
0
        ((cur[0] == 'I') && (cur[1] == '-')) ||
1303
0
        ((cur[0] == 'x') && (cur[1] == '-')) ||
1304
0
        ((cur[0] == 'X') && (cur[1] == '-'))) {
1305
        /*
1306
         * Still allow IANA code and user code which were coming
1307
         * from the previous version of the XML-1.0 specification
1308
         * it's deprecated but we should not fail
1309
         */
1310
0
        cur += 2;
1311
0
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1312
0
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
1313
0
            cur++;
1314
0
        return(cur[0] == 0);
1315
0
    }
1316
0
    nxt = cur;
1317
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1318
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1319
0
           nxt++;
1320
0
    if (nxt - cur >= 4) {
1321
        /*
1322
         * Reserved
1323
         */
1324
0
        if ((nxt - cur > 8) || (nxt[0] != 0))
1325
0
            return(0);
1326
0
        return(1);
1327
0
    }
1328
0
    if (nxt - cur < 2)
1329
0
        return(0);
1330
    /* we got an ISO 639 code */
1331
0
    if (nxt[0] == 0)
1332
0
        return(1);
1333
0
    if (nxt[0] != '-')
1334
0
        return(0);
1335
1336
0
    nxt++;
1337
0
    cur = nxt;
1338
    /* now we can have extlang or script or region or variant */
1339
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1340
0
        goto region_m49;
1341
1342
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1343
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1344
0
           nxt++;
1345
0
    if (nxt - cur == 4)
1346
0
        goto script;
1347
0
    if (nxt - cur == 2)
1348
0
        goto region;
1349
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1350
0
        goto variant;
1351
0
    if (nxt - cur != 3)
1352
0
        return(0);
1353
    /* we parsed an extlang */
1354
0
    if (nxt[0] == 0)
1355
0
        return(1);
1356
0
    if (nxt[0] != '-')
1357
0
        return(0);
1358
1359
0
    nxt++;
1360
0
    cur = nxt;
1361
    /* now we can have script or region or variant */
1362
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1363
0
        goto region_m49;
1364
1365
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1366
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1367
0
           nxt++;
1368
0
    if (nxt - cur == 2)
1369
0
        goto region;
1370
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1371
0
        goto variant;
1372
0
    if (nxt - cur != 4)
1373
0
        return(0);
1374
    /* we parsed a script */
1375
0
script:
1376
0
    if (nxt[0] == 0)
1377
0
        return(1);
1378
0
    if (nxt[0] != '-')
1379
0
        return(0);
1380
1381
0
    nxt++;
1382
0
    cur = nxt;
1383
    /* now we can have region or variant */
1384
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1385
0
        goto region_m49;
1386
1387
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1388
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1389
0
           nxt++;
1390
1391
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1392
0
        goto variant;
1393
0
    if (nxt - cur != 2)
1394
0
        return(0);
1395
    /* we parsed a region */
1396
0
region:
1397
0
    if (nxt[0] == 0)
1398
0
        return(1);
1399
0
    if (nxt[0] != '-')
1400
0
        return(0);
1401
1402
0
    nxt++;
1403
0
    cur = nxt;
1404
    /* now we can just have a variant */
1405
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1406
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1407
0
           nxt++;
1408
1409
0
    if ((nxt - cur < 5) || (nxt - cur > 8))
1410
0
        return(0);
1411
1412
    /* we parsed a variant */
1413
0
variant:
1414
0
    if (nxt[0] == 0)
1415
0
        return(1);
1416
0
    if (nxt[0] != '-')
1417
0
        return(0);
1418
    /* extensions and private use subtags not checked */
1419
0
    return (1);
1420
1421
0
region_m49:
1422
0
    if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1423
0
        ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1424
0
        nxt += 3;
1425
0
        goto region;
1426
0
    }
1427
0
    return(0);
1428
0
}
1429
1430
/************************************************************************
1431
 *                  *
1432
 *    Parser stacks related functions and macros    *
1433
 *                  *
1434
 ************************************************************************/
1435
1436
static xmlChar *
1437
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar **str);
1438
1439
/**
1440
 * xmlParserNsCreate:
1441
 *
1442
 * Create a new namespace database.
1443
 *
1444
 * Returns the new obejct.
1445
 */
1446
xmlParserNsData *
1447
216k
xmlParserNsCreate(void) {
1448
216k
    xmlParserNsData *nsdb = xmlMalloc(sizeof(*nsdb));
1449
1450
216k
    if (nsdb == NULL)
1451
0
        return(NULL);
1452
216k
    memset(nsdb, 0, sizeof(*nsdb));
1453
216k
    nsdb->defaultNsIndex = INT_MAX;
1454
1455
216k
    return(nsdb);
1456
216k
}
1457
1458
/**
1459
 * xmlParserNsFree:
1460
 * @nsdb: namespace database
1461
 *
1462
 * Free a namespace database.
1463
 */
1464
void
1465
216k
xmlParserNsFree(xmlParserNsData *nsdb) {
1466
216k
    if (nsdb == NULL)
1467
0
        return;
1468
1469
216k
    xmlFree(nsdb->extra);
1470
216k
    xmlFree(nsdb->hash);
1471
216k
    xmlFree(nsdb);
1472
216k
}
1473
1474
/**
1475
 * xmlParserNsReset:
1476
 * @nsdb: namespace database
1477
 *
1478
 * Reset a namespace database.
1479
 */
1480
static void
1481
5.80k
xmlParserNsReset(xmlParserNsData *nsdb) {
1482
5.80k
    if (nsdb == NULL)
1483
0
        return;
1484
1485
5.80k
    nsdb->hashElems = 0;
1486
5.80k
    nsdb->elementId = 0;
1487
5.80k
    nsdb->defaultNsIndex = INT_MAX;
1488
1489
5.80k
    if (nsdb->hash)
1490
0
        memset(nsdb->hash, 0, nsdb->hashSize * sizeof(nsdb->hash[0]));
1491
5.80k
}
1492
1493
/**
1494
 * xmlParserStartElement:
1495
 * @nsdb: namespace database
1496
 *
1497
 * Signal that a new element has started.
1498
 *
1499
 * Returns 0 on success, -1 if the element counter overflowed.
1500
 */
1501
static int
1502
31.0M
xmlParserNsStartElement(xmlParserNsData *nsdb) {
1503
31.0M
    if (nsdb->elementId == UINT_MAX)
1504
0
        return(-1);
1505
31.0M
    nsdb->elementId++;
1506
1507
31.0M
    return(0);
1508
31.0M
}
1509
1510
/**
1511
 * xmlParserNsLookup:
1512
 * @ctxt: parser context
1513
 * @prefix: namespace prefix
1514
 * @bucketPtr: optional bucket (return value)
1515
 *
1516
 * Lookup namespace with given prefix. If @bucketPtr is non-NULL, it will
1517
 * be set to the matching bucket, or the first empty bucket if no match
1518
 * was found.
1519
 *
1520
 * Returns the namespace index on success, INT_MAX if no namespace was
1521
 * found.
1522
 */
1523
static int
1524
xmlParserNsLookup(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1525
40.7M
                  xmlParserNsBucket **bucketPtr) {
1526
40.7M
    xmlParserNsBucket *bucket, *tombstone;
1527
40.7M
    unsigned index, hashValue;
1528
1529
40.7M
    if (prefix->name == NULL)
1530
18.8M
        return(ctxt->nsdb->defaultNsIndex);
1531
1532
21.8M
    if (ctxt->nsdb->hashSize == 0)
1533
405k
        return(INT_MAX);
1534
1535
21.4M
    hashValue = prefix->hashValue;
1536
21.4M
    index = hashValue & (ctxt->nsdb->hashSize - 1);
1537
21.4M
    bucket = &ctxt->nsdb->hash[index];
1538
21.4M
    tombstone = NULL;
1539
1540
22.6M
    while (bucket->hashValue) {
1541
21.1M
        if (bucket->index == INT_MAX) {
1542
239k
            if (tombstone == NULL)
1543
232k
                tombstone = bucket;
1544
20.9M
        } else if (bucket->hashValue == hashValue) {
1545
19.9M
            if (ctxt->nsTab[bucket->index * 2] == prefix->name) {
1546
19.9M
                if (bucketPtr != NULL)
1547
906k
                    *bucketPtr = bucket;
1548
19.9M
                return(bucket->index);
1549
19.9M
            }
1550
19.9M
        }
1551
1552
1.25M
        index++;
1553
1.25M
        bucket++;
1554
1.25M
        if (index == ctxt->nsdb->hashSize) {
1555
14.3k
            index = 0;
1556
14.3k
            bucket = ctxt->nsdb->hash;
1557
14.3k
        }
1558
1.25M
    }
1559
1560
1.49M
    if (bucketPtr != NULL)
1561
813k
        *bucketPtr = tombstone ? tombstone : bucket;
1562
1.49M
    return(INT_MAX);
1563
21.4M
}
1564
1565
/**
1566
 * xmlParserNsLookupUri:
1567
 * @ctxt: parser context
1568
 * @prefix: namespace prefix
1569
 *
1570
 * Lookup namespace URI with given prefix.
1571
 *
1572
 * Returns the namespace URI on success, NULL if no namespace was found.
1573
 */
1574
static const xmlChar *
1575
31.0M
xmlParserNsLookupUri(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix) {
1576
31.0M
    const xmlChar *ret;
1577
31.0M
    int nsIndex;
1578
1579
31.0M
    if (prefix->name == ctxt->str_xml)
1580
25.8k
        return(ctxt->str_xml_ns);
1581
1582
    /*
1583
     * minNsIndex is used when building an entity tree. We must
1584
     * ignore namespaces declared outside the entity.
1585
     */
1586
30.9M
    nsIndex = xmlParserNsLookup(ctxt, prefix, NULL);
1587
30.9M
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1588
14.5M
        return(NULL);
1589
1590
16.4M
    ret = ctxt->nsTab[nsIndex * 2 + 1];
1591
16.4M
    if (ret[0] == 0)
1592
24.9k
        ret = NULL;
1593
16.4M
    return(ret);
1594
30.9M
}
1595
1596
/**
1597
 * xmlParserNsLookupSax:
1598
 * @ctxt: parser context
1599
 * @prefix: namespace prefix
1600
 *
1601
 * Lookup extra data for the given prefix. This returns data stored
1602
 * with xmlParserNsUdpateSax.
1603
 *
1604
 * Returns the data on success, NULL if no namespace was found.
1605
 */
1606
void *
1607
923k
xmlParserNsLookupSax(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
1608
923k
    xmlHashedString hprefix;
1609
923k
    int nsIndex;
1610
1611
923k
    if (prefix == ctxt->str_xml)
1612
0
        return(NULL);
1613
1614
923k
    hprefix.name = prefix;
1615
923k
    if (prefix != NULL)
1616
922k
        hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1617
1.54k
    else
1618
1.54k
        hprefix.hashValue = 0;
1619
923k
    nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1620
923k
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1621
0
        return(NULL);
1622
1623
923k
    return(ctxt->nsdb->extra[nsIndex].saxData);
1624
923k
}
1625
1626
/**
1627
 * xmlParserNsUpdateSax:
1628
 * @ctxt: parser context
1629
 * @prefix: namespace prefix
1630
 * @saxData: extra data for SAX handler
1631
 *
1632
 * Sets or updates extra data for the given prefix. This value will be
1633
 * returned by xmlParserNsLookupSax as long as the namespace with the
1634
 * given prefix is in scope.
1635
 *
1636
 * Returns the data on success, NULL if no namespace was found.
1637
 */
1638
int
1639
xmlParserNsUpdateSax(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
1640
11.3k
                     void *saxData) {
1641
11.3k
    xmlHashedString hprefix;
1642
11.3k
    int nsIndex;
1643
1644
11.3k
    if (prefix == ctxt->str_xml)
1645
0
        return(-1);
1646
1647
11.3k
    hprefix.name = prefix;
1648
11.3k
    if (prefix != NULL)
1649
10.5k
        hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1650
788
    else
1651
788
        hprefix.hashValue = 0;
1652
11.3k
    nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1653
11.3k
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1654
0
        return(-1);
1655
1656
11.3k
    ctxt->nsdb->extra[nsIndex].saxData = saxData;
1657
11.3k
    return(0);
1658
11.3k
}
1659
1660
/**
1661
 * xmlParserNsGrow:
1662
 * @ctxt: parser context
1663
 *
1664
 * Grows the namespace tables.
1665
 *
1666
 * Returns 0 on success, -1 if a memory allocation failed.
1667
 */
1668
static int
1669
525k
xmlParserNsGrow(xmlParserCtxtPtr ctxt) {
1670
525k
    const xmlChar **table;
1671
525k
    xmlParserNsExtra *extra;
1672
525k
    int newSize;
1673
1674
525k
    newSize = xmlGrowCapacity(ctxt->nsMax,
1675
525k
                              sizeof(table[0]) + sizeof(extra[0]),
1676
525k
                              16, XML_MAX_ITEMS);
1677
525k
    if (newSize < 0)
1678
0
        goto error;
1679
1680
525k
    table = xmlRealloc(ctxt->nsTab, 2 * newSize * sizeof(table[0]));
1681
525k
    if (table == NULL)
1682
0
        goto error;
1683
525k
    ctxt->nsTab = table;
1684
1685
525k
    extra = xmlRealloc(ctxt->nsdb->extra, newSize * sizeof(extra[0]));
1686
525k
    if (extra == NULL)
1687
0
        goto error;
1688
525k
    ctxt->nsdb->extra = extra;
1689
1690
525k
    ctxt->nsMax = newSize;
1691
525k
    return(0);
1692
1693
0
error:
1694
0
    xmlErrMemory(ctxt);
1695
0
    return(-1);
1696
525k
}
1697
1698
/**
1699
 * xmlParserNsPush:
1700
 * @ctxt: parser context
1701
 * @prefix: prefix with hash value
1702
 * @uri: uri with hash value
1703
 * @saxData: extra data for SAX handler
1704
 * @defAttr: whether the namespace comes from a default attribute
1705
 *
1706
 * Push a new namespace on the table.
1707
 *
1708
 * Returns 1 if the namespace was pushed, 0 if the namespace was ignored,
1709
 * -1 if a memory allocation failed.
1710
 */
1711
static int
1712
xmlParserNsPush(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1713
1.47M
                const xmlHashedString *uri, void *saxData, int defAttr) {
1714
1.47M
    xmlParserNsBucket *bucket = NULL;
1715
1.47M
    xmlParserNsExtra *extra;
1716
1.47M
    const xmlChar **ns;
1717
1.47M
    unsigned hashValue, nsIndex, oldIndex;
1718
1719
1.47M
    if ((prefix != NULL) && (prefix->name == ctxt->str_xml))
1720
71
        return(0);
1721
1722
1.47M
    if ((ctxt->nsNr >= ctxt->nsMax) && (xmlParserNsGrow(ctxt) < 0)) {
1723
0
        xmlErrMemory(ctxt);
1724
0
        return(-1);
1725
0
    }
1726
1727
    /*
1728
     * Default namespace and 'xml' namespace
1729
     */
1730
1.47M
    if ((prefix == NULL) || (prefix->name == NULL)) {
1731
316k
        oldIndex = ctxt->nsdb->defaultNsIndex;
1732
1733
316k
        if (oldIndex != INT_MAX) {
1734
180k
            extra = &ctxt->nsdb->extra[oldIndex];
1735
1736
180k
            if (extra->elementId == ctxt->nsdb->elementId) {
1737
93.0k
                if (defAttr == 0)
1738
88.9k
                    xmlErrAttributeDup(ctxt, NULL, BAD_CAST "xmlns");
1739
93.0k
                return(0);
1740
93.0k
            }
1741
1742
87.6k
            if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1743
0
                (uri->name == ctxt->nsTab[oldIndex * 2 + 1]))
1744
0
                return(0);
1745
87.6k
        }
1746
1747
223k
        ctxt->nsdb->defaultNsIndex = ctxt->nsNr;
1748
223k
        goto populate_entry;
1749
316k
    }
1750
1751
    /*
1752
     * Hash table lookup
1753
     */
1754
1.15M
    oldIndex = xmlParserNsLookup(ctxt, prefix, &bucket);
1755
1.15M
    if (oldIndex != INT_MAX) {
1756
226k
        extra = &ctxt->nsdb->extra[oldIndex];
1757
1758
        /*
1759
         * Check for duplicate definitions on the same element.
1760
         */
1761
226k
        if (extra->elementId == ctxt->nsdb->elementId) {
1762
54.3k
            if (defAttr == 0)
1763
54.3k
                xmlErrAttributeDup(ctxt, BAD_CAST "xmlns", prefix->name);
1764
54.3k
            return(0);
1765
54.3k
        }
1766
1767
172k
        if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1768
0
            (uri->name == ctxt->nsTab[bucket->index * 2 + 1]))
1769
0
            return(0);
1770
1771
172k
        bucket->index = ctxt->nsNr;
1772
172k
        goto populate_entry;
1773
172k
    }
1774
1775
    /*
1776
     * Insert new bucket
1777
     */
1778
1779
931k
    hashValue = prefix->hashValue;
1780
1781
    /*
1782
     * Grow hash table, 50% fill factor
1783
     */
1784
931k
    if (ctxt->nsdb->hashElems + 1 > ctxt->nsdb->hashSize / 2) {
1785
154k
        xmlParserNsBucket *newHash;
1786
154k
        unsigned newSize, i, index;
1787
1788
154k
        if (ctxt->nsdb->hashSize > UINT_MAX / 2) {
1789
0
            xmlErrMemory(ctxt);
1790
0
            return(-1);
1791
0
        }
1792
154k
        newSize = ctxt->nsdb->hashSize ? ctxt->nsdb->hashSize * 2 : 16;
1793
154k
        newHash = xmlMalloc(newSize * sizeof(newHash[0]));
1794
154k
        if (newHash == NULL) {
1795
0
            xmlErrMemory(ctxt);
1796
0
            return(-1);
1797
0
        }
1798
154k
        memset(newHash, 0, newSize * sizeof(newHash[0]));
1799
1800
1.61M
        for (i = 0; i < ctxt->nsdb->hashSize; i++) {
1801
1.45M
            unsigned hv = ctxt->nsdb->hash[i].hashValue;
1802
1.45M
            unsigned newIndex;
1803
1804
1.45M
            if ((hv == 0) || (ctxt->nsdb->hash[i].index == INT_MAX))
1805
1.04M
                continue;
1806
409k
            newIndex = hv & (newSize - 1);
1807
1808
423k
            while (newHash[newIndex].hashValue != 0) {
1809
13.6k
                newIndex++;
1810
13.6k
                if (newIndex == newSize)
1811
35
                    newIndex = 0;
1812
13.6k
            }
1813
1814
409k
            newHash[newIndex] = ctxt->nsdb->hash[i];
1815
409k
        }
1816
1817
154k
        xmlFree(ctxt->nsdb->hash);
1818
154k
        ctxt->nsdb->hash = newHash;
1819
154k
        ctxt->nsdb->hashSize = newSize;
1820
1821
        /*
1822
         * Relookup
1823
         */
1824
154k
        index = hashValue & (newSize - 1);
1825
1826
157k
        while (newHash[index].hashValue != 0) {
1827
2.50k
            index++;
1828
2.50k
            if (index == newSize)
1829
23
                index = 0;
1830
2.50k
        }
1831
1832
154k
        bucket = &newHash[index];
1833
154k
    }
1834
1835
931k
    bucket->hashValue = hashValue;
1836
931k
    bucket->index = ctxt->nsNr;
1837
931k
    ctxt->nsdb->hashElems++;
1838
931k
    oldIndex = INT_MAX;
1839
1840
1.32M
populate_entry:
1841
1.32M
    nsIndex = ctxt->nsNr;
1842
1843
1.32M
    ns = &ctxt->nsTab[nsIndex * 2];
1844
1.32M
    ns[0] = prefix ? prefix->name : NULL;
1845
1.32M
    ns[1] = uri->name;
1846
1847
1.32M
    extra = &ctxt->nsdb->extra[nsIndex];
1848
1.32M
    extra->saxData = saxData;
1849
1.32M
    extra->prefixHashValue = prefix ? prefix->hashValue : 0;
1850
1.32M
    extra->uriHashValue = uri->hashValue;
1851
1.32M
    extra->elementId = ctxt->nsdb->elementId;
1852
1.32M
    extra->oldIndex = oldIndex;
1853
1854
1.32M
    ctxt->nsNr++;
1855
1856
1.32M
    return(1);
1857
931k
}
1858
1859
/**
1860
 * xmlParserNsPop:
1861
 * @ctxt: an XML parser context
1862
 * @nr:  the number to pop
1863
 *
1864
 * Pops the top @nr namespaces and restores the hash table.
1865
 *
1866
 * Returns the number of namespaces popped.
1867
 */
1868
static int
1869
xmlParserNsPop(xmlParserCtxtPtr ctxt, int nr)
1870
317k
{
1871
317k
    int i;
1872
1873
    /* assert(nr <= ctxt->nsNr); */
1874
1875
1.14M
    for (i = ctxt->nsNr - 1; i >= ctxt->nsNr - nr; i--) {
1876
829k
        const xmlChar *prefix = ctxt->nsTab[i * 2];
1877
829k
        xmlParserNsExtra *extra = &ctxt->nsdb->extra[i];
1878
1879
829k
        if (prefix == NULL) {
1880
149k
            ctxt->nsdb->defaultNsIndex = extra->oldIndex;
1881
680k
        } else {
1882
680k
            xmlHashedString hprefix;
1883
680k
            xmlParserNsBucket *bucket = NULL;
1884
1885
680k
            hprefix.name = prefix;
1886
680k
            hprefix.hashValue = extra->prefixHashValue;
1887
680k
            xmlParserNsLookup(ctxt, &hprefix, &bucket);
1888
            /* assert(bucket && bucket->hashValue); */
1889
680k
            bucket->index = extra->oldIndex;
1890
680k
        }
1891
829k
    }
1892
1893
317k
    ctxt->nsNr -= nr;
1894
317k
    return(nr);
1895
317k
}
1896
1897
static int
1898
583k
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt) {
1899
583k
    const xmlChar **atts;
1900
583k
    unsigned *attallocs;
1901
583k
    int newSize;
1902
1903
583k
    newSize = xmlGrowCapacity(ctxt->maxatts / 5,
1904
583k
                              sizeof(atts[0]) * 5 + sizeof(attallocs[0]),
1905
583k
                              10, XML_MAX_ATTRS);
1906
583k
    if (newSize < 0) {
1907
0
        xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
1908
0
                    "Maximum number of attributes exceeded");
1909
0
        return(-1);
1910
0
    }
1911
1912
583k
    atts = xmlRealloc(ctxt->atts, newSize * sizeof(atts[0]) * 5);
1913
583k
    if (atts == NULL)
1914
0
        goto mem_error;
1915
583k
    ctxt->atts = atts;
1916
1917
583k
    attallocs = xmlRealloc(ctxt->attallocs,
1918
583k
                           newSize * sizeof(attallocs[0]));
1919
583k
    if (attallocs == NULL)
1920
0
        goto mem_error;
1921
583k
    ctxt->attallocs = attallocs;
1922
1923
583k
    ctxt->maxatts = newSize * 5;
1924
1925
583k
    return(0);
1926
1927
0
mem_error:
1928
0
    xmlErrMemory(ctxt);
1929
0
    return(-1);
1930
583k
}
1931
1932
/**
1933
 * xmlCtxtPushInput:
1934
 * @ctxt:  an XML parser context
1935
 * @value:  the parser input
1936
 *
1937
 * Pushes a new parser input on top of the input stack
1938
 *
1939
 * Returns -1 in case of error, the index in the stack otherwise
1940
 */
1941
int
1942
xmlCtxtPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1943
216k
{
1944
216k
    char *directory = NULL;
1945
216k
    int maxDepth;
1946
1947
216k
    if ((ctxt == NULL) || (value == NULL))
1948
0
        return(-1);
1949
1950
216k
    maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
1951
1952
216k
    if (ctxt->inputNr >= ctxt->inputMax) {
1953
0
        xmlParserInputPtr *tmp;
1954
0
        int newSize;
1955
1956
0
        newSize = xmlGrowCapacity(ctxt->inputMax, sizeof(tmp[0]),
1957
0
                                  5, maxDepth);
1958
0
        if (newSize < 0) {
1959
0
            xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
1960
0
                           "Maximum entity nesting depth exceeded");
1961
0
            xmlHaltParser(ctxt);
1962
0
            return(-1);
1963
0
        }
1964
0
        tmp = xmlRealloc(ctxt->inputTab, newSize * sizeof(tmp[0]));
1965
0
        if (tmp == NULL) {
1966
0
            xmlErrMemory(ctxt);
1967
0
            return(-1);
1968
0
        }
1969
0
        ctxt->inputTab = tmp;
1970
0
        ctxt->inputMax = newSize;
1971
0
    }
1972
1973
216k
    if ((ctxt->inputNr == 0) && (value->filename != NULL)) {
1974
0
        directory = xmlParserGetDirectory(value->filename);
1975
0
        if (directory == NULL) {
1976
0
            xmlErrMemory(ctxt);
1977
0
            return(-1);
1978
0
        }
1979
0
    }
1980
1981
216k
    if (ctxt->input_id >= INT_MAX) {
1982
0
        xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT, "Input ID overflow\n");
1983
0
        return(-1);
1984
0
    }
1985
1986
216k
    ctxt->inputTab[ctxt->inputNr] = value;
1987
216k
    ctxt->input = value;
1988
1989
216k
    if (ctxt->inputNr == 0) {
1990
216k
        xmlFree(ctxt->directory);
1991
216k
        ctxt->directory = directory;
1992
216k
    }
1993
1994
    /*
1995
     * Internally, the input ID is only used to detect parameter entity
1996
     * boundaries. But there are entity loaders in downstream code that
1997
     * detect the main document by checking for "input_id == 1".
1998
     */
1999
216k
    value->id = ctxt->input_id++;
2000
2001
216k
    return(ctxt->inputNr++);
2002
216k
}
2003
2004
/**
2005
 * xmlCtxtPopInput:
2006
 * @ctxt: an XML parser context
2007
 *
2008
 * Pops the top parser input from the input stack
2009
 *
2010
 * Returns the input just removed
2011
 */
2012
xmlParserInputPtr
2013
xmlCtxtPopInput(xmlParserCtxtPtr ctxt)
2014
655k
{
2015
655k
    xmlParserInputPtr ret;
2016
2017
655k
    if (ctxt == NULL)
2018
0
        return(NULL);
2019
655k
    if (ctxt->inputNr <= 0)
2020
438k
        return (NULL);
2021
216k
    ctxt->inputNr--;
2022
216k
    if (ctxt->inputNr > 0)
2023
0
        ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
2024
216k
    else
2025
216k
        ctxt->input = NULL;
2026
216k
    ret = ctxt->inputTab[ctxt->inputNr];
2027
216k
    ctxt->inputTab[ctxt->inputNr] = NULL;
2028
216k
    return (ret);
2029
655k
}
2030
2031
/**
2032
 * nodePush:
2033
 * @ctxt:  an XML parser context
2034
 * @value:  the element node
2035
 *
2036
 * DEPRECATED: Internal function, do not use.
2037
 *
2038
 * Pushes a new element node on top of the node stack
2039
 *
2040
 * Returns -1 in case of error, the index in the stack otherwise
2041
 */
2042
int
2043
nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
2044
926k
{
2045
926k
    if (ctxt == NULL)
2046
0
        return(0);
2047
2048
926k
    if (ctxt->nodeNr >= ctxt->nodeMax) {
2049
23.1k
        int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
2050
23.1k
        xmlNodePtr *tmp;
2051
23.1k
        int newSize;
2052
2053
23.1k
        newSize = xmlGrowCapacity(ctxt->nodeMax, sizeof(tmp[0]),
2054
23.1k
                                  10, maxDepth);
2055
23.1k
        if (newSize < 0) {
2056
0
            xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
2057
0
                    "Excessive depth in document: %d,"
2058
0
                    " use XML_PARSE_HUGE option\n",
2059
0
                    ctxt->nodeNr);
2060
0
            xmlHaltParser(ctxt);
2061
0
            return(-1);
2062
0
        }
2063
2064
23.1k
  tmp = xmlRealloc(ctxt->nodeTab, newSize * sizeof(tmp[0]));
2065
23.1k
        if (tmp == NULL) {
2066
0
            xmlErrMemory(ctxt);
2067
0
            return (-1);
2068
0
        }
2069
23.1k
        ctxt->nodeTab = tmp;
2070
23.1k
  ctxt->nodeMax = newSize;
2071
23.1k
    }
2072
2073
926k
    ctxt->nodeTab[ctxt->nodeNr] = value;
2074
926k
    ctxt->node = value;
2075
926k
    return (ctxt->nodeNr++);
2076
926k
}
2077
2078
/**
2079
 * nodePop:
2080
 * @ctxt: an XML parser context
2081
 *
2082
 * DEPRECATED: Internal function, do not use.
2083
 *
2084
 * Pops the top element node from the node stack
2085
 *
2086
 * Returns the node just removed
2087
 */
2088
xmlNodePtr
2089
nodePop(xmlParserCtxtPtr ctxt)
2090
1.01M
{
2091
1.01M
    xmlNodePtr ret;
2092
2093
1.01M
    if (ctxt == NULL) return(NULL);
2094
1.01M
    if (ctxt->nodeNr <= 0)
2095
92.9k
        return (NULL);
2096
926k
    ctxt->nodeNr--;
2097
926k
    if (ctxt->nodeNr > 0)
2098
921k
        ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
2099
5.70k
    else
2100
5.70k
        ctxt->node = NULL;
2101
926k
    ret = ctxt->nodeTab[ctxt->nodeNr];
2102
926k
    ctxt->nodeTab[ctxt->nodeNr] = NULL;
2103
926k
    return (ret);
2104
1.01M
}
2105
2106
/**
2107
 * nameNsPush:
2108
 * @ctxt:  an XML parser context
2109
 * @value:  the element name
2110
 * @prefix:  the element prefix
2111
 * @URI:  the element namespace name
2112
 * @line:  the current line number for error messages
2113
 * @nsNr:  the number of namespaces pushed on the namespace table
2114
 *
2115
 * Pushes a new element name/prefix/URL on top of the name stack
2116
 *
2117
 * Returns -1 in case of error, the index in the stack otherwise
2118
 */
2119
static int
2120
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
2121
           const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
2122
22.8M
{
2123
22.8M
    xmlStartTag *tag;
2124
2125
22.8M
    if (ctxt->nameNr >= ctxt->nameMax) {
2126
714k
        const xmlChar **tmp;
2127
714k
        xmlStartTag *tmp2;
2128
714k
        int newSize;
2129
2130
714k
        newSize = xmlGrowCapacity(ctxt->nameMax,
2131
714k
                                  sizeof(tmp[0]) + sizeof(tmp2[0]),
2132
714k
                                  10, XML_MAX_ITEMS);
2133
714k
        if (newSize < 0)
2134
0
            goto mem_error;
2135
2136
714k
        tmp = xmlRealloc(ctxt->nameTab, newSize * sizeof(tmp[0]));
2137
714k
        if (tmp == NULL)
2138
0
      goto mem_error;
2139
714k
  ctxt->nameTab = tmp;
2140
2141
714k
        tmp2 = xmlRealloc(ctxt->pushTab, newSize * sizeof(tmp2[0]));
2142
714k
        if (tmp2 == NULL)
2143
0
      goto mem_error;
2144
714k
  ctxt->pushTab = tmp2;
2145
2146
714k
        ctxt->nameMax = newSize;
2147
22.0M
    } else if (ctxt->pushTab == NULL) {
2148
197k
        ctxt->pushTab = xmlMalloc(ctxt->nameMax * sizeof(ctxt->pushTab[0]));
2149
197k
        if (ctxt->pushTab == NULL)
2150
0
            goto mem_error;
2151
197k
    }
2152
22.8M
    ctxt->nameTab[ctxt->nameNr] = value;
2153
22.8M
    ctxt->name = value;
2154
22.8M
    tag = &ctxt->pushTab[ctxt->nameNr];
2155
22.8M
    tag->prefix = prefix;
2156
22.8M
    tag->URI = URI;
2157
22.8M
    tag->line = line;
2158
22.8M
    tag->nsNr = nsNr;
2159
22.8M
    return (ctxt->nameNr++);
2160
0
mem_error:
2161
0
    xmlErrMemory(ctxt);
2162
0
    return (-1);
2163
22.8M
}
2164
#ifdef LIBXML_PUSH_ENABLED
2165
/**
2166
 * nameNsPop:
2167
 * @ctxt: an XML parser context
2168
 *
2169
 * Pops the top element/prefix/URI name from the name stack
2170
 *
2171
 * Returns the name just removed
2172
 */
2173
static const xmlChar *
2174
nameNsPop(xmlParserCtxtPtr ctxt)
2175
7.83M
{
2176
7.83M
    const xmlChar *ret;
2177
2178
7.83M
    if (ctxt->nameNr <= 0)
2179
0
        return (NULL);
2180
7.83M
    ctxt->nameNr--;
2181
7.83M
    if (ctxt->nameNr > 0)
2182
7.70M
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2183
126k
    else
2184
126k
        ctxt->name = NULL;
2185
7.83M
    ret = ctxt->nameTab[ctxt->nameNr];
2186
7.83M
    ctxt->nameTab[ctxt->nameNr] = NULL;
2187
7.83M
    return (ret);
2188
7.83M
}
2189
#endif /* LIBXML_PUSH_ENABLED */
2190
2191
/**
2192
 * namePop:
2193
 * @ctxt: an XML parser context
2194
 *
2195
 * DEPRECATED: Internal function, do not use.
2196
 *
2197
 * Pops the top element name from the name stack
2198
 *
2199
 * Returns the name just removed
2200
 */
2201
static const xmlChar *
2202
namePop(xmlParserCtxtPtr ctxt)
2203
1.00M
{
2204
1.00M
    const xmlChar *ret;
2205
2206
1.00M
    if ((ctxt == NULL) || (ctxt->nameNr <= 0))
2207
0
        return (NULL);
2208
1.00M
    ctxt->nameNr--;
2209
1.00M
    if (ctxt->nameNr > 0)
2210
1.00M
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2211
5.57k
    else
2212
5.57k
        ctxt->name = NULL;
2213
1.00M
    ret = ctxt->nameTab[ctxt->nameNr];
2214
1.00M
    ctxt->nameTab[ctxt->nameNr] = NULL;
2215
1.00M
    return (ret);
2216
1.00M
}
2217
2218
31.0M
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
2219
31.0M
    if (ctxt->spaceNr >= ctxt->spaceMax) {
2220
910k
        int *tmp;
2221
910k
        int newSize;
2222
2223
910k
        newSize = xmlGrowCapacity(ctxt->spaceMax, sizeof(tmp[0]),
2224
910k
                                  10, XML_MAX_ITEMS);
2225
910k
        if (newSize < 0) {
2226
0
      xmlErrMemory(ctxt);
2227
0
      return(-1);
2228
0
        }
2229
2230
910k
        tmp = xmlRealloc(ctxt->spaceTab, newSize * sizeof(tmp[0]));
2231
910k
        if (tmp == NULL) {
2232
0
      xmlErrMemory(ctxt);
2233
0
      return(-1);
2234
0
  }
2235
910k
  ctxt->spaceTab = tmp;
2236
2237
910k
        ctxt->spaceMax = newSize;
2238
910k
    }
2239
31.0M
    ctxt->spaceTab[ctxt->spaceNr] = val;
2240
31.0M
    ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
2241
31.0M
    return(ctxt->spaceNr++);
2242
31.0M
}
2243
2244
17.0M
static int spacePop(xmlParserCtxtPtr ctxt) {
2245
17.0M
    int ret;
2246
17.0M
    if (ctxt->spaceNr <= 0) return(0);
2247
17.0M
    ctxt->spaceNr--;
2248
17.0M
    if (ctxt->spaceNr > 0)
2249
17.0M
  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
2250
5.58k
    else
2251
5.58k
        ctxt->space = &ctxt->spaceTab[0];
2252
17.0M
    ret = ctxt->spaceTab[ctxt->spaceNr];
2253
17.0M
    ctxt->spaceTab[ctxt->spaceNr] = -1;
2254
17.0M
    return(ret);
2255
17.0M
}
2256
2257
/*
2258
 * Macros for accessing the content. Those should be used only by the parser,
2259
 * and not exported.
2260
 *
2261
 * Dirty macros, i.e. one often need to make assumption on the context to
2262
 * use them
2263
 *
2264
 *   CUR_PTR return the current pointer to the xmlChar to be parsed.
2265
 *           To be used with extreme caution since operations consuming
2266
 *           characters may move the input buffer to a different location !
2267
 *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
2268
 *           This should be used internally by the parser
2269
 *           only to compare to ASCII values otherwise it would break when
2270
 *           running with UTF-8 encoding.
2271
 *   RAW     same as CUR but in the input buffer, bypass any token
2272
 *           extraction that may have been done
2273
 *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
2274
 *           to compare on ASCII based substring.
2275
 *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2276
 *           strings without newlines within the parser.
2277
 *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2278
 *           defined char within the parser.
2279
 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2280
 *
2281
 *   NEXT    Skip to the next character, this does the proper decoding
2282
 *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2283
 *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2284
 *   CUR_SCHAR  same but operate on a string instead of the context
2285
 *   COPY_BUF  copy the current unicode char to the target buffer, increment
2286
 *            the index
2287
 *   GROW, SHRINK  handling of input buffers
2288
 */
2289
2290
237M
#define RAW (*ctxt->input->cur)
2291
372M
#define CUR (*ctxt->input->cur)
2292
31.0M
#define NXT(val) ctxt->input->cur[(val)]
2293
564M
#define CUR_PTR ctxt->input->cur
2294
140M
#define BASE_PTR ctxt->input->base
2295
2296
#define CMP4( s, c1, c2, c3, c4 ) \
2297
5.30M
  ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2298
2.76M
    ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2299
#define CMP5( s, c1, c2, c3, c4, c5 ) \
2300
4.99M
  ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2301
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2302
4.75M
  ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2303
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2304
4.40M
  ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2305
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2306
3.98M
  ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2307
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2308
1.99M
  ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2309
1.99M
    ((unsigned char *) s)[ 8 ] == c9 )
2310
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2311
101k
  ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2312
101k
    ((unsigned char *) s)[ 9 ] == c10 )
2313
2314
18.4M
#define SKIP(val) do {             \
2315
18.4M
    ctxt->input->cur += (val),ctxt->input->col+=(val);      \
2316
18.4M
    if (*ctxt->input->cur == 0)           \
2317
18.4M
        xmlParserGrow(ctxt);           \
2318
18.4M
  } while (0)
2319
2320
#define SKIPL(val) do {             \
2321
    int skipl;                \
2322
    for(skipl=0; skipl<val; skipl++) {          \
2323
  if (*(ctxt->input->cur) == '\n') {        \
2324
  ctxt->input->line++; ctxt->input->col = 1;      \
2325
  } else ctxt->input->col++;          \
2326
  ctxt->input->cur++;           \
2327
    }                 \
2328
    if (*ctxt->input->cur == 0)           \
2329
        xmlParserGrow(ctxt);            \
2330
  } while (0)
2331
2332
#define SHRINK \
2333
2.90M
    if (!PARSER_PROGRESSIVE(ctxt)) \
2334
2.90M
  xmlParserShrink(ctxt);
2335
2336
#define GROW \
2337
203M
    if ((!PARSER_PROGRESSIVE(ctxt)) && \
2338
203M
        (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2339
355k
  xmlParserGrow(ctxt);
2340
2341
97.3M
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2342
2343
730k
#define SKIP_BLANKS_PE xmlSkipBlankCharsPE(ctxt)
2344
2345
69.3M
#define NEXT xmlNextChar(ctxt)
2346
2347
39.6M
#define NEXT1 {               \
2348
39.6M
  ctxt->input->col++;           \
2349
39.6M
  ctxt->input->cur++;           \
2350
39.6M
  if (*ctxt->input->cur == 0)         \
2351
39.6M
      xmlParserGrow(ctxt);           \
2352
39.6M
    }
2353
2354
400M
#define NEXTL(l) do {             \
2355
400M
    if (*(ctxt->input->cur) == '\n') {         \
2356
1.18M
  ctxt->input->line++; ctxt->input->col = 1;      \
2357
398M
    } else ctxt->input->col++;           \
2358
400M
    ctxt->input->cur += l;        \
2359
400M
  } while (0)
2360
2361
341k
#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2362
2363
#define COPY_BUF(b, i, v)           \
2364
112M
    if (v < 0x80) b[i++] = v;           \
2365
112M
    else i += xmlCopyCharMultiByte(&b[i],v)
2366
2367
static int
2368
109M
xmlCurrentCharRecover(xmlParserCtxtPtr ctxt, int *len) {
2369
109M
    int c = xmlCurrentChar(ctxt, len);
2370
2371
109M
    if (c == XML_INVALID_CHAR)
2372
325k
        c = 0xFFFD; /* replacement character */
2373
2374
109M
    return(c);
2375
109M
}
2376
2377
/**
2378
 * xmlSkipBlankChars:
2379
 * @ctxt:  the XML parser context
2380
 *
2381
 * DEPRECATED: Internal function, do not use.
2382
 *
2383
 * Skip whitespace in the input stream.
2384
 *
2385
 * Returns the number of space chars skipped
2386
 */
2387
int
2388
98.1M
xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2389
98.1M
    const xmlChar *cur;
2390
98.1M
    int res = 0;
2391
2392
98.1M
    cur = ctxt->input->cur;
2393
98.1M
    while (IS_BLANK_CH(*cur)) {
2394
29.3M
        if (*cur == '\n') {
2395
809k
            ctxt->input->line++; ctxt->input->col = 1;
2396
28.5M
        } else {
2397
28.5M
            ctxt->input->col++;
2398
28.5M
        }
2399
29.3M
        cur++;
2400
29.3M
        if (res < INT_MAX)
2401
29.3M
            res++;
2402
29.3M
        if (*cur == 0) {
2403
9.81k
            ctxt->input->cur = cur;
2404
9.81k
            xmlParserGrow(ctxt);
2405
9.81k
            cur = ctxt->input->cur;
2406
9.81k
        }
2407
29.3M
    }
2408
98.1M
    ctxt->input->cur = cur;
2409
2410
98.1M
    if (res > 4)
2411
306k
        GROW;
2412
2413
98.1M
    return(res);
2414
98.1M
}
2415
2416
static void
2417
0
xmlPopPE(xmlParserCtxtPtr ctxt) {
2418
0
    unsigned long consumed;
2419
0
    xmlEntityPtr ent;
2420
2421
0
    ent = ctxt->input->entity;
2422
2423
0
    ent->flags &= ~XML_ENT_EXPANDING;
2424
2425
0
    if ((ent->flags & XML_ENT_CHECKED) == 0) {
2426
0
        int result;
2427
2428
        /*
2429
         * Read the rest of the stream in case of errors. We want
2430
         * to account for the whole entity size.
2431
         */
2432
0
        do {
2433
0
            ctxt->input->cur = ctxt->input->end;
2434
0
            xmlParserShrink(ctxt);
2435
0
            result = xmlParserGrow(ctxt);
2436
0
        } while (result > 0);
2437
2438
0
        consumed = ctxt->input->consumed;
2439
0
        xmlSaturatedAddSizeT(&consumed,
2440
0
                             ctxt->input->end - ctxt->input->base);
2441
2442
0
        xmlSaturatedAdd(&ent->expandedSize, consumed);
2443
2444
        /*
2445
         * Add to sizeentities when parsing an external entity
2446
         * for the first time.
2447
         */
2448
0
        if (ent->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
2449
0
            xmlSaturatedAdd(&ctxt->sizeentities, consumed);
2450
0
        }
2451
2452
0
        ent->flags |= XML_ENT_CHECKED;
2453
0
    }
2454
2455
0
    xmlFreeInputStream(xmlCtxtPopInput(ctxt));
2456
2457
0
    xmlParserEntityCheck(ctxt, ent->expandedSize);
2458
2459
0
    GROW;
2460
0
}
2461
2462
/**
2463
 * xmlSkipBlankCharsPE:
2464
 * @ctxt:  the XML parser context
2465
 *
2466
 * Skip whitespace in the input stream, also handling parameter
2467
 * entities.
2468
 *
2469
 * Returns the number of space chars skipped
2470
 */
2471
static int
2472
730k
xmlSkipBlankCharsPE(xmlParserCtxtPtr ctxt) {
2473
730k
    int res = 0;
2474
730k
    int inParam;
2475
730k
    int expandParam;
2476
2477
730k
    inParam = PARSER_IN_PE(ctxt);
2478
730k
    expandParam = PARSER_EXTERNAL(ctxt);
2479
2480
730k
    if (!inParam && !expandParam)
2481
730k
        return(xmlSkipBlankChars(ctxt));
2482
2483
    /*
2484
     * It's Okay to use CUR/NEXT here since all the blanks are on
2485
     * the ASCII range.
2486
     */
2487
0
    while (PARSER_STOPPED(ctxt) == 0) {
2488
0
        if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2489
0
            NEXT;
2490
0
        } else if (CUR == '%') {
2491
0
            if ((expandParam == 0) ||
2492
0
                (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2493
0
                break;
2494
2495
            /*
2496
             * Expand parameter entity. We continue to consume
2497
             * whitespace at the start of the entity and possible
2498
             * even consume the whole entity and pop it. We might
2499
             * even pop multiple PEs in this loop.
2500
             */
2501
0
            xmlParsePEReference(ctxt);
2502
2503
0
            inParam = PARSER_IN_PE(ctxt);
2504
0
            expandParam = PARSER_EXTERNAL(ctxt);
2505
0
        } else if (CUR == 0) {
2506
0
            if (inParam == 0)
2507
0
                break;
2508
2509
0
            xmlPopPE(ctxt);
2510
2511
0
            inParam = PARSER_IN_PE(ctxt);
2512
0
            expandParam = PARSER_EXTERNAL(ctxt);
2513
0
        } else {
2514
0
            break;
2515
0
        }
2516
2517
        /*
2518
         * Also increase the counter when entering or exiting a PERef.
2519
         * The spec says: "When a parameter-entity reference is recognized
2520
         * in the DTD and included, its replacement text MUST be enlarged
2521
         * by the attachment of one leading and one following space (#x20)
2522
         * character."
2523
         */
2524
0
        if (res < INT_MAX)
2525
0
            res++;
2526
0
    }
2527
2528
0
    return(res);
2529
730k
}
2530
2531
/************************************************************************
2532
 *                  *
2533
 *    Commodity functions to handle entities      *
2534
 *                  *
2535
 ************************************************************************/
2536
2537
/**
2538
 * xmlPopInput:
2539
 * @ctxt:  an XML parser context
2540
 *
2541
 * DEPRECATED: Internal function, don't use.
2542
 *
2543
 * Returns the current xmlChar in the parser context
2544
 */
2545
xmlChar
2546
0
xmlPopInput(xmlParserCtxtPtr ctxt) {
2547
0
    xmlParserInputPtr input;
2548
2549
0
    if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2550
0
    input = xmlCtxtPopInput(ctxt);
2551
0
    xmlFreeInputStream(input);
2552
0
    if (*ctxt->input->cur == 0)
2553
0
        xmlParserGrow(ctxt);
2554
0
    return(CUR);
2555
0
}
2556
2557
/**
2558
 * xmlPushInput:
2559
 * @ctxt:  an XML parser context
2560
 * @input:  an XML parser input fragment (entity, XML fragment ...).
2561
 *
2562
 * DEPRECATED: Internal function, don't use.
2563
 *
2564
 * Push an input stream onto the stack.
2565
 *
2566
 * Returns -1 in case of error or the index in the input stack
2567
 */
2568
int
2569
0
xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2570
0
    int ret;
2571
2572
0
    if ((ctxt == NULL) || (input == NULL))
2573
0
        return(-1);
2574
2575
0
    ret = xmlCtxtPushInput(ctxt, input);
2576
0
    if (ret >= 0)
2577
0
        GROW;
2578
0
    return(ret);
2579
0
}
2580
2581
/**
2582
 * xmlParseCharRef:
2583
 * @ctxt:  an XML parser context
2584
 *
2585
 * DEPRECATED: Internal function, don't use.
2586
 *
2587
 * Parse a numeric character reference. Always consumes '&'.
2588
 *
2589
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2590
 *                  '&#x' [0-9a-fA-F]+ ';'
2591
 *
2592
 * [ WFC: Legal Character ]
2593
 * Characters referred to using character references must match the
2594
 * production for Char.
2595
 *
2596
 * Returns the value parsed (as an int), 0 in case of error
2597
 */
2598
int
2599
130k
xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2600
130k
    int val = 0;
2601
130k
    int count = 0;
2602
2603
    /*
2604
     * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2605
     */
2606
130k
    if ((RAW == '&') && (NXT(1) == '#') &&
2607
130k
        (NXT(2) == 'x')) {
2608
83.8k
  SKIP(3);
2609
83.8k
  GROW;
2610
403k
  while ((RAW != ';') && (PARSER_STOPPED(ctxt) == 0)) {
2611
319k
      if (count++ > 20) {
2612
186
    count = 0;
2613
186
    GROW;
2614
186
      }
2615
319k
      if ((RAW >= '0') && (RAW <= '9'))
2616
159k
          val = val * 16 + (CUR - '0');
2617
160k
      else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2618
123k
          val = val * 16 + (CUR - 'a') + 10;
2619
36.8k
      else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2620
36.4k
          val = val * 16 + (CUR - 'A') + 10;
2621
312
      else {
2622
312
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2623
312
    val = 0;
2624
312
    break;
2625
312
      }
2626
319k
      if (val > 0x110000)
2627
2.16k
          val = 0x110000;
2628
2629
319k
      NEXT;
2630
319k
      count++;
2631
319k
  }
2632
83.8k
  if (RAW == ';') {
2633
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2634
83.4k
      ctxt->input->col++;
2635
83.4k
      ctxt->input->cur++;
2636
83.4k
  }
2637
83.8k
    } else if  ((RAW == '&') && (NXT(1) == '#')) {
2638
46.8k
  SKIP(2);
2639
46.8k
  GROW;
2640
155k
  while (RAW != ';') { /* loop blocked by count */
2641
109k
      if (count++ > 20) {
2642
254
    count = 0;
2643
254
    GROW;
2644
254
      }
2645
109k
      if ((RAW >= '0') && (RAW <= '9'))
2646
108k
          val = val * 10 + (CUR - '0');
2647
290
      else {
2648
290
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2649
290
    val = 0;
2650
290
    break;
2651
290
      }
2652
108k
      if (val > 0x110000)
2653
2.37k
          val = 0x110000;
2654
2655
108k
      NEXT;
2656
108k
      count++;
2657
108k
  }
2658
46.8k
  if (RAW == ';') {
2659
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2660
46.6k
      ctxt->input->col++;
2661
46.6k
      ctxt->input->cur++;
2662
46.6k
  }
2663
46.8k
    } else {
2664
0
        if (RAW == '&')
2665
0
            SKIP(1);
2666
0
        xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2667
0
    }
2668
2669
    /*
2670
     * [ WFC: Legal Character ]
2671
     * Characters referred to using character references must match the
2672
     * production for Char.
2673
     */
2674
130k
    if (val >= 0x110000) {
2675
82
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2676
82
                "xmlParseCharRef: character reference out of bounds\n",
2677
82
          val);
2678
82
        val = 0xFFFD;
2679
130k
    } else if (!IS_CHAR(val)) {
2680
1.22k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2681
1.22k
                          "xmlParseCharRef: invalid xmlChar value %d\n",
2682
1.22k
                    val);
2683
1.22k
    }
2684
130k
    return(val);
2685
130k
}
2686
2687
/**
2688
 * xmlParseStringCharRef:
2689
 * @ctxt:  an XML parser context
2690
 * @str:  a pointer to an index in the string
2691
 *
2692
 * parse Reference declarations, variant parsing from a string rather
2693
 * than an an input flow.
2694
 *
2695
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2696
 *                  '&#x' [0-9a-fA-F]+ ';'
2697
 *
2698
 * [ WFC: Legal Character ]
2699
 * Characters referred to using character references must match the
2700
 * production for Char.
2701
 *
2702
 * Returns the value parsed (as an int), 0 in case of error, str will be
2703
 *         updated to the current value of the index
2704
 */
2705
static int
2706
31.5k
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2707
31.5k
    const xmlChar *ptr;
2708
31.5k
    xmlChar cur;
2709
31.5k
    int val = 0;
2710
2711
31.5k
    if ((str == NULL) || (*str == NULL)) return(0);
2712
31.5k
    ptr = *str;
2713
31.5k
    cur = *ptr;
2714
31.5k
    if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2715
9.76k
  ptr += 3;
2716
9.76k
  cur = *ptr;
2717
36.8k
  while (cur != ';') { /* Non input consuming loop */
2718
30.4k
      if ((cur >= '0') && (cur <= '9'))
2719
12.2k
          val = val * 16 + (cur - '0');
2720
18.1k
      else if ((cur >= 'a') && (cur <= 'f'))
2721
10.8k
          val = val * 16 + (cur - 'a') + 10;
2722
7.34k
      else if ((cur >= 'A') && (cur <= 'F'))
2723
3.98k
          val = val * 16 + (cur - 'A') + 10;
2724
3.35k
      else {
2725
3.35k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2726
3.35k
    val = 0;
2727
3.35k
    break;
2728
3.35k
      }
2729
27.0k
      if (val > 0x110000)
2730
4.08k
          val = 0x110000;
2731
2732
27.0k
      ptr++;
2733
27.0k
      cur = *ptr;
2734
27.0k
  }
2735
9.76k
  if (cur == ';')
2736
6.40k
      ptr++;
2737
21.8k
    } else if  ((cur == '&') && (ptr[1] == '#')){
2738
21.8k
  ptr += 2;
2739
21.8k
  cur = *ptr;
2740
125k
  while (cur != ';') { /* Non input consuming loops */
2741
105k
      if ((cur >= '0') && (cur <= '9'))
2742
103k
          val = val * 10 + (cur - '0');
2743
1.35k
      else {
2744
1.35k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2745
1.35k
    val = 0;
2746
1.35k
    break;
2747
1.35k
      }
2748
103k
      if (val > 0x110000)
2749
1.53k
          val = 0x110000;
2750
2751
103k
      ptr++;
2752
103k
      cur = *ptr;
2753
103k
  }
2754
21.8k
  if (cur == ';')
2755
20.4k
      ptr++;
2756
21.8k
    } else {
2757
0
  xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2758
0
  return(0);
2759
0
    }
2760
31.5k
    *str = ptr;
2761
2762
    /*
2763
     * [ WFC: Legal Character ]
2764
     * Characters referred to using character references must match the
2765
     * production for Char.
2766
     */
2767
31.5k
    if (val >= 0x110000) {
2768
161
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2769
161
                "xmlParseStringCharRef: character reference out of bounds\n",
2770
161
                val);
2771
31.4k
    } else if (IS_CHAR(val)) {
2772
25.9k
        return(val);
2773
25.9k
    } else {
2774
5.50k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2775
5.50k
        "xmlParseStringCharRef: invalid xmlChar value %d\n",
2776
5.50k
        val);
2777
5.50k
    }
2778
5.66k
    return(0);
2779
31.5k
}
2780
2781
/**
2782
 * xmlParserHandlePEReference:
2783
 * @ctxt:  the parser context
2784
 *
2785
 * DEPRECATED: Internal function, do not use.
2786
 *
2787
 * [69] PEReference ::= '%' Name ';'
2788
 *
2789
 * [ WFC: No Recursion ]
2790
 * A parsed entity must not contain a recursive
2791
 * reference to itself, either directly or indirectly.
2792
 *
2793
 * [ WFC: Entity Declared ]
2794
 * In a document without any DTD, a document with only an internal DTD
2795
 * subset which contains no parameter entity references, or a document
2796
 * with "standalone='yes'", ...  ... The declaration of a parameter
2797
 * entity must precede any reference to it...
2798
 *
2799
 * [ VC: Entity Declared ]
2800
 * In a document with an external subset or external parameter entities
2801
 * with "standalone='no'", ...  ... The declaration of a parameter entity
2802
 * must precede any reference to it...
2803
 *
2804
 * [ WFC: In DTD ]
2805
 * Parameter-entity references may only appear in the DTD.
2806
 * NOTE: misleading but this is handled.
2807
 *
2808
 * A PEReference may have been detected in the current input stream
2809
 * the handling is done accordingly to
2810
 *      http://www.w3.org/TR/REC-xml#entproc
2811
 * i.e.
2812
 *   - Included in literal in entity values
2813
 *   - Included as Parameter Entity reference within DTDs
2814
 */
2815
void
2816
0
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2817
0
    xmlParsePEReference(ctxt);
2818
0
}
2819
2820
/**
2821
 * xmlStringLenDecodeEntities:
2822
 * @ctxt:  the parser context
2823
 * @str:  the input string
2824
 * @len: the string length
2825
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2826
 * @end:  an end marker xmlChar, 0 if none
2827
 * @end2:  an end marker xmlChar, 0 if none
2828
 * @end3:  an end marker xmlChar, 0 if none
2829
 *
2830
 * DEPRECATED: Internal function, don't use.
2831
 *
2832
 * Returns A newly allocated string with the substitution done. The caller
2833
 *      must deallocate it !
2834
 */
2835
xmlChar *
2836
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2837
                           int what ATTRIBUTE_UNUSED,
2838
0
                           xmlChar end, xmlChar end2, xmlChar end3) {
2839
0
    if ((ctxt == NULL) || (str == NULL) || (len < 0))
2840
0
        return(NULL);
2841
2842
0
    if ((str[len] != 0) ||
2843
0
        (end != 0) || (end2 != 0) || (end3 != 0))
2844
0
        return(NULL);
2845
2846
0
    return(xmlExpandEntitiesInAttValue(ctxt, str, 0));
2847
0
}
2848
2849
/**
2850
 * xmlStringDecodeEntities:
2851
 * @ctxt:  the parser context
2852
 * @str:  the input string
2853
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2854
 * @end:  an end marker xmlChar, 0 if none
2855
 * @end2:  an end marker xmlChar, 0 if none
2856
 * @end3:  an end marker xmlChar, 0 if none
2857
 *
2858
 * DEPRECATED: Internal function, don't use.
2859
 *
2860
 * Returns A newly allocated string with the substitution done. The caller
2861
 *      must deallocate it !
2862
 */
2863
xmlChar *
2864
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str,
2865
                        int what ATTRIBUTE_UNUSED,
2866
0
            xmlChar end, xmlChar  end2, xmlChar end3) {
2867
0
    if ((ctxt == NULL) || (str == NULL))
2868
0
        return(NULL);
2869
2870
0
    if ((end != 0) || (end2 != 0) || (end3 != 0))
2871
0
        return(NULL);
2872
2873
0
    return(xmlExpandEntitiesInAttValue(ctxt, str, 0));
2874
0
}
2875
2876
/************************************************************************
2877
 *                  *
2878
 *    Commodity functions, cleanup needed ?     *
2879
 *                  *
2880
 ************************************************************************/
2881
2882
/**
2883
 * areBlanks:
2884
 * @ctxt:  an XML parser context
2885
 * @str:  a xmlChar *
2886
 * @len:  the size of @str
2887
 * @blank_chars: we know the chars are blanks
2888
 *
2889
 * Is this a sequence of blank chars that one can ignore ?
2890
 *
2891
 * Returns 1 if ignorable 0 otherwise.
2892
 */
2893
2894
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2895
10.6M
                     int blank_chars) {
2896
10.6M
    int i;
2897
10.6M
    xmlNodePtr lastChild;
2898
2899
    /*
2900
     * Check for xml:space value.
2901
     */
2902
10.6M
    if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2903
10.5M
        (*(ctxt->space) == -2))
2904
4.37M
  return(0);
2905
2906
    /*
2907
     * Check that the string is made of blanks
2908
     */
2909
6.30M
    if (blank_chars == 0) {
2910
6.80M
  for (i = 0;i < len;i++)
2911
6.66M
      if (!(IS_BLANK_CH(str[i]))) return(0);
2912
5.53M
    }
2913
2914
    /*
2915
     * Look if the element is mixed content in the DTD if available
2916
     */
2917
917k
    if (ctxt->node == NULL) return(0);
2918
18.4E
    if (ctxt->myDoc != NULL) {
2919
0
        xmlElementPtr elemDecl = NULL;
2920
0
        xmlDocPtr doc = ctxt->myDoc;
2921
0
        const xmlChar *prefix = NULL;
2922
2923
0
        if (ctxt->node->ns)
2924
0
            prefix = ctxt->node->ns->prefix;
2925
0
        if (doc->intSubset != NULL)
2926
0
            elemDecl = xmlHashLookup2(doc->intSubset->elements, ctxt->node->name,
2927
0
                                      prefix);
2928
0
        if ((elemDecl == NULL) && (doc->extSubset != NULL))
2929
0
            elemDecl = xmlHashLookup2(doc->extSubset->elements, ctxt->node->name,
2930
0
                                      prefix);
2931
0
        if (elemDecl != NULL) {
2932
0
            if (elemDecl->etype == XML_ELEMENT_TYPE_ELEMENT)
2933
0
                return(1);
2934
0
            if ((elemDecl->etype == XML_ELEMENT_TYPE_ANY) ||
2935
0
                (elemDecl->etype == XML_ELEMENT_TYPE_MIXED))
2936
0
                return(0);
2937
0
        }
2938
0
    }
2939
2940
    /*
2941
     * Otherwise, heuristic :-\
2942
     *
2943
     * When push parsing, we could be at the end of a chunk.
2944
     * This makes the look-ahead and consequently the NOBLANKS
2945
     * option unreliable.
2946
     */
2947
18.4E
    if ((RAW != '<') && (RAW != 0xD)) return(0);
2948
18.4E
    if ((ctxt->node->children == NULL) &&
2949
0
  (RAW == '<') && (NXT(1) == '/')) return(0);
2950
2951
18.4E
    lastChild = xmlGetLastChild(ctxt->node);
2952
18.4E
    if (lastChild == NULL) {
2953
0
        if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2954
0
            (ctxt->node->content != NULL)) return(0);
2955
18.4E
    } else if (xmlNodeIsText(lastChild))
2956
0
        return(0);
2957
18.4E
    else if ((ctxt->node->children != NULL) &&
2958
0
             (xmlNodeIsText(ctxt->node->children)))
2959
0
        return(0);
2960
18.4E
    return(1);
2961
18.4E
}
2962
2963
/************************************************************************
2964
 *                  *
2965
 *    Extra stuff for namespace support     *
2966
 *  Relates to http://www.w3.org/TR/WD-xml-names      *
2967
 *                  *
2968
 ************************************************************************/
2969
2970
/**
2971
 * xmlSplitQName:
2972
 * @ctxt:  an XML parser context
2973
 * @name:  an XML parser context
2974
 * @prefixOut:  a xmlChar **
2975
 *
2976
 * DEPRECATED: Don't use.
2977
 *
2978
 * parse an UTF8 encoded XML qualified name string
2979
 *
2980
 * [NS 5] QName ::= (Prefix ':')? LocalPart
2981
 *
2982
 * [NS 6] Prefix ::= NCName
2983
 *
2984
 * [NS 7] LocalPart ::= NCName
2985
 *
2986
 * Returns the local part, and prefix is updated
2987
 *   to get the Prefix if any.
2988
 */
2989
2990
xmlChar *
2991
0
xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefixOut) {
2992
0
    xmlChar buf[XML_MAX_NAMELEN + 5];
2993
0
    xmlChar *buffer = NULL;
2994
0
    int len = 0;
2995
0
    int max = XML_MAX_NAMELEN;
2996
0
    xmlChar *ret = NULL;
2997
0
    xmlChar *prefix;
2998
0
    const xmlChar *cur = name;
2999
0
    int c;
3000
3001
0
    if (prefixOut == NULL) return(NULL);
3002
0
    *prefixOut = NULL;
3003
3004
0
    if (cur == NULL) return(NULL);
3005
3006
    /* nasty but well=formed */
3007
0
    if (cur[0] == ':')
3008
0
  return(xmlStrdup(name));
3009
3010
0
    c = *cur++;
3011
0
    while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
3012
0
  buf[len++] = c;
3013
0
  c = *cur++;
3014
0
    }
3015
0
    if (len >= max) {
3016
  /*
3017
   * Okay someone managed to make a huge name, so he's ready to pay
3018
   * for the processing speed.
3019
   */
3020
0
  max = len * 2;
3021
3022
0
  buffer = xmlMalloc(max);
3023
0
  if (buffer == NULL) {
3024
0
      xmlErrMemory(ctxt);
3025
0
      return(NULL);
3026
0
  }
3027
0
  memcpy(buffer, buf, len);
3028
0
  while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3029
0
      if (len + 10 > max) {
3030
0
          xmlChar *tmp;
3031
0
                int newSize;
3032
3033
0
                newSize = xmlGrowCapacity(max, 1, 1, XML_MAX_ITEMS);
3034
0
                if (newSize < 0) {
3035
0
        xmlErrMemory(ctxt);
3036
0
        xmlFree(buffer);
3037
0
        return(NULL);
3038
0
                }
3039
0
    tmp = xmlRealloc(buffer, newSize);
3040
0
    if (tmp == NULL) {
3041
0
        xmlErrMemory(ctxt);
3042
0
        xmlFree(buffer);
3043
0
        return(NULL);
3044
0
    }
3045
0
    buffer = tmp;
3046
0
    max = newSize;
3047
0
      }
3048
0
      buffer[len++] = c;
3049
0
      c = *cur++;
3050
0
  }
3051
0
  buffer[len] = 0;
3052
0
    }
3053
3054
0
    if ((c == ':') && (*cur == 0)) {
3055
0
        if (buffer != NULL)
3056
0
      xmlFree(buffer);
3057
0
  return(xmlStrdup(name));
3058
0
    }
3059
3060
0
    if (buffer == NULL) {
3061
0
  ret = xmlStrndup(buf, len);
3062
0
        if (ret == NULL) {
3063
0
      xmlErrMemory(ctxt);
3064
0
      return(NULL);
3065
0
        }
3066
0
    } else {
3067
0
  ret = buffer;
3068
0
  buffer = NULL;
3069
0
  max = XML_MAX_NAMELEN;
3070
0
    }
3071
3072
3073
0
    if (c == ':') {
3074
0
  c = *cur;
3075
0
        prefix = ret;
3076
0
  if (c == 0) {
3077
0
      ret = xmlStrndup(BAD_CAST "", 0);
3078
0
            if (ret == NULL) {
3079
0
                xmlFree(prefix);
3080
0
                return(NULL);
3081
0
            }
3082
0
            *prefixOut = prefix;
3083
0
            return(ret);
3084
0
  }
3085
0
  len = 0;
3086
3087
  /*
3088
   * Check that the first character is proper to start
3089
   * a new name
3090
   */
3091
0
  if (!(((c >= 0x61) && (c <= 0x7A)) ||
3092
0
        ((c >= 0x41) && (c <= 0x5A)) ||
3093
0
        (c == '_') || (c == ':'))) {
3094
0
      int l;
3095
0
      int first = CUR_SCHAR(cur, l);
3096
3097
0
      if (!IS_LETTER(first) && (first != '_')) {
3098
0
    xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3099
0
          "Name %s is not XML Namespace compliant\n",
3100
0
          name);
3101
0
      }
3102
0
  }
3103
0
  cur++;
3104
3105
0
  while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3106
0
      buf[len++] = c;
3107
0
      c = *cur++;
3108
0
  }
3109
0
  if (len >= max) {
3110
      /*
3111
       * Okay someone managed to make a huge name, so he's ready to pay
3112
       * for the processing speed.
3113
       */
3114
0
      max = len * 2;
3115
3116
0
      buffer = xmlMalloc(max);
3117
0
      if (buffer == NULL) {
3118
0
          xmlErrMemory(ctxt);
3119
0
                xmlFree(prefix);
3120
0
    return(NULL);
3121
0
      }
3122
0
      memcpy(buffer, buf, len);
3123
0
      while (c != 0) { /* tested bigname2.xml */
3124
0
    if (len + 10 > max) {
3125
0
        xmlChar *tmp;
3126
0
                    int newSize;
3127
3128
0
                    newSize = xmlGrowCapacity(max, 1, 1, XML_MAX_ITEMS);
3129
0
                    if (newSize < 0) {
3130
0
                        xmlErrMemory(ctxt);
3131
0
                        xmlFree(buffer);
3132
0
                        return(NULL);
3133
0
                    }
3134
0
        tmp = xmlRealloc(buffer, newSize);
3135
0
        if (tmp == NULL) {
3136
0
      xmlErrMemory(ctxt);
3137
0
                        xmlFree(prefix);
3138
0
      xmlFree(buffer);
3139
0
      return(NULL);
3140
0
        }
3141
0
        buffer = tmp;
3142
0
                    max = newSize;
3143
0
    }
3144
0
    buffer[len++] = c;
3145
0
    c = *cur++;
3146
0
      }
3147
0
      buffer[len] = 0;
3148
0
  }
3149
3150
0
  if (buffer == NULL) {
3151
0
      ret = xmlStrndup(buf, len);
3152
0
            if (ret == NULL) {
3153
0
                xmlFree(prefix);
3154
0
                return(NULL);
3155
0
            }
3156
0
  } else {
3157
0
      ret = buffer;
3158
0
  }
3159
3160
0
        *prefixOut = prefix;
3161
0
    }
3162
3163
0
    return(ret);
3164
0
}
3165
3166
/************************************************************************
3167
 *                  *
3168
 *      The parser itself       *
3169
 *  Relates to http://www.w3.org/TR/REC-xml       *
3170
 *                  *
3171
 ************************************************************************/
3172
3173
/************************************************************************
3174
 *                  *
3175
 *  Routines to parse Name, NCName and NmToken      *
3176
 *                  *
3177
 ************************************************************************/
3178
3179
/*
3180
 * The two following functions are related to the change of accepted
3181
 * characters for Name and NmToken in the Revision 5 of XML-1.0
3182
 * They correspond to the modified production [4] and the new production [4a]
3183
 * changes in that revision. Also note that the macros used for the
3184
 * productions Letter, Digit, CombiningChar and Extender are not needed
3185
 * anymore.
3186
 * We still keep compatibility to pre-revision5 parsing semantic if the
3187
 * new XML_PARSE_OLD10 option is given to the parser.
3188
 */
3189
static int
3190
1.32M
xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3191
1.32M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3192
        /*
3193
   * Use the new checks of production [4] [4a] amd [5] of the
3194
   * Update 5 of XML-1.0
3195
   */
3196
1.32M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3197
1.32M
      (((c >= 'a') && (c <= 'z')) ||
3198
630k
       ((c >= 'A') && (c <= 'Z')) ||
3199
391k
       (c == '_') || (c == ':') ||
3200
227k
       ((c >= 0xC0) && (c <= 0xD6)) ||
3201
225k
       ((c >= 0xD8) && (c <= 0xF6)) ||
3202
224k
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3203
221k
       ((c >= 0x370) && (c <= 0x37D)) ||
3204
221k
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3205
220k
       ((c >= 0x200C) && (c <= 0x200D)) ||
3206
220k
       ((c >= 0x2070) && (c <= 0x218F)) ||
3207
220k
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3208
219k
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3209
212k
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3210
212k
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3211
113k
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3212
1.21M
      return(1);
3213
1.32M
    } else {
3214
0
        if (IS_LETTER(c) || (c == '_') || (c == ':'))
3215
0
      return(1);
3216
0
    }
3217
112k
    return(0);
3218
1.32M
}
3219
3220
static int
3221
28.8M
xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3222
28.8M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3223
        /*
3224
   * Use the new checks of production [4] [4a] amd [5] of the
3225
   * Update 5 of XML-1.0
3226
   */
3227
28.8M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3228
28.5M
      (((c >= 'a') && (c <= 'z')) ||
3229
16.1M
       ((c >= 'A') && (c <= 'Z')) ||
3230
12.4M
       ((c >= '0') && (c <= '9')) || /* !start */
3231
11.0M
       (c == '_') || (c == ':') ||
3232
10.3M
       (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3233
9.95M
       ((c >= 0xC0) && (c <= 0xD6)) ||
3234
9.88M
       ((c >= 0xD8) && (c <= 0xF6)) ||
3235
9.88M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3236
9.72M
       ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3237
9.72M
       ((c >= 0x370) && (c <= 0x37D)) ||
3238
9.71M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3239
9.65M
       ((c >= 0x200C) && (c <= 0x200D)) ||
3240
9.65M
       ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3241
9.65M
       ((c >= 0x2070) && (c <= 0x218F)) ||
3242
9.65M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3243
9.64M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3244
9.54M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3245
9.54M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3246
185k
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3247
28.3M
       return(1);
3248
28.8M
    } else {
3249
0
        if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3250
0
            (c == '.') || (c == '-') ||
3251
0
      (c == '_') || (c == ':') ||
3252
0
      (IS_COMBINING(c)) ||
3253
0
      (IS_EXTENDER(c)))
3254
0
      return(1);
3255
0
    }
3256
504k
    return(0);
3257
28.8M
}
3258
3259
static const xmlChar *
3260
298k
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3261
298k
    const xmlChar *ret;
3262
298k
    int len = 0, l;
3263
298k
    int c;
3264
298k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3265
298k
                    XML_MAX_TEXT_LENGTH :
3266
298k
                    XML_MAX_NAME_LENGTH;
3267
3268
    /*
3269
     * Handler for more complex cases
3270
     */
3271
298k
    c = xmlCurrentChar(ctxt, &l);
3272
298k
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3273
        /*
3274
   * Use the new checks of production [4] [4a] amd [5] of the
3275
   * Update 5 of XML-1.0
3276
   */
3277
298k
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3278
293k
      (!(((c >= 'a') && (c <= 'z')) ||
3279
254k
         ((c >= 'A') && (c <= 'Z')) ||
3280
250k
         (c == '_') || (c == ':') ||
3281
242k
         ((c >= 0xC0) && (c <= 0xD6)) ||
3282
241k
         ((c >= 0xD8) && (c <= 0xF6)) ||
3283
234k
         ((c >= 0xF8) && (c <= 0x2FF)) ||
3284
228k
         ((c >= 0x370) && (c <= 0x37D)) ||
3285
221k
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
3286
216k
         ((c >= 0x200C) && (c <= 0x200D)) ||
3287
213k
         ((c >= 0x2070) && (c <= 0x218F)) ||
3288
201k
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3289
196k
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
3290
187k
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
3291
185k
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3292
177k
         ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3293
177k
      return(NULL);
3294
177k
  }
3295
120k
  len += l;
3296
120k
  NEXTL(l);
3297
120k
  c = xmlCurrentChar(ctxt, &l);
3298
731k
  while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3299
721k
         (((c >= 'a') && (c <= 'z')) ||
3300
531k
          ((c >= 'A') && (c <= 'Z')) ||
3301
499k
          ((c >= '0') && (c <= '9')) || /* !start */
3302
458k
          (c == '_') || (c == ':') ||
3303
441k
          (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3304
424k
          ((c >= 0xC0) && (c <= 0xD6)) ||
3305
421k
          ((c >= 0xD8) && (c <= 0xF6)) ||
3306
413k
          ((c >= 0xF8) && (c <= 0x2FF)) ||
3307
403k
          ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3308
402k
          ((c >= 0x370) && (c <= 0x37D)) ||
3309
401k
          ((c >= 0x37F) && (c <= 0x1FFF)) ||
3310
383k
          ((c >= 0x200C) && (c <= 0x200D)) ||
3311
381k
          ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3312
381k
          ((c >= 0x2070) && (c <= 0x218F)) ||
3313
369k
          ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3314
357k
          ((c >= 0x3001) && (c <= 0xD7FF)) ||
3315
261k
          ((c >= 0xF900) && (c <= 0xFDCF)) ||
3316
260k
          ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3317
113k
          ((c >= 0x10000) && (c <= 0xEFFFF))
3318
721k
    )) {
3319
610k
            if (len <= INT_MAX - l)
3320
610k
          len += l;
3321
610k
      NEXTL(l);
3322
610k
      c = xmlCurrentChar(ctxt, &l);
3323
610k
  }
3324
120k
    } else {
3325
0
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3326
0
      (!IS_LETTER(c) && (c != '_') &&
3327
0
       (c != ':'))) {
3328
0
      return(NULL);
3329
0
  }
3330
0
  len += l;
3331
0
  NEXTL(l);
3332
0
  c = xmlCurrentChar(ctxt, &l);
3333
3334
0
  while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3335
0
         ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3336
0
    (c == '.') || (c == '-') ||
3337
0
    (c == '_') || (c == ':') ||
3338
0
    (IS_COMBINING(c)) ||
3339
0
    (IS_EXTENDER(c)))) {
3340
0
            if (len <= INT_MAX - l)
3341
0
          len += l;
3342
0
      NEXTL(l);
3343
0
      c = xmlCurrentChar(ctxt, &l);
3344
0
  }
3345
0
    }
3346
120k
    if (len > maxLength) {
3347
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3348
0
        return(NULL);
3349
0
    }
3350
120k
    if (ctxt->input->cur - ctxt->input->base < len) {
3351
        /*
3352
         * There were a couple of bugs where PERefs lead to to a change
3353
         * of the buffer. Check the buffer size to avoid passing an invalid
3354
         * pointer to xmlDictLookup.
3355
         */
3356
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3357
0
                    "unexpected change of input buffer");
3358
0
        return (NULL);
3359
0
    }
3360
120k
    if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3361
454
        ret = xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len);
3362
120k
    else
3363
120k
        ret = xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len);
3364
120k
    if (ret == NULL)
3365
0
        xmlErrMemory(ctxt);
3366
120k
    return(ret);
3367
120k
}
3368
3369
/**
3370
 * xmlParseName:
3371
 * @ctxt:  an XML parser context
3372
 *
3373
 * DEPRECATED: Internal function, don't use.
3374
 *
3375
 * parse an XML name.
3376
 *
3377
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3378
 *                  CombiningChar | Extender
3379
 *
3380
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3381
 *
3382
 * [6] Names ::= Name (#x20 Name)*
3383
 *
3384
 * Returns the Name parsed or NULL
3385
 */
3386
3387
const xmlChar *
3388
944k
xmlParseName(xmlParserCtxtPtr ctxt) {
3389
944k
    const xmlChar *in;
3390
944k
    const xmlChar *ret;
3391
944k
    size_t count = 0;
3392
944k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3393
941k
                       XML_MAX_TEXT_LENGTH :
3394
944k
                       XML_MAX_NAME_LENGTH;
3395
3396
944k
    GROW;
3397
3398
    /*
3399
     * Accelerator for simple ASCII names
3400
     */
3401
944k
    in = ctxt->input->cur;
3402
944k
    if (((*in >= 0x61) && (*in <= 0x7A)) ||
3403
314k
  ((*in >= 0x41) && (*in <= 0x5A)) ||
3404
697k
  (*in == '_') || (*in == ':')) {
3405
697k
  in++;
3406
3.30M
  while (((*in >= 0x61) && (*in <= 0x7A)) ||
3407
1.23M
         ((*in >= 0x41) && (*in <= 0x5A)) ||
3408
1.01M
         ((*in >= 0x30) && (*in <= 0x39)) ||
3409
836k
         (*in == '_') || (*in == '-') ||
3410
749k
         (*in == ':') || (*in == '.'))
3411
2.60M
      in++;
3412
697k
  if ((*in > 0) && (*in < 0x80)) {
3413
645k
      count = in - ctxt->input->cur;
3414
645k
            if (count > maxLength) {
3415
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3416
0
                return(NULL);
3417
0
            }
3418
645k
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3419
645k
      ctxt->input->cur = in;
3420
645k
      ctxt->input->col += count;
3421
645k
      if (ret == NULL)
3422
0
          xmlErrMemory(ctxt);
3423
645k
      return(ret);
3424
645k
  }
3425
697k
    }
3426
    /* accelerator for special cases */
3427
298k
    return(xmlParseNameComplex(ctxt));
3428
944k
}
3429
3430
static xmlHashedString
3431
1.35M
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3432
1.35M
    xmlHashedString ret;
3433
1.35M
    int len = 0, l;
3434
1.35M
    int c;
3435
1.35M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3436
1.30M
                    XML_MAX_TEXT_LENGTH :
3437
1.35M
                    XML_MAX_NAME_LENGTH;
3438
1.35M
    size_t startPosition = 0;
3439
3440
1.35M
    ret.name = NULL;
3441
1.35M
    ret.hashValue = 0;
3442
3443
    /*
3444
     * Handler for more complex cases
3445
     */
3446
1.35M
    startPosition = CUR_PTR - BASE_PTR;
3447
1.35M
    c = xmlCurrentChar(ctxt, &l);
3448
1.35M
    if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3449
1.28M
  (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3450
337k
  return(ret);
3451
337k
    }
3452
3453
25.3M
    while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3454
24.4M
     (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3455
24.3M
        if (len <= INT_MAX - l)
3456
24.3M
      len += l;
3457
24.3M
  NEXTL(l);
3458
24.3M
  c = xmlCurrentChar(ctxt, &l);
3459
24.3M
    }
3460
1.01M
    if (len > maxLength) {
3461
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3462
0
        return(ret);
3463
0
    }
3464
1.01M
    ret = xmlDictLookupHashed(ctxt->dict, (BASE_PTR + startPosition), len);
3465
1.01M
    if (ret.name == NULL)
3466
0
        xmlErrMemory(ctxt);
3467
1.01M
    return(ret);
3468
1.01M
}
3469
3470
/**
3471
 * xmlParseNCName:
3472
 * @ctxt:  an XML parser context
3473
 * @len:  length of the string parsed
3474
 *
3475
 * parse an XML name.
3476
 *
3477
 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3478
 *                      CombiningChar | Extender
3479
 *
3480
 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3481
 *
3482
 * Returns the Name parsed or NULL
3483
 */
3484
3485
static xmlHashedString
3486
73.4M
xmlParseNCName(xmlParserCtxtPtr ctxt) {
3487
73.4M
    const xmlChar *in, *e;
3488
73.4M
    xmlHashedString ret;
3489
73.4M
    size_t count = 0;
3490
73.4M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3491
70.1M
                       XML_MAX_TEXT_LENGTH :
3492
73.4M
                       XML_MAX_NAME_LENGTH;
3493
3494
73.4M
    ret.name = NULL;
3495
3496
    /*
3497
     * Accelerator for simple ASCII names
3498
     */
3499
73.4M
    in = ctxt->input->cur;
3500
73.4M
    e = ctxt->input->end;
3501
73.4M
    if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3502
4.94M
   ((*in >= 0x41) && (*in <= 0x5A)) ||
3503
72.9M
   (*in == '_')) && (in < e)) {
3504
72.9M
  in++;
3505
396M
  while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3506
130M
          ((*in >= 0x41) && (*in <= 0x5A)) ||
3507
90.1M
          ((*in >= 0x30) && (*in <= 0x39)) ||
3508
79.8M
          (*in == '_') || (*in == '-') ||
3509
323M
          (*in == '.')) && (in < e))
3510
323M
      in++;
3511
72.9M
  if (in >= e)
3512
12.7k
      goto complex;
3513
72.9M
  if ((*in > 0) && (*in < 0x80)) {
3514
72.0M
      count = in - ctxt->input->cur;
3515
72.0M
            if (count > maxLength) {
3516
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3517
0
                return(ret);
3518
0
            }
3519
72.0M
      ret = xmlDictLookupHashed(ctxt->dict, ctxt->input->cur, count);
3520
72.0M
      ctxt->input->cur = in;
3521
72.0M
      ctxt->input->col += count;
3522
72.0M
      if (ret.name == NULL) {
3523
0
          xmlErrMemory(ctxt);
3524
0
      }
3525
72.0M
      return(ret);
3526
72.0M
  }
3527
72.9M
    }
3528
1.35M
complex:
3529
1.35M
    return(xmlParseNCNameComplex(ctxt));
3530
73.4M
}
3531
3532
/**
3533
 * xmlParseNameAndCompare:
3534
 * @ctxt:  an XML parser context
3535
 *
3536
 * parse an XML name and compares for match
3537
 * (specialized for endtag parsing)
3538
 *
3539
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3540
 * and the name for mismatch
3541
 */
3542
3543
static const xmlChar *
3544
2.75M
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3545
2.75M
    register const xmlChar *cmp = other;
3546
2.75M
    register const xmlChar *in;
3547
2.75M
    const xmlChar *ret;
3548
3549
2.75M
    GROW;
3550
3551
2.75M
    in = ctxt->input->cur;
3552
8.98M
    while (*in != 0 && *in == *cmp) {
3553
6.23M
  ++in;
3554
6.23M
  ++cmp;
3555
6.23M
    }
3556
2.75M
    if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3557
  /* success */
3558
2.74M
  ctxt->input->col += in - ctxt->input->cur;
3559
2.74M
  ctxt->input->cur = in;
3560
2.74M
  return (const xmlChar*) 1;
3561
2.74M
    }
3562
    /* failure (or end of input buffer), check with full function */
3563
4.84k
    ret = xmlParseName (ctxt);
3564
    /* strings coming from the dictionary direct compare possible */
3565
4.84k
    if (ret == other) {
3566
45
  return (const xmlChar*) 1;
3567
45
    }
3568
4.80k
    return ret;
3569
4.84k
}
3570
3571
/**
3572
 * xmlParseStringName:
3573
 * @ctxt:  an XML parser context
3574
 * @str:  a pointer to the string pointer (IN/OUT)
3575
 *
3576
 * parse an XML name.
3577
 *
3578
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3579
 *                  CombiningChar | Extender
3580
 *
3581
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3582
 *
3583
 * [6] Names ::= Name (#x20 Name)*
3584
 *
3585
 * Returns the Name parsed or NULL. The @str pointer
3586
 * is updated to the current location in the string.
3587
 */
3588
3589
static xmlChar *
3590
42.1k
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3591
42.1k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3592
42.1k
    xmlChar *ret;
3593
42.1k
    const xmlChar *cur = *str;
3594
42.1k
    int len = 0, l;
3595
42.1k
    int c;
3596
42.1k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3597
42.1k
                    XML_MAX_TEXT_LENGTH :
3598
42.1k
                    XML_MAX_NAME_LENGTH;
3599
3600
42.1k
    c = CUR_SCHAR(cur, l);
3601
42.1k
    if (!xmlIsNameStartChar(ctxt, c)) {
3602
5.93k
  return(NULL);
3603
5.93k
    }
3604
3605
36.2k
    COPY_BUF(buf, len, c);
3606
36.2k
    cur += l;
3607
36.2k
    c = CUR_SCHAR(cur, l);
3608
204k
    while (xmlIsNameChar(ctxt, c)) {
3609
170k
  COPY_BUF(buf, len, c);
3610
170k
  cur += l;
3611
170k
  c = CUR_SCHAR(cur, l);
3612
170k
  if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3613
      /*
3614
       * Okay someone managed to make a huge name, so he's ready to pay
3615
       * for the processing speed.
3616
       */
3617
2.05k
      xmlChar *buffer;
3618
2.05k
      int max = len * 2;
3619
3620
2.05k
      buffer = xmlMalloc(max);
3621
2.05k
      if (buffer == NULL) {
3622
0
          xmlErrMemory(ctxt);
3623
0
    return(NULL);
3624
0
      }
3625
2.05k
      memcpy(buffer, buf, len);
3626
94.2k
      while (xmlIsNameChar(ctxt, c)) {
3627
92.2k
    if (len + 10 > max) {
3628
1.11k
        xmlChar *tmp;
3629
1.11k
                    int newSize;
3630
3631
1.11k
                    newSize = xmlGrowCapacity(max, 1, 1, maxLength);
3632
1.11k
                    if (newSize < 0) {
3633
0
                        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3634
0
                        xmlFree(buffer);
3635
0
                        return(NULL);
3636
0
                    }
3637
1.11k
        tmp = xmlRealloc(buffer, newSize);
3638
1.11k
        if (tmp == NULL) {
3639
0
      xmlErrMemory(ctxt);
3640
0
      xmlFree(buffer);
3641
0
      return(NULL);
3642
0
        }
3643
1.11k
        buffer = tmp;
3644
1.11k
                    max = newSize;
3645
1.11k
    }
3646
92.2k
    COPY_BUF(buffer, len, c);
3647
92.2k
    cur += l;
3648
92.2k
    c = CUR_SCHAR(cur, l);
3649
92.2k
      }
3650
2.05k
      buffer[len] = 0;
3651
2.05k
      *str = cur;
3652
2.05k
      return(buffer);
3653
2.05k
  }
3654
170k
    }
3655
34.1k
    if (len > maxLength) {
3656
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3657
0
        return(NULL);
3658
0
    }
3659
34.1k
    *str = cur;
3660
34.1k
    ret = xmlStrndup(buf, len);
3661
34.1k
    if (ret == NULL)
3662
0
        xmlErrMemory(ctxt);
3663
34.1k
    return(ret);
3664
34.1k
}
3665
3666
/**
3667
 * xmlParseNmtoken:
3668
 * @ctxt:  an XML parser context
3669
 *
3670
 * DEPRECATED: Internal function, don't use.
3671
 *
3672
 * parse an XML Nmtoken.
3673
 *
3674
 * [7] Nmtoken ::= (NameChar)+
3675
 *
3676
 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3677
 *
3678
 * Returns the Nmtoken parsed or NULL
3679
 */
3680
3681
xmlChar *
3682
427k
xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3683
427k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3684
427k
    xmlChar *ret;
3685
427k
    int len = 0, l;
3686
427k
    int c;
3687
427k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3688
420k
                    XML_MAX_TEXT_LENGTH :
3689
427k
                    XML_MAX_NAME_LENGTH;
3690
3691
427k
    c = xmlCurrentChar(ctxt, &l);
3692
3693
3.91M
    while (xmlIsNameChar(ctxt, c)) {
3694
3.49M
  COPY_BUF(buf, len, c);
3695
3.49M
  NEXTL(l);
3696
3.49M
  c = xmlCurrentChar(ctxt, &l);
3697
3.49M
  if (len >= XML_MAX_NAMELEN) {
3698
      /*
3699
       * Okay someone managed to make a huge token, so he's ready to pay
3700
       * for the processing speed.
3701
       */
3702
3.70k
      xmlChar *buffer;
3703
3.70k
      int max = len * 2;
3704
3705
3.70k
      buffer = xmlMalloc(max);
3706
3.70k
      if (buffer == NULL) {
3707
0
          xmlErrMemory(ctxt);
3708
0
    return(NULL);
3709
0
      }
3710
3.70k
      memcpy(buffer, buf, len);
3711
239k
      while (xmlIsNameChar(ctxt, c)) {
3712
235k
    if (len + 10 > max) {
3713
1.17k
        xmlChar *tmp;
3714
1.17k
                    int newSize;
3715
3716
1.17k
                    newSize = xmlGrowCapacity(max, 1, 1, maxLength);
3717
1.17k
                    if (newSize < 0) {
3718
0
                        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3719
0
                        xmlFree(buffer);
3720
0
                        return(NULL);
3721
0
                    }
3722
1.17k
        tmp = xmlRealloc(buffer, newSize);
3723
1.17k
        if (tmp == NULL) {
3724
0
      xmlErrMemory(ctxt);
3725
0
      xmlFree(buffer);
3726
0
      return(NULL);
3727
0
        }
3728
1.17k
        buffer = tmp;
3729
1.17k
                    max = newSize;
3730
1.17k
    }
3731
235k
    COPY_BUF(buffer, len, c);
3732
235k
    NEXTL(l);
3733
235k
    c = xmlCurrentChar(ctxt, &l);
3734
235k
      }
3735
3.70k
      buffer[len] = 0;
3736
3.70k
      return(buffer);
3737
3.70k
  }
3738
3.49M
    }
3739
423k
    if (len == 0)
3740
65.7k
        return(NULL);
3741
357k
    if (len > maxLength) {
3742
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3743
0
        return(NULL);
3744
0
    }
3745
357k
    ret = xmlStrndup(buf, len);
3746
357k
    if (ret == NULL)
3747
0
        xmlErrMemory(ctxt);
3748
357k
    return(ret);
3749
357k
}
3750
3751
/**
3752
 * xmlExpandPEsInEntityValue:
3753
 * @ctxt:  parser context
3754
 * @buf:  string buffer
3755
 * @str:  entity value
3756
 * @length:  size of entity value
3757
 * @depth:  nesting depth
3758
 *
3759
 * Validate an entity value and expand parameter entities.
3760
 */
3761
static void
3762
xmlExpandPEsInEntityValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf,
3763
29.8k
                          const xmlChar *str, int length, int depth) {
3764
29.8k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3765
29.8k
    const xmlChar *end, *chunk;
3766
29.8k
    int c, l;
3767
3768
29.8k
    if (str == NULL)
3769
0
        return;
3770
3771
29.8k
    depth += 1;
3772
29.8k
    if (depth > maxDepth) {
3773
0
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3774
0
                       "Maximum entity nesting depth exceeded");
3775
0
  return;
3776
0
    }
3777
3778
29.8k
    end = str + length;
3779
29.8k
    chunk = str;
3780
3781
1.99M
    while ((str < end) && (!PARSER_STOPPED(ctxt))) {
3782
1.98M
        c = *str;
3783
3784
1.98M
        if (c >= 0x80) {
3785
309k
            l = xmlUTF8MultibyteLen(ctxt, str,
3786
309k
                    "invalid character in entity value\n");
3787
309k
            if (l == 0) {
3788
142k
                if (chunk < str)
3789
18.2k
                    xmlSBufAddString(buf, chunk, str - chunk);
3790
142k
                xmlSBufAddReplChar(buf);
3791
142k
                str += 1;
3792
142k
                chunk = str;
3793
166k
            } else {
3794
166k
                str += l;
3795
166k
            }
3796
1.67M
        } else if (c == '&') {
3797
72.2k
            if (str[1] == '#') {
3798
31.5k
                if (chunk < str)
3799
18.8k
                    xmlSBufAddString(buf, chunk, str - chunk);
3800
3801
31.5k
                c = xmlParseStringCharRef(ctxt, &str);
3802
31.5k
                if (c == 0)
3803
5.66k
                    return;
3804
3805
25.9k
                xmlSBufAddChar(buf, c);
3806
3807
25.9k
                chunk = str;
3808
40.6k
            } else {
3809
40.6k
                xmlChar *name;
3810
3811
                /*
3812
                 * General entity references are checked for
3813
                 * syntactic validity.
3814
                 */
3815
40.6k
                str++;
3816
40.6k
                name = xmlParseStringName(ctxt, &str);
3817
3818
40.6k
                if ((name == NULL) || (*str++ != ';')) {
3819
11.6k
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3820
11.6k
                            "EntityValue: '&' forbidden except for entities "
3821
11.6k
                            "references\n");
3822
11.6k
                    xmlFree(name);
3823
11.6k
                    return;
3824
11.6k
                }
3825
3826
29.0k
                xmlFree(name);
3827
29.0k
            }
3828
1.60M
        } else if (c == '%') {
3829
1.51k
            xmlEntityPtr ent;
3830
3831
1.51k
            if (chunk < str)
3832
1.38k
                xmlSBufAddString(buf, chunk, str - chunk);
3833
3834
1.51k
            ent = xmlParseStringPEReference(ctxt, &str);
3835
1.51k
            if (ent == NULL)
3836
1.51k
                return;
3837
3838
0
            if (!PARSER_EXTERNAL(ctxt)) {
3839
0
                xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3840
0
                return;
3841
0
            }
3842
3843
0
            if (ent->content == NULL) {
3844
                /*
3845
                 * Note: external parsed entities will not be loaded,
3846
                 * it is not required for a non-validating parser to
3847
                 * complete external PEReferences coming from the
3848
                 * internal subset
3849
                 */
3850
0
                if (((ctxt->options & XML_PARSE_NO_XXE) == 0) &&
3851
0
                    ((ctxt->replaceEntities) ||
3852
0
                     (ctxt->validate))) {
3853
0
                    xmlLoadEntityContent(ctxt, ent);
3854
0
                } else {
3855
0
                    xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
3856
0
                                  "not validating will not read content for "
3857
0
                                  "PE entity %s\n", ent->name, NULL);
3858
0
                }
3859
0
            }
3860
3861
            /*
3862
             * TODO: Skip if ent->content is still NULL.
3863
             */
3864
3865
0
            if (xmlParserEntityCheck(ctxt, ent->length))
3866
0
                return;
3867
3868
0
            if (ent->flags & XML_ENT_EXPANDING) {
3869
0
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
3870
0
                xmlHaltParser(ctxt);
3871
0
                return;
3872
0
            }
3873
3874
0
            ent->flags |= XML_ENT_EXPANDING;
3875
0
            xmlExpandPEsInEntityValue(ctxt, buf, ent->content, ent->length,
3876
0
                                      depth);
3877
0
            ent->flags &= ~XML_ENT_EXPANDING;
3878
3879
0
            chunk = str;
3880
1.60M
        } else {
3881
            /* Normal ASCII char */
3882
1.60M
            if (!IS_BYTE_CHAR(c)) {
3883
90.0k
                xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3884
90.0k
                        "invalid character in entity value\n");
3885
90.0k
                if (chunk < str)
3886
4.66k
                    xmlSBufAddString(buf, chunk, str - chunk);
3887
90.0k
                xmlSBufAddReplChar(buf);
3888
90.0k
                str += 1;
3889
90.0k
                chunk = str;
3890
1.51M
            } else {
3891
1.51M
                str += 1;
3892
1.51M
            }
3893
1.60M
        }
3894
1.98M
    }
3895
3896
11.0k
    if (chunk < str)
3897
7.50k
        xmlSBufAddString(buf, chunk, str - chunk);
3898
11.0k
}
3899
3900
/**
3901
 * xmlParseEntityValue:
3902
 * @ctxt:  an XML parser context
3903
 * @orig:  if non-NULL store a copy of the original entity value
3904
 *
3905
 * DEPRECATED: Internal function, don't use.
3906
 *
3907
 * parse a value for ENTITY declarations
3908
 *
3909
 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3910
 *                 "'" ([^%&'] | PEReference | Reference)* "'"
3911
 *
3912
 * Returns the EntityValue parsed with reference substituted or NULL
3913
 */
3914
xmlChar *
3915
30.1k
xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3916
30.1k
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3917
30.1k
                         XML_MAX_HUGE_LENGTH :
3918
30.1k
                         XML_MAX_TEXT_LENGTH;
3919
30.1k
    xmlSBuf buf;
3920
30.1k
    const xmlChar *start;
3921
30.1k
    int quote, length;
3922
3923
30.1k
    xmlSBufInit(&buf, maxLength);
3924
3925
30.1k
    GROW;
3926
3927
30.1k
    quote = CUR;
3928
30.1k
    if ((quote != '"') && (quote != '\'')) {
3929
0
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3930
0
  return(NULL);
3931
0
    }
3932
30.1k
    CUR_PTR++;
3933
3934
30.1k
    length = 0;
3935
3936
    /*
3937
     * Copy raw content of the entity into a buffer
3938
     */
3939
4.98M
    while (1) {
3940
4.98M
        int c;
3941
3942
4.98M
        if (PARSER_STOPPED(ctxt))
3943
0
            goto error;
3944
3945
4.98M
        if (CUR_PTR >= ctxt->input->end) {
3946
208
            xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3947
208
            goto error;
3948
208
        }
3949
3950
4.98M
        c = CUR;
3951
3952
4.98M
        if (c == 0) {
3953
76
            xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3954
76
                    "invalid character in entity value\n");
3955
76
            goto error;
3956
76
        }
3957
4.98M
        if (c == quote)
3958
29.8k
            break;
3959
4.95M
        NEXTL(1);
3960
4.95M
        length += 1;
3961
3962
        /*
3963
         * TODO: Check growth threshold
3964
         */
3965
4.95M
        if (ctxt->input->end - CUR_PTR < 10)
3966
2.75k
            GROW;
3967
4.95M
    }
3968
3969
29.8k
    start = CUR_PTR - length;
3970
3971
29.8k
    if (orig != NULL) {
3972
29.8k
        *orig = xmlStrndup(start, length);
3973
29.8k
        if (*orig == NULL)
3974
0
            xmlErrMemory(ctxt);
3975
29.8k
    }
3976
3977
29.8k
    xmlExpandPEsInEntityValue(ctxt, &buf, start, length, ctxt->inputNr);
3978
3979
29.8k
    NEXTL(1);
3980
3981
29.8k
    return(xmlSBufFinish(&buf, NULL, ctxt, "entity length too long"));
3982
3983
284
error:
3984
284
    xmlSBufCleanup(&buf, ctxt, "entity length too long");
3985
284
    return(NULL);
3986
30.1k
}
3987
3988
/**
3989
 * xmlCheckEntityInAttValue:
3990
 * @ctxt:  parser context
3991
 * @pent:  entity
3992
 * @depth:  nesting depth
3993
 *
3994
 * Check an entity reference in an attribute value for validity
3995
 * without expanding it.
3996
 */
3997
static void
3998
0
xmlCheckEntityInAttValue(xmlParserCtxtPtr ctxt, xmlEntityPtr pent, int depth) {
3999
0
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
4000
0
    const xmlChar *str;
4001
0
    unsigned long expandedSize = pent->length;
4002
0
    int c, flags;
4003
4004
0
    depth += 1;
4005
0
    if (depth > maxDepth) {
4006
0
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
4007
0
                       "Maximum entity nesting depth exceeded");
4008
0
  return;
4009
0
    }
4010
4011
0
    if (pent->flags & XML_ENT_EXPANDING) {
4012
0
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
4013
0
        xmlHaltParser(ctxt);
4014
0
        return;
4015
0
    }
4016
4017
    /*
4018
     * If we're parsing a default attribute value in DTD content,
4019
     * the entity might reference other entities which weren't
4020
     * defined yet, so the check isn't reliable.
4021
     */
4022
0
    if (ctxt->inSubset == 0)
4023
0
        flags = XML_ENT_CHECKED | XML_ENT_VALIDATED;
4024
0
    else
4025
0
        flags = XML_ENT_VALIDATED;
4026
4027
0
    str = pent->content;
4028
0
    if (str == NULL)
4029
0
        goto done;
4030
4031
    /*
4032
     * Note that entity values are already validated. We only check
4033
     * for illegal less-than signs and compute the expanded size
4034
     * of the entity. No special handling for multi-byte characters
4035
     * is needed.
4036
     */
4037
0
    while (!PARSER_STOPPED(ctxt)) {
4038
0
        c = *str;
4039
4040
0
  if (c != '&') {
4041
0
            if (c == 0)
4042
0
                break;
4043
4044
0
            if (c == '<')
4045
0
                xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
4046
0
                        "'<' in entity '%s' is not allowed in attributes "
4047
0
                        "values\n", pent->name);
4048
4049
0
            str += 1;
4050
0
        } else if (str[1] == '#') {
4051
0
            int val;
4052
4053
0
      val = xmlParseStringCharRef(ctxt, &str);
4054
0
      if (val == 0) {
4055
0
                pent->content[0] = 0;
4056
0
                break;
4057
0
            }
4058
0
  } else {
4059
0
            xmlChar *name;
4060
0
            xmlEntityPtr ent;
4061
4062
0
      name = xmlParseStringEntityRef(ctxt, &str);
4063
0
      if (name == NULL) {
4064
0
                pent->content[0] = 0;
4065
0
                break;
4066
0
            }
4067
4068
0
            ent = xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 1);
4069
0
            xmlFree(name);
4070
4071
0
            if ((ent != NULL) &&
4072
0
                (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
4073
0
                if ((ent->flags & flags) != flags) {
4074
0
                    pent->flags |= XML_ENT_EXPANDING;
4075
0
                    xmlCheckEntityInAttValue(ctxt, ent, depth);
4076
0
                    pent->flags &= ~XML_ENT_EXPANDING;
4077
0
                }
4078
4079
0
                xmlSaturatedAdd(&expandedSize, ent->expandedSize);
4080
0
                xmlSaturatedAdd(&expandedSize, XML_ENT_FIXED_COST);
4081
0
            }
4082
0
        }
4083
0
    }
4084
4085
0
done:
4086
0
    if (ctxt->inSubset == 0)
4087
0
        pent->expandedSize = expandedSize;
4088
4089
0
    pent->flags |= flags;
4090
0
}
4091
4092
/**
4093
 * xmlExpandEntityInAttValue:
4094
 * @ctxt:  parser context
4095
 * @buf:  string buffer
4096
 * @str:  entity or attribute value
4097
 * @pent:  entity for entity value, NULL for attribute values
4098
 * @normalize:  whether to collapse whitespace
4099
 * @inSpace:  whitespace state
4100
 * @depth:  nesting depth
4101
 * @check:  whether to check for amplification
4102
 *
4103
 * Expand general entity references in an entity or attribute value.
4104
 * Perform attribute value normalization.
4105
 */
4106
static void
4107
xmlExpandEntityInAttValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf,
4108
                          const xmlChar *str, xmlEntityPtr pent, int normalize,
4109
0
                          int *inSpace, int depth, int check) {
4110
0
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
4111
0
    int c, chunkSize;
4112
4113
0
    if (str == NULL)
4114
0
        return;
4115
4116
0
    depth += 1;
4117
0
    if (depth > maxDepth) {
4118
0
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
4119
0
                       "Maximum entity nesting depth exceeded");
4120
0
  return;
4121
0
    }
4122
4123
0
    if (pent != NULL) {
4124
0
        if (pent->flags & XML_ENT_EXPANDING) {
4125
0
            xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
4126
0
            xmlHaltParser(ctxt);
4127
0
            return;
4128
0
        }
4129
4130
0
        if (check) {
4131
0
            if (xmlParserEntityCheck(ctxt, pent->length))
4132
0
                return;
4133
0
        }
4134
0
    }
4135
4136
0
    chunkSize = 0;
4137
4138
    /*
4139
     * Note that entity values are already validated. No special
4140
     * handling for multi-byte characters is needed.
4141
     */
4142
0
    while (!PARSER_STOPPED(ctxt)) {
4143
0
        c = *str;
4144
4145
0
  if (c != '&') {
4146
0
            if (c == 0)
4147
0
                break;
4148
4149
            /*
4150
             * If this function is called without an entity, it is used to
4151
             * expand entities in an attribute content where less-than was
4152
             * already unscaped and is allowed.
4153
             */
4154
0
            if ((pent != NULL) && (c == '<')) {
4155
0
                xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
4156
0
                        "'<' in entity '%s' is not allowed in attributes "
4157
0
                        "values\n", pent->name);
4158
0
                break;
4159
0
            }
4160
4161
0
            if (c <= 0x20) {
4162
0
                if ((normalize) && (*inSpace)) {
4163
                    /* Skip char */
4164
0
                    if (chunkSize > 0) {
4165
0
                        xmlSBufAddString(buf, str - chunkSize, chunkSize);
4166
0
                        chunkSize = 0;
4167
0
                    }
4168
0
                } else if (c < 0x20) {
4169
0
                    if (chunkSize > 0) {
4170
0
                        xmlSBufAddString(buf, str - chunkSize, chunkSize);
4171
0
                        chunkSize = 0;
4172
0
                    }
4173
4174
0
                    xmlSBufAddCString(buf, " ", 1);
4175
0
                } else {
4176
0
                    chunkSize += 1;
4177
0
                }
4178
4179
0
                *inSpace = 1;
4180
0
            } else {
4181
0
                chunkSize += 1;
4182
0
                *inSpace = 0;
4183
0
            }
4184
4185
0
            str += 1;
4186
0
        } else if (str[1] == '#') {
4187
0
            int val;
4188
4189
0
            if (chunkSize > 0) {
4190
0
                xmlSBufAddString(buf, str - chunkSize, chunkSize);
4191
0
                chunkSize = 0;
4192
0
            }
4193
4194
0
      val = xmlParseStringCharRef(ctxt, &str);
4195
0
      if (val == 0) {
4196
0
                if (pent != NULL)
4197
0
                    pent->content[0] = 0;
4198
0
                break;
4199
0
            }
4200
4201
0
            if (val == ' ') {
4202
0
                if ((!normalize) || (!*inSpace))
4203
0
                    xmlSBufAddCString(buf, " ", 1);
4204
0
                *inSpace = 1;
4205
0
            } else {
4206
0
                xmlSBufAddChar(buf, val);
4207
0
                *inSpace = 0;
4208
0
            }
4209
0
  } else {
4210
0
            xmlChar *name;
4211
0
            xmlEntityPtr ent;
4212
4213
0
            if (chunkSize > 0) {
4214
0
                xmlSBufAddString(buf, str - chunkSize, chunkSize);
4215
0
                chunkSize = 0;
4216
0
            }
4217
4218
0
      name = xmlParseStringEntityRef(ctxt, &str);
4219
0
            if (name == NULL) {
4220
0
                if (pent != NULL)
4221
0
                    pent->content[0] = 0;
4222
0
                break;
4223
0
            }
4224
4225
0
            ent = xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 1);
4226
0
            xmlFree(name);
4227
4228
0
      if ((ent != NULL) &&
4229
0
    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
4230
0
    if (ent->content == NULL) {
4231
0
        xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
4232
0
          "predefined entity has no content\n");
4233
0
                    break;
4234
0
                }
4235
4236
0
                xmlSBufAddString(buf, ent->content, ent->length);
4237
4238
0
                *inSpace = 0;
4239
0
      } else if ((ent != NULL) && (ent->content != NULL)) {
4240
0
                if (pent != NULL)
4241
0
                    pent->flags |= XML_ENT_EXPANDING;
4242
0
    xmlExpandEntityInAttValue(ctxt, buf, ent->content, ent,
4243
0
                                          normalize, inSpace, depth, check);
4244
0
                if (pent != NULL)
4245
0
                    pent->flags &= ~XML_ENT_EXPANDING;
4246
0
      }
4247
0
        }
4248
0
    }
4249
4250
0
    if (chunkSize > 0)
4251
0
        xmlSBufAddString(buf, str - chunkSize, chunkSize);
4252
0
}
4253
4254
/**
4255
 * xmlExpandEntitiesInAttValue:
4256
 * @ctxt:  parser context
4257
 * @str:  entity or attribute value
4258
 * @normalize:  whether to collapse whitespace
4259
 *
4260
 * Expand general entity references in an entity or attribute value.
4261
 * Perform attribute value normalization.
4262
 *
4263
 * Returns the expanded attribtue value.
4264
 */
4265
xmlChar *
4266
xmlExpandEntitiesInAttValue(xmlParserCtxtPtr ctxt, const xmlChar *str,
4267
0
                            int normalize) {
4268
0
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4269
0
                         XML_MAX_HUGE_LENGTH :
4270
0
                         XML_MAX_TEXT_LENGTH;
4271
0
    xmlSBuf buf;
4272
0
    int inSpace = 1;
4273
4274
0
    xmlSBufInit(&buf, maxLength);
4275
4276
0
    xmlExpandEntityInAttValue(ctxt, &buf, str, NULL, normalize, &inSpace,
4277
0
                              ctxt->inputNr, /* check */ 0);
4278
4279
0
    if ((normalize) && (inSpace) && (buf.size > 0))
4280
0
        buf.size--;
4281
4282
0
    return(xmlSBufFinish(&buf, NULL, ctxt, "AttValue length too long"));
4283
0
}
4284
4285
/**
4286
 * xmlParseAttValueInternal:
4287
 * @ctxt:  an XML parser context
4288
 * @len:  attribute len result
4289
 * @alloc:  whether the attribute was reallocated as a new string
4290
 * @normalize:  if 1 then further non-CDATA normalization must be done
4291
 *
4292
 * parse a value for an attribute.
4293
 * NOTE: if no normalization is needed, the routine will return pointers
4294
 *       directly from the data buffer.
4295
 *
4296
 * 3.3.3 Attribute-Value Normalization:
4297
 * Before the value of an attribute is passed to the application or
4298
 * checked for validity, the XML processor must normalize it as follows:
4299
 * - a character reference is processed by appending the referenced
4300
 *   character to the attribute value
4301
 * - an entity reference is processed by recursively processing the
4302
 *   replacement text of the entity
4303
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4304
 *   appending #x20 to the normalized value, except that only a single
4305
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4306
 *   parsed entity or the literal entity value of an internal parsed entity
4307
 * - other characters are processed by appending them to the normalized value
4308
 * If the declared value is not CDATA, then the XML processor must further
4309
 * process the normalized attribute value by discarding any leading and
4310
 * trailing space (#x20) characters, and by replacing sequences of space
4311
 * (#x20) characters by a single space (#x20) character.
4312
 * All attributes for which no declaration has been read should be treated
4313
 * by a non-validating parser as if declared CDATA.
4314
 *
4315
 * Returns the AttValue parsed or NULL. The value has to be freed by the
4316
 *     caller if it was copied, this can be detected by val[*len] == 0.
4317
 */
4318
static xmlChar *
4319
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *attlen, int *alloc,
4320
22.1M
                         int normalize, int isNamespace) {
4321
22.1M
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4322
20.9M
                         XML_MAX_HUGE_LENGTH :
4323
22.1M
                         XML_MAX_TEXT_LENGTH;
4324
22.1M
    xmlSBuf buf;
4325
22.1M
    xmlChar *ret;
4326
22.1M
    int c, l, quote, flags, chunkSize;
4327
22.1M
    int inSpace = 1;
4328
22.1M
    int replaceEntities;
4329
4330
    /* Always expand namespace URIs */
4331
22.1M
    replaceEntities = (ctxt->replaceEntities) || (isNamespace);
4332
4333
22.1M
    xmlSBufInit(&buf, maxLength);
4334
4335
22.1M
    GROW;
4336
4337
22.1M
    quote = CUR;
4338
22.1M
    if ((quote != '"') && (quote != '\'')) {
4339
2.70k
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
4340
2.70k
  return(NULL);
4341
2.70k
    }
4342
22.1M
    NEXTL(1);
4343
4344
22.1M
    if (ctxt->inSubset == 0)
4345
22.1M
        flags = XML_ENT_CHECKED | XML_ENT_VALIDATED;
4346
29.0k
    else
4347
29.0k
        flags = XML_ENT_VALIDATED;
4348
4349
22.1M
    inSpace = 1;
4350
22.1M
    chunkSize = 0;
4351
4352
235M
    while (1) {
4353
235M
        if (PARSER_STOPPED(ctxt))
4354
0
            goto error;
4355
4356
235M
        if (CUR_PTR >= ctxt->input->end) {
4357
4.25k
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4358
4.25k
                           "AttValue: ' expected\n");
4359
4.25k
            goto error;
4360
4.25k
        }
4361
4362
        /*
4363
         * TODO: Check growth threshold
4364
         */
4365
235M
        if (ctxt->input->end - CUR_PTR < 10)
4366
46.3k
            GROW;
4367
4368
235M
        c = CUR;
4369
4370
235M
        if (c >= 0x80) {
4371
5.16M
            l = xmlUTF8MultibyteLen(ctxt, CUR_PTR,
4372
5.16M
                    "invalid character in attribute value\n");
4373
5.16M
            if (l == 0) {
4374
1.82M
                if (chunkSize > 0) {
4375
203k
                    xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4376
203k
                    chunkSize = 0;
4377
203k
                }
4378
1.82M
                xmlSBufAddReplChar(&buf);
4379
1.82M
                NEXTL(1);
4380
3.33M
            } else {
4381
3.33M
                chunkSize += l;
4382
3.33M
                NEXTL(l);
4383
3.33M
            }
4384
4385
5.16M
            inSpace = 0;
4386
230M
        } else if (c != '&') {
4387
230M
            if (c > 0x20) {
4388
226M
                if (c == quote)
4389
22.1M
                    break;
4390
4391
204M
                if (c == '<')
4392
169k
                    xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4393
4394
204M
                chunkSize += 1;
4395
204M
                inSpace = 0;
4396
204M
            } else if (!IS_BYTE_CHAR(c)) {
4397
943k
                xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4398
943k
                        "invalid character in attribute value\n");
4399
943k
                if (chunkSize > 0) {
4400
47.8k
                    xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4401
47.8k
                    chunkSize = 0;
4402
47.8k
                }
4403
943k
                xmlSBufAddReplChar(&buf);
4404
943k
                inSpace = 0;
4405
2.34M
            } else {
4406
                /* Whitespace */
4407
2.34M
                if ((normalize) && (inSpace)) {
4408
                    /* Skip char */
4409
24.2k
                    if (chunkSize > 0) {
4410
2.10k
                        xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4411
2.10k
                        chunkSize = 0;
4412
2.10k
                    }
4413
2.31M
                } else if (c < 0x20) {
4414
                    /* Convert to space */
4415
535k
                    if (chunkSize > 0) {
4416
134k
                        xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4417
134k
                        chunkSize = 0;
4418
134k
                    }
4419
4420
535k
                    xmlSBufAddCString(&buf, " ", 1);
4421
1.78M
                } else {
4422
1.78M
                    chunkSize += 1;
4423
1.78M
                }
4424
4425
2.34M
                inSpace = 1;
4426
4427
2.34M
                if ((c == 0xD) && (NXT(1) == 0xA))
4428
28.3k
                    CUR_PTR++;
4429
2.34M
            }
4430
4431
207M
            NEXTL(1);
4432
207M
        } else if (NXT(1) == '#') {
4433
59.9k
            int val;
4434
4435
59.9k
            if (chunkSize > 0) {
4436
28.7k
                xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4437
28.7k
                chunkSize = 0;
4438
28.7k
            }
4439
4440
59.9k
            val = xmlParseCharRef(ctxt);
4441
59.9k
            if (val == 0)
4442
521
                goto error;
4443
4444
59.3k
            if ((val == '&') && (!replaceEntities)) {
4445
                /*
4446
                 * The reparsing will be done in xmlNodeParseContent()
4447
                 * called from SAX2.c
4448
                 */
4449
0
                xmlSBufAddCString(&buf, "&#38;", 5);
4450
0
                inSpace = 0;
4451
59.3k
            } else if (val == ' ') {
4452
34.3k
                if ((!normalize) || (!inSpace))
4453
29.5k
                    xmlSBufAddCString(&buf, " ", 1);
4454
34.3k
                inSpace = 1;
4455
34.3k
            } else {
4456
25.0k
                xmlSBufAddChar(&buf, val);
4457
25.0k
                inSpace = 0;
4458
25.0k
            }
4459
451k
        } else {
4460
451k
            const xmlChar *name;
4461
451k
            xmlEntityPtr ent;
4462
4463
451k
            if (chunkSize > 0) {
4464
229k
                xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4465
229k
                chunkSize = 0;
4466
229k
            }
4467
4468
451k
            name = xmlParseEntityRefInternal(ctxt);
4469
451k
            if (name == NULL) {
4470
                /*
4471
                 * Probably a literal '&' which wasn't escaped.
4472
                 * TODO: Handle gracefully in recovery mode.
4473
                 */
4474
299k
                continue;
4475
299k
            }
4476
4477
152k
            ent = xmlLookupGeneralEntity(ctxt, name, /* isAttr */ 1);
4478
152k
            if (ent == NULL)
4479
17.9k
                continue;
4480
4481
134k
            if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
4482
134k
                if ((ent->content[0] == '&') && (!replaceEntities))
4483
0
                    xmlSBufAddCString(&buf, "&#38;", 5);
4484
134k
                else
4485
134k
                    xmlSBufAddString(&buf, ent->content, ent->length);
4486
134k
                inSpace = 0;
4487
18.4E
            } else if (replaceEntities) {
4488
0
                xmlExpandEntityInAttValue(ctxt, &buf, ent->content, ent,
4489
0
                                          normalize, &inSpace, ctxt->inputNr,
4490
0
                                          /* check */ 1);
4491
18.4E
            } else {
4492
18.4E
                if ((ent->flags & flags) != flags)
4493
0
                    xmlCheckEntityInAttValue(ctxt, ent, ctxt->inputNr);
4494
4495
18.4E
                if (xmlParserEntityCheck(ctxt, ent->expandedSize)) {
4496
0
                    ent->content[0] = 0;
4497
0
                    goto error;
4498
0
                }
4499
4500
                /*
4501
                 * Just output the reference
4502
                 */
4503
18.4E
                xmlSBufAddCString(&buf, "&", 1);
4504
18.4E
                xmlSBufAddString(&buf, ent->name, xmlStrlen(ent->name));
4505
18.4E
                xmlSBufAddCString(&buf, ";", 1);
4506
4507
18.4E
                inSpace = 0;
4508
18.4E
            }
4509
134k
  }
4510
235M
    }
4511
4512
22.1M
    if ((buf.mem == NULL) && (alloc != NULL)) {
4513
21.9M
        ret = (xmlChar *) CUR_PTR - chunkSize;
4514
4515
21.9M
        if (attlen != NULL)
4516
21.9M
            *attlen = chunkSize;
4517
21.9M
        if ((normalize) && (inSpace) && (chunkSize > 0))
4518
344
            *attlen -= 1;
4519
21.9M
        *alloc = 0;
4520
4521
        /* Report potential error */
4522
21.9M
        xmlSBufCleanup(&buf, ctxt, "AttValue length too long");
4523
21.9M
    } else {
4524
201k
        if (chunkSize > 0)
4525
140k
            xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4526
4527
201k
        if ((normalize) && (inSpace) && (buf.size > 0))
4528
3.35k
            buf.size--;
4529
4530
201k
        ret = xmlSBufFinish(&buf, attlen, ctxt, "AttValue length too long");
4531
4532
202k
        if (ret != NULL) {
4533
202k
            if (attlen != NULL)
4534
173k
                *attlen = buf.size;
4535
202k
            if (alloc != NULL)
4536
173k
                *alloc = 1;
4537
202k
        }
4538
201k
    }
4539
4540
22.1M
    NEXTL(1);
4541
4542
22.1M
    return(ret);
4543
4544
4.77k
error:
4545
4.77k
    xmlSBufCleanup(&buf, ctxt, "AttValue length too long");
4546
4.77k
    return(NULL);
4547
22.1M
}
4548
4549
/**
4550
 * xmlParseAttValue:
4551
 * @ctxt:  an XML parser context
4552
 *
4553
 * DEPRECATED: Internal function, don't use.
4554
 *
4555
 * parse a value for an attribute
4556
 * Note: the parser won't do substitution of entities here, this
4557
 * will be handled later in xmlStringGetNodeList
4558
 *
4559
 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4560
 *                   "'" ([^<&'] | Reference)* "'"
4561
 *
4562
 * 3.3.3 Attribute-Value Normalization:
4563
 * Before the value of an attribute is passed to the application or
4564
 * checked for validity, the XML processor must normalize it as follows:
4565
 * - a character reference is processed by appending the referenced
4566
 *   character to the attribute value
4567
 * - an entity reference is processed by recursively processing the
4568
 *   replacement text of the entity
4569
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4570
 *   appending #x20 to the normalized value, except that only a single
4571
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4572
 *   parsed entity or the literal entity value of an internal parsed entity
4573
 * - other characters are processed by appending them to the normalized value
4574
 * If the declared value is not CDATA, then the XML processor must further
4575
 * process the normalized attribute value by discarding any leading and
4576
 * trailing space (#x20) characters, and by replacing sequences of space
4577
 * (#x20) characters by a single space (#x20) character.
4578
 * All attributes for which no declaration has been read should be treated
4579
 * by a non-validating parser as if declared CDATA.
4580
 *
4581
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4582
 */
4583
4584
4585
xmlChar *
4586
29.4k
xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4587
29.4k
    if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4588
29.4k
    return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0, 0));
4589
29.4k
}
4590
4591
/**
4592
 * xmlParseSystemLiteral:
4593
 * @ctxt:  an XML parser context
4594
 *
4595
 * DEPRECATED: Internal function, don't use.
4596
 *
4597
 * parse an XML Literal
4598
 *
4599
 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4600
 *
4601
 * Returns the SystemLiteral parsed or NULL
4602
 */
4603
4604
xmlChar *
4605
5.87k
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4606
5.87k
    xmlChar *buf = NULL;
4607
5.87k
    int len = 0;
4608
5.87k
    int size = XML_PARSER_BUFFER_SIZE;
4609
5.87k
    int cur, l;
4610
5.87k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4611
5.87k
                    XML_MAX_TEXT_LENGTH :
4612
5.87k
                    XML_MAX_NAME_LENGTH;
4613
5.87k
    xmlChar stop;
4614
4615
5.87k
    if (RAW == '"') {
4616
4.02k
        NEXT;
4617
4.02k
  stop = '"';
4618
4.02k
    } else if (RAW == '\'') {
4619
1.21k
        NEXT;
4620
1.21k
  stop = '\'';
4621
1.21k
    } else {
4622
630
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4623
630
  return(NULL);
4624
630
    }
4625
4626
5.24k
    buf = xmlMalloc(size);
4627
5.24k
    if (buf == NULL) {
4628
0
        xmlErrMemory(ctxt);
4629
0
  return(NULL);
4630
0
    }
4631
5.24k
    cur = xmlCurrentCharRecover(ctxt, &l);
4632
309k
    while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4633
304k
  if (len + 5 >= size) {
4634
1.30k
      xmlChar *tmp;
4635
1.30k
            int newSize;
4636
4637
1.30k
            newSize = xmlGrowCapacity(size, 1, 1, maxLength);
4638
1.30k
            if (newSize < 0) {
4639
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4640
0
                xmlFree(buf);
4641
0
                return(NULL);
4642
0
            }
4643
1.30k
      tmp = xmlRealloc(buf, newSize);
4644
1.30k
      if (tmp == NULL) {
4645
0
          xmlFree(buf);
4646
0
    xmlErrMemory(ctxt);
4647
0
    return(NULL);
4648
0
      }
4649
1.30k
      buf = tmp;
4650
1.30k
            size = newSize;
4651
1.30k
  }
4652
304k
  COPY_BUF(buf, len, cur);
4653
304k
  NEXTL(l);
4654
304k
  cur = xmlCurrentCharRecover(ctxt, &l);
4655
304k
    }
4656
5.24k
    buf[len] = 0;
4657
5.24k
    if (!IS_CHAR(cur)) {
4658
133
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4659
5.10k
    } else {
4660
5.10k
  NEXT;
4661
5.10k
    }
4662
5.24k
    return(buf);
4663
5.24k
}
4664
4665
/**
4666
 * xmlParsePubidLiteral:
4667
 * @ctxt:  an XML parser context
4668
 *
4669
 * DEPRECATED: Internal function, don't use.
4670
 *
4671
 * parse an XML public literal
4672
 *
4673
 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4674
 *
4675
 * Returns the PubidLiteral parsed or NULL.
4676
 */
4677
4678
xmlChar *
4679
3.19k
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4680
3.19k
    xmlChar *buf = NULL;
4681
3.19k
    int len = 0;
4682
3.19k
    int size = XML_PARSER_BUFFER_SIZE;
4683
3.19k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4684
3.19k
                    XML_MAX_TEXT_LENGTH :
4685
3.19k
                    XML_MAX_NAME_LENGTH;
4686
3.19k
    xmlChar cur;
4687
3.19k
    xmlChar stop;
4688
4689
3.19k
    if (RAW == '"') {
4690
977
        NEXT;
4691
977
  stop = '"';
4692
2.21k
    } else if (RAW == '\'') {
4693
1.68k
        NEXT;
4694
1.68k
  stop = '\'';
4695
1.68k
    } else {
4696
534
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4697
534
  return(NULL);
4698
534
    }
4699
2.65k
    buf = xmlMalloc(size);
4700
2.65k
    if (buf == NULL) {
4701
0
  xmlErrMemory(ctxt);
4702
0
  return(NULL);
4703
0
    }
4704
2.65k
    cur = CUR;
4705
154k
    while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop) &&
4706
151k
           (PARSER_STOPPED(ctxt) == 0)) { /* checked */
4707
151k
  if (len + 1 >= size) {
4708
90
      xmlChar *tmp;
4709
90
            int newSize;
4710
4711
90
      newSize = xmlGrowCapacity(size, 1, 1, maxLength);
4712
90
            if (newSize < 0) {
4713
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4714
0
                xmlFree(buf);
4715
0
                return(NULL);
4716
0
            }
4717
90
      tmp = xmlRealloc(buf, newSize);
4718
90
      if (tmp == NULL) {
4719
0
    xmlErrMemory(ctxt);
4720
0
    xmlFree(buf);
4721
0
    return(NULL);
4722
0
      }
4723
90
      buf = tmp;
4724
90
            size = newSize;
4725
90
  }
4726
151k
  buf[len++] = cur;
4727
151k
  NEXT;
4728
151k
  cur = CUR;
4729
151k
    }
4730
2.65k
    buf[len] = 0;
4731
2.65k
    if (cur != stop) {
4732
96
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4733
2.56k
    } else {
4734
2.56k
  NEXTL(1);
4735
2.56k
    }
4736
2.65k
    return(buf);
4737
2.65k
}
4738
4739
static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial);
4740
4741
/*
4742
 * used for the test in the inner loop of the char data testing
4743
 */
4744
static const unsigned char test_char_data[256] = {
4745
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4746
    0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4747
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4748
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4749
    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4750
    0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4751
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4752
    0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4753
    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4754
    0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4755
    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4756
    0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4757
    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4758
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4759
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4760
    0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4761
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4762
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4763
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4764
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4765
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4766
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4767
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4768
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4769
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4770
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4771
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4772
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4773
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4774
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4775
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4776
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4777
};
4778
4779
static void
4780
xmlCharacters(xmlParserCtxtPtr ctxt, const xmlChar *buf, int size,
4781
11.0M
              int isBlank) {
4782
11.0M
    int checkBlanks;
4783
4784
11.0M
    if ((ctxt->sax == NULL) || (ctxt->disableSAX))
4785
60.5k
        return;
4786
4787
10.9M
    checkBlanks = (!ctxt->keepBlanks) ||
4788
10.9M
                  (ctxt->sax->ignorableWhitespace != ctxt->sax->characters);
4789
4790
    /*
4791
     * Calling areBlanks with only parts of a text node
4792
     * is fundamentally broken, making the NOBLANKS option
4793
     * essentially unusable.
4794
     */
4795
10.9M
    if ((checkBlanks) &&
4796
10.6M
        (areBlanks(ctxt, buf, size, isBlank))) {
4797
0
        if ((ctxt->sax->ignorableWhitespace != NULL) &&
4798
0
            (ctxt->keepBlanks))
4799
0
            ctxt->sax->ignorableWhitespace(ctxt->userData, buf, size);
4800
10.9M
    } else {
4801
10.9M
        if (ctxt->sax->characters != NULL)
4802
10.9M
            ctxt->sax->characters(ctxt->userData, buf, size);
4803
4804
        /*
4805
         * The old code used to update this value for "complex" data
4806
         * even if checkBlanks was false. This was probably a bug.
4807
         */
4808
10.9M
        if ((checkBlanks) && (*ctxt->space == -1))
4809
6.30M
            *ctxt->space = -2;
4810
10.9M
    }
4811
10.9M
}
4812
4813
/**
4814
 * xmlParseCharDataInternal:
4815
 * @ctxt:  an XML parser context
4816
 * @partial:  buffer may contain partial UTF-8 sequences
4817
 *
4818
 * Parse character data. Always makes progress if the first char isn't
4819
 * '<' or '&'.
4820
 *
4821
 * The right angle bracket (>) may be represented using the string "&gt;",
4822
 * and must, for compatibility, be escaped using "&gt;" or a character
4823
 * reference when it appears in the string "]]>" in content, when that
4824
 * string is not marking the end of a CDATA section.
4825
 *
4826
 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4827
 */
4828
static void
4829
10.3M
xmlParseCharDataInternal(xmlParserCtxtPtr ctxt, int partial) {
4830
10.3M
    const xmlChar *in;
4831
10.3M
    int nbchar = 0;
4832
10.3M
    int line = ctxt->input->line;
4833
10.3M
    int col = ctxt->input->col;
4834
10.3M
    int ccol;
4835
4836
10.3M
    GROW;
4837
    /*
4838
     * Accelerated common case where input don't need to be
4839
     * modified before passing it to the handler.
4840
     */
4841
10.3M
    in = ctxt->input->cur;
4842
10.7M
    do {
4843
13.9M
get_more_space:
4844
32.6M
        while (*in == 0x20) { in++; ctxt->input->col++; }
4845
13.9M
        if (*in == 0xA) {
4846
3.28M
            do {
4847
3.28M
                ctxt->input->line++; ctxt->input->col = 1;
4848
3.28M
                in++;
4849
3.28M
            } while (*in == 0xA);
4850
3.16M
            goto get_more_space;
4851
3.16M
        }
4852
10.7M
        if (*in == '<') {
4853
3.26M
            nbchar = in - ctxt->input->cur;
4854
3.26M
            if (nbchar > 0) {
4855
3.26M
                const xmlChar *tmp = ctxt->input->cur;
4856
3.26M
                ctxt->input->cur = in;
4857
4858
3.26M
                xmlCharacters(ctxt, tmp, nbchar, 1);
4859
3.26M
            }
4860
3.26M
            return;
4861
3.26M
        }
4862
4863
8.27M
get_more:
4864
8.27M
        ccol = ctxt->input->col;
4865
105M
        while (test_char_data[*in]) {
4866
97.6M
            in++;
4867
97.6M
            ccol++;
4868
97.6M
        }
4869
8.27M
        ctxt->input->col = ccol;
4870
8.27M
        if (*in == 0xA) {
4871
807k
            do {
4872
807k
                ctxt->input->line++; ctxt->input->col = 1;
4873
807k
                in++;
4874
807k
            } while (*in == 0xA);
4875
644k
            goto get_more;
4876
644k
        }
4877
7.63M
        if (*in == ']') {
4878
146k
            if ((in[1] == ']') && (in[2] == '>')) {
4879
9
                xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4880
9
                ctxt->input->cur = in + 1;
4881
9
                return;
4882
9
            }
4883
146k
            if ((!partial) || (ctxt->input->end - in >= 2)) {
4884
146k
                in++;
4885
146k
                ctxt->input->col++;
4886
146k
                goto get_more;
4887
146k
            }
4888
146k
        }
4889
7.48M
        nbchar = in - ctxt->input->cur;
4890
7.48M
        if (nbchar > 0) {
4891
6.78M
            const xmlChar *tmp = ctxt->input->cur;
4892
6.78M
            ctxt->input->cur = in;
4893
4894
6.78M
            xmlCharacters(ctxt, tmp, nbchar, 0);
4895
4896
6.78M
            line = ctxt->input->line;
4897
6.78M
            col = ctxt->input->col;
4898
6.78M
        }
4899
7.48M
        ctxt->input->cur = in;
4900
7.48M
        if (*in == 0xD) {
4901
488k
            in++;
4902
488k
            if (*in == 0xA) {
4903
461k
                ctxt->input->cur = in;
4904
461k
                in++;
4905
461k
                ctxt->input->line++; ctxt->input->col = 1;
4906
461k
                continue; /* while */
4907
461k
            }
4908
27.5k
            in--;
4909
27.5k
        }
4910
7.02M
        if (*in == '<') {
4911
6.03M
            return;
4912
6.03M
        }
4913
995k
        if (*in == '&') {
4914
205k
            return;
4915
205k
        }
4916
790k
        if ((partial) && (*in == ']') && (ctxt->input->end - in < 2)) {
4917
1
            return;
4918
1
        }
4919
790k
        SHRINK;
4920
790k
        GROW;
4921
790k
        in = ctxt->input->cur;
4922
1.25M
    } while (((*in >= 0x20) && (*in <= 0x7F)) ||
4923
1.11M
             (*in == 0x09) || (*in == 0x0a));
4924
809k
    ctxt->input->line = line;
4925
809k
    ctxt->input->col = col;
4926
809k
    xmlParseCharDataComplex(ctxt, partial);
4927
809k
}
4928
4929
/**
4930
 * xmlParseCharDataComplex:
4931
 * @ctxt:  an XML parser context
4932
 * @cdata:  int indicating whether we are within a CDATA section
4933
 *
4934
 * Always makes progress if the first char isn't '<' or '&'.
4935
 *
4936
 * parse a CharData section.this is the fallback function
4937
 * of xmlParseCharData() when the parsing requires handling
4938
 * of non-ASCII characters.
4939
 */
4940
static void
4941
809k
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial) {
4942
809k
    xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4943
809k
    int nbchar = 0;
4944
809k
    int cur, l;
4945
4946
809k
    cur = xmlCurrentCharRecover(ctxt, &l);
4947
59.9M
    while ((cur != '<') && /* checked */
4948
59.2M
           (cur != '&') &&
4949
59.1M
           ((!partial) || (cur != ']') ||
4950
18.3k
            (ctxt->input->end - ctxt->input->cur >= 2)) &&
4951
59.1M
     (IS_CHAR(cur))) {
4952
59.1M
  if ((cur == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4953
310
      xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4954
310
  }
4955
59.1M
  COPY_BUF(buf, nbchar, cur);
4956
  /* move current position before possible calling of ctxt->sax->characters */
4957
59.1M
  NEXTL(l);
4958
59.1M
  if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4959
169k
      buf[nbchar] = 0;
4960
4961
169k
            xmlCharacters(ctxt, buf, nbchar, 0);
4962
169k
      nbchar = 0;
4963
169k
            SHRINK;
4964
169k
  }
4965
59.1M
  cur = xmlCurrentCharRecover(ctxt, &l);
4966
59.1M
    }
4967
809k
    if (nbchar != 0) {
4968
802k
        buf[nbchar] = 0;
4969
4970
802k
        xmlCharacters(ctxt, buf, nbchar, 0);
4971
802k
    }
4972
    /*
4973
     * cur == 0 can mean
4974
     *
4975
     * - End of buffer.
4976
     * - An actual 0 character.
4977
     * - An incomplete UTF-8 sequence. This is allowed if partial is set.
4978
     */
4979
809k
    if (ctxt->input->cur < ctxt->input->end) {
4980
802k
        if ((cur == 0) && (CUR != 0)) {
4981
233
            if (partial == 0) {
4982
228
                xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4983
228
                        "Incomplete UTF-8 sequence starting with %02X\n", CUR);
4984
228
                NEXTL(1);
4985
228
            }
4986
802k
        } else if ((cur != '<') && (cur != '&') && (cur != ']')) {
4987
            /* Generate the error and skip the offending character */
4988
2.49k
            xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4989
2.49k
                              "PCDATA invalid Char value %d\n", cur);
4990
2.49k
            NEXTL(l);
4991
2.49k
        }
4992
802k
    }
4993
809k
}
4994
4995
/**
4996
 * xmlParseCharData:
4997
 * @ctxt:  an XML parser context
4998
 * @cdata:  unused
4999
 *
5000
 * DEPRECATED: Internal function, don't use.
5001
 */
5002
void
5003
0
xmlParseCharData(xmlParserCtxtPtr ctxt, ATTRIBUTE_UNUSED int cdata) {
5004
0
    xmlParseCharDataInternal(ctxt, 0);
5005
0
}
5006
5007
/**
5008
 * xmlParseExternalID:
5009
 * @ctxt:  an XML parser context
5010
 * @publicID:  a xmlChar** receiving PubidLiteral
5011
 * @strict: indicate whether we should restrict parsing to only
5012
 *          production [75], see NOTE below
5013
 *
5014
 * DEPRECATED: Internal function, don't use.
5015
 *
5016
 * Parse an External ID or a Public ID
5017
 *
5018
 * NOTE: Productions [75] and [83] interact badly since [75] can generate
5019
 *       'PUBLIC' S PubidLiteral S SystemLiteral
5020
 *
5021
 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
5022
 *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
5023
 *
5024
 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
5025
 *
5026
 * Returns the function returns SystemLiteral and in the second
5027
 *                case publicID receives PubidLiteral, is strict is off
5028
 *                it is possible to return NULL and have publicID set.
5029
 */
5030
5031
xmlChar *
5032
13.8k
xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
5033
13.8k
    xmlChar *URI = NULL;
5034
5035
13.8k
    *publicID = NULL;
5036
13.8k
    if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
5037
3.87k
        SKIP(6);
5038
3.87k
  if (SKIP_BLANKS == 0) {
5039
124
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5040
124
                     "Space required after 'SYSTEM'\n");
5041
124
  }
5042
3.87k
  URI = xmlParseSystemLiteral(ctxt);
5043
3.87k
  if (URI == NULL) {
5044
130
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
5045
130
        }
5046
9.98k
    } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
5047
3.19k
        SKIP(6);
5048
3.19k
  if (SKIP_BLANKS == 0) {
5049
14
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5050
14
        "Space required after 'PUBLIC'\n");
5051
14
  }
5052
3.19k
  *publicID = xmlParsePubidLiteral(ctxt);
5053
3.19k
  if (*publicID == NULL) {
5054
534
      xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
5055
534
  }
5056
3.19k
  if (strict) {
5057
      /*
5058
       * We don't handle [83] so "S SystemLiteral" is required.
5059
       */
5060
1.63k
      if (SKIP_BLANKS == 0) {
5061
501
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5062
501
      "Space required after the Public Identifier\n");
5063
501
      }
5064
1.63k
  } else {
5065
      /*
5066
       * We handle [83] so we return immediately, if
5067
       * "S SystemLiteral" is not detected. We skip blanks if no
5068
             * system literal was found, but this is harmless since we must
5069
             * be at the end of a NotationDecl.
5070
       */
5071
1.55k
      if (SKIP_BLANKS == 0) return(NULL);
5072
849
      if ((CUR != '\'') && (CUR != '"')) return(NULL);
5073
849
  }
5074
1.99k
  URI = xmlParseSystemLiteral(ctxt);
5075
1.99k
  if (URI == NULL) {
5076
500
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
5077
500
        }
5078
1.99k
    }
5079
12.6k
    return(URI);
5080
13.8k
}
5081
5082
/**
5083
 * xmlParseCommentComplex:
5084
 * @ctxt:  an XML parser context
5085
 * @buf:  the already parsed part of the buffer
5086
 * @len:  number of bytes in the buffer
5087
 * @size:  allocated size of the buffer
5088
 *
5089
 * Skip an XML (SGML) comment <!-- .... -->
5090
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
5091
 *  must not occur within comments. "
5092
 * This is the slow routine in case the accelerator for ascii didn't work
5093
 *
5094
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
5095
 */
5096
static void
5097
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
5098
28.3k
                       size_t len, size_t size) {
5099
28.3k
    int q, ql;
5100
28.3k
    int r, rl;
5101
28.3k
    int cur, l;
5102
28.3k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5103
28.3k
                    XML_MAX_HUGE_LENGTH :
5104
28.3k
                    XML_MAX_TEXT_LENGTH;
5105
5106
28.3k
    if (buf == NULL) {
5107
7.24k
        len = 0;
5108
7.24k
  size = XML_PARSER_BUFFER_SIZE;
5109
7.24k
  buf = xmlMalloc(size);
5110
7.24k
  if (buf == NULL) {
5111
0
      xmlErrMemory(ctxt);
5112
0
      return;
5113
0
  }
5114
7.24k
    }
5115
28.3k
    q = xmlCurrentCharRecover(ctxt, &ql);
5116
28.3k
    if (q == 0)
5117
248
        goto not_terminated;
5118
28.0k
    if (!IS_CHAR(q)) {
5119
43
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
5120
43
                          "xmlParseComment: invalid xmlChar value %d\n",
5121
43
                    q);
5122
43
  xmlFree (buf);
5123
43
  return;
5124
43
    }
5125
28.0k
    NEXTL(ql);
5126
28.0k
    r = xmlCurrentCharRecover(ctxt, &rl);
5127
28.0k
    if (r == 0)
5128
19
        goto not_terminated;
5129
28.0k
    if (!IS_CHAR(r)) {
5130
21
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
5131
21
                          "xmlParseComment: invalid xmlChar value %d\n",
5132
21
                    r);
5133
21
  xmlFree (buf);
5134
21
  return;
5135
21
    }
5136
27.9k
    NEXTL(rl);
5137
27.9k
    cur = xmlCurrentCharRecover(ctxt, &l);
5138
27.9k
    if (cur == 0)
5139
19
        goto not_terminated;
5140
2.14M
    while (IS_CHAR(cur) && /* checked */
5141
2.14M
           ((cur != '>') ||
5142
2.11M
      (r != '-') || (q != '-'))) {
5143
2.11M
  if ((r == '-') && (q == '-')) {
5144
10.2k
      xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
5145
10.2k
  }
5146
2.11M
  if (len + 5 >= size) {
5147
12.5k
      xmlChar *tmp;
5148
12.5k
            int newSize;
5149
5150
12.5k
      newSize = xmlGrowCapacity(size, 1, 1, maxLength);
5151
12.5k
            if (newSize < 0) {
5152
0
                xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5153
0
                             "Comment too big found", NULL);
5154
0
                xmlFree (buf);
5155
0
                return;
5156
0
            }
5157
12.5k
      tmp = xmlRealloc(buf, newSize);
5158
12.5k
      if (tmp == NULL) {
5159
0
    xmlErrMemory(ctxt);
5160
0
    xmlFree(buf);
5161
0
    return;
5162
0
      }
5163
12.5k
      buf = tmp;
5164
12.5k
            size = newSize;
5165
12.5k
  }
5166
2.11M
  COPY_BUF(buf, len, q);
5167
5168
2.11M
  q = r;
5169
2.11M
  ql = rl;
5170
2.11M
  r = cur;
5171
2.11M
  rl = l;
5172
5173
2.11M
  NEXTL(l);
5174
2.11M
  cur = xmlCurrentCharRecover(ctxt, &l);
5175
5176
2.11M
    }
5177
27.9k
    buf[len] = 0;
5178
27.9k
    if (cur == 0) {
5179
430
  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5180
430
                       "Comment not terminated \n<!--%.50s\n", buf);
5181
27.5k
    } else if (!IS_CHAR(cur)) {
5182
103
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
5183
103
                          "xmlParseComment: invalid xmlChar value %d\n",
5184
103
                    cur);
5185
27.4k
    } else {
5186
27.4k
        NEXT;
5187
27.4k
  if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5188
0
      (!ctxt->disableSAX))
5189
0
      ctxt->sax->comment(ctxt->userData, buf);
5190
27.4k
    }
5191
27.9k
    xmlFree(buf);
5192
27.9k
    return;
5193
286
not_terminated:
5194
286
    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5195
286
       "Comment not terminated\n", NULL);
5196
286
    xmlFree(buf);
5197
286
}
5198
5199
/**
5200
 * xmlParseComment:
5201
 * @ctxt:  an XML parser context
5202
 *
5203
 * DEPRECATED: Internal function, don't use.
5204
 *
5205
 * Parse an XML (SGML) comment. Always consumes '<!'.
5206
 *
5207
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
5208
 *  must not occur within comments. "
5209
 *
5210
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
5211
 */
5212
void
5213
68.7k
xmlParseComment(xmlParserCtxtPtr ctxt) {
5214
68.7k
    xmlChar *buf = NULL;
5215
68.7k
    size_t size = XML_PARSER_BUFFER_SIZE;
5216
68.7k
    size_t len = 0;
5217
68.7k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5218
68.7k
                       XML_MAX_HUGE_LENGTH :
5219
68.7k
                       XML_MAX_TEXT_LENGTH;
5220
68.7k
    const xmlChar *in;
5221
68.7k
    size_t nbchar = 0;
5222
68.7k
    int ccol;
5223
5224
    /*
5225
     * Check that there is a comment right here.
5226
     */
5227
68.7k
    if ((RAW != '<') || (NXT(1) != '!'))
5228
0
        return;
5229
68.7k
    SKIP(2);
5230
68.7k
    if ((RAW != '-') || (NXT(1) != '-'))
5231
19
        return;
5232
68.6k
    SKIP(2);
5233
68.6k
    GROW;
5234
5235
    /*
5236
     * Accelerated common case where input don't need to be
5237
     * modified before passing it to the handler.
5238
     */
5239
68.6k
    in = ctxt->input->cur;
5240
68.6k
    do {
5241
68.6k
  if (*in == 0xA) {
5242
8.56k
      do {
5243
8.56k
    ctxt->input->line++; ctxt->input->col = 1;
5244
8.56k
    in++;
5245
8.56k
      } while (*in == 0xA);
5246
3.64k
  }
5247
128k
get_more:
5248
128k
        ccol = ctxt->input->col;
5249
1.35M
  while (((*in > '-') && (*in <= 0x7F)) ||
5250
292k
         ((*in >= 0x20) && (*in < '-')) ||
5251
1.22M
         (*in == 0x09)) {
5252
1.22M
        in++;
5253
1.22M
        ccol++;
5254
1.22M
  }
5255
128k
  ctxt->input->col = ccol;
5256
128k
  if (*in == 0xA) {
5257
18.2k
      do {
5258
18.2k
    ctxt->input->line++; ctxt->input->col = 1;
5259
18.2k
    in++;
5260
18.2k
      } while (*in == 0xA);
5261
11.0k
      goto get_more;
5262
11.0k
  }
5263
117k
  nbchar = in - ctxt->input->cur;
5264
  /*
5265
   * save current set of data
5266
   */
5267
117k
  if (nbchar > 0) {
5268
97.5k
            if (nbchar > maxLength - len) {
5269
0
                xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5270
0
                                  "Comment too big found", NULL);
5271
0
                xmlFree(buf);
5272
0
                return;
5273
0
            }
5274
97.5k
            if (buf == NULL) {
5275
57.7k
                if ((*in == '-') && (in[1] == '-'))
5276
23.5k
                    size = nbchar + 1;
5277
34.1k
                else
5278
34.1k
                    size = XML_PARSER_BUFFER_SIZE + nbchar;
5279
57.7k
                buf = xmlMalloc(size);
5280
57.7k
                if (buf == NULL) {
5281
0
                    xmlErrMemory(ctxt);
5282
0
                    return;
5283
0
                }
5284
57.7k
                len = 0;
5285
57.7k
            } else if (len + nbchar + 1 >= size) {
5286
2.16k
                xmlChar *new_buf;
5287
2.16k
                size += len + nbchar + XML_PARSER_BUFFER_SIZE;
5288
2.16k
                new_buf = xmlRealloc(buf, size);
5289
2.16k
                if (new_buf == NULL) {
5290
0
                    xmlErrMemory(ctxt);
5291
0
                    xmlFree(buf);
5292
0
                    return;
5293
0
                }
5294
2.16k
                buf = new_buf;
5295
2.16k
            }
5296
97.5k
            memcpy(&buf[len], ctxt->input->cur, nbchar);
5297
97.5k
            len += nbchar;
5298
97.5k
            buf[len] = 0;
5299
97.5k
  }
5300
117k
  ctxt->input->cur = in;
5301
117k
  if (*in == 0xA) {
5302
0
      in++;
5303
0
      ctxt->input->line++; ctxt->input->col = 1;
5304
0
  }
5305
117k
  if (*in == 0xD) {
5306
11.2k
      in++;
5307
11.2k
      if (*in == 0xA) {
5308
5.25k
    ctxt->input->cur = in;
5309
5.25k
    in++;
5310
5.25k
    ctxt->input->line++; ctxt->input->col = 1;
5311
5.25k
    goto get_more;
5312
5.25k
      }
5313
6.03k
      in--;
5314
6.03k
  }
5315
112k
  SHRINK;
5316
112k
  GROW;
5317
112k
  in = ctxt->input->cur;
5318
112k
  if (*in == '-') {
5319
83.9k
      if (in[1] == '-') {
5320
52.8k
          if (in[2] == '>') {
5321
40.3k
        SKIP(3);
5322
40.3k
        if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5323
0
            (!ctxt->disableSAX)) {
5324
0
      if (buf != NULL)
5325
0
          ctxt->sax->comment(ctxt->userData, buf);
5326
0
      else
5327
0
          ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5328
0
        }
5329
40.3k
        if (buf != NULL)
5330
36.6k
            xmlFree(buf);
5331
40.3k
        return;
5332
40.3k
    }
5333
12.5k
    if (buf != NULL) {
5334
9.64k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5335
9.64k
                          "Double hyphen within comment: "
5336
9.64k
                                      "<!--%.50s\n",
5337
9.64k
              buf);
5338
9.64k
    } else
5339
2.87k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5340
2.87k
                          "Double hyphen within comment\n", NULL);
5341
12.5k
    in++;
5342
12.5k
    ctxt->input->col++;
5343
12.5k
      }
5344
43.6k
      in++;
5345
43.6k
      ctxt->input->col++;
5346
43.6k
      goto get_more;
5347
83.9k
  }
5348
112k
    } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5349
28.3k
    xmlParseCommentComplex(ctxt, buf, len, size);
5350
28.3k
}
5351
5352
5353
/**
5354
 * xmlParsePITarget:
5355
 * @ctxt:  an XML parser context
5356
 *
5357
 * DEPRECATED: Internal function, don't use.
5358
 *
5359
 * parse the name of a PI
5360
 *
5361
 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5362
 *
5363
 * Returns the PITarget name or NULL
5364
 */
5365
5366
const xmlChar *
5367
67.6k
xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5368
67.6k
    const xmlChar *name;
5369
5370
67.6k
    name = xmlParseName(ctxt);
5371
67.6k
    if ((name != NULL) &&
5372
66.7k
        ((name[0] == 'x') || (name[0] == 'X')) &&
5373
31.1k
        ((name[1] == 'm') || (name[1] == 'M')) &&
5374
27.3k
        ((name[2] == 'l') || (name[2] == 'L'))) {
5375
20.3k
  int i;
5376
20.3k
  if ((name[0] == 'x') && (name[1] == 'm') &&
5377
17.7k
      (name[2] == 'l') && (name[3] == 0)) {
5378
1.15k
      xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5379
1.15k
     "XML declaration allowed only at the start of the document\n");
5380
1.15k
      return(name);
5381
19.1k
  } else if (name[3] == 0) {
5382
1.14k
      xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5383
1.14k
      return(name);
5384
1.14k
  }
5385
52.0k
  for (i = 0;;i++) {
5386
52.0k
      if (xmlW3CPIs[i] == NULL) break;
5387
35.0k
      if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5388
1.05k
          return(name);
5389
35.0k
  }
5390
16.9k
  xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5391
16.9k
          "xmlParsePITarget: invalid name prefix 'xml'\n",
5392
16.9k
          NULL, NULL);
5393
16.9k
    }
5394
64.3k
    if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5395
5.71k
  xmlNsErr(ctxt, XML_NS_ERR_COLON,
5396
5.71k
     "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5397
5.71k
    }
5398
64.3k
    return(name);
5399
67.6k
}
5400
5401
#ifdef LIBXML_CATALOG_ENABLED
5402
/**
5403
 * xmlParseCatalogPI:
5404
 * @ctxt:  an XML parser context
5405
 * @catalog:  the PI value string
5406
 *
5407
 * parse an XML Catalog Processing Instruction.
5408
 *
5409
 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5410
 *
5411
 * Occurs only if allowed by the user and if happening in the Misc
5412
 * part of the document before any doctype information
5413
 * This will add the given catalog to the parsing context in order
5414
 * to be used if there is a resolution need further down in the document
5415
 */
5416
5417
static void
5418
0
xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5419
0
    xmlChar *URL = NULL;
5420
0
    const xmlChar *tmp, *base;
5421
0
    xmlChar marker;
5422
5423
0
    tmp = catalog;
5424
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5425
0
    if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5426
0
  goto error;
5427
0
    tmp += 7;
5428
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5429
0
    if (*tmp != '=') {
5430
0
  return;
5431
0
    }
5432
0
    tmp++;
5433
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5434
0
    marker = *tmp;
5435
0
    if ((marker != '\'') && (marker != '"'))
5436
0
  goto error;
5437
0
    tmp++;
5438
0
    base = tmp;
5439
0
    while ((*tmp != 0) && (*tmp != marker)) tmp++;
5440
0
    if (*tmp == 0)
5441
0
  goto error;
5442
0
    URL = xmlStrndup(base, tmp - base);
5443
0
    tmp++;
5444
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5445
0
    if (*tmp != 0)
5446
0
  goto error;
5447
5448
0
    if (URL != NULL) {
5449
        /*
5450
         * Unfortunately, the catalog API doesn't report OOM errors.
5451
         * xmlGetLastError isn't very helpful since we don't know
5452
         * where the last error came from. We'd have to reset it
5453
         * before this call and restore it afterwards.
5454
         */
5455
0
  ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5456
0
  xmlFree(URL);
5457
0
    }
5458
0
    return;
5459
5460
0
error:
5461
0
    xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5462
0
            "Catalog PI syntax error: %s\n",
5463
0
      catalog, NULL);
5464
0
    if (URL != NULL)
5465
0
  xmlFree(URL);
5466
0
}
5467
#endif
5468
5469
/**
5470
 * xmlParsePI:
5471
 * @ctxt:  an XML parser context
5472
 *
5473
 * DEPRECATED: Internal function, don't use.
5474
 *
5475
 * parse an XML Processing Instruction.
5476
 *
5477
 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5478
 *
5479
 * The processing is transferred to SAX once parsed.
5480
 */
5481
5482
void
5483
67.6k
xmlParsePI(xmlParserCtxtPtr ctxt) {
5484
67.6k
    xmlChar *buf = NULL;
5485
67.6k
    size_t len = 0;
5486
67.6k
    size_t size = XML_PARSER_BUFFER_SIZE;
5487
67.6k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5488
67.5k
                       XML_MAX_HUGE_LENGTH :
5489
67.6k
                       XML_MAX_TEXT_LENGTH;
5490
67.6k
    int cur, l;
5491
67.6k
    const xmlChar *target;
5492
5493
67.6k
    if ((RAW == '<') && (NXT(1) == '?')) {
5494
  /*
5495
   * this is a Processing Instruction.
5496
   */
5497
67.6k
  SKIP(2);
5498
5499
  /*
5500
   * Parse the target name and check for special support like
5501
   * namespace.
5502
   */
5503
67.6k
        target = xmlParsePITarget(ctxt);
5504
67.6k
  if (target != NULL) {
5505
66.7k
      if ((RAW == '?') && (NXT(1) == '>')) {
5506
10.9k
    SKIP(2);
5507
5508
    /*
5509
     * SAX: PI detected.
5510
     */
5511
10.9k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5512
6.96k
        (ctxt->sax->processingInstruction != NULL))
5513
6.96k
        ctxt->sax->processingInstruction(ctxt->userData,
5514
6.96k
                                         target, NULL);
5515
10.9k
    return;
5516
10.9k
      }
5517
55.8k
      buf = xmlMalloc(size);
5518
55.8k
      if (buf == NULL) {
5519
0
    xmlErrMemory(ctxt);
5520
0
    return;
5521
0
      }
5522
55.8k
      if (SKIP_BLANKS == 0) {
5523
7.06k
    xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5524
7.06k
        "ParsePI: PI %s space expected\n", target);
5525
7.06k
      }
5526
55.8k
      cur = xmlCurrentCharRecover(ctxt, &l);
5527
5.61M
      while (IS_CHAR(cur) && /* checked */
5528
5.61M
       ((cur != '?') || (NXT(1) != '>'))) {
5529
5.56M
    if (len + 5 >= size) {
5530
33.4k
        xmlChar *tmp;
5531
33.4k
                    int newSize;
5532
5533
33.4k
                    newSize = xmlGrowCapacity(size, 1, 1, maxLength);
5534
33.4k
                    if (newSize < 0) {
5535
0
                        xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5536
0
                                          "PI %s too big found", target);
5537
0
                        xmlFree(buf);
5538
0
                        return;
5539
0
                    }
5540
33.4k
        tmp = xmlRealloc(buf, newSize);
5541
33.4k
        if (tmp == NULL) {
5542
0
      xmlErrMemory(ctxt);
5543
0
      xmlFree(buf);
5544
0
      return;
5545
0
        }
5546
33.4k
        buf = tmp;
5547
33.4k
                    size = newSize;
5548
33.4k
    }
5549
5.56M
    COPY_BUF(buf, len, cur);
5550
5.56M
    NEXTL(l);
5551
5.56M
    cur = xmlCurrentCharRecover(ctxt, &l);
5552
5.56M
      }
5553
55.8k
      buf[len] = 0;
5554
55.8k
      if (cur != '?') {
5555
1.87k
    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5556
1.87k
          "ParsePI: PI %s never end ...\n", target);
5557
53.9k
      } else {
5558
53.9k
    SKIP(2);
5559
5560
53.9k
#ifdef LIBXML_CATALOG_ENABLED
5561
53.9k
    if ((ctxt->inSubset == 0) &&
5562
45.5k
        (xmlStrEqual(target, XML_CATALOG_PI))) {
5563
12.4k
        xmlCatalogAllow allow = xmlCatalogGetDefaults();
5564
5565
12.4k
        if ((ctxt->options & XML_PARSE_CATALOG_PI) &&
5566
0
                        ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5567
0
       (allow == XML_CATA_ALLOW_ALL)))
5568
0
      xmlParseCatalogPI(ctxt, buf);
5569
12.4k
    }
5570
53.9k
#endif
5571
5572
    /*
5573
     * SAX: PI detected.
5574
     */
5575
53.9k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5576
46.4k
        (ctxt->sax->processingInstruction != NULL))
5577
46.4k
        ctxt->sax->processingInstruction(ctxt->userData,
5578
46.4k
                                         target, buf);
5579
53.9k
      }
5580
55.8k
      xmlFree(buf);
5581
55.8k
  } else {
5582
890
      xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5583
890
  }
5584
67.6k
    }
5585
67.6k
}
5586
5587
/**
5588
 * xmlParseNotationDecl:
5589
 * @ctxt:  an XML parser context
5590
 *
5591
 * DEPRECATED: Internal function, don't use.
5592
 *
5593
 * Parse a notation declaration. Always consumes '<!'.
5594
 *
5595
 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
5596
 *
5597
 * Hence there is actually 3 choices:
5598
 *     'PUBLIC' S PubidLiteral
5599
 *     'PUBLIC' S PubidLiteral S SystemLiteral
5600
 * and 'SYSTEM' S SystemLiteral
5601
 *
5602
 * See the NOTE on xmlParseExternalID().
5603
 */
5604
5605
void
5606
2.44k
xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5607
2.44k
    const xmlChar *name;
5608
2.44k
    xmlChar *Pubid;
5609
2.44k
    xmlChar *Systemid;
5610
5611
2.44k
    if ((CUR != '<') || (NXT(1) != '!'))
5612
0
        return;
5613
2.44k
    SKIP(2);
5614
5615
2.44k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5616
2.38k
  int inputid = ctxt->input->id;
5617
2.38k
  SKIP(8);
5618
2.38k
  if (SKIP_BLANKS_PE == 0) {
5619
36
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5620
36
         "Space required after '<!NOTATION'\n");
5621
36
      return;
5622
36
  }
5623
5624
2.34k
        name = xmlParseName(ctxt);
5625
2.34k
  if (name == NULL) {
5626
42
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5627
42
      return;
5628
42
  }
5629
2.30k
  if (xmlStrchr(name, ':') != NULL) {
5630
7
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5631
7
         "colons are forbidden from notation names '%s'\n",
5632
7
         name, NULL, NULL);
5633
7
  }
5634
2.30k
  if (SKIP_BLANKS_PE == 0) {
5635
19
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5636
19
         "Space required after the NOTATION name'\n");
5637
19
      return;
5638
19
  }
5639
5640
  /*
5641
   * Parse the IDs.
5642
   */
5643
2.28k
  Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5644
2.28k
  SKIP_BLANKS_PE;
5645
5646
2.28k
  if (RAW == '>') {
5647
2.08k
      if (inputid != ctxt->input->id) {
5648
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5649
0
                         "Notation declaration doesn't start and stop"
5650
0
                               " in the same entity\n");
5651
0
      }
5652
2.08k
      NEXT;
5653
2.08k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5654
1.57k
    (ctxt->sax->notationDecl != NULL))
5655
0
    ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5656
2.08k
  } else {
5657
203
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5658
203
  }
5659
2.28k
  if (Systemid != NULL) xmlFree(Systemid);
5660
2.28k
  if (Pubid != NULL) xmlFree(Pubid);
5661
2.28k
    }
5662
2.44k
}
5663
5664
/**
5665
 * xmlParseEntityDecl:
5666
 * @ctxt:  an XML parser context
5667
 *
5668
 * DEPRECATED: Internal function, don't use.
5669
 *
5670
 * Parse an entity declaration. Always consumes '<!'.
5671
 *
5672
 * [70] EntityDecl ::= GEDecl | PEDecl
5673
 *
5674
 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5675
 *
5676
 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5677
 *
5678
 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5679
 *
5680
 * [74] PEDef ::= EntityValue | ExternalID
5681
 *
5682
 * [76] NDataDecl ::= S 'NDATA' S Name
5683
 *
5684
 * [ VC: Notation Declared ]
5685
 * The Name must match the declared name of a notation.
5686
 */
5687
5688
void
5689
35.2k
xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5690
35.2k
    const xmlChar *name = NULL;
5691
35.2k
    xmlChar *value = NULL;
5692
35.2k
    xmlChar *URI = NULL, *literal = NULL;
5693
35.2k
    const xmlChar *ndata = NULL;
5694
35.2k
    int isParameter = 0;
5695
35.2k
    xmlChar *orig = NULL;
5696
5697
35.2k
    if ((CUR != '<') || (NXT(1) != '!'))
5698
0
        return;
5699
35.2k
    SKIP(2);
5700
5701
    /* GROW; done in the caller */
5702
35.2k
    if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5703
35.0k
  int inputid = ctxt->input->id;
5704
35.0k
  SKIP(6);
5705
35.0k
  if (SKIP_BLANKS_PE == 0) {
5706
5.12k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5707
5.12k
         "Space required after '<!ENTITY'\n");
5708
5.12k
  }
5709
5710
35.0k
  if (RAW == '%') {
5711
6.33k
      NEXT;
5712
6.33k
      if (SKIP_BLANKS_PE == 0) {
5713
2.84k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5714
2.84k
             "Space required after '%%'\n");
5715
2.84k
      }
5716
6.33k
      isParameter = 1;
5717
6.33k
  }
5718
5719
35.0k
        name = xmlParseName(ctxt);
5720
35.0k
  if (name == NULL) {
5721
118
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5722
118
                     "xmlParseEntityDecl: no name\n");
5723
118
            return;
5724
118
  }
5725
34.9k
  if (xmlStrchr(name, ':') != NULL) {
5726
1.09k
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5727
1.09k
         "colons are forbidden from entities names '%s'\n",
5728
1.09k
         name, NULL, NULL);
5729
1.09k
  }
5730
34.9k
  if (SKIP_BLANKS_PE == 0) {
5731
13.7k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5732
13.7k
         "Space required after the entity name\n");
5733
13.7k
  }
5734
5735
  /*
5736
   * handle the various case of definitions...
5737
   */
5738
34.9k
  if (isParameter) {
5739
6.31k
      if ((RAW == '"') || (RAW == '\'')) {
5740
5.55k
          value = xmlParseEntityValue(ctxt, &orig);
5741
5.55k
    if (value) {
5742
5.52k
        if ((ctxt->sax != NULL) &&
5743
5.52k
      (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5744
0
      ctxt->sax->entityDecl(ctxt->userData, name,
5745
0
                        XML_INTERNAL_PARAMETER_ENTITY,
5746
0
            NULL, NULL, value);
5747
5.52k
    }
5748
5.55k
      } else {
5749
766
          URI = xmlParseExternalID(ctxt, &literal, 1);
5750
766
    if ((URI == NULL) && (literal == NULL)) {
5751
41
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5752
41
    }
5753
766
    if (URI) {
5754
713
                    if (xmlStrchr(URI, '#')) {
5755
3
                        xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5756
710
                    } else {
5757
710
                        if ((ctxt->sax != NULL) &&
5758
710
                            (!ctxt->disableSAX) &&
5759
359
                            (ctxt->sax->entityDecl != NULL))
5760
0
                            ctxt->sax->entityDecl(ctxt->userData, name,
5761
0
                                        XML_EXTERNAL_PARAMETER_ENTITY,
5762
0
                                        literal, URI, NULL);
5763
710
                    }
5764
713
    }
5765
766
      }
5766
28.6k
  } else {
5767
28.6k
      if ((RAW == '"') || (RAW == '\'')) {
5768
24.6k
          value = xmlParseEntityValue(ctxt, &orig);
5769
24.6k
    if ((ctxt->sax != NULL) &&
5770
24.6k
        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5771
0
        ctxt->sax->entityDecl(ctxt->userData, name,
5772
0
        XML_INTERNAL_GENERAL_ENTITY,
5773
0
        NULL, NULL, value);
5774
    /*
5775
     * For expat compatibility in SAX mode.
5776
     */
5777
24.6k
    if ((ctxt->myDoc == NULL) ||
5778
24.6k
        (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5779
24.6k
        if (ctxt->myDoc == NULL) {
5780
1.02k
      ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5781
1.02k
      if (ctxt->myDoc == NULL) {
5782
0
          xmlErrMemory(ctxt);
5783
0
          goto done;
5784
0
      }
5785
1.02k
      ctxt->myDoc->properties = XML_DOC_INTERNAL;
5786
1.02k
        }
5787
24.6k
        if (ctxt->myDoc->intSubset == NULL) {
5788
1.02k
      ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5789
1.02k
              BAD_CAST "fake", NULL, NULL);
5790
1.02k
                        if (ctxt->myDoc->intSubset == NULL) {
5791
0
                            xmlErrMemory(ctxt);
5792
0
                            goto done;
5793
0
                        }
5794
1.02k
                    }
5795
5796
24.6k
        xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5797
24.6k
                    NULL, NULL, value);
5798
24.6k
    }
5799
24.6k
      } else {
5800
4.01k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5801
4.01k
    if ((URI == NULL) && (literal == NULL)) {
5802
791
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5803
791
    }
5804
4.01k
    if (URI) {
5805
3.18k
                    if (xmlStrchr(URI, '#')) {
5806
67
                        xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5807
67
                    }
5808
3.18k
    }
5809
4.01k
    if ((RAW != '>') && (SKIP_BLANKS_PE == 0)) {
5810
293
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5811
293
           "Space required before 'NDATA'\n");
5812
293
    }
5813
4.01k
    if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5814
1.37k
        SKIP(5);
5815
1.37k
        if (SKIP_BLANKS_PE == 0) {
5816
4
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5817
4
               "Space required after 'NDATA'\n");
5818
4
        }
5819
1.37k
        ndata = xmlParseName(ctxt);
5820
1.37k
        if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5821
781
            (ctxt->sax->unparsedEntityDecl != NULL))
5822
0
      ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5823
0
            literal, URI, ndata);
5824
2.63k
    } else {
5825
2.63k
        if ((ctxt->sax != NULL) &&
5826
2.63k
            (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5827
0
      ctxt->sax->entityDecl(ctxt->userData, name,
5828
0
            XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5829
0
            literal, URI, NULL);
5830
        /*
5831
         * For expat compatibility in SAX mode.
5832
         * assuming the entity replacement was asked for
5833
         */
5834
2.63k
        if ((ctxt->replaceEntities != 0) &&
5835
2.63k
      ((ctxt->myDoc == NULL) ||
5836
2.63k
      (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5837
2.63k
      if (ctxt->myDoc == NULL) {
5838
243
          ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5839
243
          if (ctxt->myDoc == NULL) {
5840
0
              xmlErrMemory(ctxt);
5841
0
        goto done;
5842
0
          }
5843
243
          ctxt->myDoc->properties = XML_DOC_INTERNAL;
5844
243
      }
5845
5846
2.63k
      if (ctxt->myDoc->intSubset == NULL) {
5847
243
          ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5848
243
            BAD_CAST "fake", NULL, NULL);
5849
243
                            if (ctxt->myDoc->intSubset == NULL) {
5850
0
                                xmlErrMemory(ctxt);
5851
0
                                goto done;
5852
0
                            }
5853
243
                        }
5854
2.63k
      xmlSAX2EntityDecl(ctxt, name,
5855
2.63k
                  XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5856
2.63k
                  literal, URI, NULL);
5857
2.63k
        }
5858
2.63k
    }
5859
4.01k
      }
5860
28.6k
  }
5861
34.9k
  SKIP_BLANKS_PE;
5862
34.9k
  if (RAW != '>') {
5863
942
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5864
942
              "xmlParseEntityDecl: entity %s not terminated\n", name);
5865
942
      xmlHaltParser(ctxt);
5866
33.9k
  } else {
5867
33.9k
      if (inputid != ctxt->input->id) {
5868
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5869
0
                         "Entity declaration doesn't start and stop in"
5870
0
                               " the same entity\n");
5871
0
      }
5872
33.9k
      NEXT;
5873
33.9k
  }
5874
34.9k
  if (orig != NULL) {
5875
      /*
5876
       * Ugly mechanism to save the raw entity value.
5877
       */
5878
29.8k
      xmlEntityPtr cur = NULL;
5879
5880
29.8k
      if (isParameter) {
5881
5.52k
          if ((ctxt->sax != NULL) &&
5882
5.52k
        (ctxt->sax->getParameterEntity != NULL))
5883
0
        cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5884
24.3k
      } else {
5885
24.3k
          if ((ctxt->sax != NULL) &&
5886
24.3k
        (ctxt->sax->getEntity != NULL))
5887
24.3k
        cur = ctxt->sax->getEntity(ctxt->userData, name);
5888
24.3k
    if ((cur == NULL) && (ctxt->userData==ctxt)) {
5889
0
        cur = xmlSAX2GetEntity(ctxt, name);
5890
0
    }
5891
24.3k
      }
5892
29.8k
            if ((cur != NULL) && (cur->orig == NULL)) {
5893
0
    cur->orig = orig;
5894
0
                orig = NULL;
5895
0
      }
5896
29.8k
  }
5897
5898
34.9k
done:
5899
34.9k
  if (value != NULL) xmlFree(value);
5900
34.9k
  if (URI != NULL) xmlFree(URI);
5901
34.9k
  if (literal != NULL) xmlFree(literal);
5902
34.9k
        if (orig != NULL) xmlFree(orig);
5903
34.9k
    }
5904
35.2k
}
5905
5906
/**
5907
 * xmlParseDefaultDecl:
5908
 * @ctxt:  an XML parser context
5909
 * @value:  Receive a possible fixed default value for the attribute
5910
 *
5911
 * DEPRECATED: Internal function, don't use.
5912
 *
5913
 * Parse an attribute default declaration
5914
 *
5915
 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5916
 *
5917
 * [ VC: Required Attribute ]
5918
 * if the default declaration is the keyword #REQUIRED, then the
5919
 * attribute must be specified for all elements of the type in the
5920
 * attribute-list declaration.
5921
 *
5922
 * [ VC: Attribute Default Legal ]
5923
 * The declared default value must meet the lexical constraints of
5924
 * the declared attribute type c.f. xmlValidateAttributeDecl()
5925
 *
5926
 * [ VC: Fixed Attribute Default ]
5927
 * if an attribute has a default value declared with the #FIXED
5928
 * keyword, instances of that attribute must match the default value.
5929
 *
5930
 * [ WFC: No < in Attribute Values ]
5931
 * handled in xmlParseAttValue()
5932
 *
5933
 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5934
 *          or XML_ATTRIBUTE_FIXED.
5935
 */
5936
5937
int
5938
36.3k
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5939
36.3k
    int val;
5940
36.3k
    xmlChar *ret;
5941
5942
36.3k
    *value = NULL;
5943
36.3k
    if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5944
2.72k
  SKIP(9);
5945
2.72k
  return(XML_ATTRIBUTE_REQUIRED);
5946
2.72k
    }
5947
33.6k
    if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5948
4.19k
  SKIP(8);
5949
4.19k
  return(XML_ATTRIBUTE_IMPLIED);
5950
4.19k
    }
5951
29.4k
    val = XML_ATTRIBUTE_NONE;
5952
29.4k
    if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5953
1.04k
  SKIP(6);
5954
1.04k
  val = XML_ATTRIBUTE_FIXED;
5955
1.04k
  if (SKIP_BLANKS_PE == 0) {
5956
33
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5957
33
         "Space required after '#FIXED'\n");
5958
33
  }
5959
1.04k
    }
5960
29.4k
    ret = xmlParseAttValue(ctxt);
5961
29.4k
    if (ret == NULL) {
5962
354
  xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5963
354
           "Attribute default value declaration error\n");
5964
354
    } else
5965
29.0k
        *value = ret;
5966
29.4k
    return(val);
5967
33.6k
}
5968
5969
/**
5970
 * xmlParseNotationType:
5971
 * @ctxt:  an XML parser context
5972
 *
5973
 * DEPRECATED: Internal function, don't use.
5974
 *
5975
 * parse an Notation attribute type.
5976
 *
5977
 * Note: the leading 'NOTATION' S part has already being parsed...
5978
 *
5979
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5980
 *
5981
 * [ VC: Notation Attributes ]
5982
 * Values of this type must match one of the notation names included
5983
 * in the declaration; all notation names in the declaration must be declared.
5984
 *
5985
 * Returns: the notation attribute tree built while parsing
5986
 */
5987
5988
xmlEnumerationPtr
5989
648
xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5990
648
    const xmlChar *name;
5991
648
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5992
5993
648
    if (RAW != '(') {
5994
8
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5995
8
  return(NULL);
5996
8
    }
5997
1.47k
    do {
5998
1.47k
        NEXT;
5999
1.47k
  SKIP_BLANKS_PE;
6000
1.47k
        name = xmlParseName(ctxt);
6001
1.47k
  if (name == NULL) {
6002
30
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6003
30
         "Name expected in NOTATION declaration\n");
6004
30
            xmlFreeEnumeration(ret);
6005
30
      return(NULL);
6006
30
  }
6007
1.44k
        tmp = NULL;
6008
1.44k
#ifdef LIBXML_VALID_ENABLED
6009
1.44k
        if (ctxt->validate) {
6010
0
            tmp = ret;
6011
0
            while (tmp != NULL) {
6012
0
                if (xmlStrEqual(name, tmp->name)) {
6013
0
                    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
6014
0
              "standalone: attribute notation value token %s duplicated\n",
6015
0
                                     name, NULL);
6016
0
                    if (!xmlDictOwns(ctxt->dict, name))
6017
0
                        xmlFree((xmlChar *) name);
6018
0
                    break;
6019
0
                }
6020
0
                tmp = tmp->next;
6021
0
            }
6022
0
        }
6023
1.44k
#endif /* LIBXML_VALID_ENABLED */
6024
1.44k
  if (tmp == NULL) {
6025
1.44k
      cur = xmlCreateEnumeration(name);
6026
1.44k
      if (cur == NULL) {
6027
0
                xmlErrMemory(ctxt);
6028
0
                xmlFreeEnumeration(ret);
6029
0
                return(NULL);
6030
0
            }
6031
1.44k
      if (last == NULL) ret = last = cur;
6032
823
      else {
6033
823
    last->next = cur;
6034
823
    last = cur;
6035
823
      }
6036
1.44k
  }
6037
1.44k
  SKIP_BLANKS_PE;
6038
1.44k
    } while (RAW == '|');
6039
610
    if (RAW != ')') {
6040
38
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
6041
38
        xmlFreeEnumeration(ret);
6042
38
  return(NULL);
6043
38
    }
6044
572
    NEXT;
6045
572
    return(ret);
6046
610
}
6047
6048
/**
6049
 * xmlParseEnumerationType:
6050
 * @ctxt:  an XML parser context
6051
 *
6052
 * DEPRECATED: Internal function, don't use.
6053
 *
6054
 * parse an Enumeration attribute type.
6055
 *
6056
 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
6057
 *
6058
 * [ VC: Enumeration ]
6059
 * Values of this type must match one of the Nmtoken tokens in
6060
 * the declaration
6061
 *
6062
 * Returns: the enumeration attribute tree built while parsing
6063
 */
6064
6065
xmlEnumerationPtr
6066
6.61k
xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
6067
6.61k
    xmlChar *name;
6068
6.61k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
6069
6070
6.61k
    if (RAW != '(') {
6071
225
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
6072
225
  return(NULL);
6073
225
    }
6074
8.18k
    do {
6075
8.18k
        NEXT;
6076
8.18k
  SKIP_BLANKS_PE;
6077
8.18k
        name = xmlParseNmtoken(ctxt);
6078
8.18k
  if (name == NULL) {
6079
58
      xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
6080
58
      return(ret);
6081
58
  }
6082
8.12k
        tmp = NULL;
6083
8.12k
#ifdef LIBXML_VALID_ENABLED
6084
8.12k
        if (ctxt->validate) {
6085
0
            tmp = ret;
6086
0
            while (tmp != NULL) {
6087
0
                if (xmlStrEqual(name, tmp->name)) {
6088
0
                    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
6089
0
              "standalone: attribute enumeration value token %s duplicated\n",
6090
0
                                     name, NULL);
6091
0
                    if (!xmlDictOwns(ctxt->dict, name))
6092
0
                        xmlFree(name);
6093
0
                    break;
6094
0
                }
6095
0
                tmp = tmp->next;
6096
0
            }
6097
0
        }
6098
8.12k
#endif /* LIBXML_VALID_ENABLED */
6099
8.12k
  if (tmp == NULL) {
6100
8.12k
      cur = xmlCreateEnumeration(name);
6101
8.12k
      if (!xmlDictOwns(ctxt->dict, name))
6102
8.12k
    xmlFree(name);
6103
8.12k
      if (cur == NULL) {
6104
0
                xmlErrMemory(ctxt);
6105
0
                xmlFreeEnumeration(ret);
6106
0
                return(NULL);
6107
0
            }
6108
8.12k
      if (last == NULL) ret = last = cur;
6109
1.74k
      else {
6110
1.74k
    last->next = cur;
6111
1.74k
    last = cur;
6112
1.74k
      }
6113
8.12k
  }
6114
8.12k
  SKIP_BLANKS_PE;
6115
8.12k
    } while (RAW == '|');
6116
6.33k
    if (RAW != ')') {
6117
67
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
6118
67
  return(ret);
6119
67
    }
6120
6.26k
    NEXT;
6121
6.26k
    return(ret);
6122
6.33k
}
6123
6124
/**
6125
 * xmlParseEnumeratedType:
6126
 * @ctxt:  an XML parser context
6127
 * @tree:  the enumeration tree built while parsing
6128
 *
6129
 * DEPRECATED: Internal function, don't use.
6130
 *
6131
 * parse an Enumerated attribute type.
6132
 *
6133
 * [57] EnumeratedType ::= NotationType | Enumeration
6134
 *
6135
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
6136
 *
6137
 *
6138
 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
6139
 */
6140
6141
int
6142
7.26k
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6143
7.26k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
6144
653
  SKIP(8);
6145
653
  if (SKIP_BLANKS_PE == 0) {
6146
5
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6147
5
         "Space required after 'NOTATION'\n");
6148
5
      return(0);
6149
5
  }
6150
648
  *tree = xmlParseNotationType(ctxt);
6151
648
  if (*tree == NULL) return(0);
6152
572
  return(XML_ATTRIBUTE_NOTATION);
6153
648
    }
6154
6.61k
    *tree = xmlParseEnumerationType(ctxt);
6155
6.61k
    if (*tree == NULL) return(0);
6156
6.38k
    return(XML_ATTRIBUTE_ENUMERATION);
6157
6.61k
}
6158
6159
/**
6160
 * xmlParseAttributeType:
6161
 * @ctxt:  an XML parser context
6162
 * @tree:  the enumeration tree built while parsing
6163
 *
6164
 * DEPRECATED: Internal function, don't use.
6165
 *
6166
 * parse the Attribute list def for an element
6167
 *
6168
 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
6169
 *
6170
 * [55] StringType ::= 'CDATA'
6171
 *
6172
 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
6173
 *                        'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
6174
 *
6175
 * Validity constraints for attribute values syntax are checked in
6176
 * xmlValidateAttributeValue()
6177
 *
6178
 * [ VC: ID ]
6179
 * Values of type ID must match the Name production. A name must not
6180
 * appear more than once in an XML document as a value of this type;
6181
 * i.e., ID values must uniquely identify the elements which bear them.
6182
 *
6183
 * [ VC: One ID per Element Type ]
6184
 * No element type may have more than one ID attribute specified.
6185
 *
6186
 * [ VC: ID Attribute Default ]
6187
 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
6188
 *
6189
 * [ VC: IDREF ]
6190
 * Values of type IDREF must match the Name production, and values
6191
 * of type IDREFS must match Names; each IDREF Name must match the value
6192
 * of an ID attribute on some element in the XML document; i.e. IDREF
6193
 * values must match the value of some ID attribute.
6194
 *
6195
 * [ VC: Entity Name ]
6196
 * Values of type ENTITY must match the Name production, values
6197
 * of type ENTITIES must match Names; each Entity Name must match the
6198
 * name of an unparsed entity declared in the DTD.
6199
 *
6200
 * [ VC: Name Token ]
6201
 * Values of type NMTOKEN must match the Nmtoken production; values
6202
 * of type NMTOKENS must match Nmtokens.
6203
 *
6204
 * Returns the attribute type
6205
 */
6206
int
6207
36.8k
xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6208
36.8k
    if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
6209
3.24k
  SKIP(5);
6210
3.24k
  return(XML_ATTRIBUTE_CDATA);
6211
33.5k
     } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
6212
709
  SKIP(6);
6213
709
  return(XML_ATTRIBUTE_IDREFS);
6214
32.8k
     } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
6215
633
  SKIP(5);
6216
633
  return(XML_ATTRIBUTE_IDREF);
6217
32.2k
     } else if ((RAW == 'I') && (NXT(1) == 'D')) {
6218
20.2k
        SKIP(2);
6219
20.2k
  return(XML_ATTRIBUTE_ID);
6220
20.2k
     } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
6221
459
  SKIP(6);
6222
459
  return(XML_ATTRIBUTE_ENTITY);
6223
11.5k
     } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
6224
3.42k
  SKIP(8);
6225
3.42k
  return(XML_ATTRIBUTE_ENTITIES);
6226
8.16k
     } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
6227
343
  SKIP(8);
6228
343
  return(XML_ATTRIBUTE_NMTOKENS);
6229
7.82k
     } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
6230
557
  SKIP(7);
6231
557
  return(XML_ATTRIBUTE_NMTOKEN);
6232
557
     }
6233
7.26k
     return(xmlParseEnumeratedType(ctxt, tree));
6234
36.8k
}
6235
6236
/**
6237
 * xmlParseAttributeListDecl:
6238
 * @ctxt:  an XML parser context
6239
 *
6240
 * DEPRECATED: Internal function, don't use.
6241
 *
6242
 * Parse an attribute list declaration for an element. Always consumes '<!'.
6243
 *
6244
 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
6245
 *
6246
 * [53] AttDef ::= S Name S AttType S DefaultDecl
6247
 *
6248
 */
6249
void
6250
9.54k
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
6251
9.54k
    const xmlChar *elemName;
6252
9.54k
    const xmlChar *attrName;
6253
9.54k
    xmlEnumerationPtr tree;
6254
6255
9.54k
    if ((CUR != '<') || (NXT(1) != '!'))
6256
0
        return;
6257
9.54k
    SKIP(2);
6258
6259
9.54k
    if (CMP7(CUR_PTR, 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
6260
9.47k
  int inputid = ctxt->input->id;
6261
6262
9.47k
  SKIP(7);
6263
9.47k
  if (SKIP_BLANKS_PE == 0) {
6264
116
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6265
116
                     "Space required after '<!ATTLIST'\n");
6266
116
  }
6267
9.47k
        elemName = xmlParseName(ctxt);
6268
9.47k
  if (elemName == NULL) {
6269
52
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6270
52
         "ATTLIST: no name for Element\n");
6271
52
      return;
6272
52
  }
6273
9.42k
  SKIP_BLANKS_PE;
6274
9.42k
  GROW;
6275
45.3k
  while ((RAW != '>') && (PARSER_STOPPED(ctxt) == 0)) {
6276
37.2k
      int type;
6277
37.2k
      int def;
6278
37.2k
      xmlChar *defaultValue = NULL;
6279
6280
37.2k
      GROW;
6281
37.2k
            tree = NULL;
6282
37.2k
      attrName = xmlParseName(ctxt);
6283
37.2k
      if (attrName == NULL) {
6284
122
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6285
122
             "ATTLIST: no name for Attribute\n");
6286
122
    break;
6287
122
      }
6288
37.0k
      GROW;
6289
37.0k
      if (SKIP_BLANKS_PE == 0) {
6290
245
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6291
245
            "Space required after the attribute name\n");
6292
245
    break;
6293
245
      }
6294
6295
36.8k
      type = xmlParseAttributeType(ctxt, &tree);
6296
36.8k
      if (type <= 0) {
6297
314
          break;
6298
314
      }
6299
6300
36.5k
      GROW;
6301
36.5k
      if (SKIP_BLANKS_PE == 0) {
6302
182
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6303
182
             "Space required after the attribute type\n");
6304
182
          if (tree != NULL)
6305
128
        xmlFreeEnumeration(tree);
6306
182
    break;
6307
182
      }
6308
6309
36.3k
      def = xmlParseDefaultDecl(ctxt, &defaultValue);
6310
36.3k
      if (def <= 0) {
6311
0
                if (defaultValue != NULL)
6312
0
        xmlFree(defaultValue);
6313
0
          if (tree != NULL)
6314
0
        xmlFreeEnumeration(tree);
6315
0
          break;
6316
0
      }
6317
36.3k
      if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6318
26.2k
          xmlAttrNormalizeSpace(defaultValue, defaultValue);
6319
6320
36.3k
      GROW;
6321
36.3k
            if (RAW != '>') {
6322
30.3k
    if (SKIP_BLANKS_PE == 0) {
6323
459
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6324
459
      "Space required after the attribute default value\n");
6325
459
        if (defaultValue != NULL)
6326
99
      xmlFree(defaultValue);
6327
459
        if (tree != NULL)
6328
51
      xmlFreeEnumeration(tree);
6329
459
        break;
6330
459
    }
6331
30.3k
      }
6332
35.8k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6333
32.0k
    (ctxt->sax->attributeDecl != NULL))
6334
0
    ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6335
0
                          type, def, defaultValue, tree);
6336
35.8k
      else if (tree != NULL)
6337
6.77k
    xmlFreeEnumeration(tree);
6338
6339
35.8k
      if ((ctxt->sax2) && (defaultValue != NULL) &&
6340
28.9k
          (def != XML_ATTRIBUTE_IMPLIED) &&
6341
28.9k
    (def != XML_ATTRIBUTE_REQUIRED)) {
6342
28.9k
    xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6343
28.9k
      }
6344
35.8k
      if (ctxt->sax2) {
6345
35.8k
    xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6346
35.8k
      }
6347
35.8k
      if (defaultValue != NULL)
6348
28.9k
          xmlFree(defaultValue);
6349
35.8k
      GROW;
6350
35.8k
  }
6351
9.42k
  if (RAW == '>') {
6352
8.15k
      if (inputid != ctxt->input->id) {
6353
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6354
0
                               "Attribute list declaration doesn't start and"
6355
0
                               " stop in the same entity\n");
6356
0
      }
6357
8.15k
      NEXT;
6358
8.15k
  }
6359
9.42k
    }
6360
9.54k
}
6361
6362
/**
6363
 * xmlParseElementMixedContentDecl:
6364
 * @ctxt:  an XML parser context
6365
 * @inputchk:  the input used for the current entity, needed for boundary checks
6366
 *
6367
 * DEPRECATED: Internal function, don't use.
6368
 *
6369
 * parse the declaration for a Mixed Element content
6370
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6371
 *
6372
 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6373
 *                '(' S? '#PCDATA' S? ')'
6374
 *
6375
 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6376
 *
6377
 * [ VC: No Duplicate Types ]
6378
 * The same name must not appear more than once in a single
6379
 * mixed-content declaration.
6380
 *
6381
 * returns: the list of the xmlElementContentPtr describing the element choices
6382
 */
6383
xmlElementContentPtr
6384
5.82k
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6385
5.82k
    xmlElementContentPtr ret = NULL, cur = NULL, n;
6386
5.82k
    const xmlChar *elem = NULL;
6387
6388
5.82k
    GROW;
6389
5.82k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6390
5.82k
  SKIP(7);
6391
5.82k
  SKIP_BLANKS_PE;
6392
5.82k
  if (RAW == ')') {
6393
2.38k
      if (ctxt->input->id != inputchk) {
6394
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6395
0
                               "Element content declaration doesn't start and"
6396
0
                               " stop in the same entity\n");
6397
0
      }
6398
2.38k
      NEXT;
6399
2.38k
      ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6400
2.38k
      if (ret == NULL)
6401
0
                goto mem_error;
6402
2.38k
      if (RAW == '*') {
6403
500
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6404
500
    NEXT;
6405
500
      }
6406
2.38k
      return(ret);
6407
2.38k
  }
6408
3.43k
  if ((RAW == '(') || (RAW == '|')) {
6409
3.29k
      ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6410
3.29k
      if (ret == NULL)
6411
0
                goto mem_error;
6412
3.29k
  }
6413
10.3k
  while ((RAW == '|') && (PARSER_STOPPED(ctxt) == 0)) {
6414
7.02k
      NEXT;
6415
7.02k
            n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6416
7.02k
            if (n == NULL)
6417
0
                goto mem_error;
6418
7.02k
      if (elem == NULL) {
6419
3.29k
    n->c1 = cur;
6420
3.29k
    if (cur != NULL)
6421
3.29k
        cur->parent = n;
6422
3.29k
    ret = cur = n;
6423
3.73k
      } else {
6424
3.73k
          cur->c2 = n;
6425
3.73k
    n->parent = cur;
6426
3.73k
    n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6427
3.73k
                if (n->c1 == NULL)
6428
0
                    goto mem_error;
6429
3.73k
    n->c1->parent = n;
6430
3.73k
    cur = n;
6431
3.73k
      }
6432
7.02k
      SKIP_BLANKS_PE;
6433
7.02k
      elem = xmlParseName(ctxt);
6434
7.02k
      if (elem == NULL) {
6435
84
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6436
84
      "xmlParseElementMixedContentDecl : Name expected\n");
6437
84
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6438
84
    return(NULL);
6439
84
      }
6440
6.94k
      SKIP_BLANKS_PE;
6441
6.94k
      GROW;
6442
6.94k
  }
6443
3.35k
  if ((RAW == ')') && (NXT(1) == '*')) {
6444
2.22k
      if (elem != NULL) {
6445
2.22k
    cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6446
2.22k
                                   XML_ELEMENT_CONTENT_ELEMENT);
6447
2.22k
    if (cur->c2 == NULL)
6448
0
                    goto mem_error;
6449
2.22k
    cur->c2->parent = cur;
6450
2.22k
            }
6451
2.22k
            if (ret != NULL)
6452
2.22k
                ret->ocur = XML_ELEMENT_CONTENT_MULT;
6453
2.22k
      if (ctxt->input->id != inputchk) {
6454
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6455
0
                               "Element content declaration doesn't start and"
6456
0
                               " stop in the same entity\n");
6457
0
      }
6458
2.22k
      SKIP(2);
6459
2.22k
  } else {
6460
1.12k
      xmlFreeDocElementContent(ctxt->myDoc, ret);
6461
1.12k
      xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6462
1.12k
      return(NULL);
6463
1.12k
  }
6464
6465
3.35k
    } else {
6466
0
  xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6467
0
    }
6468
2.22k
    return(ret);
6469
6470
0
mem_error:
6471
0
    xmlErrMemory(ctxt);
6472
0
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6473
0
    return(NULL);
6474
5.82k
}
6475
6476
/**
6477
 * xmlParseElementChildrenContentDeclPriv:
6478
 * @ctxt:  an XML parser context
6479
 * @inputchk:  the input used for the current entity, needed for boundary checks
6480
 * @depth: the level of recursion
6481
 *
6482
 * parse the declaration for a Mixed Element content
6483
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6484
 *
6485
 *
6486
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6487
 *
6488
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6489
 *
6490
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6491
 *
6492
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6493
 *
6494
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6495
 * TODO Parameter-entity replacement text must be properly nested
6496
 *  with parenthesized groups. That is to say, if either of the
6497
 *  opening or closing parentheses in a choice, seq, or Mixed
6498
 *  construct is contained in the replacement text for a parameter
6499
 *  entity, both must be contained in the same replacement text. For
6500
 *  interoperability, if a parameter-entity reference appears in a
6501
 *  choice, seq, or Mixed construct, its replacement text should not
6502
 *  be empty, and neither the first nor last non-blank character of
6503
 *  the replacement text should be a connector (| or ,).
6504
 *
6505
 * Returns the tree of xmlElementContentPtr describing the element
6506
 *          hierarchy.
6507
 */
6508
static xmlElementContentPtr
6509
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6510
92.7k
                                       int depth) {
6511
92.7k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
6512
92.7k
    xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6513
92.7k
    const xmlChar *elem;
6514
92.7k
    xmlChar type = 0;
6515
6516
92.7k
    if (depth > maxDepth) {
6517
4
        xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
6518
4
                "xmlParseElementChildrenContentDecl : depth %d too deep, "
6519
4
                "use XML_PARSE_HUGE\n", depth);
6520
4
  return(NULL);
6521
4
    }
6522
92.7k
    SKIP_BLANKS_PE;
6523
92.7k
    GROW;
6524
92.7k
    if (RAW == '(') {
6525
81.3k
  int inputid = ctxt->input->id;
6526
6527
        /* Recurse on first child */
6528
81.3k
  NEXT;
6529
81.3k
  SKIP_BLANKS_PE;
6530
81.3k
        cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6531
81.3k
                                                           depth + 1);
6532
81.3k
        if (cur == NULL)
6533
63.6k
            return(NULL);
6534
17.6k
  SKIP_BLANKS_PE;
6535
17.6k
  GROW;
6536
17.6k
    } else {
6537
11.4k
  elem = xmlParseName(ctxt);
6538
11.4k
  if (elem == NULL) {
6539
155
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6540
155
      return(NULL);
6541
155
  }
6542
11.2k
        cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6543
11.2k
  if (cur == NULL) {
6544
0
      xmlErrMemory(ctxt);
6545
0
      return(NULL);
6546
0
  }
6547
11.2k
  GROW;
6548
11.2k
  if (RAW == '?') {
6549
896
      cur->ocur = XML_ELEMENT_CONTENT_OPT;
6550
896
      NEXT;
6551
10.3k
  } else if (RAW == '*') {
6552
613
      cur->ocur = XML_ELEMENT_CONTENT_MULT;
6553
613
      NEXT;
6554
9.76k
  } else if (RAW == '+') {
6555
1.62k
      cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6556
1.62k
      NEXT;
6557
8.13k
  } else {
6558
8.13k
      cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6559
8.13k
  }
6560
11.2k
  GROW;
6561
11.2k
    }
6562
28.9k
    SKIP_BLANKS_PE;
6563
60.1k
    while ((RAW != ')') && (PARSER_STOPPED(ctxt) == 0)) {
6564
        /*
6565
   * Each loop we parse one separator and one element.
6566
   */
6567
36.8k
        if (RAW == ',') {
6568
23.6k
      if (type == 0) type = CUR;
6569
6570
      /*
6571
       * Detect "Name | Name , Name" error
6572
       */
6573
20.2k
      else if (type != CUR) {
6574
4
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6575
4
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6576
4
                      type);
6577
4
    if ((last != NULL) && (last != ret))
6578
4
        xmlFreeDocElementContent(ctxt->myDoc, last);
6579
4
    if (ret != NULL)
6580
4
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6581
4
    return(NULL);
6582
4
      }
6583
23.6k
      NEXT;
6584
6585
23.6k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6586
23.6k
      if (op == NULL) {
6587
0
                xmlErrMemory(ctxt);
6588
0
    if ((last != NULL) && (last != ret))
6589
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6590
0
          xmlFreeDocElementContent(ctxt->myDoc, ret);
6591
0
    return(NULL);
6592
0
      }
6593
23.6k
      if (last == NULL) {
6594
3.43k
    op->c1 = ret;
6595
3.43k
    if (ret != NULL)
6596
3.43k
        ret->parent = op;
6597
3.43k
    ret = cur = op;
6598
20.2k
      } else {
6599
20.2k
          cur->c2 = op;
6600
20.2k
    if (op != NULL)
6601
20.2k
        op->parent = cur;
6602
20.2k
    op->c1 = last;
6603
20.2k
    if (last != NULL)
6604
20.2k
        last->parent = op;
6605
20.2k
    cur =op;
6606
20.2k
    last = NULL;
6607
20.2k
      }
6608
23.6k
  } else if (RAW == '|') {
6609
12.1k
      if (type == 0) type = CUR;
6610
6611
      /*
6612
       * Detect "Name , Name | Name" error
6613
       */
6614
5.27k
      else if (type != CUR) {
6615
3
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6616
3
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6617
3
          type);
6618
3
    if ((last != NULL) && (last != ret))
6619
3
        xmlFreeDocElementContent(ctxt->myDoc, last);
6620
3
    if (ret != NULL)
6621
3
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6622
3
    return(NULL);
6623
3
      }
6624
12.1k
      NEXT;
6625
6626
12.1k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6627
12.1k
      if (op == NULL) {
6628
0
                xmlErrMemory(ctxt);
6629
0
    if ((last != NULL) && (last != ret))
6630
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6631
0
    if (ret != NULL)
6632
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6633
0
    return(NULL);
6634
0
      }
6635
12.1k
      if (last == NULL) {
6636
6.84k
    op->c1 = ret;
6637
6.84k
    if (ret != NULL)
6638
6.84k
        ret->parent = op;
6639
6.84k
    ret = cur = op;
6640
6.84k
      } else {
6641
5.27k
          cur->c2 = op;
6642
5.27k
    if (op != NULL)
6643
5.27k
        op->parent = cur;
6644
5.27k
    op->c1 = last;
6645
5.27k
    if (last != NULL)
6646
5.27k
        last->parent = op;
6647
5.27k
    cur =op;
6648
5.27k
    last = NULL;
6649
5.27k
      }
6650
12.1k
  } else {
6651
1.07k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6652
1.07k
      if ((last != NULL) && (last != ret))
6653
870
          xmlFreeDocElementContent(ctxt->myDoc, last);
6654
1.07k
      if (ret != NULL)
6655
1.07k
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6656
1.07k
      return(NULL);
6657
1.07k
  }
6658
35.8k
  GROW;
6659
35.8k
  SKIP_BLANKS_PE;
6660
35.8k
  GROW;
6661
35.8k
  if (RAW == '(') {
6662
7.35k
      int inputid = ctxt->input->id;
6663
      /* Recurse on second child */
6664
7.35k
      NEXT;
6665
7.35k
      SKIP_BLANKS_PE;
6666
7.35k
      last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6667
7.35k
                                                          depth + 1);
6668
7.35k
            if (last == NULL) {
6669
4.57k
    if (ret != NULL)
6670
4.57k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6671
4.57k
    return(NULL);
6672
4.57k
            }
6673
2.78k
      SKIP_BLANKS_PE;
6674
28.4k
  } else {
6675
28.4k
      elem = xmlParseName(ctxt);
6676
28.4k
      if (elem == NULL) {
6677
52
    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6678
52
    if (ret != NULL)
6679
52
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6680
52
    return(NULL);
6681
52
      }
6682
28.4k
      last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6683
28.4k
      if (last == NULL) {
6684
0
                xmlErrMemory(ctxt);
6685
0
    if (ret != NULL)
6686
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6687
0
    return(NULL);
6688
0
      }
6689
28.4k
      if (RAW == '?') {
6690
162
    last->ocur = XML_ELEMENT_CONTENT_OPT;
6691
162
    NEXT;
6692
28.2k
      } else if (RAW == '*') {
6693
508
    last->ocur = XML_ELEMENT_CONTENT_MULT;
6694
508
    NEXT;
6695
27.7k
      } else if (RAW == '+') {
6696
610
    last->ocur = XML_ELEMENT_CONTENT_PLUS;
6697
610
    NEXT;
6698
27.1k
      } else {
6699
27.1k
    last->ocur = XML_ELEMENT_CONTENT_ONCE;
6700
27.1k
      }
6701
28.4k
  }
6702
31.1k
  SKIP_BLANKS_PE;
6703
31.1k
  GROW;
6704
31.1k
    }
6705
23.2k
    if ((cur != NULL) && (last != NULL)) {
6706
4.77k
        cur->c2 = last;
6707
4.77k
  if (last != NULL)
6708
4.77k
      last->parent = cur;
6709
4.77k
    }
6710
23.2k
    if (ctxt->input->id != inputchk) {
6711
0
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6712
0
                       "Element content declaration doesn't start and stop in"
6713
0
                       " the same entity\n");
6714
0
    }
6715
23.2k
    NEXT;
6716
23.2k
    if (RAW == '?') {
6717
2.53k
  if (ret != NULL) {
6718
2.53k
      if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6719
2.39k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6720
678
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6721
1.86k
      else
6722
1.86k
          ret->ocur = XML_ELEMENT_CONTENT_OPT;
6723
2.53k
  }
6724
2.53k
  NEXT;
6725
20.7k
    } else if (RAW == '*') {
6726
2.46k
  if (ret != NULL) {
6727
2.46k
      ret->ocur = XML_ELEMENT_CONTENT_MULT;
6728
2.46k
      cur = ret;
6729
      /*
6730
       * Some normalization:
6731
       * (a | b* | c?)* == (a | b | c)*
6732
       */
6733
4.70k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6734
2.24k
    if ((cur->c1 != NULL) &&
6735
2.24k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6736
2.23k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6737
803
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6738
2.24k
    if ((cur->c2 != NULL) &&
6739
2.24k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6740
2.24k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6741
108
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6742
2.24k
    cur = cur->c2;
6743
2.24k
      }
6744
2.46k
  }
6745
2.46k
  NEXT;
6746
18.2k
    } else if (RAW == '+') {
6747
4.81k
  if (ret != NULL) {
6748
4.81k
      int found = 0;
6749
6750
4.81k
      if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6751
3.78k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6752
2.20k
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6753
2.61k
      else
6754
2.61k
          ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6755
      /*
6756
       * Some normalization:
6757
       * (a | b*)+ == (a | b)*
6758
       * (a | b?)+ == (a | b)*
6759
       */
6760
8.13k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6761
3.32k
    if ((cur->c1 != NULL) &&
6762
3.32k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6763
3.22k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6764
869
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6765
869
        found = 1;
6766
869
    }
6767
3.32k
    if ((cur->c2 != NULL) &&
6768
3.32k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6769
3.19k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6770
220
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6771
220
        found = 1;
6772
220
    }
6773
3.32k
    cur = cur->c2;
6774
3.32k
      }
6775
4.81k
      if (found)
6776
931
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6777
4.81k
  }
6778
4.81k
  NEXT;
6779
4.81k
    }
6780
23.2k
    return(ret);
6781
28.9k
}
6782
6783
/**
6784
 * xmlParseElementChildrenContentDecl:
6785
 * @ctxt:  an XML parser context
6786
 * @inputchk:  the input used for the current entity, needed for boundary checks
6787
 *
6788
 * DEPRECATED: Internal function, don't use.
6789
 *
6790
 * parse the declaration for a Mixed Element content
6791
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6792
 *
6793
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6794
 *
6795
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6796
 *
6797
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6798
 *
6799
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6800
 *
6801
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6802
 * TODO Parameter-entity replacement text must be properly nested
6803
 *  with parenthesized groups. That is to say, if either of the
6804
 *  opening or closing parentheses in a choice, seq, or Mixed
6805
 *  construct is contained in the replacement text for a parameter
6806
 *  entity, both must be contained in the same replacement text. For
6807
 *  interoperability, if a parameter-entity reference appears in a
6808
 *  choice, seq, or Mixed construct, its replacement text should not
6809
 *  be empty, and neither the first nor last non-blank character of
6810
 *  the replacement text should be a connector (| or ,).
6811
 *
6812
 * Returns the tree of xmlElementContentPtr describing the element
6813
 *          hierarchy.
6814
 */
6815
xmlElementContentPtr
6816
0
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6817
    /* stub left for API/ABI compat */
6818
0
    return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6819
0
}
6820
6821
/**
6822
 * xmlParseElementContentDecl:
6823
 * @ctxt:  an XML parser context
6824
 * @name:  the name of the element being defined.
6825
 * @result:  the Element Content pointer will be stored here if any
6826
 *
6827
 * DEPRECATED: Internal function, don't use.
6828
 *
6829
 * parse the declaration for an Element content either Mixed or Children,
6830
 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6831
 *
6832
 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6833
 *
6834
 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6835
 */
6836
6837
int
6838
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6839
9.89k
                           xmlElementContentPtr *result) {
6840
6841
9.89k
    xmlElementContentPtr tree = NULL;
6842
9.89k
    int inputid = ctxt->input->id;
6843
9.89k
    int res;
6844
6845
9.89k
    *result = NULL;
6846
6847
9.89k
    if (RAW != '(') {
6848
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6849
0
    "xmlParseElementContentDecl : %s '(' expected\n", name);
6850
0
  return(-1);
6851
0
    }
6852
9.89k
    NEXT;
6853
9.89k
    GROW;
6854
9.89k
    SKIP_BLANKS_PE;
6855
9.89k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6856
5.82k
        tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6857
5.82k
  res = XML_ELEMENT_TYPE_MIXED;
6858
5.82k
    } else {
6859
4.07k
        tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6860
4.07k
  res = XML_ELEMENT_TYPE_ELEMENT;
6861
4.07k
    }
6862
9.89k
    SKIP_BLANKS_PE;
6863
9.89k
    *result = tree;
6864
9.89k
    return(res);
6865
9.89k
}
6866
6867
/**
6868
 * xmlParseElementDecl:
6869
 * @ctxt:  an XML parser context
6870
 *
6871
 * DEPRECATED: Internal function, don't use.
6872
 *
6873
 * Parse an element declaration. Always consumes '<!'.
6874
 *
6875
 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6876
 *
6877
 * [ VC: Unique Element Type Declaration ]
6878
 * No element type may be declared more than once
6879
 *
6880
 * Returns the type of the element, or -1 in case of error
6881
 */
6882
int
6883
11.5k
xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6884
11.5k
    const xmlChar *name;
6885
11.5k
    int ret = -1;
6886
11.5k
    xmlElementContentPtr content  = NULL;
6887
6888
11.5k
    if ((CUR != '<') || (NXT(1) != '!'))
6889
0
        return(ret);
6890
11.5k
    SKIP(2);
6891
6892
    /* GROW; done in the caller */
6893
11.5k
    if (CMP7(CUR_PTR, 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6894
11.5k
  int inputid = ctxt->input->id;
6895
6896
11.5k
  SKIP(7);
6897
11.5k
  if (SKIP_BLANKS_PE == 0) {
6898
17
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6899
17
               "Space required after 'ELEMENT'\n");
6900
17
      return(-1);
6901
17
  }
6902
11.4k
        name = xmlParseName(ctxt);
6903
11.4k
  if (name == NULL) {
6904
11
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6905
11
         "xmlParseElementDecl: no name for Element\n");
6906
11
      return(-1);
6907
11
  }
6908
11.4k
  if (SKIP_BLANKS_PE == 0) {
6909
133
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6910
133
         "Space required after the element name\n");
6911
133
  }
6912
11.4k
  if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6913
564
      SKIP(5);
6914
      /*
6915
       * Element must always be empty.
6916
       */
6917
564
      ret = XML_ELEMENT_TYPE_EMPTY;
6918
10.9k
  } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6919
601
             (NXT(2) == 'Y')) {
6920
596
      SKIP(3);
6921
      /*
6922
       * Element is a generic container.
6923
       */
6924
596
      ret = XML_ELEMENT_TYPE_ANY;
6925
10.3k
  } else if (RAW == '(') {
6926
9.89k
      ret = xmlParseElementContentDecl(ctxt, name, &content);
6927
9.89k
  } else {
6928
      /*
6929
       * [ WFC: PEs in Internal Subset ] error handling.
6930
       */
6931
427
            xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6932
427
                  "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6933
427
      return(-1);
6934
427
  }
6935
6936
11.0k
  SKIP_BLANKS_PE;
6937
6938
11.0k
  if (RAW != '>') {
6939
961
      xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6940
961
      if (content != NULL) {
6941
60
    xmlFreeDocElementContent(ctxt->myDoc, content);
6942
60
      }
6943
10.0k
  } else {
6944
10.0k
      if (inputid != ctxt->input->id) {
6945
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6946
0
                               "Element declaration doesn't start and stop in"
6947
0
                               " the same entity\n");
6948
0
      }
6949
6950
10.0k
      NEXT;
6951
10.0k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6952
4.13k
    (ctxt->sax->elementDecl != NULL)) {
6953
0
    if (content != NULL)
6954
0
        content->parent = NULL;
6955
0
          ctxt->sax->elementDecl(ctxt->userData, name, ret,
6956
0
                           content);
6957
0
    if ((content != NULL) && (content->parent == NULL)) {
6958
        /*
6959
         * this is a trick: if xmlAddElementDecl is called,
6960
         * instead of copying the full tree it is plugged directly
6961
         * if called from the parser. Avoid duplicating the
6962
         * interfaces or change the API/ABI
6963
         */
6964
0
        xmlFreeDocElementContent(ctxt->myDoc, content);
6965
0
    }
6966
10.0k
      } else if (content != NULL) {
6967
7.33k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6968
7.33k
      }
6969
10.0k
  }
6970
11.0k
    }
6971
11.1k
    return(ret);
6972
11.5k
}
6973
6974
/**
6975
 * xmlParseConditionalSections
6976
 * @ctxt:  an XML parser context
6977
 *
6978
 * Parse a conditional section. Always consumes '<!['.
6979
 *
6980
 * [61] conditionalSect ::= includeSect | ignoreSect
6981
 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6982
 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6983
 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6984
 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6985
 */
6986
6987
static void
6988
0
xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6989
0
    int *inputIds = NULL;
6990
0
    size_t inputIdsSize = 0;
6991
0
    size_t depth = 0;
6992
6993
0
    while (PARSER_STOPPED(ctxt) == 0) {
6994
0
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6995
0
            int id = ctxt->input->id;
6996
6997
0
            SKIP(3);
6998
0
            SKIP_BLANKS_PE;
6999
7000
0
            if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
7001
0
                SKIP(7);
7002
0
                SKIP_BLANKS_PE;
7003
0
                if (RAW != '[') {
7004
0
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
7005
0
                    xmlHaltParser(ctxt);
7006
0
                    goto error;
7007
0
                }
7008
0
                if (ctxt->input->id != id) {
7009
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
7010
0
                                   "All markup of the conditional section is"
7011
0
                                   " not in the same entity\n");
7012
0
                }
7013
0
                NEXT;
7014
7015
0
                if (inputIdsSize <= depth) {
7016
0
                    int *tmp;
7017
0
                    int newSize;
7018
7019
0
                    newSize = xmlGrowCapacity(inputIdsSize, sizeof(tmp[0]),
7020
0
                                              4, 1000);
7021
0
                    if (newSize < 0) {
7022
0
                        xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
7023
0
                                       "Maximum conditional section nesting"
7024
0
                                       " depth exceeded\n");
7025
0
                        goto error;
7026
0
                    }
7027
0
                    tmp = xmlRealloc(inputIds, newSize * sizeof(tmp[0]));
7028
0
                    if (tmp == NULL) {
7029
0
                        xmlErrMemory(ctxt);
7030
0
                        goto error;
7031
0
                    }
7032
0
                    inputIds = tmp;
7033
0
                    inputIdsSize = newSize;
7034
0
                }
7035
0
                inputIds[depth] = id;
7036
0
                depth++;
7037
0
            } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
7038
0
                size_t ignoreDepth = 0;
7039
7040
0
                SKIP(6);
7041
0
                SKIP_BLANKS_PE;
7042
0
                if (RAW != '[') {
7043
0
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
7044
0
                    xmlHaltParser(ctxt);
7045
0
                    goto error;
7046
0
                }
7047
0
                if (ctxt->input->id != id) {
7048
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
7049
0
                                   "All markup of the conditional section is"
7050
0
                                   " not in the same entity\n");
7051
0
                }
7052
0
                NEXT;
7053
7054
0
                while (PARSER_STOPPED(ctxt) == 0) {
7055
0
                    if (RAW == 0) {
7056
0
                        xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
7057
0
                        goto error;
7058
0
                    }
7059
0
                    if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7060
0
                        SKIP(3);
7061
0
                        ignoreDepth++;
7062
                        /* Check for integer overflow */
7063
0
                        if (ignoreDepth == 0) {
7064
0
                            xmlErrMemory(ctxt);
7065
0
                            goto error;
7066
0
                        }
7067
0
                    } else if ((RAW == ']') && (NXT(1) == ']') &&
7068
0
                               (NXT(2) == '>')) {
7069
0
                        SKIP(3);
7070
0
                        if (ignoreDepth == 0)
7071
0
                            break;
7072
0
                        ignoreDepth--;
7073
0
                    } else {
7074
0
                        NEXT;
7075
0
                    }
7076
0
                }
7077
7078
0
                if (ctxt->input->id != id) {
7079
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
7080
0
                                   "All markup of the conditional section is"
7081
0
                                   " not in the same entity\n");
7082
0
                }
7083
0
            } else {
7084
0
                xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
7085
0
                xmlHaltParser(ctxt);
7086
0
                goto error;
7087
0
            }
7088
0
        } else if ((depth > 0) &&
7089
0
                   (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
7090
0
            depth--;
7091
0
            if (ctxt->input->id != inputIds[depth]) {
7092
0
                xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
7093
0
                               "All markup of the conditional section is not"
7094
0
                               " in the same entity\n");
7095
0
            }
7096
0
            SKIP(3);
7097
0
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7098
0
            xmlParseMarkupDecl(ctxt);
7099
0
        } else {
7100
0
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7101
0
            xmlHaltParser(ctxt);
7102
0
            goto error;
7103
0
        }
7104
7105
0
        if (depth == 0)
7106
0
            break;
7107
7108
0
        SKIP_BLANKS_PE;
7109
0
        SHRINK;
7110
0
        GROW;
7111
0
    }
7112
7113
0
error:
7114
0
    xmlFree(inputIds);
7115
0
}
7116
7117
/**
7118
 * xmlParseMarkupDecl:
7119
 * @ctxt:  an XML parser context
7120
 *
7121
 * DEPRECATED: Internal function, don't use.
7122
 *
7123
 * Parse markup declarations. Always consumes '<!' or '<?'.
7124
 *
7125
 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
7126
 *                     NotationDecl | PI | Comment
7127
 *
7128
 * [ VC: Proper Declaration/PE Nesting ]
7129
 * Parameter-entity replacement text must be properly nested with
7130
 * markup declarations. That is to say, if either the first character
7131
 * or the last character of a markup declaration (markupdecl above) is
7132
 * contained in the replacement text for a parameter-entity reference,
7133
 * both must be contained in the same replacement text.
7134
 *
7135
 * [ WFC: PEs in Internal Subset ]
7136
 * In the internal DTD subset, parameter-entity references can occur
7137
 * only where markup declarations can occur, not within markup declarations.
7138
 * (This does not apply to references that occur in external parameter
7139
 * entities or to the external subset.)
7140
 */
7141
void
7142
86.6k
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
7143
86.6k
    GROW;
7144
86.6k
    if (CUR == '<') {
7145
86.6k
        if (NXT(1) == '!') {
7146
72.5k
      switch (NXT(2)) {
7147
46.8k
          case 'E':
7148
46.8k
        if (NXT(3) == 'L')
7149
11.5k
      xmlParseElementDecl(ctxt);
7150
35.2k
        else if (NXT(3) == 'N')
7151
35.2k
      xmlParseEntityDecl(ctxt);
7152
43
                    else
7153
43
                        SKIP(2);
7154
46.8k
        break;
7155
9.54k
          case 'A':
7156
9.54k
        xmlParseAttributeListDecl(ctxt);
7157
9.54k
        break;
7158
2.44k
          case 'N':
7159
2.44k
        xmlParseNotationDecl(ctxt);
7160
2.44k
        break;
7161
13.5k
          case '-':
7162
13.5k
        xmlParseComment(ctxt);
7163
13.5k
        break;
7164
121
    default:
7165
121
                    xmlFatalErr(ctxt,
7166
121
                                ctxt->inSubset == 2 ?
7167
0
                                    XML_ERR_EXT_SUBSET_NOT_FINISHED :
7168
121
                                    XML_ERR_INT_SUBSET_NOT_FINISHED,
7169
121
                                NULL);
7170
121
                    SKIP(2);
7171
121
        break;
7172
72.5k
      }
7173
72.5k
  } else if (NXT(1) == '?') {
7174
14.1k
      xmlParsePI(ctxt);
7175
14.1k
  }
7176
86.6k
    }
7177
86.6k
}
7178
7179
/**
7180
 * xmlParseTextDecl:
7181
 * @ctxt:  an XML parser context
7182
 *
7183
 * DEPRECATED: Internal function, don't use.
7184
 *
7185
 * parse an XML declaration header for external entities
7186
 *
7187
 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
7188
 */
7189
7190
void
7191
0
xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
7192
0
    xmlChar *version;
7193
7194
    /*
7195
     * We know that '<?xml' is here.
7196
     */
7197
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7198
0
  SKIP(5);
7199
0
    } else {
7200
0
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
7201
0
  return;
7202
0
    }
7203
7204
0
    if (SKIP_BLANKS == 0) {
7205
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7206
0
           "Space needed after '<?xml'\n");
7207
0
    }
7208
7209
    /*
7210
     * We may have the VersionInfo here.
7211
     */
7212
0
    version = xmlParseVersionInfo(ctxt);
7213
0
    if (version == NULL) {
7214
0
  version = xmlCharStrdup(XML_DEFAULT_VERSION);
7215
0
        if (version == NULL) {
7216
0
            xmlErrMemory(ctxt);
7217
0
            return;
7218
0
        }
7219
0
    } else {
7220
0
  if (SKIP_BLANKS == 0) {
7221
0
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7222
0
               "Space needed here\n");
7223
0
  }
7224
0
    }
7225
0
    ctxt->input->version = version;
7226
7227
    /*
7228
     * We must have the encoding declaration
7229
     */
7230
0
    xmlParseEncodingDecl(ctxt);
7231
7232
0
    SKIP_BLANKS;
7233
0
    if ((RAW == '?') && (NXT(1) == '>')) {
7234
0
        SKIP(2);
7235
0
    } else if (RAW == '>') {
7236
        /* Deprecated old WD ... */
7237
0
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7238
0
  NEXT;
7239
0
    } else {
7240
0
        int c;
7241
7242
0
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7243
0
        while ((PARSER_STOPPED(ctxt) == 0) && ((c = CUR) != 0)) {
7244
0
            NEXT;
7245
0
            if (c == '>')
7246
0
                break;
7247
0
        }
7248
0
    }
7249
0
}
7250
7251
/**
7252
 * xmlParseExternalSubset:
7253
 * @ctxt:  an XML parser context
7254
 * @ExternalID: the external identifier
7255
 * @SystemID: the system identifier (or URL)
7256
 *
7257
 * DEPRECATED: Internal function, don't use.
7258
 *
7259
 * parse Markup declarations from an external subset
7260
 *
7261
 * [30] extSubset ::= textDecl? extSubsetDecl
7262
 *
7263
 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7264
 */
7265
void
7266
xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7267
0
                       const xmlChar *SystemID) {
7268
0
    int oldInputNr;
7269
7270
0
    xmlCtxtInitializeLate(ctxt);
7271
7272
0
    xmlDetectEncoding(ctxt);
7273
7274
0
    if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7275
0
  xmlParseTextDecl(ctxt);
7276
0
    }
7277
0
    if (ctxt->myDoc == NULL) {
7278
0
        ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7279
0
  if (ctxt->myDoc == NULL) {
7280
0
      xmlErrMemory(ctxt);
7281
0
      return;
7282
0
  }
7283
0
  ctxt->myDoc->properties = XML_DOC_INTERNAL;
7284
0
    }
7285
0
    if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL) &&
7286
0
        (xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID) == NULL)) {
7287
0
        xmlErrMemory(ctxt);
7288
0
    }
7289
7290
0
    ctxt->inSubset = 2;
7291
0
    oldInputNr = ctxt->inputNr;
7292
7293
0
    SKIP_BLANKS_PE;
7294
0
    while (((RAW != 0) || (ctxt->inputNr > oldInputNr)) &&
7295
0
           (!PARSER_STOPPED(ctxt))) {
7296
0
  GROW;
7297
0
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7298
0
            xmlParseConditionalSections(ctxt);
7299
0
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7300
0
            xmlParseMarkupDecl(ctxt);
7301
0
        } else {
7302
0
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7303
0
            xmlHaltParser(ctxt);
7304
0
            return;
7305
0
        }
7306
0
        SKIP_BLANKS_PE;
7307
0
        SHRINK;
7308
0
    }
7309
7310
0
    while (ctxt->inputNr > oldInputNr)
7311
0
        xmlPopPE(ctxt);
7312
7313
0
    xmlParserCheckEOF(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED);
7314
0
}
7315
7316
/**
7317
 * xmlParseReference:
7318
 * @ctxt:  an XML parser context
7319
 *
7320
 * DEPRECATED: Internal function, don't use.
7321
 *
7322
 * parse and handle entity references in content, depending on the SAX
7323
 * interface, this may end-up in a call to character() if this is a
7324
 * CharRef, a predefined entity, if there is no reference() callback.
7325
 * or if the parser was asked to switch to that mode.
7326
 *
7327
 * Always consumes '&'.
7328
 *
7329
 * [67] Reference ::= EntityRef | CharRef
7330
 */
7331
void
7332
333k
xmlParseReference(xmlParserCtxtPtr ctxt) {
7333
333k
    xmlEntityPtr ent = NULL;
7334
333k
    const xmlChar *name;
7335
333k
    xmlChar *val;
7336
7337
333k
    if (RAW != '&')
7338
0
        return;
7339
7340
    /*
7341
     * Simple case of a CharRef
7342
     */
7343
333k
    if (NXT(1) == '#') {
7344
70.7k
  int i = 0;
7345
70.7k
  xmlChar out[16];
7346
70.7k
  int value = xmlParseCharRef(ctxt);
7347
7348
70.7k
  if (value == 0)
7349
190
      return;
7350
7351
        /*
7352
         * Just encode the value in UTF-8
7353
         */
7354
70.5k
        COPY_BUF(out, i, value);
7355
70.5k
        out[i] = 0;
7356
70.5k
        if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7357
70.5k
            (!ctxt->disableSAX))
7358
70.5k
            ctxt->sax->characters(ctxt->userData, out, i);
7359
70.5k
  return;
7360
70.7k
    }
7361
7362
    /*
7363
     * We are seeing an entity reference
7364
     */
7365
262k
    name = xmlParseEntityRefInternal(ctxt);
7366
262k
    if (name == NULL)
7367
205
        return;
7368
262k
    ent = xmlLookupGeneralEntity(ctxt, name, /* isAttr */ 0);
7369
262k
    if (ent == NULL) {
7370
        /*
7371
         * Create a reference for undeclared entities.
7372
         */
7373
741
        if ((ctxt->replaceEntities == 0) &&
7374
0
            (ctxt->sax != NULL) &&
7375
0
            (ctxt->disableSAX == 0) &&
7376
0
            (ctxt->sax->reference != NULL)) {
7377
0
            ctxt->sax->reference(ctxt->userData, name);
7378
0
        }
7379
741
        return;
7380
741
    }
7381
261k
    if (!ctxt->wellFormed)
7382
0
  return;
7383
7384
    /* special case of predefined entities */
7385
261k
    if ((ent->name == NULL) ||
7386
261k
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7387
261k
  val = ent->content;
7388
261k
  if (val == NULL) return;
7389
  /*
7390
   * inline the entity.
7391
   */
7392
261k
  if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7393
261k
      (!ctxt->disableSAX))
7394
261k
      ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7395
261k
  return;
7396
261k
    }
7397
7398
    /*
7399
     * Some users try to parse entities on their own and used to set
7400
     * the renamed "checked" member. Fix the flags to cover this
7401
     * case.
7402
     */
7403
0
    if (((ent->flags & XML_ENT_PARSED) == 0) && (ent->children != NULL))
7404
0
        ent->flags |= XML_ENT_PARSED;
7405
7406
    /*
7407
     * The first reference to the entity trigger a parsing phase
7408
     * where the ent->children is filled with the result from
7409
     * the parsing.
7410
     * Note: external parsed entities will not be loaded, it is not
7411
     * required for a non-validating parser, unless the parsing option
7412
     * of validating, or substituting entities were given. Doing so is
7413
     * far more secure as the parser will only process data coming from
7414
     * the document entity by default.
7415
     *
7416
     * FIXME: This doesn't work correctly since entities can be
7417
     * expanded with different namespace declarations in scope.
7418
     * For example:
7419
     *
7420
     * <!DOCTYPE doc [
7421
     *   <!ENTITY ent "<ns:elem/>">
7422
     * ]>
7423
     * <doc>
7424
     *   <decl1 xmlns:ns="urn:ns1">
7425
     *     &ent;
7426
     *   </decl1>
7427
     *   <decl2 xmlns:ns="urn:ns2">
7428
     *     &ent;
7429
     *   </decl2>
7430
     * </doc>
7431
     *
7432
     * Proposed fix:
7433
     *
7434
     * - Ignore current namespace declarations when parsing the
7435
     *   entity. If a prefix can't be resolved, don't report an error
7436
     *   but mark it as unresolved.
7437
     * - Try to resolve these prefixes when expanding the entity.
7438
     *   This will require a specialized version of xmlStaticCopyNode
7439
     *   which can also make use of the namespace hash table to avoid
7440
     *   quadratic behavior.
7441
     *
7442
     * Alternatively, we could simply reparse the entity on each
7443
     * expansion like we already do with custom SAX callbacks.
7444
     * External entity content should be cached in this case.
7445
     */
7446
0
    if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7447
0
        (((ctxt->options & XML_PARSE_NO_XXE) == 0) &&
7448
0
         ((ctxt->replaceEntities) ||
7449
0
          (ctxt->validate)))) {
7450
0
        if ((ent->flags & XML_ENT_PARSED) == 0) {
7451
0
            xmlCtxtParseEntity(ctxt, ent);
7452
0
        } else if (ent->children == NULL) {
7453
            /*
7454
             * Probably running in SAX mode and the callbacks don't
7455
             * build the entity content. Parse the entity again.
7456
             *
7457
             * This will also be triggered in normal tree builder mode
7458
             * if an entity happens to be empty, causing unnecessary
7459
             * reloads. It's hard to come up with a reliable check in
7460
             * which mode we're running.
7461
             */
7462
0
            xmlCtxtParseEntity(ctxt, ent);
7463
0
        }
7464
0
    }
7465
7466
    /*
7467
     * We also check for amplification if entities aren't substituted.
7468
     * They might be expanded later.
7469
     */
7470
0
    if (xmlParserEntityCheck(ctxt, ent->expandedSize))
7471
0
        return;
7472
7473
0
    if ((ctxt->sax == NULL) || (ctxt->disableSAX))
7474
0
        return;
7475
7476
0
    if (ctxt->replaceEntities == 0) {
7477
  /*
7478
   * Create a reference
7479
   */
7480
0
        if (ctxt->sax->reference != NULL)
7481
0
      ctxt->sax->reference(ctxt->userData, ent->name);
7482
0
    } else if ((ent->children != NULL) && (ctxt->node != NULL)) {
7483
0
        xmlNodePtr copy, cur;
7484
7485
        /*
7486
         * Seems we are generating the DOM content, copy the tree
7487
   */
7488
0
        cur = ent->children;
7489
7490
        /*
7491
         * Handle first text node with SAX to coalesce text efficiently
7492
         */
7493
0
        if ((cur->type == XML_TEXT_NODE) ||
7494
0
            (cur->type == XML_CDATA_SECTION_NODE)) {
7495
0
            int len = xmlStrlen(cur->content);
7496
7497
0
            if ((cur->type == XML_TEXT_NODE) ||
7498
0
                (ctxt->options & XML_PARSE_NOCDATA)) {
7499
0
                if (ctxt->sax->characters != NULL)
7500
0
                    ctxt->sax->characters(ctxt, cur->content, len);
7501
0
            } else {
7502
0
                if (ctxt->sax->cdataBlock != NULL)
7503
0
                    ctxt->sax->cdataBlock(ctxt, cur->content, len);
7504
0
            }
7505
7506
0
            cur = cur->next;
7507
0
        }
7508
7509
0
        while (cur != NULL) {
7510
0
            xmlNodePtr last;
7511
7512
            /*
7513
             * Handle last text node with SAX to coalesce text efficiently
7514
             */
7515
0
            if ((cur->next == NULL) &&
7516
0
                ((cur->type == XML_TEXT_NODE) ||
7517
0
                 (cur->type == XML_CDATA_SECTION_NODE))) {
7518
0
                int len = xmlStrlen(cur->content);
7519
7520
0
                if ((cur->type == XML_TEXT_NODE) ||
7521
0
                    (ctxt->options & XML_PARSE_NOCDATA)) {
7522
0
                    if (ctxt->sax->characters != NULL)
7523
0
                        ctxt->sax->characters(ctxt, cur->content, len);
7524
0
                } else {
7525
0
                    if (ctxt->sax->cdataBlock != NULL)
7526
0
                        ctxt->sax->cdataBlock(ctxt, cur->content, len);
7527
0
                }
7528
7529
0
                break;
7530
0
            }
7531
7532
            /*
7533
             * Reset coalesce buffer stats only for non-text nodes.
7534
             */
7535
0
            ctxt->nodemem = 0;
7536
0
            ctxt->nodelen = 0;
7537
7538
0
            copy = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7539
7540
0
            if (copy == NULL) {
7541
0
                xmlErrMemory(ctxt);
7542
0
                break;
7543
0
            }
7544
7545
0
            if (ctxt->parseMode == XML_PARSE_READER) {
7546
                /* Needed for reader */
7547
0
                copy->extra = cur->extra;
7548
                /* Maybe needed for reader */
7549
0
                copy->_private = cur->_private;
7550
0
            }
7551
7552
0
            copy->parent = ctxt->node;
7553
0
            last = ctxt->node->last;
7554
0
            if (last == NULL) {
7555
0
                ctxt->node->children = copy;
7556
0
            } else {
7557
0
                last->next = copy;
7558
0
                copy->prev = last;
7559
0
            }
7560
0
            ctxt->node->last = copy;
7561
7562
0
            cur = cur->next;
7563
0
        }
7564
0
    }
7565
0
}
7566
7567
static void
7568
24.3k
xmlHandleUndeclaredEntity(xmlParserCtxtPtr ctxt, const xmlChar *name) {
7569
    /*
7570
     * [ WFC: Entity Declared ]
7571
     * In a document without any DTD, a document with only an
7572
     * internal DTD subset which contains no parameter entity
7573
     * references, or a document with "standalone='yes'", the
7574
     * Name given in the entity reference must match that in an
7575
     * entity declaration, except that well-formed documents
7576
     * need not declare any of the following entities: amp, lt,
7577
     * gt, apos, quot.
7578
     * The declaration of a parameter entity must precede any
7579
     * reference to it.
7580
     * Similarly, the declaration of a general entity must
7581
     * precede any reference to it which appears in a default
7582
     * value in an attribute-list declaration. Note that if
7583
     * entities are declared in the external subset or in
7584
     * external parameter entities, a non-validating processor
7585
     * is not obligated to read and process their declarations;
7586
     * for such documents, the rule that an entity must be
7587
     * declared is a well-formedness constraint only if
7588
     * standalone='yes'.
7589
     */
7590
24.3k
    if ((ctxt->standalone == 1) ||
7591
24.0k
        ((ctxt->hasExternalSubset == 0) &&
7592
23.4k
         (ctxt->hasPErefs == 0))) {
7593
17.7k
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7594
17.7k
                          "Entity '%s' not defined\n", name);
7595
17.7k
    } else if (ctxt->validate) {
7596
        /*
7597
         * [ VC: Entity Declared ]
7598
         * In a document with an external subset or external
7599
         * parameter entities with "standalone='no'", ...
7600
         * ... The declaration of a parameter entity must
7601
         * precede any reference to it...
7602
         */
7603
0
        xmlValidityError(ctxt, XML_ERR_UNDECLARED_ENTITY,
7604
0
                         "Entity '%s' not defined\n", name, NULL);
7605
6.66k
    } else if ((ctxt->loadsubset & ~XML_SKIP_IDS) ||
7606
6.66k
               ((ctxt->replaceEntities) &&
7607
6.66k
                ((ctxt->options & XML_PARSE_NO_XXE) == 0))) {
7608
        /*
7609
         * Also raise a non-fatal error
7610
         *
7611
         * - if the external subset is loaded and all entity declarations
7612
         *   should be available, or
7613
         * - entity substition was requested without restricting
7614
         *   external entity access.
7615
         */
7616
6.66k
        xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7617
6.66k
                     "Entity '%s' not defined\n", name);
7618
6.66k
    } else {
7619
0
        xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7620
0
                      "Entity '%s' not defined\n", name, NULL);
7621
0
    }
7622
7623
24.3k
    ctxt->valid = 0;
7624
24.3k
}
7625
7626
static xmlEntityPtr
7627
415k
xmlLookupGeneralEntity(xmlParserCtxtPtr ctxt, const xmlChar *name, int inAttr) {
7628
415k
    xmlEntityPtr ent = NULL;
7629
7630
    /*
7631
     * Predefined entities override any extra definition
7632
     */
7633
415k
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7634
415k
        ent = xmlGetPredefinedEntity(name);
7635
415k
        if (ent != NULL)
7636
396k
            return(ent);
7637
415k
    }
7638
7639
    /*
7640
     * Ask first SAX for entity resolution, otherwise try the
7641
     * entities which may have stored in the parser context.
7642
     */
7643
18.6k
    if (ctxt->sax != NULL) {
7644
18.6k
  if (ctxt->sax->getEntity != NULL)
7645
18.6k
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7646
18.6k
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7647
1.10k
      (ctxt->options & XML_PARSE_OLDSAX))
7648
0
      ent = xmlGetPredefinedEntity(name);
7649
18.6k
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7650
1.10k
      (ctxt->userData==ctxt)) {
7651
0
      ent = xmlSAX2GetEntity(ctxt, name);
7652
0
  }
7653
18.6k
    }
7654
7655
18.6k
    if (ent == NULL) {
7656
18.6k
        xmlHandleUndeclaredEntity(ctxt, name);
7657
18.6k
    }
7658
7659
    /*
7660
     * [ WFC: Parsed Entity ]
7661
     * An entity reference must not contain the name of an
7662
     * unparsed entity
7663
     */
7664
18.4E
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7665
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7666
0
     "Entity reference to unparsed entity %s\n", name);
7667
0
        ent = NULL;
7668
0
    }
7669
7670
    /*
7671
     * [ WFC: No External Entity References ]
7672
     * Attribute values cannot contain direct or indirect
7673
     * entity references to external entities.
7674
     */
7675
18.4E
    else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7676
0
        if (inAttr) {
7677
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7678
0
                 "Attribute references external entity '%s'\n", name);
7679
0
            ent = NULL;
7680
0
        }
7681
0
    }
7682
7683
18.6k
    return(ent);
7684
415k
}
7685
7686
/**
7687
 * xmlParseEntityRefInternal:
7688
 * @ctxt:  an XML parser context
7689
 * @inAttr:  whether we are in an attribute value
7690
 *
7691
 * Parse an entity reference. Always consumes '&'.
7692
 *
7693
 * [68] EntityRef ::= '&' Name ';'
7694
 *
7695
 * Returns the name, or NULL in case of error.
7696
 */
7697
static const xmlChar *
7698
714k
xmlParseEntityRefInternal(xmlParserCtxtPtr ctxt) {
7699
714k
    const xmlChar *name;
7700
7701
714k
    GROW;
7702
7703
714k
    if (RAW != '&')
7704
0
        return(NULL);
7705
714k
    NEXT;
7706
714k
    name = xmlParseName(ctxt);
7707
714k
    if (name == NULL) {
7708
174k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7709
174k
           "xmlParseEntityRef: no name\n");
7710
174k
        return(NULL);
7711
174k
    }
7712
539k
    if (RAW != ';') {
7713
124k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7714
124k
  return(NULL);
7715
124k
    }
7716
415k
    NEXT;
7717
7718
415k
    return(name);
7719
539k
}
7720
7721
/**
7722
 * xmlParseEntityRef:
7723
 * @ctxt:  an XML parser context
7724
 *
7725
 * DEPRECATED: Internal function, don't use.
7726
 *
7727
 * Returns the xmlEntityPtr if found, or NULL otherwise.
7728
 */
7729
xmlEntityPtr
7730
0
xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7731
0
    const xmlChar *name;
7732
7733
0
    if (ctxt == NULL)
7734
0
        return(NULL);
7735
7736
0
    name = xmlParseEntityRefInternal(ctxt);
7737
0
    if (name == NULL)
7738
0
        return(NULL);
7739
7740
0
    return(xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 0));
7741
0
}
7742
7743
/**
7744
 * xmlParseStringEntityRef:
7745
 * @ctxt:  an XML parser context
7746
 * @str:  a pointer to an index in the string
7747
 *
7748
 * parse ENTITY references declarations, but this version parses it from
7749
 * a string value.
7750
 *
7751
 * [68] EntityRef ::= '&' Name ';'
7752
 *
7753
 * [ WFC: Entity Declared ]
7754
 * In a document without any DTD, a document with only an internal DTD
7755
 * subset which contains no parameter entity references, or a document
7756
 * with "standalone='yes'", the Name given in the entity reference
7757
 * must match that in an entity declaration, except that well-formed
7758
 * documents need not declare any of the following entities: amp, lt,
7759
 * gt, apos, quot.  The declaration of a parameter entity must precede
7760
 * any reference to it.  Similarly, the declaration of a general entity
7761
 * must precede any reference to it which appears in a default value in an
7762
 * attribute-list declaration. Note that if entities are declared in the
7763
 * external subset or in external parameter entities, a non-validating
7764
 * processor is not obligated to read and process their declarations;
7765
 * for such documents, the rule that an entity must be declared is a
7766
 * well-formedness constraint only if standalone='yes'.
7767
 *
7768
 * [ WFC: Parsed Entity ]
7769
 * An entity reference must not contain the name of an unparsed entity
7770
 *
7771
 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7772
 * is updated to the current location in the string.
7773
 */
7774
static xmlChar *
7775
0
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7776
0
    xmlChar *name;
7777
0
    const xmlChar *ptr;
7778
0
    xmlChar cur;
7779
7780
0
    if ((str == NULL) || (*str == NULL))
7781
0
        return(NULL);
7782
0
    ptr = *str;
7783
0
    cur = *ptr;
7784
0
    if (cur != '&')
7785
0
  return(NULL);
7786
7787
0
    ptr++;
7788
0
    name = xmlParseStringName(ctxt, &ptr);
7789
0
    if (name == NULL) {
7790
0
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7791
0
           "xmlParseStringEntityRef: no name\n");
7792
0
  *str = ptr;
7793
0
  return(NULL);
7794
0
    }
7795
0
    if (*ptr != ';') {
7796
0
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7797
0
        xmlFree(name);
7798
0
  *str = ptr;
7799
0
  return(NULL);
7800
0
    }
7801
0
    ptr++;
7802
7803
0
    *str = ptr;
7804
0
    return(name);
7805
0
}
7806
7807
/**
7808
 * xmlParsePEReference:
7809
 * @ctxt:  an XML parser context
7810
 *
7811
 * DEPRECATED: Internal function, don't use.
7812
 *
7813
 * Parse a parameter entity reference. Always consumes '%'.
7814
 *
7815
 * The entity content is handled directly by pushing it's content as
7816
 * a new input stream.
7817
 *
7818
 * [69] PEReference ::= '%' Name ';'
7819
 *
7820
 * [ WFC: No Recursion ]
7821
 * A parsed entity must not contain a recursive
7822
 * reference to itself, either directly or indirectly.
7823
 *
7824
 * [ WFC: Entity Declared ]
7825
 * In a document without any DTD, a document with only an internal DTD
7826
 * subset which contains no parameter entity references, or a document
7827
 * with "standalone='yes'", ...  ... The declaration of a parameter
7828
 * entity must precede any reference to it...
7829
 *
7830
 * [ VC: Entity Declared ]
7831
 * In a document with an external subset or external parameter entities
7832
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7833
 * must precede any reference to it...
7834
 *
7835
 * [ WFC: In DTD ]
7836
 * Parameter-entity references may only appear in the DTD.
7837
 * NOTE: misleading but this is handled.
7838
 */
7839
void
7840
xmlParsePEReference(xmlParserCtxtPtr ctxt)
7841
6.46k
{
7842
6.46k
    const xmlChar *name;
7843
6.46k
    xmlEntityPtr entity = NULL;
7844
6.46k
    xmlParserInputPtr input;
7845
7846
6.46k
    if (RAW != '%')
7847
0
        return;
7848
6.46k
    NEXT;
7849
6.46k
    name = xmlParseName(ctxt);
7850
6.46k
    if (name == NULL) {
7851
1.17k
  xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
7852
1.17k
  return;
7853
1.17k
    }
7854
5.29k
    if (RAW != ';') {
7855
50
  xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
7856
50
        return;
7857
50
    }
7858
7859
5.24k
    NEXT;
7860
7861
    /* Must be set before xmlHandleUndeclaredEntity */
7862
5.24k
    ctxt->hasPErefs = 1;
7863
7864
    /*
7865
     * Request the entity from SAX
7866
     */
7867
5.24k
    if ((ctxt->sax != NULL) &&
7868
5.24k
  (ctxt->sax->getParameterEntity != NULL))
7869
0
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7870
7871
5.24k
    if (entity == NULL) {
7872
5.24k
        xmlHandleUndeclaredEntity(ctxt, name);
7873
5.24k
    } else {
7874
  /*
7875
   * Internal checking in case the entity quest barfed
7876
   */
7877
0
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7878
0
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7879
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7880
0
      "Internal: %%%s; is not a parameter entity\n",
7881
0
        name, NULL);
7882
0
  } else {
7883
0
      if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7884
0
                ((ctxt->options & XML_PARSE_NO_XXE) ||
7885
0
     ((ctxt->loadsubset == 0) &&
7886
0
      (ctxt->replaceEntities == 0) &&
7887
0
      (ctxt->validate == 0))))
7888
0
    return;
7889
7890
0
            if (entity->flags & XML_ENT_EXPANDING) {
7891
0
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7892
0
                xmlHaltParser(ctxt);
7893
0
                return;
7894
0
            }
7895
7896
0
      input = xmlNewEntityInputStream(ctxt, entity);
7897
0
      if (xmlCtxtPushInput(ctxt, input) < 0) {
7898
0
                xmlFreeInputStream(input);
7899
0
    return;
7900
0
            }
7901
7902
0
            entity->flags |= XML_ENT_EXPANDING;
7903
7904
0
            GROW;
7905
7906
0
      if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
7907
0
                xmlDetectEncoding(ctxt);
7908
7909
0
                if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7910
0
                    (IS_BLANK_CH(NXT(5)))) {
7911
0
                    xmlParseTextDecl(ctxt);
7912
0
                }
7913
0
            }
7914
0
  }
7915
0
    }
7916
5.24k
}
7917
7918
/**
7919
 * xmlLoadEntityContent:
7920
 * @ctxt:  an XML parser context
7921
 * @entity: an unloaded system entity
7922
 *
7923
 * Load the content of an entity.
7924
 *
7925
 * Returns 0 in case of success and -1 in case of failure
7926
 */
7927
static int
7928
0
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7929
0
    xmlParserInputPtr oldinput, input = NULL;
7930
0
    xmlParserInputPtr *oldinputTab;
7931
0
    const xmlChar *oldencoding;
7932
0
    xmlChar *content = NULL;
7933
0
    xmlResourceType rtype;
7934
0
    size_t length, i;
7935
0
    int oldinputNr, oldinputMax;
7936
0
    int ret = -1;
7937
0
    int res;
7938
7939
0
    if ((ctxt == NULL) || (entity == NULL) ||
7940
0
        ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7941
0
   (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7942
0
  (entity->content != NULL)) {
7943
0
  xmlFatalErr(ctxt, XML_ERR_ARGUMENT,
7944
0
              "xmlLoadEntityContent parameter error");
7945
0
        return(-1);
7946
0
    }
7947
7948
0
    if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)
7949
0
        rtype = XML_RESOURCE_PARAMETER_ENTITY;
7950
0
    else
7951
0
        rtype = XML_RESOURCE_GENERAL_ENTITY;
7952
7953
0
    input = xmlLoadResource(ctxt, (char *) entity->URI,
7954
0
                            (char *) entity->ExternalID, rtype);
7955
0
    if (input == NULL)
7956
0
        return(-1);
7957
7958
0
    oldinput = ctxt->input;
7959
0
    oldinputNr = ctxt->inputNr;
7960
0
    oldinputMax = ctxt->inputMax;
7961
0
    oldinputTab = ctxt->inputTab;
7962
0
    oldencoding = ctxt->encoding;
7963
7964
0
    ctxt->input = NULL;
7965
0
    ctxt->inputNr = 0;
7966
0
    ctxt->inputMax = 1;
7967
0
    ctxt->encoding = NULL;
7968
0
    ctxt->inputTab = xmlMalloc(sizeof(xmlParserInputPtr));
7969
0
    if (ctxt->inputTab == NULL) {
7970
0
        xmlErrMemory(ctxt);
7971
0
        xmlFreeInputStream(input);
7972
0
        goto error;
7973
0
    }
7974
7975
0
    xmlBufResetInput(input->buf->buffer, input);
7976
7977
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
7978
0
        xmlFreeInputStream(input);
7979
0
        goto error;
7980
0
    }
7981
7982
0
    xmlDetectEncoding(ctxt);
7983
7984
    /*
7985
     * Parse a possible text declaration first
7986
     */
7987
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7988
0
  xmlParseTextDecl(ctxt);
7989
        /*
7990
         * An XML-1.0 document can't reference an entity not XML-1.0
7991
         */
7992
0
        if ((xmlStrEqual(ctxt->version, BAD_CAST "1.0")) &&
7993
0
            (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
7994
0
            xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
7995
0
                           "Version mismatch between document and entity\n");
7996
0
        }
7997
0
    }
7998
7999
0
    length = input->cur - input->base;
8000
0
    xmlBufShrink(input->buf->buffer, length);
8001
0
    xmlSaturatedAdd(&ctxt->sizeentities, length);
8002
8003
0
    while ((res = xmlParserInputBufferGrow(input->buf, 4096)) > 0)
8004
0
        ;
8005
8006
0
    xmlBufResetInput(input->buf->buffer, input);
8007
8008
0
    if (res < 0) {
8009
0
        xmlCtxtErrIO(ctxt, input->buf->error, NULL);
8010
0
        goto error;
8011
0
    }
8012
8013
0
    length = xmlBufUse(input->buf->buffer);
8014
0
    if (length > INT_MAX) {
8015
0
        xmlErrMemory(ctxt);
8016
0
        goto error;
8017
0
    }
8018
8019
0
    content = xmlStrndup(xmlBufContent(input->buf->buffer), length);
8020
0
    if (content == NULL) {
8021
0
        xmlErrMemory(ctxt);
8022
0
        goto error;
8023
0
    }
8024
8025
0
    for (i = 0; i < length; ) {
8026
0
        int clen = length - i;
8027
0
        int c = xmlGetUTF8Char(content + i, &clen);
8028
8029
0
        if ((c < 0) || (!IS_CHAR(c))) {
8030
0
            xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8031
0
                              "xmlLoadEntityContent: invalid char value %d\n",
8032
0
                              content[i]);
8033
0
            goto error;
8034
0
        }
8035
0
        i += clen;
8036
0
    }
8037
8038
0
    xmlSaturatedAdd(&ctxt->sizeentities, length);
8039
0
    entity->content = content;
8040
0
    entity->length = length;
8041
0
    content = NULL;
8042
0
    ret = 0;
8043
8044
0
error:
8045
0
    while (ctxt->inputNr > 0)
8046
0
        xmlFreeInputStream(xmlCtxtPopInput(ctxt));
8047
0
    xmlFree(ctxt->inputTab);
8048
0
    xmlFree((xmlChar *) ctxt->encoding);
8049
8050
0
    ctxt->input = oldinput;
8051
0
    ctxt->inputNr = oldinputNr;
8052
0
    ctxt->inputMax = oldinputMax;
8053
0
    ctxt->inputTab = oldinputTab;
8054
0
    ctxt->encoding = oldencoding;
8055
8056
0
    xmlFree(content);
8057
8058
0
    return(ret);
8059
0
}
8060
8061
/**
8062
 * xmlParseStringPEReference:
8063
 * @ctxt:  an XML parser context
8064
 * @str:  a pointer to an index in the string
8065
 *
8066
 * parse PEReference declarations
8067
 *
8068
 * [69] PEReference ::= '%' Name ';'
8069
 *
8070
 * [ WFC: No Recursion ]
8071
 * A parsed entity must not contain a recursive
8072
 * reference to itself, either directly or indirectly.
8073
 *
8074
 * [ WFC: Entity Declared ]
8075
 * In a document without any DTD, a document with only an internal DTD
8076
 * subset which contains no parameter entity references, or a document
8077
 * with "standalone='yes'", ...  ... The declaration of a parameter
8078
 * entity must precede any reference to it...
8079
 *
8080
 * [ VC: Entity Declared ]
8081
 * In a document with an external subset or external parameter entities
8082
 * with "standalone='no'", ...  ... The declaration of a parameter entity
8083
 * must precede any reference to it...
8084
 *
8085
 * [ WFC: In DTD ]
8086
 * Parameter-entity references may only appear in the DTD.
8087
 * NOTE: misleading but this is handled.
8088
 *
8089
 * Returns the string of the entity content.
8090
 *         str is updated to the current value of the index
8091
 */
8092
static xmlEntityPtr
8093
1.51k
xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8094
1.51k
    const xmlChar *ptr;
8095
1.51k
    xmlChar cur;
8096
1.51k
    xmlChar *name;
8097
1.51k
    xmlEntityPtr entity = NULL;
8098
8099
1.51k
    if ((str == NULL) || (*str == NULL)) return(NULL);
8100
1.51k
    ptr = *str;
8101
1.51k
    cur = *ptr;
8102
1.51k
    if (cur != '%')
8103
0
        return(NULL);
8104
1.51k
    ptr++;
8105
1.51k
    name = xmlParseStringName(ctxt, &ptr);
8106
1.51k
    if (name == NULL) {
8107
710
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8108
710
           "xmlParseStringPEReference: no name\n");
8109
710
  *str = ptr;
8110
710
  return(NULL);
8111
710
    }
8112
804
    cur = *ptr;
8113
804
    if (cur != ';') {
8114
332
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8115
332
  xmlFree(name);
8116
332
  *str = ptr;
8117
332
  return(NULL);
8118
332
    }
8119
472
    ptr++;
8120
8121
    /* Must be set before xmlHandleUndeclaredEntity */
8122
472
    ctxt->hasPErefs = 1;
8123
8124
    /*
8125
     * Request the entity from SAX
8126
     */
8127
472
    if ((ctxt->sax != NULL) &&
8128
472
  (ctxt->sax->getParameterEntity != NULL))
8129
0
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8130
8131
472
    if (entity == NULL) {
8132
472
        xmlHandleUndeclaredEntity(ctxt, name);
8133
472
    } else {
8134
  /*
8135
   * Internal checking in case the entity quest barfed
8136
   */
8137
0
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8138
0
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8139
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8140
0
        "%%%s; is not a parameter entity\n",
8141
0
        name, NULL);
8142
0
  }
8143
0
    }
8144
8145
472
    xmlFree(name);
8146
472
    *str = ptr;
8147
472
    return(entity);
8148
804
}
8149
8150
/**
8151
 * xmlParseDocTypeDecl:
8152
 * @ctxt:  an XML parser context
8153
 *
8154
 * DEPRECATED: Internal function, don't use.
8155
 *
8156
 * parse a DOCTYPE declaration
8157
 *
8158
 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8159
 *                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8160
 *
8161
 * [ VC: Root Element Type ]
8162
 * The Name in the document type declaration must match the element
8163
 * type of the root element.
8164
 */
8165
8166
void
8167
6.79k
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8168
6.79k
    const xmlChar *name = NULL;
8169
6.79k
    xmlChar *ExternalID = NULL;
8170
6.79k
    xmlChar *URI = NULL;
8171
8172
    /*
8173
     * We know that '<!DOCTYPE' has been detected.
8174
     */
8175
6.79k
    SKIP(9);
8176
8177
6.79k
    if (SKIP_BLANKS == 0) {
8178
143
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8179
143
                       "Space required after 'DOCTYPE'\n");
8180
143
    }
8181
8182
    /*
8183
     * Parse the DOCTYPE name.
8184
     */
8185
6.79k
    name = xmlParseName(ctxt);
8186
6.79k
    if (name == NULL) {
8187
16
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8188
16
           "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8189
16
    }
8190
6.79k
    ctxt->intSubName = name;
8191
8192
6.79k
    SKIP_BLANKS;
8193
8194
    /*
8195
     * Check for SystemID and ExternalID
8196
     */
8197
6.79k
    URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8198
8199
6.79k
    if ((URI != NULL) || (ExternalID != NULL)) {
8200
338
        ctxt->hasExternalSubset = 1;
8201
338
    }
8202
6.79k
    ctxt->extSubURI = URI;
8203
6.79k
    ctxt->extSubSystem = ExternalID;
8204
8205
6.79k
    SKIP_BLANKS;
8206
8207
    /*
8208
     * Create and update the internal subset.
8209
     */
8210
6.79k
    if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8211
0
  (!ctxt->disableSAX))
8212
0
  ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8213
8214
6.79k
    if ((RAW != '[') && (RAW != '>')) {
8215
197
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8216
197
    }
8217
6.79k
}
8218
8219
/**
8220
 * xmlParseInternalSubset:
8221
 * @ctxt:  an XML parser context
8222
 *
8223
 * parse the internal subset declaration
8224
 *
8225
 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8226
 */
8227
8228
static void
8229
6.27k
xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8230
    /*
8231
     * Is there any DTD definition ?
8232
     */
8233
6.27k
    if (RAW == '[') {
8234
6.27k
        int oldInputNr = ctxt->inputNr;
8235
8236
6.27k
        NEXT;
8237
  /*
8238
   * Parse the succession of Markup declarations and
8239
   * PEReferences.
8240
   * Subsequence (markupdecl | PEReference | S)*
8241
   */
8242
6.27k
  SKIP_BLANKS;
8243
99.3k
  while (((RAW != ']') || (ctxt->inputNr > oldInputNr)) &&
8244
97.0k
               (PARSER_STOPPED(ctxt) == 0)) {
8245
8246
            /*
8247
             * Conditional sections are allowed from external entities included
8248
             * by PE References in the internal subset.
8249
             */
8250
96.1k
            if ((PARSER_EXTERNAL(ctxt)) &&
8251
0
                (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8252
0
                xmlParseConditionalSections(ctxt);
8253
96.1k
            } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
8254
86.6k
          xmlParseMarkupDecl(ctxt);
8255
86.6k
            } else if (RAW == '%') {
8256
6.46k
          xmlParsePEReference(ctxt);
8257
6.46k
            } else {
8258
3.05k
    xmlFatalErr(ctxt, XML_ERR_INT_SUBSET_NOT_FINISHED, NULL);
8259
3.05k
                break;
8260
3.05k
            }
8261
93.0k
      SKIP_BLANKS_PE;
8262
93.0k
            SHRINK;
8263
93.0k
            GROW;
8264
93.0k
  }
8265
8266
6.27k
        while (ctxt->inputNr > oldInputNr)
8267
0
            xmlPopPE(ctxt);
8268
8269
6.27k
  if (RAW == ']') {
8270
2.29k
      NEXT;
8271
2.29k
      SKIP_BLANKS;
8272
2.29k
  }
8273
6.27k
    }
8274
8275
    /*
8276
     * We should be at the end of the DOCTYPE declaration.
8277
     */
8278
6.27k
    if ((ctxt->wellFormed) && (RAW != '>')) {
8279
15
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8280
15
  return;
8281
15
    }
8282
6.26k
    NEXT;
8283
6.26k
}
8284
8285
#ifdef LIBXML_SAX1_ENABLED
8286
/**
8287
 * xmlParseAttribute:
8288
 * @ctxt:  an XML parser context
8289
 * @value:  a xmlChar ** used to store the value of the attribute
8290
 *
8291
 * DEPRECATED: Internal function, don't use.
8292
 *
8293
 * parse an attribute
8294
 *
8295
 * [41] Attribute ::= Name Eq AttValue
8296
 *
8297
 * [ WFC: No External Entity References ]
8298
 * Attribute values cannot contain direct or indirect entity references
8299
 * to external entities.
8300
 *
8301
 * [ WFC: No < in Attribute Values ]
8302
 * The replacement text of any entity referred to directly or indirectly in
8303
 * an attribute value (other than "&lt;") must not contain a <.
8304
 *
8305
 * [ VC: Attribute Value Type ]
8306
 * The attribute must have been declared; the value must be of the type
8307
 * declared for it.
8308
 *
8309
 * [25] Eq ::= S? '=' S?
8310
 *
8311
 * With namespace:
8312
 *
8313
 * [NS 11] Attribute ::= QName Eq AttValue
8314
 *
8315
 * Also the case QName == xmlns:??? is handled independently as a namespace
8316
 * definition.
8317
 *
8318
 * Returns the attribute name, and the value in *value.
8319
 */
8320
8321
const xmlChar *
8322
0
xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8323
0
    const xmlChar *name;
8324
0
    xmlChar *val;
8325
8326
0
    *value = NULL;
8327
0
    GROW;
8328
0
    name = xmlParseName(ctxt);
8329
0
    if (name == NULL) {
8330
0
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8331
0
                 "error parsing attribute name\n");
8332
0
        return(NULL);
8333
0
    }
8334
8335
    /*
8336
     * read the value
8337
     */
8338
0
    SKIP_BLANKS;
8339
0
    if (RAW == '=') {
8340
0
        NEXT;
8341
0
  SKIP_BLANKS;
8342
0
  val = xmlParseAttValue(ctxt);
8343
0
    } else {
8344
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8345
0
         "Specification mandates value for attribute %s\n", name);
8346
0
  return(name);
8347
0
    }
8348
8349
    /*
8350
     * Check that xml:lang conforms to the specification
8351
     * No more registered as an error, just generate a warning now
8352
     * since this was deprecated in XML second edition
8353
     */
8354
0
    if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8355
0
  if (!xmlCheckLanguageID(val)) {
8356
0
      xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8357
0
              "Malformed value for xml:lang : %s\n",
8358
0
        val, NULL);
8359
0
  }
8360
0
    }
8361
8362
    /*
8363
     * Check that xml:space conforms to the specification
8364
     */
8365
0
    if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8366
0
  if (xmlStrEqual(val, BAD_CAST "default"))
8367
0
      *(ctxt->space) = 0;
8368
0
  else if (xmlStrEqual(val, BAD_CAST "preserve"))
8369
0
      *(ctxt->space) = 1;
8370
0
  else {
8371
0
    xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8372
0
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8373
0
                                 val, NULL);
8374
0
  }
8375
0
    }
8376
8377
0
    *value = val;
8378
0
    return(name);
8379
0
}
8380
8381
/**
8382
 * xmlParseStartTag:
8383
 * @ctxt:  an XML parser context
8384
 *
8385
 * DEPRECATED: Internal function, don't use.
8386
 *
8387
 * Parse a start tag. Always consumes '<'.
8388
 *
8389
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8390
 *
8391
 * [ WFC: Unique Att Spec ]
8392
 * No attribute name may appear more than once in the same start-tag or
8393
 * empty-element tag.
8394
 *
8395
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8396
 *
8397
 * [ WFC: Unique Att Spec ]
8398
 * No attribute name may appear more than once in the same start-tag or
8399
 * empty-element tag.
8400
 *
8401
 * With namespace:
8402
 *
8403
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8404
 *
8405
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8406
 *
8407
 * Returns the element name parsed
8408
 */
8409
8410
const xmlChar *
8411
0
xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8412
0
    const xmlChar *name;
8413
0
    const xmlChar *attname;
8414
0
    xmlChar *attvalue;
8415
0
    const xmlChar **atts = ctxt->atts;
8416
0
    int nbatts = 0;
8417
0
    int maxatts = ctxt->maxatts;
8418
0
    int i;
8419
8420
0
    if (RAW != '<') return(NULL);
8421
0
    NEXT1;
8422
8423
0
    name = xmlParseName(ctxt);
8424
0
    if (name == NULL) {
8425
0
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8426
0
       "xmlParseStartTag: invalid element name\n");
8427
0
        return(NULL);
8428
0
    }
8429
8430
    /*
8431
     * Now parse the attributes, it ends up with the ending
8432
     *
8433
     * (S Attribute)* S?
8434
     */
8435
0
    SKIP_BLANKS;
8436
0
    GROW;
8437
8438
0
    while (((RAW != '>') &&
8439
0
     ((RAW != '/') || (NXT(1) != '>')) &&
8440
0
     (IS_BYTE_CHAR(RAW))) && (PARSER_STOPPED(ctxt) == 0)) {
8441
0
  attname = xmlParseAttribute(ctxt, &attvalue);
8442
0
        if (attname == NULL)
8443
0
      break;
8444
0
        if (attvalue != NULL) {
8445
      /*
8446
       * [ WFC: Unique Att Spec ]
8447
       * No attribute name may appear more than once in the same
8448
       * start-tag or empty-element tag.
8449
       */
8450
0
      for (i = 0; i < nbatts;i += 2) {
8451
0
          if (xmlStrEqual(atts[i], attname)) {
8452
0
        xmlErrAttributeDup(ctxt, NULL, attname);
8453
0
        goto failed;
8454
0
    }
8455
0
      }
8456
      /*
8457
       * Add the pair to atts
8458
       */
8459
0
      if (nbatts + 4 > maxatts) {
8460
0
          const xmlChar **n;
8461
0
                int newSize;
8462
8463
0
                newSize = xmlGrowCapacity(maxatts, sizeof(n[0]) * 2,
8464
0
                                          11, XML_MAX_ATTRS);
8465
0
                if (newSize < 0) {
8466
0
        xmlErrMemory(ctxt);
8467
0
        goto failed;
8468
0
    }
8469
0
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
8470
0
                if (newSize < 2)
8471
0
                    newSize = 2;
8472
0
#endif
8473
0
          n = xmlRealloc(atts, newSize * sizeof(n[0]) * 2);
8474
0
    if (n == NULL) {
8475
0
        xmlErrMemory(ctxt);
8476
0
        goto failed;
8477
0
    }
8478
0
    atts = n;
8479
0
                maxatts = newSize * 2;
8480
0
    ctxt->atts = atts;
8481
0
    ctxt->maxatts = maxatts;
8482
0
      }
8483
8484
0
      atts[nbatts++] = attname;
8485
0
      atts[nbatts++] = attvalue;
8486
0
      atts[nbatts] = NULL;
8487
0
      atts[nbatts + 1] = NULL;
8488
8489
0
            attvalue = NULL;
8490
0
  }
8491
8492
0
failed:
8493
8494
0
        if (attvalue != NULL)
8495
0
            xmlFree(attvalue);
8496
8497
0
  GROW
8498
0
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8499
0
      break;
8500
0
  if (SKIP_BLANKS == 0) {
8501
0
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8502
0
         "attributes construct error\n");
8503
0
  }
8504
0
  SHRINK;
8505
0
        GROW;
8506
0
    }
8507
8508
    /*
8509
     * SAX: Start of Element !
8510
     */
8511
0
    if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8512
0
  (!ctxt->disableSAX)) {
8513
0
  if (nbatts > 0)
8514
0
      ctxt->sax->startElement(ctxt->userData, name, atts);
8515
0
  else
8516
0
      ctxt->sax->startElement(ctxt->userData, name, NULL);
8517
0
    }
8518
8519
0
    if (atts != NULL) {
8520
        /* Free only the content strings */
8521
0
        for (i = 1;i < nbatts;i+=2)
8522
0
      if (atts[i] != NULL)
8523
0
         xmlFree((xmlChar *) atts[i]);
8524
0
    }
8525
0
    return(name);
8526
0
}
8527
8528
/**
8529
 * xmlParseEndTag1:
8530
 * @ctxt:  an XML parser context
8531
 * @line:  line of the start tag
8532
 * @nsNr:  number of namespaces on the start tag
8533
 *
8534
 * Parse an end tag. Always consumes '</'.
8535
 *
8536
 * [42] ETag ::= '</' Name S? '>'
8537
 *
8538
 * With namespace
8539
 *
8540
 * [NS 9] ETag ::= '</' QName S? '>'
8541
 */
8542
8543
static void
8544
0
xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8545
0
    const xmlChar *name;
8546
8547
0
    GROW;
8548
0
    if ((RAW != '<') || (NXT(1) != '/')) {
8549
0
  xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8550
0
           "xmlParseEndTag: '</' not found\n");
8551
0
  return;
8552
0
    }
8553
0
    SKIP(2);
8554
8555
0
    name = xmlParseNameAndCompare(ctxt,ctxt->name);
8556
8557
    /*
8558
     * We should definitely be at the ending "S? '>'" part
8559
     */
8560
0
    GROW;
8561
0
    SKIP_BLANKS;
8562
0
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8563
0
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8564
0
    } else
8565
0
  NEXT1;
8566
8567
    /*
8568
     * [ WFC: Element Type Match ]
8569
     * The Name in an element's end-tag must match the element type in the
8570
     * start-tag.
8571
     *
8572
     */
8573
0
    if (name != (xmlChar*)1) {
8574
0
        if (name == NULL) name = BAD_CAST "unparsable";
8575
0
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8576
0
         "Opening and ending tag mismatch: %s line %d and %s\n",
8577
0
                    ctxt->name, line, name);
8578
0
    }
8579
8580
    /*
8581
     * SAX: End of Tag
8582
     */
8583
0
    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8584
0
  (!ctxt->disableSAX))
8585
0
        ctxt->sax->endElement(ctxt->userData, ctxt->name);
8586
8587
0
    namePop(ctxt);
8588
0
    spacePop(ctxt);
8589
0
}
8590
8591
/**
8592
 * xmlParseEndTag:
8593
 * @ctxt:  an XML parser context
8594
 *
8595
 * DEPRECATED: Internal function, don't use.
8596
 *
8597
 * parse an end of tag
8598
 *
8599
 * [42] ETag ::= '</' Name S? '>'
8600
 *
8601
 * With namespace
8602
 *
8603
 * [NS 9] ETag ::= '</' QName S? '>'
8604
 */
8605
8606
void
8607
0
xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8608
0
    xmlParseEndTag1(ctxt, 0);
8609
0
}
8610
#endif /* LIBXML_SAX1_ENABLED */
8611
8612
/************************************************************************
8613
 *                  *
8614
 *          SAX 2 specific operations       *
8615
 *                  *
8616
 ************************************************************************/
8617
8618
/**
8619
 * xmlParseQNameHashed:
8620
 * @ctxt:  an XML parser context
8621
 * @prefix:  pointer to store the prefix part
8622
 *
8623
 * parse an XML Namespace QName
8624
 *
8625
 * [6]  QName  ::= (Prefix ':')? LocalPart
8626
 * [7]  Prefix  ::= NCName
8627
 * [8]  LocalPart  ::= NCName
8628
 *
8629
 * Returns the Name parsed or NULL
8630
 */
8631
8632
static xmlHashedString
8633
53.2M
xmlParseQNameHashed(xmlParserCtxtPtr ctxt, xmlHashedString *prefix) {
8634
53.2M
    xmlHashedString l, p;
8635
53.2M
    int start, isNCName = 0;
8636
8637
53.2M
    l.name = NULL;
8638
53.2M
    p.name = NULL;
8639
8640
53.2M
    GROW;
8641
53.2M
    start = CUR_PTR - BASE_PTR;
8642
8643
53.2M
    l = xmlParseNCName(ctxt);
8644
53.2M
    if (l.name != NULL) {
8645
53.0M
        isNCName = 1;
8646
53.0M
        if (CUR == ':') {
8647
20.2M
            NEXT;
8648
20.2M
            p = l;
8649
20.2M
            l = xmlParseNCName(ctxt);
8650
20.2M
        }
8651
53.0M
    }
8652
53.2M
    if ((l.name == NULL) || (CUR == ':')) {
8653
476k
        xmlChar *tmp;
8654
8655
476k
        l.name = NULL;
8656
476k
        p.name = NULL;
8657
476k
        if ((isNCName == 0) && (CUR != ':'))
8658
57.9k
            return(l);
8659
418k
        tmp = xmlParseNmtoken(ctxt);
8660
418k
        if (tmp != NULL)
8661
353k
            xmlFree(tmp);
8662
418k
        l = xmlDictLookupHashed(ctxt->dict, BASE_PTR + start,
8663
418k
                                CUR_PTR - (BASE_PTR + start));
8664
418k
        if (l.name == NULL) {
8665
0
            xmlErrMemory(ctxt);
8666
0
            return(l);
8667
0
        }
8668
418k
        xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8669
418k
                 "Failed to parse QName '%s'\n", l.name, NULL, NULL);
8670
418k
    }
8671
8672
53.1M
    *prefix = p;
8673
53.1M
    return(l);
8674
53.2M
}
8675
8676
/**
8677
 * xmlParseQName:
8678
 * @ctxt:  an XML parser context
8679
 * @prefix:  pointer to store the prefix part
8680
 *
8681
 * parse an XML Namespace QName
8682
 *
8683
 * [6]  QName  ::= (Prefix ':')? LocalPart
8684
 * [7]  Prefix  ::= NCName
8685
 * [8]  LocalPart  ::= NCName
8686
 *
8687
 * Returns the Name parsed or NULL
8688
 */
8689
8690
static const xmlChar *
8691
10.5k
xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8692
10.5k
    xmlHashedString n, p;
8693
8694
10.5k
    n = xmlParseQNameHashed(ctxt, &p);
8695
10.5k
    if (n.name == NULL)
8696
437
        return(NULL);
8697
10.1k
    *prefix = p.name;
8698
10.1k
    return(n.name);
8699
10.5k
}
8700
8701
/**
8702
 * xmlParseQNameAndCompare:
8703
 * @ctxt:  an XML parser context
8704
 * @name:  the localname
8705
 * @prefix:  the prefix, if any.
8706
 *
8707
 * parse an XML name and compares for match
8708
 * (specialized for endtag parsing)
8709
 *
8710
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8711
 * and the name for mismatch
8712
 */
8713
8714
static const xmlChar *
8715
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8716
5.48M
                        xmlChar const *prefix) {
8717
5.48M
    const xmlChar *cmp;
8718
5.48M
    const xmlChar *in;
8719
5.48M
    const xmlChar *ret;
8720
5.48M
    const xmlChar *prefix2;
8721
8722
5.48M
    if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8723
8724
5.48M
    GROW;
8725
5.48M
    in = ctxt->input->cur;
8726
8727
5.48M
    cmp = prefix;
8728
16.5M
    while (*in != 0 && *in == *cmp) {
8729
11.0M
  ++in;
8730
11.0M
  ++cmp;
8731
11.0M
    }
8732
5.48M
    if ((*cmp == 0) && (*in == ':')) {
8733
5.48M
        in++;
8734
5.48M
  cmp = name;
8735
37.1M
  while (*in != 0 && *in == *cmp) {
8736
31.6M
      ++in;
8737
31.6M
      ++cmp;
8738
31.6M
  }
8739
5.48M
  if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8740
      /* success */
8741
5.47M
            ctxt->input->col += in - ctxt->input->cur;
8742
5.47M
      ctxt->input->cur = in;
8743
5.47M
      return((const xmlChar*) 1);
8744
5.47M
  }
8745
5.48M
    }
8746
    /*
8747
     * all strings coms from the dictionary, equality can be done directly
8748
     */
8749
10.5k
    ret = xmlParseQName (ctxt, &prefix2);
8750
10.5k
    if (ret == NULL)
8751
437
        return(NULL);
8752
10.1k
    if ((ret == name) && (prefix == prefix2))
8753
180
  return((const xmlChar*) 1);
8754
9.95k
    return ret;
8755
10.1k
}
8756
8757
/**
8758
 * xmlParseAttribute2:
8759
 * @ctxt:  an XML parser context
8760
 * @pref:  the element prefix
8761
 * @elem:  the element name
8762
 * @prefix:  a xmlChar ** used to store the value of the attribute prefix
8763
 * @value:  a xmlChar ** used to store the value of the attribute
8764
 * @len:  an int * to save the length of the attribute
8765
 * @alloc:  an int * to indicate if the attribute was allocated
8766
 *
8767
 * parse an attribute in the new SAX2 framework.
8768
 *
8769
 * Returns the attribute name, and the value in *value, .
8770
 */
8771
8772
static xmlHashedString
8773
xmlParseAttribute2(xmlParserCtxtPtr ctxt,
8774
                   const xmlChar * pref, const xmlChar * elem,
8775
                   xmlHashedString * hprefix, xmlChar ** value,
8776
                   int *len, int *alloc)
8777
22.2M
{
8778
22.2M
    xmlHashedString hname;
8779
22.2M
    const xmlChar *prefix, *name;
8780
22.2M
    xmlChar *val = NULL, *internal_val = NULL;
8781
22.2M
    int normalize = 0;
8782
22.2M
    int isNamespace;
8783
8784
22.2M
    *value = NULL;
8785
22.2M
    GROW;
8786
22.2M
    hname = xmlParseQNameHashed(ctxt, hprefix);
8787
22.2M
    if (hname.name == NULL) {
8788
46.8k
        xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8789
46.8k
                       "error parsing attribute name\n");
8790
46.8k
        return(hname);
8791
46.8k
    }
8792
22.1M
    name = hname.name;
8793
22.1M
    prefix = hprefix->name;
8794
8795
    /*
8796
     * get the type if needed
8797
     */
8798
22.1M
    if (ctxt->attsSpecial != NULL) {
8799
23.3k
        int type;
8800
8801
23.3k
        type = XML_PTR_TO_INT(xmlHashQLookup2(ctxt->attsSpecial, pref, elem,
8802
23.3k
                                              prefix, name));
8803
23.3k
        if (type != 0)
8804
10.6k
            normalize = 1;
8805
23.3k
    }
8806
8807
    /*
8808
     * read the value
8809
     */
8810
22.1M
    SKIP_BLANKS;
8811
22.1M
    if (RAW == '=') {
8812
22.1M
        NEXT;
8813
22.1M
        SKIP_BLANKS;
8814
22.1M
        isNamespace = (((prefix == NULL) && (name == ctxt->str_xmlns)) ||
8815
21.8M
                       (prefix == ctxt->str_xmlns));
8816
22.1M
        val = xmlParseAttValueInternal(ctxt, len, alloc, normalize,
8817
22.1M
                                       isNamespace);
8818
22.1M
        if (val == NULL)
8819
7.12k
            goto error;
8820
22.1M
    } else {
8821
15.3k
        xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8822
15.3k
                          "Specification mandates value for attribute %s\n",
8823
15.3k
                          name);
8824
15.3k
        goto error;
8825
15.3k
    }
8826
8827
22.1M
    if (prefix == ctxt->str_xml) {
8828
        /*
8829
         * Check that xml:lang conforms to the specification
8830
         * No more registered as an error, just generate a warning now
8831
         * since this was deprecated in XML second edition
8832
         */
8833
100k
        if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8834
0
            internal_val = xmlStrndup(val, *len);
8835
0
            if (internal_val == NULL)
8836
0
                goto mem_error;
8837
0
            if (!xmlCheckLanguageID(internal_val)) {
8838
0
                xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8839
0
                              "Malformed value for xml:lang : %s\n",
8840
0
                              internal_val, NULL);
8841
0
            }
8842
0
        }
8843
8844
        /*
8845
         * Check that xml:space conforms to the specification
8846
         */
8847
100k
        if (xmlStrEqual(name, BAD_CAST "space")) {
8848
82.4k
            internal_val = xmlStrndup(val, *len);
8849
82.4k
            if (internal_val == NULL)
8850
0
                goto mem_error;
8851
82.4k
            if (xmlStrEqual(internal_val, BAD_CAST "default"))
8852
51
                *(ctxt->space) = 0;
8853
82.3k
            else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8854
81.2k
                *(ctxt->space) = 1;
8855
1.12k
            else {
8856
1.12k
                xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8857
1.12k
                              "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8858
1.12k
                              internal_val, NULL);
8859
1.12k
            }
8860
82.4k
        }
8861
100k
        if (internal_val) {
8862
82.4k
            xmlFree(internal_val);
8863
82.4k
        }
8864
100k
    }
8865
8866
22.1M
    *value = val;
8867
22.1M
    return (hname);
8868
8869
0
mem_error:
8870
0
    xmlErrMemory(ctxt);
8871
22.1k
error:
8872
22.1k
    if ((val != NULL) && (*alloc != 0))
8873
0
        xmlFree(val);
8874
22.1k
    return(hname);
8875
0
}
8876
8877
/**
8878
 * xmlAttrHashInsert:
8879
 * @ctxt: parser context
8880
 * @size: size of the hash table
8881
 * @name: attribute name
8882
 * @uri: namespace uri
8883
 * @hashValue: combined hash value of name and uri
8884
 * @aindex: attribute index (this is a multiple of 5)
8885
 *
8886
 * Inserts a new attribute into the hash table.
8887
 *
8888
 * Returns INT_MAX if no existing attribute was found, the attribute
8889
 * index if an attribute was found, -1 if a memory allocation failed.
8890
 */
8891
static int
8892
xmlAttrHashInsert(xmlParserCtxtPtr ctxt, unsigned size, const xmlChar *name,
8893
16.3M
                  const xmlChar *uri, unsigned hashValue, int aindex) {
8894
16.3M
    xmlAttrHashBucket *table = ctxt->attrHash;
8895
16.3M
    xmlAttrHashBucket *bucket;
8896
16.3M
    unsigned hindex;
8897
8898
16.3M
    hindex = hashValue & (size - 1);
8899
16.3M
    bucket = &table[hindex];
8900
8901
19.1M
    while (bucket->index >= 0) {
8902
2.82M
        const xmlChar **atts = &ctxt->atts[bucket->index];
8903
8904
2.82M
        if (name == atts[0]) {
8905
150k
            int nsIndex = XML_PTR_TO_INT(atts[2]);
8906
8907
150k
            if ((nsIndex == NS_INDEX_EMPTY) ? (uri == NULL) :
8908
150k
                (nsIndex == NS_INDEX_XML) ? (uri == ctxt->str_xml_ns) :
8909
2.02k
                (uri == ctxt->nsTab[nsIndex * 2 + 1]))
8910
42.9k
                return(bucket->index);
8911
150k
        }
8912
8913
2.78M
        hindex++;
8914
2.78M
        bucket++;
8915
2.78M
        if (hindex >= size) {
8916
985k
            hindex = 0;
8917
985k
            bucket = table;
8918
985k
        }
8919
2.78M
    }
8920
8921
16.3M
    bucket->index = aindex;
8922
8923
16.3M
    return(INT_MAX);
8924
16.3M
}
8925
8926
static int
8927
xmlAttrHashInsertQName(xmlParserCtxtPtr ctxt, unsigned size,
8928
                       const xmlChar *name, const xmlChar *prefix,
8929
23
                       unsigned hashValue, int aindex) {
8930
23
    xmlAttrHashBucket *table = ctxt->attrHash;
8931
23
    xmlAttrHashBucket *bucket;
8932
23
    unsigned hindex;
8933
8934
23
    hindex = hashValue & (size - 1);
8935
23
    bucket = &table[hindex];
8936
8937
27
    while (bucket->index >= 0) {
8938
17
        const xmlChar **atts = &ctxt->atts[bucket->index];
8939
8940
17
        if ((name == atts[0]) && (prefix == atts[1]))
8941
13
            return(bucket->index);
8942
8943
4
        hindex++;
8944
4
        bucket++;
8945
4
        if (hindex >= size) {
8946
0
            hindex = 0;
8947
0
            bucket = table;
8948
0
        }
8949
4
    }
8950
8951
10
    bucket->index = aindex;
8952
8953
10
    return(INT_MAX);
8954
23
}
8955
/**
8956
 * xmlParseStartTag2:
8957
 * @ctxt:  an XML parser context
8958
 *
8959
 * Parse a start tag. Always consumes '<'.
8960
 *
8961
 * This routine is called when running SAX2 parsing
8962
 *
8963
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8964
 *
8965
 * [ WFC: Unique Att Spec ]
8966
 * No attribute name may appear more than once in the same start-tag or
8967
 * empty-element tag.
8968
 *
8969
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8970
 *
8971
 * [ WFC: Unique Att Spec ]
8972
 * No attribute name may appear more than once in the same start-tag or
8973
 * empty-element tag.
8974
 *
8975
 * With namespace:
8976
 *
8977
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8978
 *
8979
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8980
 *
8981
 * Returns the element name parsed
8982
 */
8983
8984
static const xmlChar *
8985
xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
8986
31.0M
                  const xmlChar **URI, int *nbNsPtr) {
8987
31.0M
    xmlHashedString hlocalname;
8988
31.0M
    xmlHashedString hprefix;
8989
31.0M
    xmlHashedString hattname;
8990
31.0M
    xmlHashedString haprefix;
8991
31.0M
    const xmlChar *localname;
8992
31.0M
    const xmlChar *prefix;
8993
31.0M
    const xmlChar *attname;
8994
31.0M
    const xmlChar *aprefix;
8995
31.0M
    const xmlChar *uri;
8996
31.0M
    xmlChar *attvalue = NULL;
8997
31.0M
    const xmlChar **atts = ctxt->atts;
8998
31.0M
    unsigned attrHashSize = 0;
8999
31.0M
    int maxatts = ctxt->maxatts;
9000
31.0M
    int nratts, nbatts, nbdef;
9001
31.0M
    int i, j, nbNs, nbTotalDef, attval, nsIndex, maxAtts;
9002
31.0M
    int alloc = 0;
9003
31.0M
    int numNsErr = 0;
9004
31.0M
    int numDupErr = 0;
9005
9006
31.0M
    if (RAW != '<') return(NULL);
9007
31.0M
    NEXT1;
9008
9009
31.0M
    nbatts = 0;
9010
31.0M
    nratts = 0;
9011
31.0M
    nbdef = 0;
9012
31.0M
    nbNs = 0;
9013
31.0M
    nbTotalDef = 0;
9014
31.0M
    attval = 0;
9015
9016
31.0M
    if (xmlParserNsStartElement(ctxt->nsdb) < 0) {
9017
0
        xmlErrMemory(ctxt);
9018
0
        return(NULL);
9019
0
    }
9020
9021
31.0M
    hlocalname = xmlParseQNameHashed(ctxt, &hprefix);
9022
31.0M
    if (hlocalname.name == NULL) {
9023
10.6k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9024
10.6k
           "StartTag: invalid element name\n");
9025
10.6k
        return(NULL);
9026
10.6k
    }
9027
31.0M
    localname = hlocalname.name;
9028
31.0M
    prefix = hprefix.name;
9029
9030
    /*
9031
     * Now parse the attributes, it ends up with the ending
9032
     *
9033
     * (S Attribute)* S?
9034
     */
9035
31.0M
    SKIP_BLANKS;
9036
31.0M
    GROW;
9037
9038
    /*
9039
     * The ctxt->atts array will be ultimately passed to the SAX callback
9040
     * containing five xmlChar pointers for each attribute:
9041
     *
9042
     * [0] attribute name
9043
     * [1] attribute prefix
9044
     * [2] namespace URI
9045
     * [3] attribute value
9046
     * [4] end of attribute value
9047
     *
9048
     * To save memory, we reuse this array temporarily and store integers
9049
     * in these pointer variables.
9050
     *
9051
     * [0] attribute name
9052
     * [1] attribute prefix
9053
     * [2] hash value of attribute prefix, and later namespace index
9054
     * [3] for non-allocated values: ptrdiff_t offset into input buffer
9055
     * [4] for non-allocated values: ptrdiff_t offset into input buffer
9056
     *
9057
     * The ctxt->attallocs array contains an additional unsigned int for
9058
     * each attribute, containing the hash value of the attribute name
9059
     * and the alloc flag in bit 31.
9060
     */
9061
9062
43.0M
    while (((RAW != '>') &&
9063
24.4M
     ((RAW != '/') || (NXT(1) != '>')) &&
9064
22.2M
     (IS_BYTE_CHAR(RAW))) && (PARSER_STOPPED(ctxt) == 0)) {
9065
22.2M
  int len = -1;
9066
9067
22.2M
  hattname = xmlParseAttribute2(ctxt, prefix, localname,
9068
22.2M
                                          &haprefix, &attvalue, &len,
9069
22.2M
                                          &alloc);
9070
22.2M
        if (hattname.name == NULL)
9071
46.8k
      break;
9072
22.1M
        if (attvalue == NULL)
9073
22.1k
            goto next_attr;
9074
22.1M
        attname = hattname.name;
9075
22.1M
        aprefix = haprefix.name;
9076
22.1M
  if (len < 0) len = xmlStrlen(attvalue);
9077
9078
22.1M
        if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9079
292k
            xmlHashedString huri;
9080
292k
            xmlURIPtr parsedUri;
9081
9082
292k
            huri = xmlDictLookupHashed(ctxt->dict, attvalue, len);
9083
292k
            uri = huri.name;
9084
292k
            if (uri == NULL) {
9085
0
                xmlErrMemory(ctxt);
9086
0
                goto next_attr;
9087
0
            }
9088
292k
            if (*uri != 0) {
9089
289k
                if (xmlParseURISafe((const char *) uri, &parsedUri) < 0) {
9090
0
                    xmlErrMemory(ctxt);
9091
0
                    goto next_attr;
9092
0
                }
9093
289k
                if (parsedUri == NULL) {
9094
91.2k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9095
91.2k
                             "xmlns: '%s' is not a valid URI\n",
9096
91.2k
                                       uri, NULL, NULL);
9097
198k
                } else {
9098
198k
                    if (parsedUri->scheme == NULL) {
9099
57.5k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9100
57.5k
                                  "xmlns: URI %s is not absolute\n",
9101
57.5k
                                  uri, NULL, NULL);
9102
57.5k
                    }
9103
198k
                    xmlFreeURI(parsedUri);
9104
198k
                }
9105
289k
                if (uri == ctxt->str_xml_ns) {
9106
76
                    if (attname != ctxt->str_xml) {
9107
76
                        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9108
76
                     "xml namespace URI cannot be the default namespace\n",
9109
76
                                 NULL, NULL, NULL);
9110
76
                    }
9111
76
                    goto next_attr;
9112
76
                }
9113
289k
                if ((len == 29) &&
9114
7.93k
                    (xmlStrEqual(uri,
9115
7.93k
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9116
229
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9117
229
                         "reuse of the xmlns namespace name is forbidden\n",
9118
229
                             NULL, NULL, NULL);
9119
229
                    goto next_attr;
9120
229
                }
9121
289k
            }
9122
9123
292k
            if (xmlParserNsPush(ctxt, NULL, &huri, NULL, 0) > 0)
9124
203k
                nbNs++;
9125
21.8M
        } else if (aprefix == ctxt->str_xmlns) {
9126
853k
            xmlHashedString huri;
9127
853k
            xmlURIPtr parsedUri;
9128
9129
853k
            huri = xmlDictLookupHashed(ctxt->dict, attvalue, len);
9130
853k
            uri = huri.name;
9131
853k
            if (uri == NULL) {
9132
0
                xmlErrMemory(ctxt);
9133
0
                goto next_attr;
9134
0
            }
9135
9136
853k
            if (attname == ctxt->str_xml) {
9137
217
                if (uri != ctxt->str_xml_ns) {
9138
104
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9139
104
                             "xml namespace prefix mapped to wrong URI\n",
9140
104
                             NULL, NULL, NULL);
9141
104
                }
9142
                /*
9143
                 * Do not keep a namespace definition node
9144
                 */
9145
217
                goto next_attr;
9146
217
            }
9147
852k
            if (uri == ctxt->str_xml_ns) {
9148
15
                if (attname != ctxt->str_xml) {
9149
15
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9150
15
                             "xml namespace URI mapped to wrong prefix\n",
9151
15
                             NULL, NULL, NULL);
9152
15
                }
9153
15
                goto next_attr;
9154
15
            }
9155
852k
            if (attname == ctxt->str_xmlns) {
9156
223
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9157
223
                         "redefinition of the xmlns prefix is forbidden\n",
9158
223
                         NULL, NULL, NULL);
9159
223
                goto next_attr;
9160
223
            }
9161
852k
            if ((len == 29) &&
9162
16.9k
                (xmlStrEqual(uri,
9163
16.9k
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9164
42
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9165
42
                         "reuse of the xmlns namespace name is forbidden\n",
9166
42
                         NULL, NULL, NULL);
9167
42
                goto next_attr;
9168
42
            }
9169
852k
            if ((uri == NULL) || (uri[0] == 0)) {
9170
369
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9171
369
                         "xmlns:%s: Empty XML namespace is not allowed\n",
9172
369
                              attname, NULL, NULL);
9173
369
                goto next_attr;
9174
852k
            } else {
9175
852k
                if (xmlParseURISafe((const char *) uri, &parsedUri) < 0) {
9176
0
                    xmlErrMemory(ctxt);
9177
0
                    goto next_attr;
9178
0
                }
9179
852k
                if (parsedUri == NULL) {
9180
87.6k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9181
87.6k
                         "xmlns:%s: '%s' is not a valid URI\n",
9182
87.6k
                                       attname, uri, NULL);
9183
764k
                } else {
9184
764k
                    if ((ctxt->pedantic) && (parsedUri->scheme == NULL)) {
9185
0
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9186
0
                                  "xmlns:%s: URI %s is not absolute\n",
9187
0
                                  attname, uri, NULL);
9188
0
                    }
9189
764k
                    xmlFreeURI(parsedUri);
9190
764k
                }
9191
852k
            }
9192
9193
852k
            if (xmlParserNsPush(ctxt, &hattname, &huri, NULL, 0) > 0)
9194
797k
                nbNs++;
9195
20.9M
        } else {
9196
            /*
9197
             * Populate attributes array, see above for repurposing
9198
             * of xmlChar pointers.
9199
             */
9200
20.9M
            if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9201
577k
                int res = xmlCtxtGrowAttrs(ctxt);
9202
9203
577k
                maxatts = ctxt->maxatts;
9204
577k
                atts = ctxt->atts;
9205
9206
577k
                if (res < 0)
9207
0
                    goto next_attr;
9208
577k
            }
9209
20.9M
            ctxt->attallocs[nratts++] = (hattname.hashValue & 0x7FFFFFFF) |
9210
20.9M
                                        ((unsigned) alloc << 31);
9211
20.9M
            atts[nbatts++] = attname;
9212
20.9M
            atts[nbatts++] = aprefix;
9213
20.9M
            atts[nbatts++] = (const xmlChar *) (size_t) haprefix.hashValue;
9214
20.9M
            if (alloc) {
9215
83.6k
                atts[nbatts++] = attvalue;
9216
83.6k
                attvalue += len;
9217
83.6k
                atts[nbatts++] = attvalue;
9218
20.9M
            } else {
9219
                /*
9220
                 * attvalue points into the input buffer which can be
9221
                 * reallocated. Store differences to input->base instead.
9222
                 * The pointers will be reconstructed later.
9223
                 */
9224
20.9M
                atts[nbatts++] = (void *) (attvalue - BASE_PTR);
9225
20.9M
                attvalue += len;
9226
20.9M
                atts[nbatts++] = (void *) (attvalue - BASE_PTR);
9227
20.9M
            }
9228
            /*
9229
             * tag if some deallocation is needed
9230
             */
9231
20.9M
            if (alloc != 0) attval = 1;
9232
20.9M
            attvalue = NULL; /* moved into atts */
9233
20.9M
        }
9234
9235
22.1M
next_attr:
9236
22.1M
        if ((attvalue != NULL) && (alloc != 0)) {
9237
89.8k
            xmlFree(attvalue);
9238
89.8k
            attvalue = NULL;
9239
89.8k
        }
9240
9241
22.1M
  GROW
9242
22.1M
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9243
10.1M
      break;
9244
12.0M
  if (SKIP_BLANKS == 0) {
9245
29.7k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9246
29.7k
         "attributes construct error\n");
9247
29.7k
      break;
9248
29.7k
  }
9249
12.0M
        GROW;
9250
12.0M
    }
9251
9252
    /*
9253
     * Namespaces from default attributes
9254
     */
9255
31.0M
    if (ctxt->attsDefault != NULL) {
9256
119k
        xmlDefAttrsPtr defaults;
9257
9258
119k
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9259
119k
  if (defaults != NULL) {
9260
794k
      for (i = 0; i < defaults->nbAttrs; i++) {
9261
708k
                xmlDefAttr *attr = &defaults->attrs[i];
9262
9263
708k
          attname = attr->name.name;
9264
708k
    aprefix = attr->prefix.name;
9265
9266
708k
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9267
23.5k
                    xmlParserEntityCheck(ctxt, attr->expandedSize);
9268
9269
23.5k
                    if (xmlParserNsPush(ctxt, NULL, &attr->value, NULL, 1) > 0)
9270
19.4k
                        nbNs++;
9271
685k
    } else if (aprefix == ctxt->str_xmlns) {
9272
306k
                    xmlParserEntityCheck(ctxt, attr->expandedSize);
9273
9274
306k
                    if (xmlParserNsPush(ctxt, &attr->name, &attr->value,
9275
306k
                                      NULL, 1) > 0)
9276
306k
                        nbNs++;
9277
378k
    } else {
9278
378k
                    if (nratts + nbTotalDef >= XML_MAX_ATTRS) {
9279
0
                        xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
9280
0
                                    "Maximum number of attributes exceeded");
9281
0
                        break;
9282
0
                    }
9283
378k
                    nbTotalDef += 1;
9284
378k
                }
9285
708k
      }
9286
86.0k
  }
9287
119k
    }
9288
9289
    /*
9290
     * Resolve attribute namespaces
9291
     */
9292
51.9M
    for (i = 0; i < nbatts; i += 5) {
9293
20.9M
        attname = atts[i];
9294
20.9M
        aprefix = atts[i+1];
9295
9296
        /*
9297
  * The default namespace does not apply to attribute names.
9298
  */
9299
20.9M
  if (aprefix == NULL) {
9300
14.0M
            nsIndex = NS_INDEX_EMPTY;
9301
14.0M
        } else if (aprefix == ctxt->str_xml) {
9302
100k
            nsIndex = NS_INDEX_XML;
9303
6.82M
        } else {
9304
6.82M
            haprefix.name = aprefix;
9305
6.82M
            haprefix.hashValue = (size_t) atts[i+2];
9306
6.82M
            nsIndex = xmlParserNsLookup(ctxt, &haprefix, NULL);
9307
9308
6.82M
      if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex)) {
9309
357k
                xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9310
357k
        "Namespace prefix %s for %s on %s is not defined\n",
9311
357k
        aprefix, attname, localname);
9312
357k
                nsIndex = NS_INDEX_EMPTY;
9313
357k
            }
9314
6.82M
        }
9315
9316
20.9M
        atts[i+2] = XML_INT_TO_PTR(nsIndex);
9317
20.9M
    }
9318
9319
    /*
9320
     * Maximum number of attributes including default attributes.
9321
     */
9322
31.0M
    maxAtts = nratts + nbTotalDef;
9323
9324
    /*
9325
     * Verify that attribute names are unique.
9326
     */
9327
31.0M
    if (maxAtts > 1) {
9328
5.52M
        attrHashSize = 4;
9329
8.71M
        while (attrHashSize / 2 < (unsigned) maxAtts)
9330
3.18M
            attrHashSize *= 2;
9331
9332
5.52M
        if (attrHashSize > ctxt->attrHashMax) {
9333
235k
            xmlAttrHashBucket *tmp;
9334
9335
235k
            tmp = xmlRealloc(ctxt->attrHash, attrHashSize * sizeof(tmp[0]));
9336
235k
            if (tmp == NULL) {
9337
0
                xmlErrMemory(ctxt);
9338
0
                goto done;
9339
0
            }
9340
9341
235k
            ctxt->attrHash = tmp;
9342
235k
            ctxt->attrHashMax = attrHashSize;
9343
235k
        }
9344
9345
5.52M
        memset(ctxt->attrHash, -1, attrHashSize * sizeof(ctxt->attrHash[0]));
9346
9347
21.8M
        for (i = 0, j = 0; j < nratts; i += 5, j++) {
9348
16.3M
            const xmlChar *nsuri;
9349
16.3M
            unsigned hashValue, nameHashValue, uriHashValue;
9350
16.3M
            int res;
9351
9352
16.3M
            attname = atts[i];
9353
16.3M
            aprefix = atts[i+1];
9354
16.3M
            nsIndex = XML_PTR_TO_INT(atts[i+2]);
9355
            /* Hash values always have bit 31 set, see dict.c */
9356
16.3M
            nameHashValue = ctxt->attallocs[j] | 0x80000000;
9357
9358
16.3M
            if (nsIndex == NS_INDEX_EMPTY) {
9359
                /*
9360
                 * Prefix with empty namespace means an undeclared
9361
                 * prefix which was already reported above.
9362
                 */
9363
11.4M
                if (aprefix != NULL)
9364
295k
                    continue;
9365
11.1M
                nsuri = NULL;
9366
11.1M
                uriHashValue = URI_HASH_EMPTY;
9367
11.1M
            } else if (nsIndex == NS_INDEX_XML) {
9368
2.20k
                nsuri = ctxt->str_xml_ns;
9369
2.20k
                uriHashValue = URI_HASH_XML;
9370
4.91M
            } else {
9371
4.91M
                nsuri = ctxt->nsTab[nsIndex * 2 + 1];
9372
4.91M
                uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue;
9373
4.91M
            }
9374
9375
16.0M
            hashValue = xmlDictCombineHash(nameHashValue, uriHashValue);
9376
16.0M
            res = xmlAttrHashInsert(ctxt, attrHashSize, attname, nsuri,
9377
16.0M
                                    hashValue, i);
9378
16.0M
            if (res < 0)
9379
0
                continue;
9380
9381
            /*
9382
             * [ WFC: Unique Att Spec ]
9383
             * No attribute name may appear more than once in the same
9384
             * start-tag or empty-element tag.
9385
             * As extended by the Namespace in XML REC.
9386
             */
9387
16.0M
            if (res < INT_MAX) {
9388
35.5k
                if (aprefix == atts[res+1]) {
9389
35.4k
                    xmlErrAttributeDup(ctxt, aprefix, attname);
9390
35.4k
                    numDupErr += 1;
9391
35.4k
                } else {
9392
135
                    xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9393
135
                             "Namespaced Attribute %s in '%s' redefined\n",
9394
135
                             attname, nsuri, NULL);
9395
135
                    numNsErr += 1;
9396
135
                }
9397
35.5k
            }
9398
16.0M
        }
9399
5.52M
    }
9400
9401
    /*
9402
     * Default attributes
9403
     */
9404
31.0M
    if (ctxt->attsDefault != NULL) {
9405
119k
        xmlDefAttrsPtr defaults;
9406
9407
119k
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9408
119k
  if (defaults != NULL) {
9409
794k
      for (i = 0; i < defaults->nbAttrs; i++) {
9410
708k
                xmlDefAttr *attr = &defaults->attrs[i];
9411
708k
                const xmlChar *nsuri = NULL;
9412
708k
                unsigned hashValue, uriHashValue = 0;
9413
708k
                int res;
9414
9415
708k
          attname = attr->name.name;
9416
708k
    aprefix = attr->prefix.name;
9417
9418
708k
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL))
9419
23.5k
                    continue;
9420
685k
    if (aprefix == ctxt->str_xmlns)
9421
306k
                    continue;
9422
9423
378k
                if (aprefix == NULL) {
9424
217k
                    nsIndex = NS_INDEX_EMPTY;
9425
217k
                    nsuri = NULL;
9426
217k
                    uriHashValue = URI_HASH_EMPTY;
9427
217k
                } else if (aprefix == ctxt->str_xml) {
9428
37.7k
                    nsIndex = NS_INDEX_XML;
9429
37.7k
                    nsuri = ctxt->str_xml_ns;
9430
37.7k
                    uriHashValue = URI_HASH_XML;
9431
123k
                } else {
9432
123k
                    nsIndex = xmlParserNsLookup(ctxt, &attr->prefix, NULL);
9433
123k
                    if ((nsIndex == INT_MAX) ||
9434
122k
                        (nsIndex < ctxt->nsdb->minNsIndex)) {
9435
122k
                        xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9436
122k
                                 "Namespace prefix %s for %s on %s is not "
9437
122k
                                 "defined\n",
9438
122k
                                 aprefix, attname, localname);
9439
122k
                        nsIndex = NS_INDEX_EMPTY;
9440
122k
                        nsuri = NULL;
9441
122k
                        uriHashValue = URI_HASH_EMPTY;
9442
122k
                    } else {
9443
1.03k
                        nsuri = ctxt->nsTab[nsIndex * 2 + 1];
9444
1.03k
                        uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue;
9445
1.03k
                    }
9446
123k
                }
9447
9448
                /*
9449
                 * Check whether the attribute exists
9450
                 */
9451
378k
                if (maxAtts > 1) {
9452
359k
                    hashValue = xmlDictCombineHash(attr->name.hashValue,
9453
359k
                                                   uriHashValue);
9454
359k
                    res = xmlAttrHashInsert(ctxt, attrHashSize, attname, nsuri,
9455
359k
                                            hashValue, nbatts);
9456
359k
                    if (res < 0)
9457
0
                        continue;
9458
359k
                    if (res < INT_MAX) {
9459
7.36k
                        if (aprefix == atts[res+1])
9460
761
                            continue;
9461
6.60k
                        xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9462
6.60k
                                 "Namespaced Attribute %s in '%s' redefined\n",
9463
6.60k
                                 attname, nsuri, NULL);
9464
6.60k
                    }
9465
359k
                }
9466
9467
378k
                xmlParserEntityCheck(ctxt, attr->expandedSize);
9468
9469
378k
                if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9470
5.87k
                    res = xmlCtxtGrowAttrs(ctxt);
9471
9472
5.87k
                    maxatts = ctxt->maxatts;
9473
5.87k
                    atts = ctxt->atts;
9474
9475
5.87k
                    if (res < 0) {
9476
0
                        localname = NULL;
9477
0
                        goto done;
9478
0
                    }
9479
5.87k
                }
9480
9481
378k
                atts[nbatts++] = attname;
9482
378k
                atts[nbatts++] = aprefix;
9483
378k
                atts[nbatts++] = XML_INT_TO_PTR(nsIndex);
9484
378k
                atts[nbatts++] = attr->value.name;
9485
378k
                atts[nbatts++] = attr->valueEnd;
9486
378k
                if ((ctxt->standalone == 1) && (attr->external != 0)) {
9487
0
                    xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9488
0
                            "standalone: attribute %s on %s defaulted "
9489
0
                            "from external subset\n",
9490
0
                            attname, localname);
9491
0
                }
9492
378k
                nbdef++;
9493
378k
      }
9494
86.0k
  }
9495
119k
    }
9496
9497
    /*
9498
     * Using a single hash table for nsUri/localName pairs cannot
9499
     * detect duplicate QNames reliably. The following example will
9500
     * only result in two namespace errors.
9501
     *
9502
     * <doc xmlns:a="a" xmlns:b="a">
9503
     *   <elem a:a="" b:a="" b:a=""/>
9504
     * </doc>
9505
     *
9506
     * If we saw more than one namespace error but no duplicate QNames
9507
     * were found, we have to scan for duplicate QNames.
9508
     */
9509
31.0M
    if ((numDupErr == 0) && (numNsErr > 1)) {
9510
5
        memset(ctxt->attrHash, -1,
9511
5
               attrHashSize * sizeof(ctxt->attrHash[0]));
9512
9513
35
        for (i = 0, j = 0; j < nratts; i += 5, j++) {
9514
30
            unsigned hashValue, nameHashValue, prefixHashValue;
9515
30
            int res;
9516
9517
30
            aprefix = atts[i+1];
9518
30
            if (aprefix == NULL)
9519
7
                continue;
9520
9521
23
            attname = atts[i];
9522
            /* Hash values always have bit 31 set, see dict.c */
9523
23
            nameHashValue = ctxt->attallocs[j] | 0x80000000;
9524
23
            prefixHashValue = xmlDictComputeHash(ctxt->dict, aprefix);
9525
9526
23
            hashValue = xmlDictCombineHash(nameHashValue, prefixHashValue);
9527
23
            res = xmlAttrHashInsertQName(ctxt, attrHashSize, attname,
9528
23
                                         aprefix, hashValue, i);
9529
23
            if (res < INT_MAX)
9530
13
                xmlErrAttributeDup(ctxt, aprefix, attname);
9531
23
        }
9532
5
    }
9533
9534
    /*
9535
     * Reconstruct attribute pointers
9536
     */
9537
52.3M
    for (i = 0, j = 0; i < nbatts; i += 5, j++) {
9538
        /* namespace URI */
9539
21.3M
        nsIndex = XML_PTR_TO_INT(atts[i+2]);
9540
21.3M
        if (nsIndex == INT_MAX)
9541
14.7M
            atts[i+2] = NULL;
9542
6.60M
        else if (nsIndex == INT_MAX - 1)
9543
138k
            atts[i+2] = ctxt->str_xml_ns;
9544
6.46M
        else
9545
6.46M
            atts[i+2] = ctxt->nsTab[nsIndex * 2 + 1];
9546
9547
21.3M
        if ((j < nratts) && (ctxt->attallocs[j] & 0x80000000) == 0) {
9548
20.9M
            atts[i+3] = BASE_PTR + XML_PTR_TO_INT(atts[i+3]);  /* value */
9549
20.9M
            atts[i+4] = BASE_PTR + XML_PTR_TO_INT(atts[i+4]);  /* valuend */
9550
20.9M
        }
9551
21.3M
    }
9552
9553
31.0M
    uri = xmlParserNsLookupUri(ctxt, &hprefix);
9554
31.0M
    if ((prefix != NULL) && (uri == NULL)) {
9555
489k
  xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9556
489k
           "Namespace prefix %s on %s is not defined\n",
9557
489k
     prefix, localname, NULL);
9558
489k
    }
9559
31.0M
    *pref = prefix;
9560
31.0M
    *URI = uri;
9561
9562
    /*
9563
     * SAX callback
9564
     */
9565
31.0M
    if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9566
31.0M
  (!ctxt->disableSAX)) {
9567
30.8M
  if (nbNs > 0)
9568
443k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix, uri,
9569
443k
                          nbNs, ctxt->nsTab + 2 * (ctxt->nsNr - nbNs),
9570
443k
        nbatts / 5, nbdef, atts);
9571
30.4M
  else
9572
30.4M
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix, uri,
9573
30.4M
                          0, NULL, nbatts / 5, nbdef, atts);
9574
30.8M
    }
9575
9576
31.0M
done:
9577
    /*
9578
     * Free allocated attribute values
9579
     */
9580
31.0M
    if (attval != 0) {
9581
324k
  for (i = 0, j = 0; j < nratts; i += 5, j++)
9582
250k
      if (ctxt->attallocs[j] & 0x80000000)
9583
83.6k
          xmlFree((xmlChar *) atts[i+3]);
9584
74.4k
    }
9585
9586
31.0M
    *nbNsPtr = nbNs;
9587
31.0M
    return(localname);
9588
31.0M
}
9589
9590
/**
9591
 * xmlParseEndTag2:
9592
 * @ctxt:  an XML parser context
9593
 * @line:  line of the start tag
9594
 * @nsNr:  number of namespaces on the start tag
9595
 *
9596
 * Parse an end tag. Always consumes '</'.
9597
 *
9598
 * [42] ETag ::= '</' Name S? '>'
9599
 *
9600
 * With namespace
9601
 *
9602
 * [NS 9] ETag ::= '</' QName S? '>'
9603
 */
9604
9605
static void
9606
8.24M
xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9607
8.24M
    const xmlChar *name;
9608
9609
8.24M
    GROW;
9610
8.24M
    if ((RAW != '<') || (NXT(1) != '/')) {
9611
2
  xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9612
2
  return;
9613
2
    }
9614
8.24M
    SKIP(2);
9615
9616
8.24M
    if (tag->prefix == NULL)
9617
2.75M
        name = xmlParseNameAndCompare(ctxt, ctxt->name);
9618
5.48M
    else
9619
5.48M
        name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9620
9621
    /*
9622
     * We should definitely be at the ending "S? '>'" part
9623
     */
9624
8.24M
    GROW;
9625
8.24M
    SKIP_BLANKS;
9626
8.24M
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9627
7.24k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9628
7.24k
    } else
9629
8.23M
  NEXT1;
9630
9631
    /*
9632
     * [ WFC: Element Type Match ]
9633
     * The Name in an element's end-tag must match the element type in the
9634
     * start-tag.
9635
     *
9636
     */
9637
8.24M
    if (name != (xmlChar*)1) {
9638
14.3k
        if (name == NULL) name = BAD_CAST "unparsable";
9639
14.3k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9640
14.3k
         "Opening and ending tag mismatch: %s line %d and %s\n",
9641
14.3k
                    ctxt->name, tag->line, name);
9642
14.3k
    }
9643
9644
    /*
9645
     * SAX: End of Tag
9646
     */
9647
8.24M
    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9648
8.24M
  (!ctxt->disableSAX))
9649
8.22M
  ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9650
8.22M
                                tag->URI);
9651
9652
8.24M
    spacePop(ctxt);
9653
8.24M
    if (tag->nsNr != 0)
9654
175k
  xmlParserNsPop(ctxt, tag->nsNr);
9655
8.24M
}
9656
9657
/**
9658
 * xmlParseCDSect:
9659
 * @ctxt:  an XML parser context
9660
 *
9661
 * DEPRECATED: Internal function, don't use.
9662
 *
9663
 * Parse escaped pure raw content. Always consumes '<!['.
9664
 *
9665
 * [18] CDSect ::= CDStart CData CDEnd
9666
 *
9667
 * [19] CDStart ::= '<![CDATA['
9668
 *
9669
 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9670
 *
9671
 * [21] CDEnd ::= ']]>'
9672
 */
9673
void
9674
10.7k
xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9675
10.7k
    xmlChar *buf = NULL;
9676
10.7k
    int len = 0;
9677
10.7k
    int size = XML_PARSER_BUFFER_SIZE;
9678
10.7k
    int r, rl;
9679
10.7k
    int s, sl;
9680
10.7k
    int cur, l;
9681
10.7k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9682
10.7k
                    XML_MAX_HUGE_LENGTH :
9683
10.7k
                    XML_MAX_TEXT_LENGTH;
9684
9685
10.7k
    if ((CUR != '<') || (NXT(1) != '!') || (NXT(2) != '['))
9686
0
        return;
9687
10.7k
    SKIP(3);
9688
9689
10.7k
    if (!CMP6(CUR_PTR, 'C', 'D', 'A', 'T', 'A', '['))
9690
0
        return;
9691
10.7k
    SKIP(6);
9692
9693
10.7k
    r = xmlCurrentCharRecover(ctxt, &rl);
9694
10.7k
    if (!IS_CHAR(r)) {
9695
16
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9696
16
        goto out;
9697
16
    }
9698
10.7k
    NEXTL(rl);
9699
10.7k
    s = xmlCurrentCharRecover(ctxt, &sl);
9700
10.7k
    if (!IS_CHAR(s)) {
9701
17
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9702
17
        goto out;
9703
17
    }
9704
10.7k
    NEXTL(sl);
9705
10.7k
    cur = xmlCurrentCharRecover(ctxt, &l);
9706
10.7k
    buf = xmlMalloc(size);
9707
10.7k
    if (buf == NULL) {
9708
0
  xmlErrMemory(ctxt);
9709
0
        goto out;
9710
0
    }
9711
41.5M
    while (IS_CHAR(cur) &&
9712
41.5M
           ((r != ']') || (s != ']') || (cur != '>'))) {
9713
41.5M
  if (len + 5 >= size) {
9714
54.1k
      xmlChar *tmp;
9715
54.1k
            int newSize;
9716
9717
54.1k
            newSize = xmlGrowCapacity(size, 1, 1, maxLength);
9718
54.1k
            if (newSize < 0) {
9719
0
                xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9720
0
                               "CData section too big found\n");
9721
0
                goto out;
9722
0
            }
9723
54.1k
      tmp = xmlRealloc(buf, newSize);
9724
54.1k
      if (tmp == NULL) {
9725
0
    xmlErrMemory(ctxt);
9726
0
                goto out;
9727
0
      }
9728
54.1k
      buf = tmp;
9729
54.1k
      size = newSize;
9730
54.1k
  }
9731
41.5M
  COPY_BUF(buf, len, r);
9732
41.5M
  r = s;
9733
41.5M
  rl = sl;
9734
41.5M
  s = cur;
9735
41.5M
  sl = l;
9736
41.5M
  NEXTL(l);
9737
41.5M
  cur = xmlCurrentCharRecover(ctxt, &l);
9738
41.5M
    }
9739
10.7k
    buf[len] = 0;
9740
10.7k
    if (cur != '>') {
9741
300
  xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9742
300
                       "CData section not finished\n%.50s\n", buf);
9743
300
        goto out;
9744
300
    }
9745
10.4k
    NEXTL(l);
9746
9747
    /*
9748
     * OK the buffer is to be consumed as cdata.
9749
     */
9750
10.4k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9751
10.3k
        if ((ctxt->sax->cdataBlock != NULL) &&
9752
0
            ((ctxt->options & XML_PARSE_NOCDATA) == 0)) {
9753
0
            ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9754
10.3k
        } else if (ctxt->sax->characters != NULL) {
9755
10.3k
            ctxt->sax->characters(ctxt->userData, buf, len);
9756
10.3k
        }
9757
10.3k
    }
9758
9759
10.7k
out:
9760
10.7k
    xmlFree(buf);
9761
10.7k
}
9762
9763
/**
9764
 * xmlParseContentInternal:
9765
 * @ctxt:  an XML parser context
9766
 *
9767
 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9768
 * unexpected EOF to the caller.
9769
 */
9770
9771
static void
9772
5.56k
xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9773
5.56k
    int oldNameNr = ctxt->nameNr;
9774
5.56k
    int oldSpaceNr = ctxt->spaceNr;
9775
5.56k
    int oldNodeNr = ctxt->nodeNr;
9776
9777
5.56k
    GROW;
9778
1.74M
    while ((ctxt->input->cur < ctxt->input->end) &&
9779
1.74M
     (PARSER_STOPPED(ctxt) == 0)) {
9780
1.74M
  const xmlChar *cur = ctxt->input->cur;
9781
9782
  /*
9783
   * First case : a Processing Instruction.
9784
   */
9785
1.74M
  if ((*cur == '<') && (cur[1] == '?')) {
9786
20
      xmlParsePI(ctxt);
9787
20
  }
9788
9789
  /*
9790
   * Second case : a CDSection
9791
   */
9792
  /* 2.6.0 test was *cur not RAW */
9793
1.74M
  else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9794
0
      xmlParseCDSect(ctxt);
9795
0
  }
9796
9797
  /*
9798
   * Third case :  a comment
9799
   */
9800
1.74M
  else if ((*cur == '<') && (NXT(1) == '!') &&
9801
0
     (NXT(2) == '-') && (NXT(3) == '-')) {
9802
0
      xmlParseComment(ctxt);
9803
0
  }
9804
9805
  /*
9806
   * Fourth case :  a sub-element.
9807
   */
9808
1.74M
  else if (*cur == '<') {
9809
1.41M
            if (NXT(1) == '/') {
9810
408k
                if (ctxt->nameNr <= oldNameNr)
9811
5.38k
                    break;
9812
402k
          xmlParseElementEnd(ctxt);
9813
1.00M
            } else {
9814
1.00M
          xmlParseElementStart(ctxt);
9815
1.00M
            }
9816
1.41M
  }
9817
9818
  /*
9819
   * Fifth case : a reference. If if has not been resolved,
9820
   *    parsing returns it's Name, create the node
9821
   */
9822
9823
328k
  else if (*cur == '&') {
9824
14
      xmlParseReference(ctxt);
9825
14
  }
9826
9827
  /*
9828
   * Last case, text. Note that References are handled directly.
9829
   */
9830
328k
  else {
9831
328k
      xmlParseCharDataInternal(ctxt, 0);
9832
328k
  }
9833
9834
1.74M
  SHRINK;
9835
1.74M
  GROW;
9836
1.74M
    }
9837
9838
5.56k
    if ((ctxt->nameNr > oldNameNr) &&
9839
178
        (ctxt->input->cur >= ctxt->input->end) &&
9840
176
        (ctxt->wellFormed)) {
9841
0
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9842
0
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9843
0
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9844
0
                "Premature end of data in tag %s line %d\n",
9845
0
                name, line, NULL);
9846
0
    }
9847
9848
    /*
9849
     * Clean up in error case
9850
     */
9851
9852
5.64k
    while (ctxt->nodeNr > oldNodeNr)
9853
79
        nodePop(ctxt);
9854
9855
16.6k
    while (ctxt->nameNr > oldNameNr) {
9856
11.1k
        xmlStartTag *tag = &ctxt->pushTab[ctxt->nameNr - 1];
9857
9858
11.1k
        if (tag->nsNr != 0)
9859
8
            xmlParserNsPop(ctxt, tag->nsNr);
9860
9861
11.1k
        namePop(ctxt);
9862
11.1k
    }
9863
9864
16.6k
    while (ctxt->spaceNr > oldSpaceNr)
9865
11.1k
        spacePop(ctxt);
9866
5.56k
}
9867
9868
/**
9869
 * xmlParseContent:
9870
 * @ctxt:  an XML parser context
9871
 *
9872
 * Parse XML element content. This is useful if you're only interested
9873
 * in custom SAX callbacks. If you want a node list, use
9874
 * xmlCtxtParseContent.
9875
 */
9876
void
9877
0
xmlParseContent(xmlParserCtxtPtr ctxt) {
9878
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
9879
0
        return;
9880
9881
0
    xmlCtxtInitializeLate(ctxt);
9882
9883
0
    xmlParseContentInternal(ctxt);
9884
9885
0
    xmlParserCheckEOF(ctxt, XML_ERR_NOT_WELL_BALANCED);
9886
0
}
9887
9888
/**
9889
 * xmlParseElement:
9890
 * @ctxt:  an XML parser context
9891
 *
9892
 * DEPRECATED: Internal function, don't use.
9893
 *
9894
 * parse an XML element
9895
 *
9896
 * [39] element ::= EmptyElemTag | STag content ETag
9897
 *
9898
 * [ WFC: Element Type Match ]
9899
 * The Name in an element's end-tag must match the element type in the
9900
 * start-tag.
9901
 *
9902
 */
9903
9904
void
9905
5.75k
xmlParseElement(xmlParserCtxtPtr ctxt) {
9906
5.75k
    if (xmlParseElementStart(ctxt) != 0)
9907
187
        return;
9908
9909
5.56k
    xmlParseContentInternal(ctxt);
9910
9911
5.56k
    if (ctxt->input->cur >= ctxt->input->end) {
9912
181
        if (ctxt->wellFormed) {
9913
0
            const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9914
0
            int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9915
0
            xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9916
0
                    "Premature end of data in tag %s line %d\n",
9917
0
                    name, line, NULL);
9918
0
        }
9919
181
        return;
9920
181
    }
9921
9922
5.38k
    xmlParseElementEnd(ctxt);
9923
5.38k
}
9924
9925
/**
9926
 * xmlParseElementStart:
9927
 * @ctxt:  an XML parser context
9928
 *
9929
 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
9930
 * opening tag was parsed, 1 if an empty element was parsed.
9931
 *
9932
 * Always consumes '<'.
9933
 */
9934
static int
9935
1.01M
xmlParseElementStart(xmlParserCtxtPtr ctxt) {
9936
1.01M
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
9937
1.01M
    const xmlChar *name;
9938
1.01M
    const xmlChar *prefix = NULL;
9939
1.01M
    const xmlChar *URI = NULL;
9940
1.01M
    xmlParserNodeInfo node_info;
9941
1.01M
    int line;
9942
1.01M
    xmlNodePtr cur;
9943
1.01M
    int nbNs = 0;
9944
9945
1.01M
    if (ctxt->nameNr > maxDepth) {
9946
2
        xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
9947
2
                "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9948
2
                ctxt->nameNr);
9949
2
  xmlHaltParser(ctxt);
9950
2
  return(-1);
9951
2
    }
9952
9953
    /* Capture start position */
9954
1.01M
    if (ctxt->record_info) {
9955
0
        node_info.begin_pos = ctxt->input->consumed +
9956
0
                          (CUR_PTR - ctxt->input->base);
9957
0
  node_info.begin_line = ctxt->input->line;
9958
0
    }
9959
9960
1.01M
    if (ctxt->spaceNr == 0)
9961
5.75k
  spacePush(ctxt, -1);
9962
1.00M
    else if (*ctxt->space == -2)
9963
0
  spacePush(ctxt, -1);
9964
1.00M
    else
9965
1.00M
  spacePush(ctxt, *ctxt->space);
9966
9967
1.01M
    line = ctxt->input->line;
9968
1.01M
#ifdef LIBXML_SAX1_ENABLED
9969
1.01M
    if (ctxt->sax2)
9970
1.01M
#endif /* LIBXML_SAX1_ENABLED */
9971
1.01M
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &nbNs);
9972
0
#ifdef LIBXML_SAX1_ENABLED
9973
0
    else
9974
0
  name = xmlParseStartTag(ctxt);
9975
1.01M
#endif /* LIBXML_SAX1_ENABLED */
9976
1.01M
    if (name == NULL) {
9977
6.72k
  spacePop(ctxt);
9978
6.72k
        return(-1);
9979
6.72k
    }
9980
1.00M
    nameNsPush(ctxt, name, prefix, URI, line, nbNs);
9981
1.00M
    cur = ctxt->node;
9982
9983
1.00M
#ifdef LIBXML_VALID_ENABLED
9984
    /*
9985
     * [ VC: Root Element Type ]
9986
     * The Name in the document type declaration must match the element
9987
     * type of the root element.
9988
     */
9989
1.00M
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9990
0
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
9991
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9992
1.00M
#endif /* LIBXML_VALID_ENABLED */
9993
9994
    /*
9995
     * Check for an Empty Element.
9996
     */
9997
1.00M
    if ((RAW == '/') && (NXT(1) == '>')) {
9998
542k
        SKIP(2);
9999
542k
  if (ctxt->sax2) {
10000
542k
      if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10001
542k
    (!ctxt->disableSAX))
10002
530k
    ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
10003
542k
#ifdef LIBXML_SAX1_ENABLED
10004
542k
  } else {
10005
0
      if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10006
0
    (!ctxt->disableSAX))
10007
0
    ctxt->sax->endElement(ctxt->userData, name);
10008
0
#endif /* LIBXML_SAX1_ENABLED */
10009
0
  }
10010
542k
  namePop(ctxt);
10011
542k
  spacePop(ctxt);
10012
542k
  if (nbNs > 0)
10013
1.12k
      xmlParserNsPop(ctxt, nbNs);
10014
542k
  if (cur != NULL && ctxt->record_info) {
10015
0
            node_info.node = cur;
10016
0
            node_info.end_pos = ctxt->input->consumed +
10017
0
                                (CUR_PTR - ctxt->input->base);
10018
0
            node_info.end_line = ctxt->input->line;
10019
0
            xmlParserAddNodeInfo(ctxt, &node_info);
10020
0
  }
10021
542k
  return(1);
10022
542k
    }
10023
466k
    if (RAW == '>') {
10024
419k
        NEXT1;
10025
419k
        if (cur != NULL && ctxt->record_info) {
10026
0
            node_info.node = cur;
10027
0
            node_info.end_pos = 0;
10028
0
            node_info.end_line = 0;
10029
0
            xmlParserAddNodeInfo(ctxt, &node_info);
10030
0
        }
10031
419k
    } else {
10032
46.5k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10033
46.5k
         "Couldn't find end of Start Tag %s line %d\n",
10034
46.5k
                    name, line, NULL);
10035
10036
  /*
10037
   * end of parsing of this node.
10038
   */
10039
46.5k
  nodePop(ctxt);
10040
46.5k
  namePop(ctxt);
10041
46.5k
  spacePop(ctxt);
10042
46.5k
  if (nbNs > 0)
10043
100
      xmlParserNsPop(ctxt, nbNs);
10044
46.5k
  return(-1);
10045
46.5k
    }
10046
10047
419k
    return(0);
10048
466k
}
10049
10050
/**
10051
 * xmlParseElementEnd:
10052
 * @ctxt:  an XML parser context
10053
 *
10054
 * Parse the end of an XML element. Always consumes '</'.
10055
 */
10056
static void
10057
408k
xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
10058
408k
    xmlNodePtr cur = ctxt->node;
10059
10060
408k
    if (ctxt->nameNr <= 0) {
10061
0
        if ((RAW == '<') && (NXT(1) == '/'))
10062
0
            SKIP(2);
10063
0
        return;
10064
0
    }
10065
10066
    /*
10067
     * parse the end of tag: '</' should be here.
10068
     */
10069
408k
    if (ctxt->sax2) {
10070
408k
  xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
10071
408k
  namePop(ctxt);
10072
408k
    }
10073
0
#ifdef LIBXML_SAX1_ENABLED
10074
0
    else
10075
0
  xmlParseEndTag1(ctxt, 0);
10076
408k
#endif /* LIBXML_SAX1_ENABLED */
10077
10078
    /*
10079
     * Capture end position
10080
     */
10081
408k
    if (cur != NULL && ctxt->record_info) {
10082
0
        xmlParserNodeInfoPtr node_info;
10083
10084
0
        node_info = (xmlParserNodeInfoPtr) xmlParserFindNodeInfo(ctxt, cur);
10085
0
        if (node_info != NULL) {
10086
0
            node_info->end_pos = ctxt->input->consumed +
10087
0
                                 (CUR_PTR - ctxt->input->base);
10088
0
            node_info->end_line = ctxt->input->line;
10089
0
        }
10090
0
    }
10091
408k
}
10092
10093
/**
10094
 * xmlParseVersionNum:
10095
 * @ctxt:  an XML parser context
10096
 *
10097
 * DEPRECATED: Internal function, don't use.
10098
 *
10099
 * parse the XML version value.
10100
 *
10101
 * [26] VersionNum ::= '1.' [0-9]+
10102
 *
10103
 * In practice allow [0-9].[0-9]+ at that level
10104
 *
10105
 * Returns the string giving the XML version number, or NULL
10106
 */
10107
xmlChar *
10108
163k
xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10109
163k
    xmlChar *buf = NULL;
10110
163k
    int len = 0;
10111
163k
    int size = 10;
10112
163k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
10113
158k
                    XML_MAX_TEXT_LENGTH :
10114
163k
                    XML_MAX_NAME_LENGTH;
10115
163k
    xmlChar cur;
10116
10117
163k
    buf = xmlMalloc(size);
10118
163k
    if (buf == NULL) {
10119
0
  xmlErrMemory(ctxt);
10120
0
  return(NULL);
10121
0
    }
10122
163k
    cur = CUR;
10123
163k
    if (!((cur >= '0') && (cur <= '9'))) {
10124
94
  xmlFree(buf);
10125
94
  return(NULL);
10126
94
    }
10127
163k
    buf[len++] = cur;
10128
163k
    NEXT;
10129
163k
    cur=CUR;
10130
163k
    if (cur != '.') {
10131
33
  xmlFree(buf);
10132
33
  return(NULL);
10133
33
    }
10134
163k
    buf[len++] = cur;
10135
163k
    NEXT;
10136
163k
    cur=CUR;
10137
332k
    while ((cur >= '0') && (cur <= '9')) {
10138
169k
  if (len + 1 >= size) {
10139
291
      xmlChar *tmp;
10140
291
            int newSize;
10141
10142
291
            newSize = xmlGrowCapacity(size, 1, 1, maxLength);
10143
291
            if (newSize < 0) {
10144
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "VersionNum");
10145
0
                xmlFree(buf);
10146
0
                return(NULL);
10147
0
            }
10148
291
      tmp = xmlRealloc(buf, newSize);
10149
291
      if (tmp == NULL) {
10150
0
    xmlErrMemory(ctxt);
10151
0
          xmlFree(buf);
10152
0
    return(NULL);
10153
0
      }
10154
291
      buf = tmp;
10155
291
            size = newSize;
10156
291
  }
10157
169k
  buf[len++] = cur;
10158
169k
  NEXT;
10159
169k
  cur=CUR;
10160
169k
    }
10161
163k
    buf[len] = 0;
10162
163k
    return(buf);
10163
163k
}
10164
10165
/**
10166
 * xmlParseVersionInfo:
10167
 * @ctxt:  an XML parser context
10168
 *
10169
 * DEPRECATED: Internal function, don't use.
10170
 *
10171
 * parse the XML version.
10172
 *
10173
 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10174
 *
10175
 * [25] Eq ::= S? '=' S?
10176
 *
10177
 * Returns the version string, e.g. "1.0"
10178
 */
10179
10180
xmlChar *
10181
164k
xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10182
164k
    xmlChar *version = NULL;
10183
10184
164k
    if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10185
163k
  SKIP(7);
10186
163k
  SKIP_BLANKS;
10187
163k
  if (RAW != '=') {
10188
35
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10189
35
      return(NULL);
10190
35
        }
10191
163k
  NEXT;
10192
163k
  SKIP_BLANKS;
10193
163k
  if (RAW == '"') {
10194
163k
      NEXT;
10195
163k
      version = xmlParseVersionNum(ctxt);
10196
163k
      if (RAW != '"') {
10197
139
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10198
139
      } else
10199
163k
          NEXT;
10200
163k
  } else if (RAW == '\''){
10201
380
      NEXT;
10202
380
      version = xmlParseVersionNum(ctxt);
10203
380
      if (RAW != '\'') {
10204
7
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10205
7
      } else
10206
373
          NEXT;
10207
380
  } else {
10208
45
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10209
45
  }
10210
163k
    }
10211
164k
    return(version);
10212
164k
}
10213
10214
/**
10215
 * xmlParseEncName:
10216
 * @ctxt:  an XML parser context
10217
 *
10218
 * DEPRECATED: Internal function, don't use.
10219
 *
10220
 * parse the XML encoding name
10221
 *
10222
 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10223
 *
10224
 * Returns the encoding name value or NULL
10225
 */
10226
xmlChar *
10227
141k
xmlParseEncName(xmlParserCtxtPtr ctxt) {
10228
141k
    xmlChar *buf = NULL;
10229
141k
    int len = 0;
10230
141k
    int size = 10;
10231
141k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
10232
135k
                    XML_MAX_TEXT_LENGTH :
10233
141k
                    XML_MAX_NAME_LENGTH;
10234
141k
    xmlChar cur;
10235
10236
141k
    cur = CUR;
10237
141k
    if (((cur >= 'a') && (cur <= 'z')) ||
10238
141k
        ((cur >= 'A') && (cur <= 'Z'))) {
10239
141k
  buf = xmlMalloc(size);
10240
141k
  if (buf == NULL) {
10241
0
      xmlErrMemory(ctxt);
10242
0
      return(NULL);
10243
0
  }
10244
10245
141k
  buf[len++] = cur;
10246
141k
  NEXT;
10247
141k
  cur = CUR;
10248
715k
  while (((cur >= 'a') && (cur <= 'z')) ||
10249
707k
         ((cur >= 'A') && (cur <= 'Z')) ||
10250
429k
         ((cur >= '0') && (cur <= '9')) ||
10251
284k
         (cur == '.') || (cur == '_') ||
10252
574k
         (cur == '-')) {
10253
574k
      if (len + 1 >= size) {
10254
1.09k
          xmlChar *tmp;
10255
1.09k
                int newSize;
10256
10257
1.09k
                newSize = xmlGrowCapacity(size, 1, 1, maxLength);
10258
1.09k
                if (newSize < 0) {
10259
0
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "EncName");
10260
0
                    xmlFree(buf);
10261
0
                    return(NULL);
10262
0
                }
10263
1.09k
    tmp = xmlRealloc(buf, newSize);
10264
1.09k
    if (tmp == NULL) {
10265
0
        xmlErrMemory(ctxt);
10266
0
        xmlFree(buf);
10267
0
        return(NULL);
10268
0
    }
10269
1.09k
    buf = tmp;
10270
1.09k
                size = newSize;
10271
1.09k
      }
10272
574k
      buf[len++] = cur;
10273
574k
      NEXT;
10274
574k
      cur = CUR;
10275
574k
        }
10276
141k
  buf[len] = 0;
10277
141k
    } else {
10278
32
  xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10279
32
    }
10280
141k
    return(buf);
10281
141k
}
10282
10283
/**
10284
 * xmlParseEncodingDecl:
10285
 * @ctxt:  an XML parser context
10286
 *
10287
 * DEPRECATED: Internal function, don't use.
10288
 *
10289
 * parse the XML encoding declaration
10290
 *
10291
 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'")
10292
 *
10293
 * this setups the conversion filters.
10294
 *
10295
 * Returns the encoding value or NULL
10296
 */
10297
10298
const xmlChar *
10299
143k
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10300
143k
    xmlChar *encoding = NULL;
10301
10302
143k
    SKIP_BLANKS;
10303
143k
    if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g') == 0)
10304
2.23k
        return(NULL);
10305
10306
141k
    SKIP(8);
10307
141k
    SKIP_BLANKS;
10308
141k
    if (RAW != '=') {
10309
32
        xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10310
32
        return(NULL);
10311
32
    }
10312
141k
    NEXT;
10313
141k
    SKIP_BLANKS;
10314
141k
    if (RAW == '"') {
10315
141k
        NEXT;
10316
141k
        encoding = xmlParseEncName(ctxt);
10317
141k
        if (RAW != '"') {
10318
145
            xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10319
145
            xmlFree((xmlChar *) encoding);
10320
145
            return(NULL);
10321
145
        } else
10322
140k
            NEXT;
10323
141k
    } else if (RAW == '\''){
10324
37
        NEXT;
10325
37
        encoding = xmlParseEncName(ctxt);
10326
37
        if (RAW != '\'') {
10327
16
            xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10328
16
            xmlFree((xmlChar *) encoding);
10329
16
            return(NULL);
10330
16
        } else
10331
21
            NEXT;
10332
37
    } else {
10333
18
        xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10334
18
    }
10335
10336
140k
    if (encoding == NULL)
10337
26
        return(NULL);
10338
10339
140k
    xmlSetDeclaredEncoding(ctxt, encoding);
10340
10341
140k
    return(ctxt->encoding);
10342
140k
}
10343
10344
/**
10345
 * xmlParseSDDecl:
10346
 * @ctxt:  an XML parser context
10347
 *
10348
 * DEPRECATED: Internal function, don't use.
10349
 *
10350
 * parse the XML standalone declaration
10351
 *
10352
 * [32] SDDecl ::= S 'standalone' Eq
10353
 *                 (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10354
 *
10355
 * [ VC: Standalone Document Declaration ]
10356
 * TODO The standalone document declaration must have the value "no"
10357
 * if any external markup declarations contain declarations of:
10358
 *  - attributes with default values, if elements to which these
10359
 *    attributes apply appear in the document without specifications
10360
 *    of values for these attributes, or
10361
 *  - entities (other than amp, lt, gt, apos, quot), if references
10362
 *    to those entities appear in the document, or
10363
 *  - attributes with values subject to normalization, where the
10364
 *    attribute appears in the document with a value which will change
10365
 *    as a result of normalization, or
10366
 *  - element types with element content, if white space occurs directly
10367
 *    within any instance of those types.
10368
 *
10369
 * Returns:
10370
 *   1 if standalone="yes"
10371
 *   0 if standalone="no"
10372
 *  -2 if standalone attribute is missing or invalid
10373
 *    (A standalone value of -2 means that the XML declaration was found,
10374
 *     but no value was specified for the standalone attribute).
10375
 */
10376
10377
int
10378
101k
xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10379
101k
    int standalone = -2;
10380
10381
101k
    SKIP_BLANKS;
10382
101k
    if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10383
99.1k
  SKIP(10);
10384
99.1k
        SKIP_BLANKS;
10385
99.1k
  if (RAW != '=') {
10386
11
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10387
11
      return(standalone);
10388
11
        }
10389
99.1k
  NEXT;
10390
99.1k
  SKIP_BLANKS;
10391
99.1k
        if (RAW == '\''){
10392
28
      NEXT;
10393
28
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10394
16
          standalone = 0;
10395
16
                SKIP(2);
10396
16
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10397
6
                 (NXT(2) == 's')) {
10398
3
          standalone = 1;
10399
3
    SKIP(3);
10400
9
            } else {
10401
9
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10402
9
      }
10403
28
      if (RAW != '\'') {
10404
12
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10405
12
      } else
10406
16
          NEXT;
10407
99.1k
  } else if (RAW == '"'){
10408
99.1k
      NEXT;
10409
99.1k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10410
453
          standalone = 0;
10411
453
    SKIP(2);
10412
98.6k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10413
98.6k
                 (NXT(2) == 's')) {
10414
98.6k
          standalone = 1;
10415
98.6k
                SKIP(3);
10416
98.6k
            } else {
10417
32
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10418
32
      }
10419
99.1k
      if (RAW != '"') {
10420
48
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10421
48
      } else
10422
99.0k
          NEXT;
10423
99.1k
  } else {
10424
9
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10425
9
        }
10426
99.1k
    }
10427
101k
    return(standalone);
10428
101k
}
10429
10430
/**
10431
 * xmlParseXMLDecl:
10432
 * @ctxt:  an XML parser context
10433
 *
10434
 * DEPRECATED: Internal function, don't use.
10435
 *
10436
 * parse an XML declaration header
10437
 *
10438
 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10439
 */
10440
10441
void
10442
164k
xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10443
164k
    xmlChar *version;
10444
10445
    /*
10446
     * This value for standalone indicates that the document has an
10447
     * XML declaration but it does not have a standalone attribute.
10448
     * It will be overwritten later if a standalone attribute is found.
10449
     */
10450
10451
164k
    ctxt->standalone = -2;
10452
10453
    /*
10454
     * We know that '<?xml' is here.
10455
     */
10456
164k
    SKIP(5);
10457
10458
164k
    if (!IS_BLANK_CH(RAW)) {
10459
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10460
0
                 "Blank needed after '<?xml'\n");
10461
0
    }
10462
164k
    SKIP_BLANKS;
10463
10464
    /*
10465
     * We must have the VersionInfo here.
10466
     */
10467
164k
    version = xmlParseVersionInfo(ctxt);
10468
164k
    if (version == NULL) {
10469
947
  xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10470
163k
    } else {
10471
163k
  if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10472
      /*
10473
       * Changed here for XML-1.0 5th edition
10474
       */
10475
4.77k
      if (ctxt->options & XML_PARSE_OLD10) {
10476
0
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10477
0
                "Unsupported version '%s'\n",
10478
0
                version);
10479
4.77k
      } else {
10480
4.77k
          if ((version[0] == '1') && ((version[1] == '.'))) {
10481
4.61k
        xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10482
4.61k
                      "Unsupported version '%s'\n",
10483
4.61k
          version, NULL);
10484
4.61k
    } else {
10485
166
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10486
166
              "Unsupported version '%s'\n",
10487
166
              version);
10488
166
    }
10489
4.77k
      }
10490
4.77k
  }
10491
163k
  if (ctxt->version != NULL)
10492
0
      xmlFree((void *) ctxt->version);
10493
163k
  ctxt->version = version;
10494
163k
    }
10495
10496
    /*
10497
     * We may have the encoding declaration
10498
     */
10499
164k
    if (!IS_BLANK_CH(RAW)) {
10500
21.9k
        if ((RAW == '?') && (NXT(1) == '>')) {
10501
20.9k
      SKIP(2);
10502
20.9k
      return;
10503
20.9k
  }
10504
1.00k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10505
1.00k
    }
10506
143k
    xmlParseEncodingDecl(ctxt);
10507
10508
    /*
10509
     * We may have the standalone status.
10510
     */
10511
143k
    if ((ctxt->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10512
42.3k
        if ((RAW == '?') && (NXT(1) == '>')) {
10513
42.3k
      SKIP(2);
10514
42.3k
      return;
10515
42.3k
  }
10516
36
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10517
36
    }
10518
10519
    /*
10520
     * We can grow the input buffer freely at that point
10521
     */
10522
101k
    GROW;
10523
10524
101k
    SKIP_BLANKS;
10525
101k
    ctxt->standalone = xmlParseSDDecl(ctxt);
10526
10527
101k
    SKIP_BLANKS;
10528
101k
    if ((RAW == '?') && (NXT(1) == '>')) {
10529
99.5k
        SKIP(2);
10530
99.5k
    } else if (RAW == '>') {
10531
        /* Deprecated old WD ... */
10532
52
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10533
52
  NEXT;
10534
1.47k
    } else {
10535
1.47k
        int c;
10536
10537
1.47k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10538
743k
        while ((PARSER_STOPPED(ctxt) == 0) &&
10539
743k
               ((c = CUR) != 0)) {
10540
742k
            NEXT;
10541
742k
            if (c == '>')
10542
1.02k
                break;
10543
742k
        }
10544
1.47k
    }
10545
101k
}
10546
10547
/**
10548
 * xmlCtxtGetVersion:
10549
 * @ctxt:  parser context
10550
 *
10551
 * Available since 2.14.0.
10552
 *
10553
 * Returns the version from the XML declaration.
10554
 */
10555
const xmlChar *
10556
0
xmlCtxtGetVersion(xmlParserCtxtPtr ctxt) {
10557
0
    if (ctxt == NULL)
10558
0
        return(NULL);
10559
10560
0
    return(ctxt->version);
10561
0
}
10562
10563
/**
10564
 * xmlCtxtGetStandalone:
10565
 * @ctxt:  parser context
10566
 *
10567
 * Available since 2.14.0.
10568
 *
10569
 * Returns the value from the standalone document declaration.
10570
 */
10571
int
10572
0
xmlCtxtGetStandalone(xmlParserCtxtPtr ctxt) {
10573
0
    if (ctxt == NULL)
10574
0
        return(0);
10575
10576
0
    return(ctxt->standalone);
10577
0
}
10578
10579
/**
10580
 * xmlParseMisc:
10581
 * @ctxt:  an XML parser context
10582
 *
10583
 * DEPRECATED: Internal function, don't use.
10584
 *
10585
 * parse an XML Misc* optional field.
10586
 *
10587
 * [27] Misc ::= Comment | PI |  S
10588
 */
10589
10590
void
10591
11.5k
xmlParseMisc(xmlParserCtxtPtr ctxt) {
10592
11.5k
    while (PARSER_STOPPED(ctxt) == 0) {
10593
11.5k
        SKIP_BLANKS;
10594
11.5k
        GROW;
10595
11.5k
        if ((RAW == '<') && (NXT(1) == '?')) {
10596
61
      xmlParsePI(ctxt);
10597
11.5k
        } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10598
0
      xmlParseComment(ctxt);
10599
11.5k
        } else {
10600
11.5k
            break;
10601
11.5k
        }
10602
11.5k
    }
10603
11.5k
}
10604
10605
static void
10606
145k
xmlFinishDocument(xmlParserCtxtPtr ctxt) {
10607
145k
    xmlDocPtr doc;
10608
10609
    /*
10610
     * SAX: end of the document processing.
10611
     */
10612
145k
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10613
5.75k
        ctxt->sax->endDocument(ctxt->userData);
10614
10615
145k
    doc = ctxt->myDoc;
10616
145k
    if (doc != NULL) {
10617
5.75k
        if (ctxt->wellFormed) {
10618
5.48k
            doc->properties |= XML_DOC_WELLFORMED;
10619
5.48k
            if (ctxt->valid)
10620
5.48k
                doc->properties |= XML_DOC_DTDVALID;
10621
5.48k
            if (ctxt->nsWellFormed)
10622
5.48k
                doc->properties |= XML_DOC_NSVALID;
10623
5.48k
        }
10624
10625
5.75k
        if (ctxt->options & XML_PARSE_OLD10)
10626
0
            doc->properties |= XML_DOC_OLD10;
10627
10628
        /*
10629
         * Remove locally kept entity definitions if the tree was not built
10630
         */
10631
5.75k
  if (xmlStrEqual(doc->version, SAX_COMPAT_MODE)) {
10632
7
            xmlFreeDoc(doc);
10633
7
            ctxt->myDoc = NULL;
10634
7
        }
10635
5.75k
    }
10636
145k
}
10637
10638
/**
10639
 * xmlParseDocument:
10640
 * @ctxt:  an XML parser context
10641
 *
10642
 * Parse an XML document and invoke the SAX handlers. This is useful
10643
 * if you're only interested in custom SAX callbacks. If you want a
10644
 * document tree, use xmlCtxtParseDocument.
10645
 *
10646
 * Returns 0, -1 in case of error.
10647
 */
10648
10649
int
10650
5.80k
xmlParseDocument(xmlParserCtxtPtr ctxt) {
10651
5.80k
    if ((ctxt == NULL) || (ctxt->input == NULL))
10652
0
        return(-1);
10653
10654
5.80k
    GROW;
10655
10656
    /*
10657
     * SAX: detecting the level.
10658
     */
10659
5.80k
    xmlCtxtInitializeLate(ctxt);
10660
10661
5.80k
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10662
5.80k
        ctxt->sax->setDocumentLocator(ctxt->userData,
10663
5.80k
                (xmlSAXLocator *) &xmlDefaultSAXLocator);
10664
5.80k
    }
10665
10666
5.80k
    xmlDetectEncoding(ctxt);
10667
10668
5.80k
    if (CUR == 0) {
10669
45
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10670
45
  return(-1);
10671
45
    }
10672
10673
5.75k
    GROW;
10674
5.75k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10675
10676
  /*
10677
   * Note that we will switch encoding on the fly.
10678
   */
10679
5.49k
  xmlParseXMLDecl(ctxt);
10680
5.49k
  SKIP_BLANKS;
10681
5.49k
    } else {
10682
262
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10683
262
        if (ctxt->version == NULL) {
10684
0
            xmlErrMemory(ctxt);
10685
0
            return(-1);
10686
0
        }
10687
262
    }
10688
5.75k
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10689
5.75k
        ctxt->sax->startDocument(ctxt->userData);
10690
5.75k
    if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10691
5.75k
        (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10692
0
  ctxt->myDoc->compression = ctxt->input->buf->compressed;
10693
0
    }
10694
10695
    /*
10696
     * The Misc part of the Prolog
10697
     */
10698
5.75k
    xmlParseMisc(ctxt);
10699
10700
    /*
10701
     * Then possibly doc type declaration(s) and more Misc
10702
     * (doctypedecl Misc*)?
10703
     */
10704
5.75k
    GROW;
10705
5.75k
    if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10706
10707
0
  ctxt->inSubset = 1;
10708
0
  xmlParseDocTypeDecl(ctxt);
10709
0
  if (RAW == '[') {
10710
0
      xmlParseInternalSubset(ctxt);
10711
0
  } else if (RAW == '>') {
10712
0
            NEXT;
10713
0
        }
10714
10715
  /*
10716
   * Create and update the external subset.
10717
   */
10718
0
  ctxt->inSubset = 2;
10719
0
  if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10720
0
      (!ctxt->disableSAX))
10721
0
      ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10722
0
                                ctxt->extSubSystem, ctxt->extSubURI);
10723
0
  ctxt->inSubset = 0;
10724
10725
0
        xmlCleanSpecialAttr(ctxt);
10726
10727
0
  xmlParseMisc(ctxt);
10728
0
    }
10729
10730
    /*
10731
     * Time to start parsing the tree itself
10732
     */
10733
5.75k
    GROW;
10734
5.75k
    if (RAW != '<') {
10735
2
        if (ctxt->wellFormed)
10736
0
            xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10737
0
                           "Start tag expected, '<' not found\n");
10738
5.75k
    } else {
10739
5.75k
  xmlParseElement(ctxt);
10740
10741
  /*
10742
   * The Misc part at the end
10743
   */
10744
5.75k
  xmlParseMisc(ctxt);
10745
10746
5.75k
        xmlParserCheckEOF(ctxt, XML_ERR_DOCUMENT_END);
10747
5.75k
    }
10748
10749
5.75k
    ctxt->instate = XML_PARSER_EOF;
10750
5.75k
    xmlFinishDocument(ctxt);
10751
10752
5.75k
    if (! ctxt->wellFormed) {
10753
274
  ctxt->valid = 0;
10754
274
  return(-1);
10755
274
    }
10756
10757
5.48k
    return(0);
10758
5.75k
}
10759
10760
/**
10761
 * xmlParseExtParsedEnt:
10762
 * @ctxt:  an XML parser context
10763
 *
10764
 * DEPRECATED: Internal function, don't use.
10765
 *
10766
 * parse a general parsed entity
10767
 * An external general parsed entity is well-formed if it matches the
10768
 * production labeled extParsedEnt.
10769
 *
10770
 * [78] extParsedEnt ::= TextDecl? content
10771
 *
10772
 * Returns 0, -1 in case of error. the parser context is augmented
10773
 *                as a result of the parsing.
10774
 */
10775
10776
int
10777
0
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10778
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
10779
0
        return(-1);
10780
10781
0
    xmlCtxtInitializeLate(ctxt);
10782
10783
0
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10784
0
        ctxt->sax->setDocumentLocator(ctxt->userData,
10785
0
                (xmlSAXLocator *) &xmlDefaultSAXLocator);
10786
0
    }
10787
10788
0
    xmlDetectEncoding(ctxt);
10789
10790
0
    if (CUR == 0) {
10791
0
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10792
0
    }
10793
10794
    /*
10795
     * Check for the XMLDecl in the Prolog.
10796
     */
10797
0
    GROW;
10798
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10799
10800
  /*
10801
   * Note that we will switch encoding on the fly.
10802
   */
10803
0
  xmlParseXMLDecl(ctxt);
10804
0
  SKIP_BLANKS;
10805
0
    } else {
10806
0
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10807
0
    }
10808
0
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10809
0
        ctxt->sax->startDocument(ctxt->userData);
10810
10811
    /*
10812
     * Doing validity checking on chunk doesn't make sense
10813
     */
10814
0
    ctxt->options &= ~XML_PARSE_DTDVALID;
10815
0
    ctxt->validate = 0;
10816
0
    ctxt->depth = 0;
10817
10818
0
    xmlParseContentInternal(ctxt);
10819
10820
0
    if (ctxt->input->cur < ctxt->input->end)
10821
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10822
10823
    /*
10824
     * SAX: end of the document processing.
10825
     */
10826
0
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10827
0
        ctxt->sax->endDocument(ctxt->userData);
10828
10829
0
    if (! ctxt->wellFormed) return(-1);
10830
0
    return(0);
10831
0
}
10832
10833
#ifdef LIBXML_PUSH_ENABLED
10834
/************************************************************************
10835
 *                  *
10836
 *    Progressive parsing interfaces        *
10837
 *                  *
10838
 ************************************************************************/
10839
10840
/**
10841
 * xmlParseLookupChar:
10842
 * @ctxt:  an XML parser context
10843
 * @c:  character
10844
 *
10845
 * Check whether the input buffer contains a character.
10846
 */
10847
static int
10848
3.89M
xmlParseLookupChar(xmlParserCtxtPtr ctxt, int c) {
10849
3.89M
    const xmlChar *cur;
10850
10851
3.89M
    if (ctxt->checkIndex == 0) {
10852
3.89M
        cur = ctxt->input->cur + 1;
10853
3.89M
    } else {
10854
524
        cur = ctxt->input->cur + ctxt->checkIndex;
10855
524
    }
10856
10857
3.89M
    if (memchr(cur, c, ctxt->input->end - cur) == NULL) {
10858
684
        size_t index = ctxt->input->end - ctxt->input->cur;
10859
10860
684
        if (index > LONG_MAX) {
10861
0
            ctxt->checkIndex = 0;
10862
0
            return(1);
10863
0
        }
10864
684
        ctxt->checkIndex = index;
10865
684
        return(0);
10866
3.89M
    } else {
10867
3.89M
        ctxt->checkIndex = 0;
10868
3.89M
        return(1);
10869
3.89M
    }
10870
3.89M
}
10871
10872
/**
10873
 * xmlParseLookupString:
10874
 * @ctxt:  an XML parser context
10875
 * @startDelta: delta to apply at the start
10876
 * @str:  string
10877
 * @strLen:  length of string
10878
 *
10879
 * Check whether the input buffer contains a string.
10880
 */
10881
static const xmlChar *
10882
xmlParseLookupString(xmlParserCtxtPtr ctxt, size_t startDelta,
10883
88.9k
                     const char *str, size_t strLen) {
10884
88.9k
    const xmlChar *cur, *term;
10885
10886
88.9k
    if (ctxt->checkIndex == 0) {
10887
88.2k
        cur = ctxt->input->cur + startDelta;
10888
88.2k
    } else {
10889
691
        cur = ctxt->input->cur + ctxt->checkIndex;
10890
691
    }
10891
10892
88.9k
    term = BAD_CAST strstr((const char *) cur, str);
10893
88.9k
    if (term == NULL) {
10894
995
        const xmlChar *end = ctxt->input->end;
10895
995
        size_t index;
10896
10897
        /* Rescan (strLen - 1) characters. */
10898
995
        if ((size_t) (end - cur) < strLen)
10899
29
            end = cur;
10900
966
        else
10901
966
            end -= strLen - 1;
10902
995
        index = end - ctxt->input->cur;
10903
995
        if (index > LONG_MAX) {
10904
0
            ctxt->checkIndex = 0;
10905
0
            return(ctxt->input->end - strLen);
10906
0
        }
10907
995
        ctxt->checkIndex = index;
10908
87.9k
    } else {
10909
87.9k
        ctxt->checkIndex = 0;
10910
87.9k
    }
10911
10912
88.9k
    return(term);
10913
88.9k
}
10914
10915
/**
10916
 * xmlParseLookupCharData:
10917
 * @ctxt:  an XML parser context
10918
 *
10919
 * Check whether the input buffer contains terminated char data.
10920
 */
10921
static int
10922
65.2k
xmlParseLookupCharData(xmlParserCtxtPtr ctxt) {
10923
65.2k
    const xmlChar *cur = ctxt->input->cur + ctxt->checkIndex;
10924
65.2k
    const xmlChar *end = ctxt->input->end;
10925
65.2k
    size_t index;
10926
10927
802k
    while (cur < end) {
10928
800k
        if ((*cur == '<') || (*cur == '&')) {
10929
63.3k
            ctxt->checkIndex = 0;
10930
63.3k
            return(1);
10931
63.3k
        }
10932
737k
        cur++;
10933
737k
    }
10934
10935
1.94k
    index = cur - ctxt->input->cur;
10936
1.94k
    if (index > LONG_MAX) {
10937
0
        ctxt->checkIndex = 0;
10938
0
        return(1);
10939
0
    }
10940
1.94k
    ctxt->checkIndex = index;
10941
1.94k
    return(0);
10942
1.94k
}
10943
10944
/**
10945
 * xmlParseLookupGt:
10946
 * @ctxt:  an XML parser context
10947
 *
10948
 * Check whether there's enough data in the input buffer to finish parsing
10949
 * a start tag. This has to take quotes into account.
10950
 */
10951
static int
10952
17.1M
xmlParseLookupGt(xmlParserCtxtPtr ctxt) {
10953
17.1M
    const xmlChar *cur;
10954
17.1M
    const xmlChar *end = ctxt->input->end;
10955
17.1M
    int state = ctxt->endCheckState;
10956
17.1M
    size_t index;
10957
10958
17.1M
    if (ctxt->checkIndex == 0)
10959
17.1M
        cur = ctxt->input->cur + 1;
10960
6.49k
    else
10961
6.49k
        cur = ctxt->input->cur + ctxt->checkIndex;
10962
10963
427M
    while (cur < end) {
10964
427M
        if (state) {
10965
115M
            if (*cur == state)
10966
10.6M
                state = 0;
10967
312M
        } else if (*cur == '\'' || *cur == '"') {
10968
10.6M
            state = *cur;
10969
301M
        } else if (*cur == '>') {
10970
17.0M
            ctxt->checkIndex = 0;
10971
17.0M
            ctxt->endCheckState = 0;
10972
17.0M
            return(1);
10973
17.0M
        }
10974
409M
        cur++;
10975
409M
    }
10976
10977
10.7k
    index = cur - ctxt->input->cur;
10978
10.7k
    if (index > LONG_MAX) {
10979
0
        ctxt->checkIndex = 0;
10980
0
        ctxt->endCheckState = 0;
10981
0
        return(1);
10982
0
    }
10983
10.7k
    ctxt->checkIndex = index;
10984
10.7k
    ctxt->endCheckState = state;
10985
10.7k
    return(0);
10986
10.7k
}
10987
10988
/**
10989
 * xmlParseLookupInternalSubset:
10990
 * @ctxt:  an XML parser context
10991
 *
10992
 * Check whether there's enough data in the input buffer to finish parsing
10993
 * the internal subset.
10994
 */
10995
static int
10996
670
xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt) {
10997
    /*
10998
     * Sorry, but progressive parsing of the internal subset is not
10999
     * supported. We first check that the full content of the internal
11000
     * subset is available and parsing is launched only at that point.
11001
     * Internal subset ends with "']' S? '>'" in an unescaped section and
11002
     * not in a ']]>' sequence which are conditional sections.
11003
     */
11004
670
    const xmlChar *cur, *start;
11005
670
    const xmlChar *end = ctxt->input->end;
11006
670
    int state = ctxt->endCheckState;
11007
670
    size_t index;
11008
11009
670
    if (ctxt->checkIndex == 0) {
11010
535
        cur = ctxt->input->cur + 1;
11011
535
    } else {
11012
135
        cur = ctxt->input->cur + ctxt->checkIndex;
11013
135
    }
11014
670
    start = cur;
11015
11016
12.5M
    while (cur < end) {
11017
12.5M
        if (state == '-') {
11018
1.56M
            if ((*cur == '-') &&
11019
98.1k
                (cur[1] == '-') &&
11020
49.2k
                (cur[2] == '>')) {
11021
21.4k
                state = 0;
11022
21.4k
                cur += 3;
11023
21.4k
                start = cur;
11024
21.4k
                continue;
11025
21.4k
            }
11026
1.56M
        }
11027
10.9M
        else if (state == ']') {
11028
9.53k
            if (*cur == '>') {
11029
108
                ctxt->checkIndex = 0;
11030
108
                ctxt->endCheckState = 0;
11031
108
                return(1);
11032
108
            }
11033
9.42k
            if (IS_BLANK_CH(*cur)) {
11034
1.94k
                state = ' ';
11035
7.48k
            } else if (*cur != ']') {
11036
3.25k
                state = 0;
11037
3.25k
                start = cur;
11038
3.25k
                continue;
11039
3.25k
            }
11040
9.42k
        }
11041
10.9M
        else if (state == ' ') {
11042
23.0k
            if (*cur == '>') {
11043
5
                ctxt->checkIndex = 0;
11044
5
                ctxt->endCheckState = 0;
11045
5
                return(1);
11046
5
            }
11047
23.0k
            if (!IS_BLANK_CH(*cur)) {
11048
1.93k
                state = 0;
11049
1.93k
                start = cur;
11050
1.93k
                continue;
11051
1.93k
            }
11052
23.0k
        }
11053
10.9M
        else if (state != 0) {
11054
7.18M
            if (*cur == state) {
11055
64.5k
                state = 0;
11056
64.5k
                start = cur + 1;
11057
64.5k
            }
11058
7.18M
        }
11059
3.74M
        else if (*cur == '<') {
11060
117k
            if ((cur[1] == '!') &&
11061
51.5k
                (cur[2] == '-') &&
11062
21.6k
                (cur[3] == '-')) {
11063
21.5k
                state = '-';
11064
21.5k
                cur += 4;
11065
                /* Don't treat <!--> as comment */
11066
21.5k
                start = cur;
11067
21.5k
                continue;
11068
21.5k
            }
11069
117k
        }
11070
3.62M
        else if ((*cur == '"') || (*cur == '\'') || (*cur == ']')) {
11071
70.0k
            state = *cur;
11072
70.0k
        }
11073
11074
12.4M
        cur++;
11075
12.4M
    }
11076
11077
    /*
11078
     * Rescan the three last characters to detect "<!--" and "-->"
11079
     * split across chunks.
11080
     */
11081
557
    if ((state == 0) || (state == '-')) {
11082
257
        if (cur - start < 3)
11083
18
            cur = start;
11084
239
        else
11085
239
            cur -= 3;
11086
257
    }
11087
557
    index = cur - ctxt->input->cur;
11088
557
    if (index > LONG_MAX) {
11089
0
        ctxt->checkIndex = 0;
11090
0
        ctxt->endCheckState = 0;
11091
0
        return(1);
11092
0
    }
11093
557
    ctxt->checkIndex = index;
11094
557
    ctxt->endCheckState = state;
11095
557
    return(0);
11096
557
}
11097
11098
/**
11099
 * xmlParseTryOrFinish:
11100
 * @ctxt:  an XML parser context
11101
 * @terminate:  last chunk indicator
11102
 *
11103
 * Try to progress on parsing
11104
 *
11105
 * Returns zero if no parsing was possible
11106
 */
11107
static int
11108
231k
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11109
231k
    int ret = 0;
11110
231k
    size_t avail;
11111
231k
    xmlChar cur, next;
11112
11113
231k
    if (ctxt->input == NULL)
11114
0
        return(0);
11115
11116
231k
    if ((ctxt->input != NULL) &&
11117
231k
        (ctxt->input->cur - ctxt->input->base > 4096)) {
11118
17.6k
        xmlParserShrink(ctxt);
11119
17.6k
    }
11120
11121
86.7M
    while (ctxt->disableSAX == 0) {
11122
86.6M
        avail = ctxt->input->end - ctxt->input->cur;
11123
86.6M
        if (avail < 1)
11124
133k
      goto done;
11125
86.5M
        switch (ctxt->instate) {
11126
917
            case XML_PARSER_EOF:
11127
          /*
11128
     * Document parsing is done !
11129
     */
11130
917
          goto done;
11131
210k
            case XML_PARSER_START:
11132
                /*
11133
                 * Very first chars read from the document flow.
11134
                 */
11135
210k
                if ((!terminate) && (avail < 4))
11136
0
                    goto done;
11137
11138
                /*
11139
                 * We need more bytes to detect EBCDIC code pages.
11140
                 * See xmlDetectEBCDIC.
11141
                 */
11142
210k
                if ((CMP4(CUR_PTR, 0x4C, 0x6F, 0xA7, 0x94)) &&
11143
6
                    (!terminate) && (avail < 200))
11144
0
                    goto done;
11145
11146
210k
                xmlDetectEncoding(ctxt);
11147
210k
                ctxt->instate = XML_PARSER_XML_DECL;
11148
210k
    break;
11149
11150
210k
            case XML_PARSER_XML_DECL:
11151
210k
    if ((!terminate) && (avail < 2))
11152
0
        goto done;
11153
210k
    cur = ctxt->input->cur[0];
11154
210k
    next = ctxt->input->cur[1];
11155
210k
          if ((cur == '<') && (next == '?')) {
11156
        /* PI or XML decl */
11157
175k
        if ((!terminate) &&
11158
18.7k
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11159
134
      goto done;
11160
175k
        if ((ctxt->input->cur[2] == 'x') &&
11161
171k
      (ctxt->input->cur[3] == 'm') &&
11162
170k
      (ctxt->input->cur[4] == 'l') &&
11163
169k
      (IS_BLANK_CH(ctxt->input->cur[5]))) {
11164
158k
      ret += 5;
11165
158k
      xmlParseXMLDecl(ctxt);
11166
158k
        } else {
11167
16.4k
      ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11168
16.4k
                        if (ctxt->version == NULL) {
11169
0
                            xmlErrMemory(ctxt);
11170
0
                            break;
11171
0
                        }
11172
16.4k
        }
11173
175k
    } else {
11174
35.2k
        ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11175
35.2k
        if (ctxt->version == NULL) {
11176
0
            xmlErrMemory(ctxt);
11177
0
      break;
11178
0
        }
11179
35.2k
    }
11180
210k
                if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
11181
0
                    ctxt->sax->setDocumentLocator(ctxt->userData,
11182
0
                            (xmlSAXLocator *) &xmlDefaultSAXLocator);
11183
0
                }
11184
210k
                if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11185
0
                    (!ctxt->disableSAX))
11186
0
                    ctxt->sax->startDocument(ctxt->userData);
11187
210k
                ctxt->instate = XML_PARSER_MISC;
11188
210k
    break;
11189
30.0M
            case XML_PARSER_START_TAG: {
11190
30.0M
          const xmlChar *name;
11191
30.0M
    const xmlChar *prefix = NULL;
11192
30.0M
    const xmlChar *URI = NULL;
11193
30.0M
                int line = ctxt->input->line;
11194
30.0M
    int nbNs = 0;
11195
11196
30.0M
    if ((!terminate) && (avail < 2))
11197
3
        goto done;
11198
30.0M
    cur = ctxt->input->cur[0];
11199
30.0M
          if (cur != '<') {
11200
375
        xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
11201
375
                                   "Start tag expected, '<' not found");
11202
375
                    ctxt->instate = XML_PARSER_EOF;
11203
375
                    xmlFinishDocument(ctxt);
11204
375
        goto done;
11205
375
    }
11206
30.0M
    if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11207
9.57k
                    goto done;
11208
30.0M
    if (ctxt->spaceNr == 0)
11209
0
        spacePush(ctxt, -1);
11210
30.0M
    else if (*ctxt->space == -2)
11211
7.31M
        spacePush(ctxt, -1);
11212
22.6M
    else
11213
22.6M
        spacePush(ctxt, *ctxt->space);
11214
30.0M
#ifdef LIBXML_SAX1_ENABLED
11215
30.0M
    if (ctxt->sax2)
11216
30.0M
#endif /* LIBXML_SAX1_ENABLED */
11217
30.0M
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &nbNs);
11218
1.07k
#ifdef LIBXML_SAX1_ENABLED
11219
1.07k
    else
11220
1.07k
        name = xmlParseStartTag(ctxt);
11221
30.0M
#endif /* LIBXML_SAX1_ENABLED */
11222
30.0M
    if (name == NULL) {
11223
3.94k
        spacePop(ctxt);
11224
3.94k
                    ctxt->instate = XML_PARSER_EOF;
11225
3.94k
                    xmlFinishDocument(ctxt);
11226
3.94k
        goto done;
11227
3.94k
    }
11228
29.9M
#ifdef LIBXML_VALID_ENABLED
11229
    /*
11230
     * [ VC: Root Element Type ]
11231
     * The Name in the document type declaration must match
11232
     * the element type of the root element.
11233
     */
11234
29.9M
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11235
0
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
11236
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11237
29.9M
#endif /* LIBXML_VALID_ENABLED */
11238
11239
    /*
11240
     * Check for an Empty Element.
11241
     */
11242
29.9M
    if ((RAW == '/') && (NXT(1) == '>')) {
11243
8.15M
        SKIP(2);
11244
11245
8.15M
        if (ctxt->sax2) {
11246
8.15M
      if ((ctxt->sax != NULL) &&
11247
8.15M
          (ctxt->sax->endElementNs != NULL) &&
11248
8.15M
          (!ctxt->disableSAX))
11249
8.14M
          ctxt->sax->endElementNs(ctxt->userData, name,
11250
8.14M
                                  prefix, URI);
11251
8.15M
      if (nbNs > 0)
11252
135k
          xmlParserNsPop(ctxt, nbNs);
11253
8.15M
#ifdef LIBXML_SAX1_ENABLED
11254
18.4E
        } else {
11255
18.4E
      if ((ctxt->sax != NULL) &&
11256
0
          (ctxt->sax->endElement != NULL) &&
11257
0
          (!ctxt->disableSAX))
11258
0
          ctxt->sax->endElement(ctxt->userData, name);
11259
18.4E
#endif /* LIBXML_SAX1_ENABLED */
11260
18.4E
        }
11261
8.15M
        spacePop(ctxt);
11262
21.8M
    } else if (RAW == '>') {
11263
21.8M
        NEXT;
11264
21.8M
                    nameNsPush(ctxt, name, prefix, URI, line, nbNs);
11265
21.8M
    } else {
11266
47.4k
        xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11267
47.4k
           "Couldn't find end of Start Tag %s\n",
11268
47.4k
           name);
11269
47.4k
        nodePop(ctxt);
11270
47.4k
        spacePop(ctxt);
11271
47.4k
                    if (nbNs > 0)
11272
4.72k
                        xmlParserNsPop(ctxt, nbNs);
11273
47.4k
    }
11274
11275
29.9M
                if (ctxt->nameNr == 0)
11276
9.57k
                    ctxt->instate = XML_PARSER_EPILOG;
11277
29.9M
                else
11278
29.9M
                    ctxt->instate = XML_PARSER_CONTENT;
11279
29.9M
                break;
11280
30.0M
      }
11281
48.0M
            case XML_PARSER_CONTENT: {
11282
48.0M
    cur = ctxt->input->cur[0];
11283
11284
48.0M
    if (cur == '<') {
11285
37.7M
                    if ((!terminate) && (avail < 2))
11286
484
                        goto done;
11287
37.7M
        next = ctxt->input->cur[1];
11288
11289
37.7M
                    if (next == '/') {
11290
7.83M
                        ctxt->instate = XML_PARSER_END_TAG;
11291
7.83M
                        break;
11292
29.8M
                    } else if (next == '?') {
11293
11.8k
                        if ((!terminate) &&
11294
7.34k
                            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11295
87
                            goto done;
11296
11.7k
                        xmlParsePI(ctxt);
11297
11.7k
                        ctxt->instate = XML_PARSER_CONTENT;
11298
11.7k
                        break;
11299
29.8M
                    } else if (next == '!') {
11300
63.3k
                        if ((!terminate) && (avail < 3))
11301
7
                            goto done;
11302
63.3k
                        next = ctxt->input->cur[2];
11303
11304
63.3k
                        if (next == '-') {
11305
51.8k
                            if ((!terminate) && (avail < 4))
11306
7
                                goto done;
11307
51.7k
                            if (ctxt->input->cur[3] == '-') {
11308
51.7k
                                if ((!terminate) &&
11309
37.2k
                                    (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11310
86
                                    goto done;
11311
51.6k
                                xmlParseComment(ctxt);
11312
51.6k
                                ctxt->instate = XML_PARSER_CONTENT;
11313
51.6k
                                break;
11314
51.7k
                            }
11315
51.7k
                        } else if (next == '[') {
11316
11.3k
                            if ((!terminate) && (avail < 9))
11317
5
                                goto done;
11318
11.3k
                            if ((ctxt->input->cur[2] == '[') &&
11319
11.3k
                                (ctxt->input->cur[3] == 'C') &&
11320
11.3k
                                (ctxt->input->cur[4] == 'D') &&
11321
11.3k
                                (ctxt->input->cur[5] == 'A') &&
11322
11.3k
                                (ctxt->input->cur[6] == 'T') &&
11323
11.3k
                                (ctxt->input->cur[7] == 'A') &&
11324
11.3k
                                (ctxt->input->cur[8] == '[')) {
11325
11.3k
                                if ((!terminate) &&
11326
4.92k
                                    (!xmlParseLookupString(ctxt, 9, "]]>", 3)))
11327
588
                                    goto done;
11328
10.7k
                                ctxt->instate = XML_PARSER_CDATA_SECTION;
11329
10.7k
                                xmlParseCDSect(ctxt);
11330
10.7k
                                ctxt->instate = XML_PARSER_CONTENT;
11331
10.7k
                                break;
11332
11.3k
                            }
11333
11.3k
                        }
11334
63.3k
                    }
11335
37.7M
    } else if (cur == '&') {
11336
333k
        if ((!terminate) && (!xmlParseLookupChar(ctxt, ';')))
11337
60
      goto done;
11338
333k
        xmlParseReference(ctxt);
11339
333k
                    break;
11340
9.98M
    } else {
11341
        /* TODO Avoid the extra copy, handle directly !!! */
11342
        /*
11343
         * Goal of the following test is:
11344
         *  - minimize calls to the SAX 'character' callback
11345
         *    when they are mergeable
11346
         *  - handle an problem for isBlank when we only parse
11347
         *    a sequence of blank chars and the next one is
11348
         *    not available to check against '<' presence.
11349
         *  - tries to homogenize the differences in SAX
11350
         *    callbacks between the push and pull versions
11351
         *    of the parser.
11352
         */
11353
9.98M
        if (avail < XML_PARSER_BIG_BUFFER_SIZE) {
11354
391k
      if ((!terminate) && (!xmlParseLookupCharData(ctxt)))
11355
1.94k
          goto done;
11356
391k
                    }
11357
9.98M
                    ctxt->checkIndex = 0;
11358
9.98M
        xmlParseCharDataInternal(ctxt, !terminate);
11359
9.98M
                    break;
11360
9.98M
    }
11361
11362
29.7M
                ctxt->instate = XML_PARSER_START_TAG;
11363
29.7M
    break;
11364
48.0M
      }
11365
7.83M
            case XML_PARSER_END_TAG:
11366
7.83M
    if ((!terminate) && (!xmlParseLookupChar(ctxt, '>')))
11367
624
        goto done;
11368
7.83M
    if (ctxt->sax2) {
11369
7.83M
              xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11370
7.83M
        nameNsPop(ctxt);
11371
7.83M
    }
11372
17
#ifdef LIBXML_SAX1_ENABLED
11373
17
      else
11374
17
        xmlParseEndTag1(ctxt, 0);
11375
7.83M
#endif /* LIBXML_SAX1_ENABLED */
11376
7.83M
    if (ctxt->nameNr == 0) {
11377
126k
        ctxt->instate = XML_PARSER_EPILOG;
11378
7.70M
    } else {
11379
7.70M
        ctxt->instate = XML_PARSER_CONTENT;
11380
7.70M
    }
11381
7.83M
    break;
11382
249k
            case XML_PARSER_MISC:
11383
252k
            case XML_PARSER_PROLOG:
11384
263k
            case XML_PARSER_EPILOG:
11385
263k
    SKIP_BLANKS;
11386
263k
                avail = ctxt->input->end - ctxt->input->cur;
11387
263k
    if (avail < 1)
11388
8.25k
        goto done;
11389
255k
    if (ctxt->input->cur[0] == '<') {
11390
254k
                    if ((!terminate) && (avail < 2))
11391
9
                        goto done;
11392
254k
                    next = ctxt->input->cur[1];
11393
254k
                    if (next == '?') {
11394
41.7k
                        if ((!terminate) &&
11395
19.0k
                            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11396
64
                            goto done;
11397
41.6k
                        xmlParsePI(ctxt);
11398
41.6k
                        break;
11399
212k
                    } else if (next == '!') {
11400
10.4k
                        if ((!terminate) && (avail < 3))
11401
5
                            goto done;
11402
11403
10.4k
                        if (ctxt->input->cur[2] == '-') {
11404
3.51k
                            if ((!terminate) && (avail < 4))
11405
1
                                goto done;
11406
3.51k
                            if (ctxt->input->cur[3] == '-') {
11407
3.48k
                                if ((!terminate) &&
11408
1.70k
                                    (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11409
36
                                    goto done;
11410
3.45k
                                xmlParseComment(ctxt);
11411
3.45k
                                break;
11412
3.48k
                            }
11413
6.95k
                        } else if (ctxt->instate == XML_PARSER_MISC) {
11414
6.93k
                            if ((!terminate) && (avail < 9))
11415
1
                                goto done;
11416
6.93k
                            if ((ctxt->input->cur[2] == 'D') &&
11417
6.92k
                                (ctxt->input->cur[3] == 'O') &&
11418
6.91k
                                (ctxt->input->cur[4] == 'C') &&
11419
6.90k
                                (ctxt->input->cur[5] == 'T') &&
11420
6.89k
                                (ctxt->input->cur[6] == 'Y') &&
11421
6.89k
                                (ctxt->input->cur[7] == 'P') &&
11422
6.87k
                                (ctxt->input->cur[8] == 'E')) {
11423
6.87k
                                if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11424
74
                                    goto done;
11425
6.79k
                                ctxt->inSubset = 1;
11426
6.79k
                                xmlParseDocTypeDecl(ctxt);
11427
6.79k
                                if (RAW == '[') {
11428
6.36k
                                    ctxt->instate = XML_PARSER_DTD;
11429
6.36k
                                } else {
11430
437
                                    if (RAW == '>')
11431
240
                                        NEXT;
11432
                                    /*
11433
                                     * Create and update the external subset.
11434
                                     */
11435
437
                                    ctxt->inSubset = 2;
11436
437
                                    if ((ctxt->sax != NULL) &&
11437
437
                                        (!ctxt->disableSAX) &&
11438
234
                                        (ctxt->sax->externalSubset != NULL))
11439
0
                                        ctxt->sax->externalSubset(
11440
0
                                                ctxt->userData,
11441
0
                                                ctxt->intSubName,
11442
0
                                                ctxt->extSubSystem,
11443
0
                                                ctxt->extSubURI);
11444
437
                                    ctxt->inSubset = 0;
11445
437
                                    xmlCleanSpecialAttr(ctxt);
11446
437
                                    ctxt->instate = XML_PARSER_PROLOG;
11447
437
                                }
11448
6.79k
                                break;
11449
6.87k
                            }
11450
6.93k
                        }
11451
10.4k
                    }
11452
254k
                }
11453
11454
203k
                if (ctxt->instate == XML_PARSER_EPILOG) {
11455
941
                    if (ctxt->errNo == XML_ERR_OK)
11456
170
                        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11457
941
        ctxt->instate = XML_PARSER_EOF;
11458
941
                    xmlFinishDocument(ctxt);
11459
202k
                } else {
11460
202k
        ctxt->instate = XML_PARSER_START_TAG;
11461
202k
    }
11462
203k
    break;
11463
6.83k
            case XML_PARSER_DTD: {
11464
6.83k
                if ((!terminate) && (!xmlParseLookupInternalSubset(ctxt)))
11465
557
                    goto done;
11466
6.27k
    xmlParseInternalSubset(ctxt);
11467
6.27k
    ctxt->inSubset = 2;
11468
6.27k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11469
2.20k
        (ctxt->sax->externalSubset != NULL))
11470
0
        ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11471
0
          ctxt->extSubSystem, ctxt->extSubURI);
11472
6.27k
    ctxt->inSubset = 0;
11473
6.27k
    xmlCleanSpecialAttr(ctxt);
11474
6.27k
    ctxt->instate = XML_PARSER_PROLOG;
11475
6.27k
                break;
11476
6.83k
      }
11477
0
            default:
11478
0
                xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
11479
0
      "PP: internal error\n");
11480
0
    ctxt->instate = XML_PARSER_EOF;
11481
0
    break;
11482
86.5M
  }
11483
86.5M
    }
11484
231k
done:
11485
231k
    return(ret);
11486
231k
}
11487
11488
/**
11489
 * xmlParseChunk:
11490
 * @ctxt:  an XML parser context
11491
 * @chunk:  chunk of memory
11492
 * @size:  size of chunk in bytes
11493
 * @terminate:  last chunk indicator
11494
 *
11495
 * Parse a chunk of memory in push parser mode.
11496
 *
11497
 * Assumes that the parser context was initialized with
11498
 * xmlCreatePushParserCtxt.
11499
 *
11500
 * The last chunk, which will often be empty, must be marked with
11501
 * the @terminate flag. With the default SAX callbacks, the resulting
11502
 * document will be available in ctxt->myDoc. This pointer will not
11503
 * be freed when calling xmlFreeParserCtxt and must be freed by the
11504
 * caller. If the document isn't well-formed, it will still be returned
11505
 * in ctxt->myDoc.
11506
 *
11507
 * As an exception, xmlCtxtResetPush will free the document in
11508
 * ctxt->myDoc. So ctxt->myDoc should be set to NULL after extracting
11509
 * the document.
11510
 *
11511
 * Returns an xmlParserErrors code (0 on success).
11512
 */
11513
int
11514
xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
11515
231k
              int terminate) {
11516
231k
    size_t curBase;
11517
231k
    size_t maxLength;
11518
231k
    size_t pos;
11519
231k
    int end_in_lf = 0;
11520
231k
    int res;
11521
11522
231k
    if ((ctxt == NULL) || (size < 0))
11523
0
        return(XML_ERR_ARGUMENT);
11524
231k
    if ((chunk == NULL) && (size > 0))
11525
0
        return(XML_ERR_ARGUMENT);
11526
231k
    if ((ctxt->input == NULL) || (ctxt->input->buf == NULL))
11527
0
        return(XML_ERR_ARGUMENT);
11528
231k
    if (ctxt->disableSAX != 0)
11529
0
        return(ctxt->errNo);
11530
11531
231k
    ctxt->input->flags |= XML_INPUT_PROGRESSIVE;
11532
231k
    if (ctxt->instate == XML_PARSER_START)
11533
210k
        xmlCtxtInitializeLate(ctxt);
11534
231k
    if ((size > 0) && (chunk != NULL) && (!terminate) &&
11535
31.4k
        (chunk[size - 1] == '\r')) {
11536
122
  end_in_lf = 1;
11537
122
  size--;
11538
122
    }
11539
11540
    /*
11541
     * Also push an empty chunk to make sure that the raw buffer
11542
     * will be flushed if there is an encoder.
11543
     */
11544
231k
    pos = ctxt->input->cur - ctxt->input->base;
11545
231k
    res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11546
231k
    xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
11547
231k
    if (res < 0) {
11548
5
        xmlCtxtErrIO(ctxt, ctxt->input->buf->error, NULL);
11549
5
        xmlHaltParser(ctxt);
11550
5
        return(ctxt->errNo);
11551
5
    }
11552
11553
231k
    xmlParseTryOrFinish(ctxt, terminate);
11554
11555
231k
    curBase = ctxt->input->cur - ctxt->input->base;
11556
231k
    maxLength = (ctxt->options & XML_PARSE_HUGE) ?
11557
231k
                XML_MAX_HUGE_LENGTH :
11558
231k
                XML_MAX_LOOKUP_LIMIT;
11559
231k
    if (curBase > maxLength) {
11560
0
        xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
11561
0
                    "Buffer size limit exceeded, try XML_PARSE_HUGE\n");
11562
0
        xmlHaltParser(ctxt);
11563
0
    }
11564
11565
231k
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX != 0))
11566
74.6k
        return(ctxt->errNo);
11567
11568
156k
    if (end_in_lf == 1) {
11569
104
  pos = ctxt->input->cur - ctxt->input->base;
11570
104
  res = xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
11571
104
  xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
11572
104
        if (res < 0) {
11573
0
            xmlCtxtErrIO(ctxt, ctxt->input->buf->error, NULL);
11574
0
            xmlHaltParser(ctxt);
11575
0
            return(ctxt->errNo);
11576
0
        }
11577
104
    }
11578
156k
    if (terminate) {
11579
  /*
11580
   * Check for termination
11581
   */
11582
135k
        if ((ctxt->instate != XML_PARSER_EOF) &&
11583
134k
            (ctxt->instate != XML_PARSER_EPILOG)) {
11584
8.01k
            if (ctxt->nameNr > 0) {
11585
7.85k
                const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
11586
7.85k
                int line = ctxt->pushTab[ctxt->nameNr - 1].line;
11587
7.85k
                xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
11588
7.85k
                        "Premature end of data in tag %s line %d\n",
11589
7.85k
                        name, line, NULL);
11590
7.85k
            } else if (ctxt->instate == XML_PARSER_START) {
11591
0
                xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11592
158
            } else {
11593
158
                xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
11594
158
                               "Start tag expected, '<' not found\n");
11595
158
            }
11596
127k
        } else {
11597
127k
            xmlParserCheckEOF(ctxt, XML_ERR_DOCUMENT_END);
11598
127k
        }
11599
135k
  if (ctxt->instate != XML_PARSER_EOF) {
11600
134k
            ctxt->instate = XML_PARSER_EOF;
11601
134k
            xmlFinishDocument(ctxt);
11602
134k
  }
11603
135k
    }
11604
156k
    if (ctxt->wellFormed == 0)
11605
8.01k
  return((xmlParserErrors) ctxt->errNo);
11606
148k
    else
11607
148k
        return(0);
11608
156k
}
11609
11610
/************************************************************************
11611
 *                  *
11612
 *    I/O front end functions to the parser     *
11613
 *                  *
11614
 ************************************************************************/
11615
11616
/**
11617
 * xmlCreatePushParserCtxt:
11618
 * @sax:  a SAX handler (optional)
11619
 * @user_data:  user data for SAX callbacks (optional)
11620
 * @chunk:  initial chunk (optional, deprecated)
11621
 * @size:  size of initial chunk in bytes
11622
 * @filename:  file name or URI (optional)
11623
 *
11624
 * Create a parser context for using the XML parser in push mode.
11625
 * See xmlParseChunk.
11626
 *
11627
 * Passing an initial chunk is useless and deprecated.
11628
 *
11629
 * The push parser doesn't support recovery mode or the
11630
 * XML_PARSE_NOBLANKS option.
11631
 *
11632
 * @filename is used as base URI to fetch external entities and for
11633
 * error reports.
11634
 *
11635
 * Returns the new parser context or NULL if a memory allocation
11636
 * failed.
11637
 */
11638
11639
xmlParserCtxtPtr
11640
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11641
210k
                        const char *chunk, int size, const char *filename) {
11642
210k
    xmlParserCtxtPtr ctxt;
11643
210k
    xmlParserInputPtr input;
11644
11645
210k
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
11646
210k
    if (ctxt == NULL)
11647
0
  return(NULL);
11648
11649
210k
    ctxt->options &= ~XML_PARSE_NODICT;
11650
210k
    ctxt->dictNames = 1;
11651
11652
210k
    input = xmlNewPushInput(filename, chunk, size);
11653
210k
    if (input == NULL) {
11654
0
  xmlFreeParserCtxt(ctxt);
11655
0
  return(NULL);
11656
0
    }
11657
210k
    if (xmlCtxtPushInput(ctxt, input) < 0) {
11658
0
        xmlFreeInputStream(input);
11659
0
        xmlFreeParserCtxt(ctxt);
11660
0
        return(NULL);
11661
0
    }
11662
11663
210k
    return(ctxt);
11664
210k
}
11665
#endif /* LIBXML_PUSH_ENABLED */
11666
11667
/**
11668
 * xmlStopParser:
11669
 * @ctxt:  an XML parser context
11670
 *
11671
 * Blocks further parser processing
11672
 */
11673
void
11674
0
xmlStopParser(xmlParserCtxtPtr ctxt) {
11675
0
    if (ctxt == NULL)
11676
0
        return;
11677
0
    xmlHaltParser(ctxt);
11678
0
    if (ctxt->errNo != XML_ERR_NO_MEMORY)
11679
0
        ctxt->errNo = XML_ERR_USER_STOP;
11680
0
}
11681
11682
/**
11683
 * xmlCreateIOParserCtxt:
11684
 * @sax:  a SAX handler (optional)
11685
 * @user_data:  user data for SAX callbacks (optional)
11686
 * @ioread:  an I/O read function
11687
 * @ioclose:  an I/O close function (optional)
11688
 * @ioctx:  an I/O handler
11689
 * @enc:  the charset encoding if known (deprecated)
11690
 *
11691
 * Create a parser context for using the XML parser with an existing
11692
 * I/O stream
11693
 *
11694
 * Returns the new parser context or NULL
11695
 */
11696
xmlParserCtxtPtr
11697
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11698
                      xmlInputReadCallback ioread,
11699
                      xmlInputCloseCallback ioclose,
11700
0
                      void *ioctx, xmlCharEncoding enc) {
11701
0
    xmlParserCtxtPtr ctxt;
11702
0
    xmlParserInputPtr input;
11703
0
    const char *encoding;
11704
11705
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
11706
0
    if (ctxt == NULL)
11707
0
  return(NULL);
11708
11709
0
    encoding = xmlGetCharEncodingName(enc);
11710
0
    input = xmlCtxtNewInputFromIO(ctxt, NULL, ioread, ioclose, ioctx,
11711
0
                                  encoding, 0);
11712
0
    if (input == NULL) {
11713
0
  xmlFreeParserCtxt(ctxt);
11714
0
        return (NULL);
11715
0
    }
11716
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
11717
0
        xmlFreeInputStream(input);
11718
0
        xmlFreeParserCtxt(ctxt);
11719
0
        return(NULL);
11720
0
    }
11721
11722
0
    return(ctxt);
11723
0
}
11724
11725
#ifdef LIBXML_VALID_ENABLED
11726
/************************************************************************
11727
 *                  *
11728
 *    Front ends when parsing a DTD       *
11729
 *                  *
11730
 ************************************************************************/
11731
11732
/**
11733
 * xmlCtxtParseDtd:
11734
 * @ctxt:  a parser context
11735
 * @input:  a parser input
11736
 * @publicId:  public ID of the DTD (optional)
11737
 * @systemId:  system ID of the DTD (optional)
11738
 *
11739
 * Parse a DTD.
11740
 *
11741
 * Option XML_PARSE_DTDLOAD should be enabled in the parser context
11742
 * to make external entities work.
11743
 *
11744
 * Availabe since 2.14.0.
11745
 *
11746
 * Returns the resulting xmlDtdPtr or NULL in case of error.
11747
 * @input will be freed by the function in any case.
11748
 */
11749
xmlDtdPtr
11750
xmlCtxtParseDtd(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
11751
0
                const xmlChar *publicId, const xmlChar *systemId) {
11752
0
    xmlDtdPtr ret = NULL;
11753
11754
0
    if ((ctxt == NULL) || (input == NULL)) {
11755
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
11756
0
        xmlFreeInputStream(input);
11757
0
        return(NULL);
11758
0
    }
11759
11760
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
11761
0
        xmlFreeInputStream(input);
11762
0
        return(NULL);
11763
0
    }
11764
11765
0
    if (publicId == NULL)
11766
0
        publicId = BAD_CAST "none";
11767
0
    if (systemId == NULL)
11768
0
        systemId = BAD_CAST "none";
11769
11770
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11771
0
    if (ctxt->myDoc == NULL) {
11772
0
        xmlErrMemory(ctxt);
11773
0
        goto error;
11774
0
    }
11775
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
11776
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11777
0
                                       publicId, systemId);
11778
0
    if (ctxt->myDoc->extSubset == NULL) {
11779
0
        xmlErrMemory(ctxt);
11780
0
        xmlFreeDoc(ctxt->myDoc);
11781
0
        goto error;
11782
0
    }
11783
11784
0
    xmlParseExternalSubset(ctxt, publicId, systemId);
11785
11786
0
    if (ctxt->wellFormed) {
11787
0
        ret = ctxt->myDoc->extSubset;
11788
0
        ctxt->myDoc->extSubset = NULL;
11789
0
        if (ret != NULL) {
11790
0
            xmlNodePtr tmp;
11791
11792
0
            ret->doc = NULL;
11793
0
            tmp = ret->children;
11794
0
            while (tmp != NULL) {
11795
0
                tmp->doc = NULL;
11796
0
                tmp = tmp->next;
11797
0
            }
11798
0
        }
11799
0
    } else {
11800
0
        ret = NULL;
11801
0
    }
11802
0
    xmlFreeDoc(ctxt->myDoc);
11803
0
    ctxt->myDoc = NULL;
11804
11805
0
error:
11806
0
    xmlFreeInputStream(xmlCtxtPopInput(ctxt));
11807
11808
0
    return(ret);
11809
0
}
11810
11811
/**
11812
 * xmlIOParseDTD:
11813
 * @sax:  the SAX handler block or NULL
11814
 * @input:  an Input Buffer
11815
 * @enc:  the charset encoding if known
11816
 *
11817
 * DEPRECATED: Use xmlCtxtParseDtd.
11818
 *
11819
 * Load and parse a DTD
11820
 *
11821
 * Returns the resulting xmlDtdPtr or NULL in case of error.
11822
 * @input will be freed by the function in any case.
11823
 */
11824
11825
xmlDtdPtr
11826
xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
11827
0
        xmlCharEncoding enc) {
11828
0
    xmlDtdPtr ret = NULL;
11829
0
    xmlParserCtxtPtr ctxt;
11830
0
    xmlParserInputPtr pinput = NULL;
11831
11832
0
    if (input == NULL)
11833
0
  return(NULL);
11834
11835
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
11836
0
    if (ctxt == NULL) {
11837
0
        xmlFreeParserInputBuffer(input);
11838
0
  return(NULL);
11839
0
    }
11840
0
    xmlCtxtSetOptions(ctxt, XML_PARSE_DTDLOAD);
11841
11842
    /*
11843
     * generate a parser input from the I/O handler
11844
     */
11845
11846
0
    pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
11847
0
    if (pinput == NULL) {
11848
0
        xmlFreeParserInputBuffer(input);
11849
0
  xmlFreeParserCtxt(ctxt);
11850
0
  return(NULL);
11851
0
    }
11852
11853
0
    if (enc != XML_CHAR_ENCODING_NONE) {
11854
0
        xmlSwitchEncoding(ctxt, enc);
11855
0
    }
11856
11857
0
    ret = xmlCtxtParseDtd(ctxt, pinput, NULL, NULL);
11858
11859
0
    xmlFreeParserCtxt(ctxt);
11860
0
    return(ret);
11861
0
}
11862
11863
/**
11864
 * xmlSAXParseDTD:
11865
 * @sax:  the SAX handler block
11866
 * @ExternalID:  a NAME* containing the External ID of the DTD
11867
 * @SystemID:  a NAME* containing the URL to the DTD
11868
 *
11869
 * DEPRECATED: Use xmlCtxtParseDtd.
11870
 *
11871
 * Load and parse an external subset.
11872
 *
11873
 * Returns the resulting xmlDtdPtr or NULL in case of error.
11874
 */
11875
11876
xmlDtdPtr
11877
xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
11878
0
                          const xmlChar *SystemID) {
11879
0
    xmlDtdPtr ret = NULL;
11880
0
    xmlParserCtxtPtr ctxt;
11881
0
    xmlParserInputPtr input = NULL;
11882
0
    xmlChar* systemIdCanonic;
11883
11884
0
    if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
11885
11886
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
11887
0
    if (ctxt == NULL) {
11888
0
  return(NULL);
11889
0
    }
11890
0
    xmlCtxtSetOptions(ctxt, XML_PARSE_DTDLOAD);
11891
11892
    /*
11893
     * Canonicalise the system ID
11894
     */
11895
0
    systemIdCanonic = xmlCanonicPath(SystemID);
11896
0
    if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
11897
0
  xmlFreeParserCtxt(ctxt);
11898
0
  return(NULL);
11899
0
    }
11900
11901
    /*
11902
     * Ask the Entity resolver to load the damn thing
11903
     */
11904
11905
0
    if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
11906
0
  input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
11907
0
                                   systemIdCanonic);
11908
0
    if (input == NULL) {
11909
0
  xmlFreeParserCtxt(ctxt);
11910
0
  if (systemIdCanonic != NULL)
11911
0
      xmlFree(systemIdCanonic);
11912
0
  return(NULL);
11913
0
    }
11914
11915
0
    if (input->filename == NULL)
11916
0
  input->filename = (char *) systemIdCanonic;
11917
0
    else
11918
0
  xmlFree(systemIdCanonic);
11919
11920
0
    ret = xmlCtxtParseDtd(ctxt, input, ExternalID, SystemID);
11921
11922
0
    xmlFreeParserCtxt(ctxt);
11923
0
    return(ret);
11924
0
}
11925
11926
11927
/**
11928
 * xmlParseDTD:
11929
 * @ExternalID:  a NAME* containing the External ID of the DTD
11930
 * @SystemID:  a NAME* containing the URL to the DTD
11931
 *
11932
 * Load and parse an external subset.
11933
 *
11934
 * Returns the resulting xmlDtdPtr or NULL in case of error.
11935
 */
11936
11937
xmlDtdPtr
11938
0
xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
11939
0
    return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
11940
0
}
11941
#endif /* LIBXML_VALID_ENABLED */
11942
11943
/************************************************************************
11944
 *                  *
11945
 *    Front ends when parsing an Entity     *
11946
 *                  *
11947
 ************************************************************************/
11948
11949
static xmlNodePtr
11950
xmlCtxtParseContentInternal(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
11951
0
                            int hasTextDecl, int buildTree) {
11952
0
    xmlNodePtr root = NULL;
11953
0
    xmlNodePtr list = NULL;
11954
0
    xmlChar *rootName = BAD_CAST "#root";
11955
0
    int result;
11956
11957
0
    if (buildTree) {
11958
0
        root = xmlNewDocNode(ctxt->myDoc, NULL, rootName, NULL);
11959
0
        if (root == NULL) {
11960
0
            xmlErrMemory(ctxt);
11961
0
            goto error;
11962
0
        }
11963
0
    }
11964
11965
0
    if (xmlCtxtPushInput(ctxt, input) < 0)
11966
0
        goto error;
11967
11968
0
    nameNsPush(ctxt, rootName, NULL, NULL, 0, 0);
11969
0
    spacePush(ctxt, -1);
11970
11971
0
    if (buildTree)
11972
0
        nodePush(ctxt, root);
11973
11974
0
    if (hasTextDecl) {
11975
0
        xmlDetectEncoding(ctxt);
11976
11977
        /*
11978
         * Parse a possible text declaration first
11979
         */
11980
0
        if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
11981
0
            (IS_BLANK_CH(NXT(5)))) {
11982
0
            xmlParseTextDecl(ctxt);
11983
            /*
11984
             * An XML-1.0 document can't reference an entity not XML-1.0
11985
             */
11986
0
            if ((xmlStrEqual(ctxt->version, BAD_CAST "1.0")) &&
11987
0
                (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
11988
0
                xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
11989
0
                               "Version mismatch between document and "
11990
0
                               "entity\n");
11991
0
            }
11992
0
        }
11993
0
    }
11994
11995
0
    xmlParseContentInternal(ctxt);
11996
11997
0
    if (ctxt->input->cur < ctxt->input->end)
11998
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11999
12000
0
    if ((ctxt->wellFormed) ||
12001
0
        ((ctxt->recovery) && (!xmlCtxtIsCatastrophicError(ctxt)))) {
12002
0
        if (root != NULL) {
12003
0
            xmlNodePtr cur;
12004
12005
            /*
12006
             * Unlink newly created node list.
12007
             */
12008
0
            list = root->children;
12009
0
            root->children = NULL;
12010
0
            root->last = NULL;
12011
0
            for (cur = list; cur != NULL; cur = cur->next)
12012
0
                cur->parent = NULL;
12013
0
        }
12014
0
    }
12015
12016
    /*
12017
     * Read the rest of the stream in case of errors. We want
12018
     * to account for the whole entity size.
12019
     */
12020
0
    do {
12021
0
        ctxt->input->cur = ctxt->input->end;
12022
0
        xmlParserShrink(ctxt);
12023
0
        result = xmlParserGrow(ctxt);
12024
0
    } while (result > 0);
12025
12026
0
    if (buildTree)
12027
0
        nodePop(ctxt);
12028
12029
0
    namePop(ctxt);
12030
0
    spacePop(ctxt);
12031
12032
0
    xmlCtxtPopInput(ctxt);
12033
12034
0
error:
12035
0
    xmlFreeNode(root);
12036
12037
0
    return(list);
12038
0
}
12039
12040
static void
12041
0
xmlCtxtParseEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr ent) {
12042
0
    xmlParserInputPtr input;
12043
0
    xmlNodePtr list;
12044
0
    unsigned long consumed;
12045
0
    int isExternal;
12046
0
    int buildTree;
12047
0
    int oldMinNsIndex;
12048
0
    int oldNodelen, oldNodemem;
12049
12050
0
    isExternal = (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY);
12051
0
    buildTree = (ctxt->node != NULL);
12052
12053
    /*
12054
     * Recursion check
12055
     */
12056
0
    if (ent->flags & XML_ENT_EXPANDING) {
12057
0
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
12058
0
        xmlHaltParser(ctxt);
12059
0
        goto error;
12060
0
    }
12061
12062
    /*
12063
     * Load entity
12064
     */
12065
0
    input = xmlNewEntityInputStream(ctxt, ent);
12066
0
    if (input == NULL)
12067
0
        goto error;
12068
12069
    /*
12070
     * When building a tree, we need to limit the scope of namespace
12071
     * declarations, so that entities don't reference xmlNs structs
12072
     * from the parent of a reference.
12073
     */
12074
0
    oldMinNsIndex = ctxt->nsdb->minNsIndex;
12075
0
    if (buildTree)
12076
0
        ctxt->nsdb->minNsIndex = ctxt->nsNr;
12077
12078
0
    oldNodelen = ctxt->nodelen;
12079
0
    oldNodemem = ctxt->nodemem;
12080
0
    ctxt->nodelen = 0;
12081
0
    ctxt->nodemem = 0;
12082
12083
    /*
12084
     * Parse content
12085
     *
12086
     * This initiates a recursive call chain:
12087
     *
12088
     * - xmlCtxtParseContentInternal
12089
     * - xmlParseContentInternal
12090
     * - xmlParseReference
12091
     * - xmlCtxtParseEntity
12092
     *
12093
     * The nesting depth is limited by the maximum number of inputs,
12094
     * see xmlCtxtPushInput.
12095
     *
12096
     * It's possible to make this non-recursive (minNsIndex must be
12097
     * stored in the input struct) at the expense of code readability.
12098
     */
12099
12100
0
    ent->flags |= XML_ENT_EXPANDING;
12101
12102
0
    list = xmlCtxtParseContentInternal(ctxt, input, isExternal, buildTree);
12103
12104
0
    ent->flags &= ~XML_ENT_EXPANDING;
12105
12106
0
    ctxt->nsdb->minNsIndex = oldMinNsIndex;
12107
0
    ctxt->nodelen = oldNodelen;
12108
0
    ctxt->nodemem = oldNodemem;
12109
12110
    /*
12111
     * Entity size accounting
12112
     */
12113
0
    consumed = input->consumed;
12114
0
    xmlSaturatedAddSizeT(&consumed, input->end - input->base);
12115
12116
0
    if ((ent->flags & XML_ENT_CHECKED) == 0)
12117
0
        xmlSaturatedAdd(&ent->expandedSize, consumed);
12118
12119
0
    if ((ent->flags & XML_ENT_PARSED) == 0) {
12120
0
        if (isExternal)
12121
0
            xmlSaturatedAdd(&ctxt->sizeentities, consumed);
12122
12123
0
        ent->children = list;
12124
12125
0
        while (list != NULL) {
12126
0
            list->parent = (xmlNodePtr) ent;
12127
12128
            /*
12129
             * Downstream code like the nginx xslt module can set
12130
             * ctxt->myDoc->extSubset to a separate DTD, so the entity
12131
             * might have a different or a NULL document.
12132
             */
12133
0
            if (list->doc != ent->doc)
12134
0
                xmlSetTreeDoc(list, ent->doc);
12135
12136
0
            if (list->next == NULL)
12137
0
                ent->last = list;
12138
0
            list = list->next;
12139
0
        }
12140
0
    } else {
12141
0
        xmlFreeNodeList(list);
12142
0
    }
12143
12144
0
    xmlFreeInputStream(input);
12145
12146
0
error:
12147
0
    ent->flags |= XML_ENT_PARSED | XML_ENT_CHECKED;
12148
0
}
12149
12150
/**
12151
 * xmlParseCtxtExternalEntity:
12152
 * @ctxt:  the existing parsing context
12153
 * @URL:  the URL for the entity to load
12154
 * @ID:  the System ID for the entity to load
12155
 * @listOut:  the return value for the set of parsed nodes
12156
 *
12157
 * Parse an external general entity within an existing parsing context
12158
 * An external general parsed entity is well-formed if it matches the
12159
 * production labeled extParsedEnt.
12160
 *
12161
 * [78] extParsedEnt ::= TextDecl? content
12162
 *
12163
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12164
 *    the parser error code otherwise
12165
 */
12166
12167
int
12168
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctxt, const xmlChar *URL,
12169
0
                           const xmlChar *ID, xmlNodePtr *listOut) {
12170
0
    xmlParserInputPtr input;
12171
0
    xmlNodePtr list;
12172
12173
0
    if (listOut != NULL)
12174
0
        *listOut = NULL;
12175
12176
0
    if (ctxt == NULL)
12177
0
        return(XML_ERR_ARGUMENT);
12178
12179
0
    input = xmlLoadResource(ctxt, (char *) URL, (char *) ID,
12180
0
                            XML_RESOURCE_GENERAL_ENTITY);
12181
0
    if (input == NULL)
12182
0
        return(ctxt->errNo);
12183
12184
0
    xmlCtxtInitializeLate(ctxt);
12185
12186
0
    list = xmlCtxtParseContentInternal(ctxt, input, /* hasTextDecl */ 1, 1);
12187
0
    if (listOut != NULL)
12188
0
        *listOut = list;
12189
0
    else
12190
0
        xmlFreeNodeList(list);
12191
12192
0
    xmlFreeInputStream(input);
12193
0
    return(ctxt->errNo);
12194
0
}
12195
12196
#ifdef LIBXML_SAX1_ENABLED
12197
/**
12198
 * xmlParseExternalEntity:
12199
 * @doc:  the document the chunk pertains to
12200
 * @sax:  the SAX handler block (possibly NULL)
12201
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12202
 * @depth:  Used for loop detection, use 0
12203
 * @URL:  the URL for the entity to load
12204
 * @ID:  the System ID for the entity to load
12205
 * @list:  the return value for the set of parsed nodes
12206
 *
12207
 * DEPRECATED: Use xmlParseCtxtExternalEntity.
12208
 *
12209
 * Parse an external general entity
12210
 * An external general parsed entity is well-formed if it matches the
12211
 * production labeled extParsedEnt.
12212
 *
12213
 * [78] extParsedEnt ::= TextDecl? content
12214
 *
12215
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12216
 *    the parser error code otherwise
12217
 */
12218
12219
int
12220
xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
12221
0
    int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *list) {
12222
0
    xmlParserCtxtPtr ctxt;
12223
0
    int ret;
12224
12225
0
    if (list != NULL)
12226
0
        *list = NULL;
12227
12228
0
    if (doc == NULL)
12229
0
        return(XML_ERR_ARGUMENT);
12230
12231
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12232
0
    if (ctxt == NULL)
12233
0
        return(XML_ERR_NO_MEMORY);
12234
12235
0
    ctxt->depth = depth;
12236
0
    ctxt->myDoc = doc;
12237
0
    ret = xmlParseCtxtExternalEntity(ctxt, URL, ID, list);
12238
12239
0
    xmlFreeParserCtxt(ctxt);
12240
0
    return(ret);
12241
0
}
12242
12243
/**
12244
 * xmlParseBalancedChunkMemory:
12245
 * @doc:  the document the chunk pertains to (must not be NULL)
12246
 * @sax:  the SAX handler block (possibly NULL)
12247
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12248
 * @depth:  Used for loop detection, use 0
12249
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
12250
 * @lst:  the return value for the set of parsed nodes
12251
 *
12252
 * Parse a well-balanced chunk of an XML document
12253
 * called by the parser
12254
 * The allowed sequence for the Well Balanced Chunk is the one defined by
12255
 * the content production in the XML grammar:
12256
 *
12257
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12258
 *
12259
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12260
 *    the parser error code otherwise
12261
 */
12262
12263
int
12264
xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12265
0
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
12266
0
    return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
12267
0
                                                depth, string, lst, 0 );
12268
0
}
12269
#endif /* LIBXML_SAX1_ENABLED */
12270
12271
/**
12272
 * xmlCtxtParseContent:
12273
 * @ctxt:  parser context
12274
 * @input:  parser input
12275
 * @node:  target node or document
12276
 * @hasTextDecl:  whether to parse text declaration
12277
 *
12278
 * Parse a well-balanced chunk of XML matching the 'content' production.
12279
 *
12280
 * Namespaces in scope of @node and entities of @node's document are
12281
 * recognized. When validating, the DTD of @node's document is used.
12282
 *
12283
 * Always consumes @input even in error case.
12284
 *
12285
 * Available since 2.14.0.
12286
 *
12287
 * Returns a node list or NULL in case of error.
12288
 */
12289
xmlNodePtr
12290
xmlCtxtParseContent(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
12291
0
                    xmlNodePtr node, int hasTextDecl) {
12292
0
    xmlDocPtr doc;
12293
0
    xmlNodePtr cur, list = NULL;
12294
0
    int nsnr = 0;
12295
0
    xmlDictPtr oldDict;
12296
0
    int oldOptions, oldDictNames, oldLoadSubset;
12297
12298
0
    if ((ctxt == NULL) || (input == NULL) || (node == NULL)) {
12299
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
12300
0
        goto exit;
12301
0
    }
12302
12303
0
    doc = node->doc;
12304
0
    if (doc == NULL) {
12305
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
12306
0
        goto exit;
12307
0
    }
12308
12309
0
    switch (node->type) {
12310
0
        case XML_ELEMENT_NODE:
12311
0
        case XML_DOCUMENT_NODE:
12312
0
        case XML_HTML_DOCUMENT_NODE:
12313
0
            break;
12314
12315
0
        case XML_ATTRIBUTE_NODE:
12316
0
        case XML_TEXT_NODE:
12317
0
        case XML_CDATA_SECTION_NODE:
12318
0
        case XML_ENTITY_REF_NODE:
12319
0
        case XML_PI_NODE:
12320
0
        case XML_COMMENT_NODE:
12321
0
            for (cur = node->parent; cur != NULL; cur = node->parent) {
12322
0
                if ((cur->type == XML_ELEMENT_NODE) ||
12323
0
                    (cur->type == XML_DOCUMENT_NODE) ||
12324
0
                    (cur->type == XML_HTML_DOCUMENT_NODE)) {
12325
0
                    node = cur;
12326
0
                    break;
12327
0
                }
12328
0
            }
12329
0
            break;
12330
12331
0
        default:
12332
0
            xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
12333
0
            goto exit;
12334
0
    }
12335
12336
0
#ifdef LIBXML_HTML_ENABLED
12337
0
    if (ctxt->html)
12338
0
        htmlCtxtReset(ctxt);
12339
0
    else
12340
0
#endif
12341
0
        xmlCtxtReset(ctxt);
12342
12343
0
    oldDict = ctxt->dict;
12344
0
    oldOptions = ctxt->options;
12345
0
    oldDictNames = ctxt->dictNames;
12346
0
    oldLoadSubset = ctxt->loadsubset;
12347
12348
    /*
12349
     * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
12350
     */
12351
0
    if (doc->dict != NULL) {
12352
0
        ctxt->dict = doc->dict;
12353
0
    } else {
12354
0
        ctxt->options |= XML_PARSE_NODICT;
12355
0
        ctxt->dictNames = 0;
12356
0
    }
12357
12358
    /*
12359
     * Disable IDs
12360
     */
12361
0
    ctxt->loadsubset |= XML_SKIP_IDS;
12362
12363
0
    ctxt->myDoc = doc;
12364
12365
0
#ifdef LIBXML_HTML_ENABLED
12366
0
    if (ctxt->html) {
12367
        /*
12368
         * When parsing in context, it makes no sense to add implied
12369
         * elements like html/body/etc...
12370
         */
12371
0
        ctxt->options |= HTML_PARSE_NOIMPLIED;
12372
12373
0
        list = htmlCtxtParseContentInternal(ctxt, input);
12374
0
    } else
12375
0
#endif
12376
0
    {
12377
0
        xmlCtxtInitializeLate(ctxt);
12378
12379
        /*
12380
         * initialize the SAX2 namespaces stack
12381
         */
12382
0
        cur = node;
12383
0
        while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
12384
0
            xmlNsPtr ns = cur->nsDef;
12385
0
            xmlHashedString hprefix, huri;
12386
12387
0
            while (ns != NULL) {
12388
0
                hprefix = xmlDictLookupHashed(ctxt->dict, ns->prefix, -1);
12389
0
                huri = xmlDictLookupHashed(ctxt->dict, ns->href, -1);
12390
0
                if (xmlParserNsPush(ctxt, &hprefix, &huri, ns, 1) > 0)
12391
0
                    nsnr++;
12392
0
                ns = ns->next;
12393
0
            }
12394
0
            cur = cur->parent;
12395
0
        }
12396
12397
0
        list = xmlCtxtParseContentInternal(ctxt, input, hasTextDecl, 1);
12398
12399
0
        if (nsnr > 0)
12400
0
            xmlParserNsPop(ctxt, nsnr);
12401
0
    }
12402
12403
0
    ctxt->dict = oldDict;
12404
0
    ctxt->options = oldOptions;
12405
0
    ctxt->dictNames = oldDictNames;
12406
0
    ctxt->loadsubset = oldLoadSubset;
12407
0
    ctxt->myDoc = NULL;
12408
0
    ctxt->node = NULL;
12409
12410
0
exit:
12411
0
    xmlFreeInputStream(input);
12412
0
    return(list);
12413
0
}
12414
12415
/**
12416
 * xmlParseInNodeContext:
12417
 * @node:  the context node
12418
 * @data:  the input string
12419
 * @datalen:  the input string length in bytes
12420
 * @options:  a combination of xmlParserOption
12421
 * @listOut:  the return value for the set of parsed nodes
12422
 *
12423
 * Parse a well-balanced chunk of an XML document
12424
 * within the context (DTD, namespaces, etc ...) of the given node.
12425
 *
12426
 * The allowed sequence for the data is a Well Balanced Chunk defined by
12427
 * the content production in the XML grammar:
12428
 *
12429
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12430
 *
12431
 * This function assumes the encoding of @node's document which is
12432
 * typically not what you want. A better alternative is
12433
 * xmlCtxtParseContent.
12434
 *
12435
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12436
 * error code otherwise
12437
 */
12438
xmlParserErrors
12439
xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
12440
0
                      int options, xmlNodePtr *listOut) {
12441
0
    xmlParserCtxtPtr ctxt;
12442
0
    xmlParserInputPtr input;
12443
0
    xmlDocPtr doc;
12444
0
    xmlNodePtr list;
12445
0
    xmlParserErrors ret;
12446
12447
0
    if (listOut == NULL)
12448
0
        return(XML_ERR_INTERNAL_ERROR);
12449
0
    *listOut = NULL;
12450
12451
0
    if ((node == NULL) || (data == NULL) || (datalen < 0))
12452
0
        return(XML_ERR_INTERNAL_ERROR);
12453
12454
0
    doc = node->doc;
12455
0
    if (doc == NULL)
12456
0
        return(XML_ERR_INTERNAL_ERROR);
12457
12458
0
#ifdef LIBXML_HTML_ENABLED
12459
0
    if (doc->type == XML_HTML_DOCUMENT_NODE) {
12460
0
        ctxt = htmlNewParserCtxt();
12461
0
    }
12462
0
    else
12463
0
#endif
12464
0
        ctxt = xmlNewParserCtxt();
12465
12466
0
    if (ctxt == NULL)
12467
0
        return(XML_ERR_NO_MEMORY);
12468
12469
0
    input = xmlCtxtNewInputFromMemory(ctxt, NULL, data, datalen,
12470
0
                                      (const char *) doc->encoding,
12471
0
                                      XML_INPUT_BUF_STATIC);
12472
0
    if (input == NULL) {
12473
0
        xmlFreeParserCtxt(ctxt);
12474
0
        return(XML_ERR_NO_MEMORY);
12475
0
    }
12476
12477
0
    xmlCtxtUseOptions(ctxt, options);
12478
12479
0
    list = xmlCtxtParseContent(ctxt, input, node, /* hasTextDecl */ 0);
12480
12481
0
    if (list == NULL) {
12482
0
        ret = ctxt->errNo;
12483
0
        if (ret == XML_ERR_ARGUMENT)
12484
0
            ret = XML_ERR_INTERNAL_ERROR;
12485
0
    } else {
12486
0
        ret = XML_ERR_OK;
12487
0
        *listOut = list;
12488
0
    }
12489
12490
0
    xmlFreeParserCtxt(ctxt);
12491
12492
0
    return(ret);
12493
0
}
12494
12495
#ifdef LIBXML_SAX1_ENABLED
12496
/**
12497
 * xmlParseBalancedChunkMemoryRecover:
12498
 * @doc:  the document the chunk pertains to (must not be NULL)
12499
 * @sax:  the SAX handler block (possibly NULL)
12500
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12501
 * @depth:  Used for loop detection, use 0
12502
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
12503
 * @listOut:  the return value for the set of parsed nodes
12504
 * @recover: return nodes even if the data is broken (use 0)
12505
 *
12506
 * Parse a well-balanced chunk of an XML document
12507
 *
12508
 * The allowed sequence for the Well Balanced Chunk is the one defined by
12509
 * the content production in the XML grammar:
12510
 *
12511
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12512
 *
12513
 * Returns 0 if the chunk is well balanced, or thehe parser error code
12514
 * otherwise.
12515
 *
12516
 * In case recover is set to 1, the nodelist will not be empty even if
12517
 * the parsed chunk is not well balanced, assuming the parsing succeeded to
12518
 * some extent.
12519
 */
12520
int
12521
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12522
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *listOut,
12523
0
     int recover) {
12524
0
    xmlParserCtxtPtr ctxt;
12525
0
    xmlParserInputPtr input;
12526
0
    xmlNodePtr list;
12527
0
    int ret;
12528
12529
0
    if (listOut != NULL)
12530
0
        *listOut = NULL;
12531
12532
0
    if (string == NULL)
12533
0
        return(XML_ERR_ARGUMENT);
12534
12535
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12536
0
    if (ctxt == NULL)
12537
0
        return(XML_ERR_NO_MEMORY);
12538
12539
0
    xmlCtxtInitializeLate(ctxt);
12540
12541
0
    ctxt->depth = depth;
12542
0
    ctxt->myDoc = doc;
12543
0
    if (recover) {
12544
0
        ctxt->options |= XML_PARSE_RECOVER;
12545
0
        ctxt->recovery = 1;
12546
0
    }
12547
12548
0
    input = xmlNewStringInputStream(ctxt, string);
12549
0
    if (input == NULL) {
12550
0
        ret = ctxt->errNo;
12551
0
        goto error;
12552
0
    }
12553
12554
0
    list = xmlCtxtParseContentInternal(ctxt, input, /* hasTextDecl */ 0, 1);
12555
0
    if (listOut != NULL)
12556
0
        *listOut = list;
12557
0
    else
12558
0
        xmlFreeNodeList(list);
12559
12560
0
    if (!ctxt->wellFormed)
12561
0
        ret = ctxt->errNo;
12562
0
    else
12563
0
        ret = XML_ERR_OK;
12564
12565
0
error:
12566
0
    xmlFreeInputStream(input);
12567
0
    xmlFreeParserCtxt(ctxt);
12568
0
    return(ret);
12569
0
}
12570
12571
/**
12572
 * xmlSAXParseEntity:
12573
 * @sax:  the SAX handler block
12574
 * @filename:  the filename
12575
 *
12576
 * DEPRECATED: Don't use.
12577
 *
12578
 * parse an XML external entity out of context and build a tree.
12579
 * It use the given SAX function block to handle the parsing callback.
12580
 * If sax is NULL, fallback to the default DOM tree building routines.
12581
 *
12582
 * [78] extParsedEnt ::= TextDecl? content
12583
 *
12584
 * This correspond to a "Well Balanced" chunk
12585
 *
12586
 * Returns the resulting document tree
12587
 */
12588
12589
xmlDocPtr
12590
0
xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
12591
0
    xmlDocPtr ret;
12592
0
    xmlParserCtxtPtr ctxt;
12593
12594
0
    ctxt = xmlCreateFileParserCtxt(filename);
12595
0
    if (ctxt == NULL) {
12596
0
  return(NULL);
12597
0
    }
12598
0
    if (sax != NULL) {
12599
0
        if (sax->initialized == XML_SAX2_MAGIC) {
12600
0
            *ctxt->sax = *sax;
12601
0
        } else {
12602
0
            memset(ctxt->sax, 0, sizeof(*ctxt->sax));
12603
0
            memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12604
0
        }
12605
0
        ctxt->userData = NULL;
12606
0
    }
12607
12608
0
    xmlParseExtParsedEnt(ctxt);
12609
12610
0
    if (ctxt->wellFormed) {
12611
0
  ret = ctxt->myDoc;
12612
0
    } else {
12613
0
        ret = NULL;
12614
0
        xmlFreeDoc(ctxt->myDoc);
12615
0
    }
12616
12617
0
    xmlFreeParserCtxt(ctxt);
12618
12619
0
    return(ret);
12620
0
}
12621
12622
/**
12623
 * xmlParseEntity:
12624
 * @filename:  the filename
12625
 *
12626
 * parse an XML external entity out of context and build a tree.
12627
 *
12628
 * [78] extParsedEnt ::= TextDecl? content
12629
 *
12630
 * This correspond to a "Well Balanced" chunk
12631
 *
12632
 * Returns the resulting document tree
12633
 */
12634
12635
xmlDocPtr
12636
0
xmlParseEntity(const char *filename) {
12637
0
    return(xmlSAXParseEntity(NULL, filename));
12638
0
}
12639
#endif /* LIBXML_SAX1_ENABLED */
12640
12641
/**
12642
 * xmlCreateEntityParserCtxt:
12643
 * @URL:  the entity URL
12644
 * @ID:  the entity PUBLIC ID
12645
 * @base:  a possible base for the target URI
12646
 *
12647
 * DEPRECATED: Don't use.
12648
 *
12649
 * Create a parser context for an external entity
12650
 * Automatic support for ZLIB/Compress compressed document is provided
12651
 * by default if found at compile-time.
12652
 *
12653
 * Returns the new parser context or NULL
12654
 */
12655
xmlParserCtxtPtr
12656
xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
12657
0
                    const xmlChar *base) {
12658
0
    xmlParserCtxtPtr ctxt;
12659
0
    xmlParserInputPtr input;
12660
0
    xmlChar *uri = NULL;
12661
12662
0
    ctxt = xmlNewParserCtxt();
12663
0
    if (ctxt == NULL)
12664
0
  return(NULL);
12665
12666
0
    if (base != NULL) {
12667
0
        if (xmlBuildURISafe(URL, base, &uri) < 0)
12668
0
            goto error;
12669
0
        if (uri != NULL)
12670
0
            URL = uri;
12671
0
    }
12672
12673
0
    input = xmlLoadResource(ctxt, (char *) URL, (char *) ID,
12674
0
                            XML_RESOURCE_UNKNOWN);
12675
0
    if (input == NULL)
12676
0
        goto error;
12677
12678
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
12679
0
        xmlFreeInputStream(input);
12680
0
        goto error;
12681
0
    }
12682
12683
0
    xmlFree(uri);
12684
0
    return(ctxt);
12685
12686
0
error:
12687
0
    xmlFree(uri);
12688
0
    xmlFreeParserCtxt(ctxt);
12689
0
    return(NULL);
12690
0
}
12691
12692
/************************************************************************
12693
 *                  *
12694
 *    Front ends when parsing from a file     *
12695
 *                  *
12696
 ************************************************************************/
12697
12698
/**
12699
 * xmlCreateURLParserCtxt:
12700
 * @filename:  the filename or URL
12701
 * @options:  a combination of xmlParserOption
12702
 *
12703
 * DEPRECATED: Use xmlNewParserCtxt and xmlCtxtReadFile.
12704
 *
12705
 * Create a parser context for a file or URL content.
12706
 * Automatic support for ZLIB/Compress compressed document is provided
12707
 * by default if found at compile-time and for file accesses
12708
 *
12709
 * Returns the new parser context or NULL
12710
 */
12711
xmlParserCtxtPtr
12712
xmlCreateURLParserCtxt(const char *filename, int options)
12713
0
{
12714
0
    xmlParserCtxtPtr ctxt;
12715
0
    xmlParserInputPtr input;
12716
12717
0
    ctxt = xmlNewParserCtxt();
12718
0
    if (ctxt == NULL)
12719
0
  return(NULL);
12720
12721
0
    options |= XML_PARSE_UNZIP;
12722
12723
0
    xmlCtxtUseOptions(ctxt, options);
12724
0
    ctxt->linenumbers = 1;
12725
12726
0
    input = xmlLoadResource(ctxt, filename, NULL, XML_RESOURCE_MAIN_DOCUMENT);
12727
0
    if (input == NULL) {
12728
0
  xmlFreeParserCtxt(ctxt);
12729
0
  return(NULL);
12730
0
    }
12731
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
12732
0
        xmlFreeInputStream(input);
12733
0
        xmlFreeParserCtxt(ctxt);
12734
0
        return(NULL);
12735
0
    }
12736
12737
0
    return(ctxt);
12738
0
}
12739
12740
/**
12741
 * xmlCreateFileParserCtxt:
12742
 * @filename:  the filename
12743
 *
12744
 * DEPRECATED: Use xmlNewParserCtxt and xmlCtxtReadFile.
12745
 *
12746
 * Create a parser context for a file content.
12747
 * Automatic support for ZLIB/Compress compressed document is provided
12748
 * by default if found at compile-time.
12749
 *
12750
 * Returns the new parser context or NULL
12751
 */
12752
xmlParserCtxtPtr
12753
xmlCreateFileParserCtxt(const char *filename)
12754
0
{
12755
0
    return(xmlCreateURLParserCtxt(filename, 0));
12756
0
}
12757
12758
#ifdef LIBXML_SAX1_ENABLED
12759
/**
12760
 * xmlSAXParseFileWithData:
12761
 * @sax:  the SAX handler block
12762
 * @filename:  the filename
12763
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
12764
 *             documents
12765
 * @data:  the userdata
12766
 *
12767
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
12768
 *
12769
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12770
 * compressed document is provided by default if found at compile-time.
12771
 * It use the given SAX function block to handle the parsing callback.
12772
 * If sax is NULL, fallback to the default DOM tree building routines.
12773
 *
12774
 * User data (void *) is stored within the parser context in the
12775
 * context's _private member, so it is available nearly everywhere in libxml
12776
 *
12777
 * Returns the resulting document tree
12778
 */
12779
12780
xmlDocPtr
12781
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
12782
0
                        int recovery, void *data) {
12783
0
    xmlDocPtr ret = NULL;
12784
0
    xmlParserCtxtPtr ctxt;
12785
0
    xmlParserInputPtr input;
12786
12787
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12788
0
    if (ctxt == NULL)
12789
0
  return(NULL);
12790
12791
0
    if (data != NULL)
12792
0
  ctxt->_private = data;
12793
12794
0
    if (recovery) {
12795
0
        ctxt->options |= XML_PARSE_RECOVER;
12796
0
        ctxt->recovery = 1;
12797
0
    }
12798
12799
0
    if ((filename != NULL) && (filename[0] == '-') && (filename[1] == 0))
12800
0
        input = xmlCtxtNewInputFromFd(ctxt, filename, STDIN_FILENO, NULL, 0);
12801
0
    else
12802
0
        input = xmlCtxtNewInputFromUrl(ctxt, filename, NULL, NULL, 0);
12803
12804
0
    if (input != NULL)
12805
0
        ret = xmlCtxtParseDocument(ctxt, input);
12806
12807
0
    xmlFreeParserCtxt(ctxt);
12808
0
    return(ret);
12809
0
}
12810
12811
/**
12812
 * xmlSAXParseFile:
12813
 * @sax:  the SAX handler block
12814
 * @filename:  the filename
12815
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
12816
 *             documents
12817
 *
12818
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
12819
 *
12820
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12821
 * compressed document is provided by default if found at compile-time.
12822
 * It use the given SAX function block to handle the parsing callback.
12823
 * If sax is NULL, fallback to the default DOM tree building routines.
12824
 *
12825
 * Returns the resulting document tree
12826
 */
12827
12828
xmlDocPtr
12829
xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
12830
0
                          int recovery) {
12831
0
    return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
12832
0
}
12833
12834
/**
12835
 * xmlRecoverDoc:
12836
 * @cur:  a pointer to an array of xmlChar
12837
 *
12838
 * DEPRECATED: Use xmlReadDoc with XML_PARSE_RECOVER.
12839
 *
12840
 * parse an XML in-memory document and build a tree.
12841
 * In the case the document is not Well Formed, a attempt to build a
12842
 * tree is tried anyway
12843
 *
12844
 * Returns the resulting document tree or NULL in case of failure
12845
 */
12846
12847
xmlDocPtr
12848
0
xmlRecoverDoc(const xmlChar *cur) {
12849
0
    return(xmlSAXParseDoc(NULL, cur, 1));
12850
0
}
12851
12852
/**
12853
 * xmlParseFile:
12854
 * @filename:  the filename
12855
 *
12856
 * DEPRECATED: Use xmlReadFile.
12857
 *
12858
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12859
 * compressed document is provided by default if found at compile-time.
12860
 *
12861
 * Returns the resulting document tree if the file was wellformed,
12862
 * NULL otherwise.
12863
 */
12864
12865
xmlDocPtr
12866
0
xmlParseFile(const char *filename) {
12867
0
    return(xmlSAXParseFile(NULL, filename, 0));
12868
0
}
12869
12870
/**
12871
 * xmlRecoverFile:
12872
 * @filename:  the filename
12873
 *
12874
 * DEPRECATED: Use xmlReadFile with XML_PARSE_RECOVER.
12875
 *
12876
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12877
 * compressed document is provided by default if found at compile-time.
12878
 * In the case the document is not Well Formed, it attempts to build
12879
 * a tree anyway
12880
 *
12881
 * Returns the resulting document tree or NULL in case of failure
12882
 */
12883
12884
xmlDocPtr
12885
0
xmlRecoverFile(const char *filename) {
12886
0
    return(xmlSAXParseFile(NULL, filename, 1));
12887
0
}
12888
12889
12890
/**
12891
 * xmlSetupParserForBuffer:
12892
 * @ctxt:  an XML parser context
12893
 * @buffer:  a xmlChar * buffer
12894
 * @filename:  a file name
12895
 *
12896
 * DEPRECATED: Don't use.
12897
 *
12898
 * Setup the parser context to parse a new buffer; Clears any prior
12899
 * contents from the parser context. The buffer parameter must not be
12900
 * NULL, but the filename parameter can be
12901
 */
12902
void
12903
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
12904
                             const char* filename)
12905
0
{
12906
0
    xmlParserInputPtr input;
12907
12908
0
    if ((ctxt == NULL) || (buffer == NULL))
12909
0
        return;
12910
12911
0
    xmlClearParserCtxt(ctxt);
12912
12913
0
    input = xmlCtxtNewInputFromString(ctxt, filename, (const char *) buffer,
12914
0
                                      NULL, 0);
12915
0
    if (input == NULL)
12916
0
        return;
12917
0
    if (xmlCtxtPushInput(ctxt, input) < 0)
12918
0
        xmlFreeInputStream(input);
12919
0
}
12920
12921
/**
12922
 * xmlSAXUserParseFile:
12923
 * @sax:  a SAX handler
12924
 * @user_data:  The user data returned on SAX callbacks
12925
 * @filename:  a file name
12926
 *
12927
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
12928
 *
12929
 * parse an XML file and call the given SAX handler routines.
12930
 * Automatic support for ZLIB/Compress compressed document is provided
12931
 *
12932
 * Returns 0 in case of success or a error number otherwise
12933
 */
12934
int
12935
xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
12936
0
                    const char *filename) {
12937
0
    int ret = 0;
12938
0
    xmlParserCtxtPtr ctxt;
12939
12940
0
    ctxt = xmlCreateFileParserCtxt(filename);
12941
0
    if (ctxt == NULL) return -1;
12942
0
    if (sax != NULL) {
12943
0
        if (sax->initialized == XML_SAX2_MAGIC) {
12944
0
            *ctxt->sax = *sax;
12945
0
        } else {
12946
0
            memset(ctxt->sax, 0, sizeof(*ctxt->sax));
12947
0
            memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12948
0
        }
12949
0
  ctxt->userData = user_data;
12950
0
    }
12951
12952
0
    xmlParseDocument(ctxt);
12953
12954
0
    if (ctxt->wellFormed)
12955
0
  ret = 0;
12956
0
    else {
12957
0
        if (ctxt->errNo != 0)
12958
0
      ret = ctxt->errNo;
12959
0
  else
12960
0
      ret = -1;
12961
0
    }
12962
0
    if (ctxt->myDoc != NULL) {
12963
0
        xmlFreeDoc(ctxt->myDoc);
12964
0
  ctxt->myDoc = NULL;
12965
0
    }
12966
0
    xmlFreeParserCtxt(ctxt);
12967
12968
0
    return ret;
12969
0
}
12970
#endif /* LIBXML_SAX1_ENABLED */
12971
12972
/************************************************************************
12973
 *                  *
12974
 *    Front ends when parsing from memory     *
12975
 *                  *
12976
 ************************************************************************/
12977
12978
/**
12979
 * xmlCreateMemoryParserCtxt:
12980
 * @buffer:  a pointer to a char array
12981
 * @size:  the size of the array
12982
 *
12983
 * Create a parser context for an XML in-memory document. The input buffer
12984
 * must not contain a terminating null byte.
12985
 *
12986
 * Returns the new parser context or NULL
12987
 */
12988
xmlParserCtxtPtr
12989
0
xmlCreateMemoryParserCtxt(const char *buffer, int size) {
12990
0
    xmlParserCtxtPtr ctxt;
12991
0
    xmlParserInputPtr input;
12992
12993
0
    if (size < 0)
12994
0
  return(NULL);
12995
12996
0
    ctxt = xmlNewParserCtxt();
12997
0
    if (ctxt == NULL)
12998
0
  return(NULL);
12999
13000
0
    input = xmlCtxtNewInputFromMemory(ctxt, NULL, buffer, size, NULL, 0);
13001
0
    if (input == NULL) {
13002
0
  xmlFreeParserCtxt(ctxt);
13003
0
  return(NULL);
13004
0
    }
13005
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
13006
0
        xmlFreeInputStream(input);
13007
0
        xmlFreeParserCtxt(ctxt);
13008
0
        return(NULL);
13009
0
    }
13010
13011
0
    return(ctxt);
13012
0
}
13013
13014
#ifdef LIBXML_SAX1_ENABLED
13015
/**
13016
 * xmlSAXParseMemoryWithData:
13017
 * @sax:  the SAX handler block
13018
 * @buffer:  an pointer to a char array
13019
 * @size:  the size of the array
13020
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13021
 *             documents
13022
 * @data:  the userdata
13023
 *
13024
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
13025
 *
13026
 * parse an XML in-memory block and use the given SAX function block
13027
 * to handle the parsing callback. If sax is NULL, fallback to the default
13028
 * DOM tree building routines.
13029
 *
13030
 * User data (void *) is stored within the parser context in the
13031
 * context's _private member, so it is available nearly everywhere in libxml
13032
 *
13033
 * Returns the resulting document tree
13034
 */
13035
13036
xmlDocPtr
13037
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
13038
0
                          int size, int recovery, void *data) {
13039
0
    xmlDocPtr ret = NULL;
13040
0
    xmlParserCtxtPtr ctxt;
13041
0
    xmlParserInputPtr input;
13042
13043
0
    if (size < 0)
13044
0
        return(NULL);
13045
13046
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
13047
0
    if (ctxt == NULL)
13048
0
        return(NULL);
13049
13050
0
    if (data != NULL)
13051
0
  ctxt->_private=data;
13052
13053
0
    if (recovery) {
13054
0
        ctxt->options |= XML_PARSE_RECOVER;
13055
0
        ctxt->recovery = 1;
13056
0
    }
13057
13058
0
    input = xmlCtxtNewInputFromMemory(ctxt, NULL, buffer, size, NULL,
13059
0
                                      XML_INPUT_BUF_STATIC);
13060
13061
0
    if (input != NULL)
13062
0
        ret = xmlCtxtParseDocument(ctxt, input);
13063
13064
0
    xmlFreeParserCtxt(ctxt);
13065
0
    return(ret);
13066
0
}
13067
13068
/**
13069
 * xmlSAXParseMemory:
13070
 * @sax:  the SAX handler block
13071
 * @buffer:  an pointer to a char array
13072
 * @size:  the size of the array
13073
 * @recovery:  work in recovery mode, i.e. tries to read not Well Formed
13074
 *             documents
13075
 *
13076
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
13077
 *
13078
 * parse an XML in-memory block and use the given SAX function block
13079
 * to handle the parsing callback. If sax is NULL, fallback to the default
13080
 * DOM tree building routines.
13081
 *
13082
 * Returns the resulting document tree
13083
 */
13084
xmlDocPtr
13085
xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
13086
0
            int size, int recovery) {
13087
0
    return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
13088
0
}
13089
13090
/**
13091
 * xmlParseMemory:
13092
 * @buffer:  an pointer to a char array
13093
 * @size:  the size of the array
13094
 *
13095
 * DEPRECATED: Use xmlReadMemory.
13096
 *
13097
 * parse an XML in-memory block and build a tree.
13098
 *
13099
 * Returns the resulting document tree
13100
 */
13101
13102
0
xmlDocPtr xmlParseMemory(const char *buffer, int size) {
13103
0
   return(xmlSAXParseMemory(NULL, buffer, size, 0));
13104
0
}
13105
13106
/**
13107
 * xmlRecoverMemory:
13108
 * @buffer:  an pointer to a char array
13109
 * @size:  the size of the array
13110
 *
13111
 * DEPRECATED: Use xmlReadMemory with XML_PARSE_RECOVER.
13112
 *
13113
 * parse an XML in-memory block and build a tree.
13114
 * In the case the document is not Well Formed, an attempt to
13115
 * build a tree is tried anyway
13116
 *
13117
 * Returns the resulting document tree or NULL in case of error
13118
 */
13119
13120
0
xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
13121
0
   return(xmlSAXParseMemory(NULL, buffer, size, 1));
13122
0
}
13123
13124
/**
13125
 * xmlSAXUserParseMemory:
13126
 * @sax:  a SAX handler
13127
 * @user_data:  The user data returned on SAX callbacks
13128
 * @buffer:  an in-memory XML document input
13129
 * @size:  the length of the XML document in bytes
13130
 *
13131
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
13132
 *
13133
 * parse an XML in-memory buffer and call the given SAX handler routines.
13134
 *
13135
 * Returns 0 in case of success or a error number otherwise
13136
 */
13137
int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
13138
0
        const char *buffer, int size) {
13139
0
    int ret = 0;
13140
0
    xmlParserCtxtPtr ctxt;
13141
13142
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13143
0
    if (ctxt == NULL) return -1;
13144
0
    if (sax != NULL) {
13145
0
        if (sax->initialized == XML_SAX2_MAGIC) {
13146
0
            *ctxt->sax = *sax;
13147
0
        } else {
13148
0
            memset(ctxt->sax, 0, sizeof(*ctxt->sax));
13149
0
            memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
13150
0
        }
13151
0
  ctxt->userData = user_data;
13152
0
    }
13153
13154
0
    xmlParseDocument(ctxt);
13155
13156
0
    if (ctxt->wellFormed)
13157
0
  ret = 0;
13158
0
    else {
13159
0
        if (ctxt->errNo != 0)
13160
0
      ret = ctxt->errNo;
13161
0
  else
13162
0
      ret = -1;
13163
0
    }
13164
0
    if (ctxt->myDoc != NULL) {
13165
0
        xmlFreeDoc(ctxt->myDoc);
13166
0
  ctxt->myDoc = NULL;
13167
0
    }
13168
0
    xmlFreeParserCtxt(ctxt);
13169
13170
0
    return ret;
13171
0
}
13172
#endif /* LIBXML_SAX1_ENABLED */
13173
13174
/**
13175
 * xmlCreateDocParserCtxt:
13176
 * @str:  a pointer to an array of xmlChar
13177
 *
13178
 * Creates a parser context for an XML in-memory document.
13179
 *
13180
 * Returns the new parser context or NULL
13181
 */
13182
xmlParserCtxtPtr
13183
0
xmlCreateDocParserCtxt(const xmlChar *str) {
13184
0
    xmlParserCtxtPtr ctxt;
13185
0
    xmlParserInputPtr input;
13186
13187
0
    ctxt = xmlNewParserCtxt();
13188
0
    if (ctxt == NULL)
13189
0
  return(NULL);
13190
13191
0
    input = xmlCtxtNewInputFromString(ctxt, NULL, (const char *) str, NULL, 0);
13192
0
    if (input == NULL) {
13193
0
  xmlFreeParserCtxt(ctxt);
13194
0
  return(NULL);
13195
0
    }
13196
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
13197
0
        xmlFreeInputStream(input);
13198
0
        xmlFreeParserCtxt(ctxt);
13199
0
        return(NULL);
13200
0
    }
13201
13202
0
    return(ctxt);
13203
0
}
13204
13205
#ifdef LIBXML_SAX1_ENABLED
13206
/**
13207
 * xmlSAXParseDoc:
13208
 * @sax:  the SAX handler block
13209
 * @cur:  a pointer to an array of xmlChar
13210
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13211
 *             documents
13212
 *
13213
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadDoc.
13214
 *
13215
 * parse an XML in-memory document and build a tree.
13216
 * It use the given SAX function block to handle the parsing callback.
13217
 * If sax is NULL, fallback to the default DOM tree building routines.
13218
 *
13219
 * Returns the resulting document tree
13220
 */
13221
13222
xmlDocPtr
13223
0
xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
13224
0
    xmlDocPtr ret;
13225
0
    xmlParserCtxtPtr ctxt;
13226
0
    xmlSAXHandlerPtr oldsax = NULL;
13227
13228
0
    if (cur == NULL) return(NULL);
13229
13230
13231
0
    ctxt = xmlCreateDocParserCtxt(cur);
13232
0
    if (ctxt == NULL) return(NULL);
13233
0
    if (sax != NULL) {
13234
0
        oldsax = ctxt->sax;
13235
0
        ctxt->sax = sax;
13236
0
        ctxt->userData = NULL;
13237
0
    }
13238
13239
0
    xmlParseDocument(ctxt);
13240
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13241
0
    else {
13242
0
       ret = NULL;
13243
0
       xmlFreeDoc(ctxt->myDoc);
13244
0
       ctxt->myDoc = NULL;
13245
0
    }
13246
0
    if (sax != NULL)
13247
0
  ctxt->sax = oldsax;
13248
0
    xmlFreeParserCtxt(ctxt);
13249
13250
0
    return(ret);
13251
0
}
13252
13253
/**
13254
 * xmlParseDoc:
13255
 * @cur:  a pointer to an array of xmlChar
13256
 *
13257
 * DEPRECATED: Use xmlReadDoc.
13258
 *
13259
 * parse an XML in-memory document and build a tree.
13260
 *
13261
 * Returns the resulting document tree
13262
 */
13263
13264
xmlDocPtr
13265
0
xmlParseDoc(const xmlChar *cur) {
13266
0
    return(xmlSAXParseDoc(NULL, cur, 0));
13267
0
}
13268
#endif /* LIBXML_SAX1_ENABLED */
13269
13270
/************************************************************************
13271
 *                  *
13272
 *  New set (2.6.0) of simpler and more flexible APIs   *
13273
 *                  *
13274
 ************************************************************************/
13275
13276
/**
13277
 * DICT_FREE:
13278
 * @str:  a string
13279
 *
13280
 * Free a string if it is not owned by the "dict" dictionary in the
13281
 * current scope
13282
 */
13283
#define DICT_FREE(str)            \
13284
23.2k
  if ((str) && ((!dict) ||       \
13285
0
      (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
13286
23.2k
      xmlFree((char *)(str));
13287
13288
/**
13289
 * xmlCtxtReset:
13290
 * @ctxt: an XML parser context
13291
 *
13292
 * Reset a parser context
13293
 */
13294
void
13295
xmlCtxtReset(xmlParserCtxtPtr ctxt)
13296
5.80k
{
13297
5.80k
    xmlParserInputPtr input;
13298
5.80k
    xmlDictPtr dict;
13299
13300
5.80k
    if (ctxt == NULL)
13301
0
        return;
13302
13303
5.80k
    dict = ctxt->dict;
13304
13305
5.80k
    while ((input = xmlCtxtPopInput(ctxt)) != NULL) { /* Non consuming */
13306
0
        xmlFreeInputStream(input);
13307
0
    }
13308
5.80k
    ctxt->inputNr = 0;
13309
5.80k
    ctxt->input = NULL;
13310
13311
5.80k
    ctxt->spaceNr = 0;
13312
5.80k
    if (ctxt->spaceTab != NULL) {
13313
5.80k
  ctxt->spaceTab[0] = -1;
13314
5.80k
  ctxt->space = &ctxt->spaceTab[0];
13315
5.80k
    } else {
13316
0
        ctxt->space = NULL;
13317
0
    }
13318
13319
13320
5.80k
    ctxt->nodeNr = 0;
13321
5.80k
    ctxt->node = NULL;
13322
13323
5.80k
    ctxt->nameNr = 0;
13324
5.80k
    ctxt->name = NULL;
13325
13326
5.80k
    ctxt->nsNr = 0;
13327
5.80k
    xmlParserNsReset(ctxt->nsdb);
13328
13329
5.80k
    DICT_FREE(ctxt->version);
13330
5.80k
    ctxt->version = NULL;
13331
5.80k
    DICT_FREE(ctxt->encoding);
13332
5.80k
    ctxt->encoding = NULL;
13333
5.80k
    DICT_FREE(ctxt->extSubURI);
13334
5.80k
    ctxt->extSubURI = NULL;
13335
5.80k
    DICT_FREE(ctxt->extSubSystem);
13336
5.80k
    ctxt->extSubSystem = NULL;
13337
13338
5.80k
    if (ctxt->directory != NULL) {
13339
0
        xmlFree(ctxt->directory);
13340
0
        ctxt->directory = NULL;
13341
0
    }
13342
13343
5.80k
    if (ctxt->myDoc != NULL)
13344
0
        xmlFreeDoc(ctxt->myDoc);
13345
5.80k
    ctxt->myDoc = NULL;
13346
13347
5.80k
    ctxt->standalone = -1;
13348
5.80k
    ctxt->hasExternalSubset = 0;
13349
5.80k
    ctxt->hasPErefs = 0;
13350
5.80k
    ctxt->html = 0;
13351
5.80k
    ctxt->instate = XML_PARSER_START;
13352
13353
5.80k
    ctxt->wellFormed = 1;
13354
5.80k
    ctxt->nsWellFormed = 1;
13355
5.80k
    ctxt->disableSAX = 0;
13356
5.80k
    ctxt->valid = 1;
13357
5.80k
    ctxt->record_info = 0;
13358
5.80k
    ctxt->checkIndex = 0;
13359
5.80k
    ctxt->endCheckState = 0;
13360
5.80k
    ctxt->inSubset = 0;
13361
5.80k
    ctxt->errNo = XML_ERR_OK;
13362
5.80k
    ctxt->depth = 0;
13363
5.80k
    ctxt->catalogs = NULL;
13364
5.80k
    ctxt->sizeentities = 0;
13365
5.80k
    ctxt->sizeentcopy = 0;
13366
5.80k
    xmlInitNodeInfoSeq(&ctxt->node_seq);
13367
13368
5.80k
    if (ctxt->attsDefault != NULL) {
13369
0
        xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
13370
0
        ctxt->attsDefault = NULL;
13371
0
    }
13372
5.80k
    if (ctxt->attsSpecial != NULL) {
13373
0
        xmlHashFree(ctxt->attsSpecial, NULL);
13374
0
        ctxt->attsSpecial = NULL;
13375
0
    }
13376
13377
5.80k
#ifdef LIBXML_CATALOG_ENABLED
13378
5.80k
    if (ctxt->catalogs != NULL)
13379
0
  xmlCatalogFreeLocal(ctxt->catalogs);
13380
5.80k
#endif
13381
5.80k
    ctxt->nbErrors = 0;
13382
5.80k
    ctxt->nbWarnings = 0;
13383
5.80k
    if (ctxt->lastError.code != XML_ERR_OK)
13384
0
        xmlResetError(&ctxt->lastError);
13385
5.80k
}
13386
13387
/**
13388
 * xmlCtxtResetPush:
13389
 * @ctxt: an XML parser context
13390
 * @chunk:  a pointer to an array of chars
13391
 * @size:  number of chars in the array
13392
 * @filename:  an optional file name or URI
13393
 * @encoding:  the document encoding, or NULL
13394
 *
13395
 * Reset a push parser context
13396
 *
13397
 * Returns 0 in case of success and 1 in case of error
13398
 */
13399
int
13400
xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
13401
                 int size, const char *filename, const char *encoding)
13402
0
{
13403
0
    xmlParserInputPtr input;
13404
13405
0
    if (ctxt == NULL)
13406
0
        return(1);
13407
13408
0
    xmlCtxtReset(ctxt);
13409
13410
0
    input = xmlNewPushInput(filename, chunk, size);
13411
0
    if (input == NULL)
13412
0
        return(1);
13413
13414
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
13415
0
        xmlFreeInputStream(input);
13416
0
        return(1);
13417
0
    }
13418
13419
0
    if (encoding != NULL)
13420
0
        xmlSwitchEncodingName(ctxt, encoding);
13421
13422
0
    return(0);
13423
0
}
13424
13425
static int
13426
xmlCtxtSetOptionsInternal(xmlParserCtxtPtr ctxt, int options, int keepMask)
13427
216k
{
13428
216k
    int allMask;
13429
13430
216k
    if (ctxt == NULL)
13431
0
        return(-1);
13432
13433
    /*
13434
     * XInclude options aren't handled by the parser.
13435
     *
13436
     * XML_PARSE_XINCLUDE
13437
     * XML_PARSE_NOXINCNODE
13438
     * XML_PARSE_NOBASEFIX
13439
     */
13440
216k
    allMask = XML_PARSE_RECOVER |
13441
216k
              XML_PARSE_NOENT |
13442
216k
              XML_PARSE_DTDLOAD |
13443
216k
              XML_PARSE_DTDATTR |
13444
216k
              XML_PARSE_DTDVALID |
13445
216k
              XML_PARSE_NOERROR |
13446
216k
              XML_PARSE_NOWARNING |
13447
216k
              XML_PARSE_PEDANTIC |
13448
216k
              XML_PARSE_NOBLANKS |
13449
216k
#ifdef LIBXML_SAX1_ENABLED
13450
216k
              XML_PARSE_SAX1 |
13451
216k
#endif
13452
216k
              XML_PARSE_NONET |
13453
216k
              XML_PARSE_NODICT |
13454
216k
              XML_PARSE_NSCLEAN |
13455
216k
              XML_PARSE_NOCDATA |
13456
216k
              XML_PARSE_COMPACT |
13457
216k
              XML_PARSE_OLD10 |
13458
216k
              XML_PARSE_HUGE |
13459
216k
              XML_PARSE_OLDSAX |
13460
216k
              XML_PARSE_IGNORE_ENC |
13461
216k
              XML_PARSE_BIG_LINES |
13462
216k
              XML_PARSE_NO_XXE |
13463
216k
              XML_PARSE_UNZIP |
13464
216k
              XML_PARSE_NO_SYS_CATALOG |
13465
216k
              XML_PARSE_CATALOG_PI;
13466
13467
216k
    ctxt->options = (ctxt->options & keepMask) | (options & allMask);
13468
13469
    /*
13470
     * For some options, struct members are historically the source
13471
     * of truth. The values are initalized from global variables and
13472
     * old code could also modify them directly. Several older API
13473
     * functions that don't take an options argument rely on these
13474
     * deprecated mechanisms.
13475
     *
13476
     * Once public access to struct members and the globals are
13477
     * disabled, we can use the options bitmask as source of
13478
     * truth, making all these struct members obsolete.
13479
     *
13480
     * The XML_DETECT_IDS flags is misnamed. It simply enables
13481
     * loading of the external subset.
13482
     */
13483
216k
    ctxt->recovery = (options & XML_PARSE_RECOVER) ? 1 : 0;
13484
216k
    ctxt->replaceEntities = (options & XML_PARSE_NOENT) ? 1 : 0;
13485
216k
    ctxt->loadsubset = (options & XML_PARSE_DTDLOAD) ? XML_DETECT_IDS : 0;
13486
216k
    ctxt->loadsubset |= (options & XML_PARSE_DTDATTR) ? XML_COMPLETE_ATTRS : 0;
13487
216k
    ctxt->validate = (options & XML_PARSE_DTDVALID) ? 1 : 0;
13488
216k
    ctxt->pedantic = (options & XML_PARSE_PEDANTIC) ? 1 : 0;
13489
216k
    ctxt->keepBlanks = (options & XML_PARSE_NOBLANKS) ? 0 : 1;
13490
216k
    ctxt->dictNames = (options & XML_PARSE_NODICT) ? 0 : 1;
13491
13492
216k
    if (options & XML_PARSE_HUGE) {
13493
210k
        if (ctxt->dict != NULL)
13494
210k
            xmlDictSetLimit(ctxt->dict, 0);
13495
210k
    }
13496
13497
216k
    ctxt->linenumbers = 1;
13498
13499
216k
    return(options & ~allMask);
13500
216k
}
13501
13502
/**
13503
 * xmlCtxtSetOptions:
13504
 * @ctxt: an XML parser context
13505
 * @options:  a bitmask of xmlParserOption values
13506
 *
13507
 * Applies the options to the parser context. Unset options are
13508
 * cleared.
13509
 *
13510
 * Available since 2.13.0. With older versions, you can use
13511
 * xmlCtxtUseOptions.
13512
 *
13513
 * XML_PARSE_RECOVER
13514
 *
13515
 * Enable "recovery" mode which allows non-wellformed documents.
13516
 * How this mode behaves exactly is unspecified and may change
13517
 * without further notice. Use of this feature is DISCOURAGED.
13518
 *
13519
 * Not supported by the push parser.
13520
 *
13521
 * XML_PARSE_NOENT
13522
 *
13523
 * Despite the confusing name, this option enables substitution
13524
 * of entities. The resulting tree won't contain any entity
13525
 * reference nodes.
13526
 *
13527
 * This option also enables loading of external entities (both
13528
 * general and parameter entities) which is dangerous. If you
13529
 * process untrusted data, it's recommended to set the
13530
 * XML_PARSE_NO_XXE option to disable loading of external
13531
 * entities.
13532
 *
13533
 * XML_PARSE_DTDLOAD
13534
 *
13535
 * Enables loading of an external DTD and the loading and
13536
 * substitution of external parameter entities. Has no effect
13537
 * if XML_PARSE_NO_XXE is set.
13538
 *
13539
 * XML_PARSE_DTDATTR
13540
 *
13541
 * Adds default attributes from the DTD to the result document.
13542
 *
13543
 * Implies XML_PARSE_DTDLOAD, but loading of external content
13544
 * can be disabled with XML_PARSE_NO_XXE.
13545
 *
13546
 * XML_PARSE_DTDVALID
13547
 *
13548
 * This option enables DTD validation which requires to load
13549
 * external DTDs and external entities (both general and
13550
 * parameter entities) unless XML_PARSE_NO_XXE was set.
13551
 *
13552
 * XML_PARSE_NO_XXE
13553
 *
13554
 * Disables loading of external DTDs or entities.
13555
 *
13556
 * Available since 2.13.0.
13557
 *
13558
 * XML_PARSE_NOERROR
13559
 *
13560
 * Disable error and warning reports to the error handlers.
13561
 * Errors are still accessible with xmlCtxtGetLastError.
13562
 *
13563
 * XML_PARSE_NOWARNING
13564
 *
13565
 * Disable warning reports.
13566
 *
13567
 * XML_PARSE_PEDANTIC
13568
 *
13569
 * Enable some pedantic warnings.
13570
 *
13571
 * XML_PARSE_NOBLANKS
13572
 *
13573
 * Remove some whitespace from the result document. Where to
13574
 * remove whitespace depends on DTD element declarations or a
13575
 * broken heuristic with unfixable bugs. Use of this option is
13576
 * DISCOURAGED.
13577
 *
13578
 * Not supported by the push parser.
13579
 *
13580
 * XML_PARSE_SAX1
13581
 *
13582
 * Always invoke the deprecated SAX1 startElement and endElement
13583
 * handlers. This option is DEPRECATED.
13584
 *
13585
 * XML_PARSE_NONET
13586
 *
13587
 * Disable network access with the builtin HTTP client.
13588
 *
13589
 * XML_PARSE_NODICT
13590
 *
13591
 * Create a document without interned strings, making all
13592
 * strings separate memory allocations.
13593
 *
13594
 * XML_PARSE_NSCLEAN
13595
 *
13596
 * Remove redundant namespace declarations from the result
13597
 * document.
13598
 *
13599
 * XML_PARSE_NOCDATA
13600
 *
13601
 * Output normal text nodes instead of CDATA nodes.
13602
 *
13603
 * XML_PARSE_COMPACT
13604
 *
13605
 * Store small strings directly in the node struct to save
13606
 * memory.
13607
 *
13608
 * XML_PARSE_OLD10
13609
 *
13610
 * Use old Name productions from before XML 1.0 Fifth Edition.
13611
 * This options is DEPRECATED.
13612
 *
13613
 * XML_PARSE_HUGE
13614
 *
13615
 * Relax some internal limits.
13616
 *
13617
 * Maximum size of text nodes, tags, comments, processing instructions,
13618
 * CDATA sections, entity values
13619
 *
13620
 * normal: 10M
13621
 * huge:    1B
13622
 *
13623
 * Maximum size of names, system literals, pubid literals
13624
 *
13625
 * normal: 50K
13626
 * huge:   10M
13627
 *
13628
 * Maximum nesting depth of elements
13629
 *
13630
 * normal:  256
13631
 * huge:   2048
13632
 *
13633
 * Maximum nesting depth of entities
13634
 *
13635
 * normal: 20
13636
 * huge:   40
13637
 *
13638
 * XML_PARSE_OLDSAX
13639
 *
13640
 * Enable an unspecified legacy mode for SAX parsers. This
13641
 * option is DEPRECATED.
13642
 *
13643
 * XML_PARSE_IGNORE_ENC
13644
 *
13645
 * Ignore the encoding in the XML declaration. This option is
13646
 * mostly unneeded these days. The only effect is to enforce
13647
 * UTF-8 decoding of ASCII-like data.
13648
 *
13649
 * XML_PARSE_BIG_LINES
13650
 *
13651
 * Enable reporting of line numbers larger than 65535.
13652
 *
13653
 * XML_PARSE_UNZIP
13654
 *
13655
 * Enable input decompression. Setting this option is discouraged
13656
 * to avoid zip bombs.
13657
 *
13658
 * Available since 2.14.0.
13659
 *
13660
 * XML_PARSE_NO_SYS_CATALOG
13661
 *
13662
 * Disables the global system XML catalog.
13663
 *
13664
 * Available since 2.14.0.
13665
 *
13666
 * XML_PARSE_CATALOG_PI
13667
 *
13668
 * Enable XML catalog processing instructions.
13669
 *
13670
 * Available since 2.14.0.
13671
 *
13672
 * Returns 0 in case of success, the set of unknown or unimplemented options
13673
 *         in case of error.
13674
 */
13675
int
13676
xmlCtxtSetOptions(xmlParserCtxtPtr ctxt, int options)
13677
0
{
13678
0
#ifdef LIBXML_HTML_ENABLED
13679
0
    if ((ctxt != NULL) && (ctxt->html))
13680
0
        return(htmlCtxtSetOptions(ctxt, options));
13681
0
#endif
13682
13683
0
    return(xmlCtxtSetOptionsInternal(ctxt, options, 0));
13684
0
}
13685
13686
/**
13687
 * xmlCtxtGetOptions:
13688
 * @ctxt: an XML parser context
13689
 *
13690
 * Get the current options of the parser context.
13691
 *
13692
 * Available since 2.14.0.
13693
 *
13694
 * Returns the current options set in the parser context, or -1 if ctxt is NULL.
13695
 */
13696
int
13697
xmlCtxtGetOptions(xmlParserCtxtPtr ctxt)
13698
0
{
13699
0
    if (ctxt == NULL)
13700
0
        return(-1);
13701
13702
0
    return(ctxt->options);
13703
0
}
13704
13705
/**
13706
 * xmlCtxtUseOptions:
13707
 * @ctxt: an XML parser context
13708
 * @options:  a combination of xmlParserOption
13709
 *
13710
 * DEPRECATED: Use xmlCtxtSetOptions.
13711
 *
13712
 * Applies the options to the parser context. The following options
13713
 * are never cleared and can only be enabled:
13714
 *
13715
 * XML_PARSE_NOERROR
13716
 * XML_PARSE_NOWARNING
13717
 * XML_PARSE_NONET
13718
 * XML_PARSE_NSCLEAN
13719
 * XML_PARSE_NOCDATA
13720
 * XML_PARSE_COMPACT
13721
 * XML_PARSE_OLD10
13722
 * XML_PARSE_HUGE
13723
 * XML_PARSE_OLDSAX
13724
 * XML_PARSE_IGNORE_ENC
13725
 * XML_PARSE_BIG_LINES
13726
 *
13727
 * Returns 0 in case of success, the set of unknown or unimplemented options
13728
 *         in case of error.
13729
 */
13730
int
13731
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
13732
216k
{
13733
216k
    int keepMask;
13734
13735
216k
#ifdef LIBXML_HTML_ENABLED
13736
216k
    if ((ctxt != NULL) && (ctxt->html))
13737
0
        return(htmlCtxtUseOptions(ctxt, options));
13738
216k
#endif
13739
13740
    /*
13741
     * For historic reasons, some options can only be enabled.
13742
     */
13743
216k
    keepMask = XML_PARSE_NOERROR |
13744
216k
               XML_PARSE_NOWARNING |
13745
216k
               XML_PARSE_NONET |
13746
216k
               XML_PARSE_NSCLEAN |
13747
216k
               XML_PARSE_NOCDATA |
13748
216k
               XML_PARSE_COMPACT |
13749
216k
               XML_PARSE_OLD10 |
13750
216k
               XML_PARSE_HUGE |
13751
216k
               XML_PARSE_OLDSAX |
13752
216k
               XML_PARSE_IGNORE_ENC |
13753
216k
               XML_PARSE_BIG_LINES;
13754
13755
216k
    return(xmlCtxtSetOptionsInternal(ctxt, options, keepMask));
13756
216k
}
13757
13758
/**
13759
 * xmlCtxtSetMaxAmplification:
13760
 * @ctxt: an XML parser context
13761
 * @maxAmpl:  maximum amplification factor
13762
 *
13763
 * To protect against exponential entity expansion ("billion laughs"), the
13764
 * size of serialized output is (roughly) limited to the input size
13765
 * multiplied by this factor. The default value is 5.
13766
 *
13767
 * When working with documents making heavy use of entity expansion, it can
13768
 * be necessary to increase the value. For security reasons, this should only
13769
 * be considered when processing trusted input.
13770
 */
13771
void
13772
xmlCtxtSetMaxAmplification(xmlParserCtxtPtr ctxt, unsigned maxAmpl)
13773
0
{
13774
0
    ctxt->maxAmpl = maxAmpl;
13775
0
}
13776
13777
/**
13778
 * xmlCtxtParseDocument:
13779
 * @ctxt:  an XML parser context
13780
 * @input:  parser input
13781
 *
13782
 * Parse an XML document and return the resulting document tree.
13783
 * Takes ownership of the input object.
13784
 *
13785
 * Available since 2.13.0.
13786
 *
13787
 * Returns the resulting document tree or NULL
13788
 */
13789
xmlDocPtr
13790
xmlCtxtParseDocument(xmlParserCtxtPtr ctxt, xmlParserInputPtr input)
13791
5.80k
{
13792
5.80k
    xmlDocPtr ret = NULL;
13793
13794
5.80k
    if ((ctxt == NULL) || (input == NULL)) {
13795
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
13796
0
        xmlFreeInputStream(input);
13797
0
        return(NULL);
13798
0
    }
13799
13800
    /* assert(ctxt->inputNr == 0); */
13801
5.80k
    while (ctxt->inputNr > 0)
13802
0
        xmlFreeInputStream(xmlCtxtPopInput(ctxt));
13803
13804
5.80k
    if (xmlCtxtPushInput(ctxt, input) < 0) {
13805
0
        xmlFreeInputStream(input);
13806
0
        return(NULL);
13807
0
    }
13808
13809
5.80k
    xmlParseDocument(ctxt);
13810
13811
5.80k
    ret = xmlCtxtGetDocument(ctxt);
13812
13813
    /* assert(ctxt->inputNr == 1); */
13814
11.6k
    while (ctxt->inputNr > 0)
13815
5.80k
        xmlFreeInputStream(xmlCtxtPopInput(ctxt));
13816
13817
5.80k
    return(ret);
13818
5.80k
}
13819
13820
/**
13821
 * xmlReadDoc:
13822
 * @cur:  a pointer to a zero terminated string
13823
 * @URL:  base URL (optional)
13824
 * @encoding:  the document encoding (optional)
13825
 * @options:  a combination of xmlParserOption
13826
 *
13827
 * Convenience function to parse an XML document from a
13828
 * zero-terminated string.
13829
 *
13830
 * See xmlCtxtReadDoc for details.
13831
 *
13832
 * Returns the resulting document tree
13833
 */
13834
xmlDocPtr
13835
xmlReadDoc(const xmlChar *cur, const char *URL, const char *encoding,
13836
           int options)
13837
0
{
13838
0
    xmlParserCtxtPtr ctxt;
13839
0
    xmlParserInputPtr input;
13840
0
    xmlDocPtr doc = NULL;
13841
13842
0
    ctxt = xmlNewParserCtxt();
13843
0
    if (ctxt == NULL)
13844
0
        return(NULL);
13845
13846
0
    xmlCtxtUseOptions(ctxt, options);
13847
13848
0
    input = xmlCtxtNewInputFromString(ctxt, URL, (const char *) cur, encoding,
13849
0
                                      XML_INPUT_BUF_STATIC);
13850
13851
0
    if (input != NULL)
13852
0
        doc = xmlCtxtParseDocument(ctxt, input);
13853
13854
0
    xmlFreeParserCtxt(ctxt);
13855
0
    return(doc);
13856
0
}
13857
13858
/**
13859
 * xmlReadFile:
13860
 * @filename:  a file or URL
13861
 * @encoding:  the document encoding (optional)
13862
 * @options:  a combination of xmlParserOption
13863
 *
13864
 * Convenience function to parse an XML file from the filesystem,
13865
 * the network or a global user-define resource loader.
13866
 *
13867
 * This function always enables the XML_PARSE_UNZIP option for
13868
 * backward compatibility. If a "-" filename is passed, it will
13869
 * read from stdin. Both of these features are potentially
13870
 * insecure and might be removed from later versions.
13871
 *
13872
 * See xmlCtxtReadFile for details.
13873
 *
13874
 * Returns the resulting document tree
13875
 */
13876
xmlDocPtr
13877
xmlReadFile(const char *filename, const char *encoding, int options)
13878
0
{
13879
0
    xmlParserCtxtPtr ctxt;
13880
0
    xmlParserInputPtr input;
13881
0
    xmlDocPtr doc = NULL;
13882
13883
0
    ctxt = xmlNewParserCtxt();
13884
0
    if (ctxt == NULL)
13885
0
        return(NULL);
13886
13887
0
    options |= XML_PARSE_UNZIP;
13888
13889
0
    xmlCtxtUseOptions(ctxt, options);
13890
13891
    /*
13892
     * Backward compatibility for users of command line utilities like
13893
     * xmlstarlet expecting "-" to mean stdin. This is dangerous and
13894
     * should be removed at some point.
13895
     */
13896
0
    if ((filename != NULL) && (filename[0] == '-') && (filename[1] == 0))
13897
0
        input = xmlCtxtNewInputFromFd(ctxt, filename, STDIN_FILENO,
13898
0
                                      encoding, 0);
13899
0
    else
13900
0
        input = xmlCtxtNewInputFromUrl(ctxt, filename, NULL, encoding, 0);
13901
13902
0
    if (input != NULL)
13903
0
        doc = xmlCtxtParseDocument(ctxt, input);
13904
13905
0
    xmlFreeParserCtxt(ctxt);
13906
0
    return(doc);
13907
0
}
13908
13909
/**
13910
 * xmlReadMemory:
13911
 * @buffer:  a pointer to a char array
13912
 * @size:  the size of the array
13913
 * @url:  base URL (optional)
13914
 * @encoding:  the document encoding (optional)
13915
 * @options:  a combination of xmlParserOption
13916
 *
13917
 * Parse an XML in-memory document and build a tree. The input buffer must
13918
 * not contain a terminating null byte.
13919
 *
13920
 * See xmlCtxtReadMemory for details.
13921
 *
13922
 * Returns the resulting document tree
13923
 */
13924
xmlDocPtr
13925
xmlReadMemory(const char *buffer, int size, const char *url,
13926
              const char *encoding, int options)
13927
0
{
13928
0
    xmlParserCtxtPtr ctxt;
13929
0
    xmlParserInputPtr input;
13930
0
    xmlDocPtr doc = NULL;
13931
13932
0
    if (size < 0)
13933
0
  return(NULL);
13934
13935
0
    ctxt = xmlNewParserCtxt();
13936
0
    if (ctxt == NULL)
13937
0
        return(NULL);
13938
13939
0
    xmlCtxtUseOptions(ctxt, options);
13940
13941
0
    input = xmlCtxtNewInputFromMemory(ctxt, url, buffer, size, encoding,
13942
0
                                      XML_INPUT_BUF_STATIC);
13943
13944
0
    if (input != NULL)
13945
0
        doc = xmlCtxtParseDocument(ctxt, input);
13946
13947
0
    xmlFreeParserCtxt(ctxt);
13948
0
    return(doc);
13949
0
}
13950
13951
/**
13952
 * xmlReadFd:
13953
 * @fd:  an open file descriptor
13954
 * @URL:  base URL (optional)
13955
 * @encoding:  the document encoding (optional)
13956
 * @options:  a combination of xmlParserOption
13957
 *
13958
 * Parse an XML from a file descriptor and build a tree.
13959
 *
13960
 * See xmlCtxtReadFd for details.
13961
 *
13962
 * NOTE that the file descriptor will not be closed when the
13963
 * context is freed or reset.
13964
 *
13965
 * Returns the resulting document tree
13966
 */
13967
xmlDocPtr
13968
xmlReadFd(int fd, const char *URL, const char *encoding, int options)
13969
0
{
13970
0
    xmlParserCtxtPtr ctxt;
13971
0
    xmlParserInputPtr input;
13972
0
    xmlDocPtr doc = NULL;
13973
13974
0
    ctxt = xmlNewParserCtxt();
13975
0
    if (ctxt == NULL)
13976
0
        return(NULL);
13977
13978
0
    xmlCtxtUseOptions(ctxt, options);
13979
13980
0
    input = xmlCtxtNewInputFromFd(ctxt, URL, fd, encoding, 0);
13981
13982
0
    if (input != NULL)
13983
0
        doc = xmlCtxtParseDocument(ctxt, input);
13984
13985
0
    xmlFreeParserCtxt(ctxt);
13986
0
    return(doc);
13987
0
}
13988
13989
/**
13990
 * xmlReadIO:
13991
 * @ioread:  an I/O read function
13992
 * @ioclose:  an I/O close function (optional)
13993
 * @ioctx:  an I/O handler
13994
 * @URL:  base URL (optional)
13995
 * @encoding:  the document encoding (optional)
13996
 * @options:  a combination of xmlParserOption
13997
 *
13998
 * Parse an XML document from I/O functions and context and build a tree.
13999
 *
14000
 * See xmlCtxtReadIO for details.
14001
 *
14002
 * Returns the resulting document tree
14003
 */
14004
xmlDocPtr
14005
xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
14006
          void *ioctx, const char *URL, const char *encoding, int options)
14007
0
{
14008
0
    xmlParserCtxtPtr ctxt;
14009
0
    xmlParserInputPtr input;
14010
0
    xmlDocPtr doc = NULL;
14011
14012
0
    ctxt = xmlNewParserCtxt();
14013
0
    if (ctxt == NULL)
14014
0
        return(NULL);
14015
14016
0
    xmlCtxtUseOptions(ctxt, options);
14017
14018
0
    input = xmlCtxtNewInputFromIO(ctxt, URL, ioread, ioclose, ioctx,
14019
0
                                  encoding, 0);
14020
14021
0
    if (input != NULL)
14022
0
        doc = xmlCtxtParseDocument(ctxt, input);
14023
14024
0
    xmlFreeParserCtxt(ctxt);
14025
0
    return(doc);
14026
0
}
14027
14028
/**
14029
 * xmlCtxtReadDoc:
14030
 * @ctxt:  an XML parser context
14031
 * @str:  a pointer to a zero terminated string
14032
 * @URL:  base URL (optional)
14033
 * @encoding:  the document encoding (optional)
14034
 * @options:  a combination of xmlParserOption
14035
 *
14036
 * Parse an XML in-memory document and build a tree.
14037
 *
14038
 * @URL is used as base to resolve external entities and for error
14039
 * reporting.
14040
 *
14041
 * See xmlCtxtUseOptions for details.
14042
 *
14043
 * Returns the resulting document tree
14044
 */
14045
xmlDocPtr
14046
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar *str,
14047
               const char *URL, const char *encoding, int options)
14048
0
{
14049
0
    xmlParserInputPtr input;
14050
14051
0
    if (ctxt == NULL)
14052
0
        return(NULL);
14053
14054
0
    xmlCtxtReset(ctxt);
14055
0
    xmlCtxtUseOptions(ctxt, options);
14056
14057
0
    input = xmlCtxtNewInputFromString(ctxt, URL, (const char *) str, encoding,
14058
0
                                      XML_INPUT_BUF_STATIC);
14059
0
    if (input == NULL)
14060
0
        return(NULL);
14061
14062
0
    return(xmlCtxtParseDocument(ctxt, input));
14063
0
}
14064
14065
/**
14066
 * xmlCtxtReadFile:
14067
 * @ctxt:  an XML parser context
14068
 * @filename:  a file or URL
14069
 * @encoding:  the document encoding (optional)
14070
 * @options:  a combination of xmlParserOption
14071
 *
14072
 * Parse an XML file from the filesystem, the network or a user-defined
14073
 * resource loader.
14074
 *
14075
 * This function always enables the XML_PARSE_UNZIP option for
14076
 * backward compatibility. This feature is potentially insecure
14077
 * and might be removed from later versions.
14078
 *
14079
 * Returns the resulting document tree
14080
 */
14081
xmlDocPtr
14082
xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
14083
                const char *encoding, int options)
14084
0
{
14085
0
    xmlParserInputPtr input;
14086
14087
0
    if (ctxt == NULL)
14088
0
        return(NULL);
14089
14090
0
    options |= XML_PARSE_UNZIP;
14091
14092
0
    xmlCtxtReset(ctxt);
14093
0
    xmlCtxtUseOptions(ctxt, options);
14094
14095
0
    input = xmlCtxtNewInputFromUrl(ctxt, filename, NULL, encoding, 0);
14096
0
    if (input == NULL)
14097
0
        return(NULL);
14098
14099
0
    return(xmlCtxtParseDocument(ctxt, input));
14100
0
}
14101
14102
/**
14103
 * xmlCtxtReadMemory:
14104
 * @ctxt:  an XML parser context
14105
 * @buffer:  a pointer to a char array
14106
 * @size:  the size of the array
14107
 * @URL:  base URL (optional)
14108
 * @encoding:  the document encoding (optional)
14109
 * @options:  a combination of xmlParserOption
14110
 *
14111
 * Parse an XML in-memory document and build a tree. The input buffer must
14112
 * not contain a terminating null byte.
14113
 *
14114
 * @URL is used as base to resolve external entities and for error
14115
 * reporting.
14116
 *
14117
 * See xmlCtxtUseOptions for details.
14118
 *
14119
 * Returns the resulting document tree
14120
 */
14121
xmlDocPtr
14122
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
14123
                  const char *URL, const char *encoding, int options)
14124
0
{
14125
0
    xmlParserInputPtr input;
14126
14127
0
    if ((ctxt == NULL) || (size < 0))
14128
0
        return(NULL);
14129
14130
0
    xmlCtxtReset(ctxt);
14131
0
    xmlCtxtUseOptions(ctxt, options);
14132
14133
0
    input = xmlCtxtNewInputFromMemory(ctxt, URL, buffer, size, encoding,
14134
0
                                      XML_INPUT_BUF_STATIC);
14135
0
    if (input == NULL)
14136
0
        return(NULL);
14137
14138
0
    return(xmlCtxtParseDocument(ctxt, input));
14139
0
}
14140
14141
/**
14142
 * xmlCtxtReadFd:
14143
 * @ctxt:  an XML parser context
14144
 * @fd:  an open file descriptor
14145
 * @URL:  base URL (optional)
14146
 * @encoding:  the document encoding (optional)
14147
 * @options:  a combination of xmlParserOption
14148
 *
14149
 * Parse an XML document from a file descriptor and build a tree.
14150
 *
14151
 * NOTE that the file descriptor will not be closed when the
14152
 * context is freed or reset.
14153
 *
14154
 * @URL is used as base to resolve external entities and for error
14155
 * reporting.
14156
 *
14157
 * See xmlCtxtUseOptions for details.
14158
 *
14159
 * Returns the resulting document tree
14160
 */
14161
xmlDocPtr
14162
xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
14163
              const char *URL, const char *encoding, int options)
14164
0
{
14165
0
    xmlParserInputPtr input;
14166
14167
0
    if (ctxt == NULL)
14168
0
        return(NULL);
14169
14170
0
    xmlCtxtReset(ctxt);
14171
0
    xmlCtxtUseOptions(ctxt, options);
14172
14173
0
    input = xmlCtxtNewInputFromFd(ctxt, URL, fd, encoding, 0);
14174
0
    if (input == NULL)
14175
0
        return(NULL);
14176
14177
0
    return(xmlCtxtParseDocument(ctxt, input));
14178
0
}
14179
14180
/**
14181
 * xmlCtxtReadIO:
14182
 * @ctxt:  an XML parser context
14183
 * @ioread:  an I/O read function
14184
 * @ioclose:  an I/O close function
14185
 * @ioctx:  an I/O handler
14186
 * @URL:  the base URL to use for the document
14187
 * @encoding:  the document encoding, or NULL
14188
 * @options:  a combination of xmlParserOption
14189
 *
14190
 * parse an XML document from I/O functions and source and build a tree.
14191
 * This reuses the existing @ctxt parser context
14192
 *
14193
 * @URL is used as base to resolve external entities and for error
14194
 * reporting.
14195
 *
14196
 * See xmlCtxtUseOptions for details.
14197
 *
14198
 * Returns the resulting document tree
14199
 */
14200
xmlDocPtr
14201
xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
14202
              xmlInputCloseCallback ioclose, void *ioctx,
14203
        const char *URL,
14204
              const char *encoding, int options)
14205
5.80k
{
14206
5.80k
    xmlParserInputPtr input;
14207
14208
5.80k
    if (ctxt == NULL)
14209
0
        return(NULL);
14210
14211
5.80k
    xmlCtxtReset(ctxt);
14212
5.80k
    xmlCtxtUseOptions(ctxt, options);
14213
14214
5.80k
    input = xmlCtxtNewInputFromIO(ctxt, URL, ioread, ioclose, ioctx,
14215
5.80k
                                  encoding, 0);
14216
5.80k
    if (input == NULL)
14217
0
        return(NULL);
14218
14219
5.80k
    return(xmlCtxtParseDocument(ctxt, input));
14220
5.80k
}
14221