Coverage Report

Created: 2025-12-08 09:28

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/workdir/UnpackedTarball/libxml2/parser.c
Line
Count
Source
1
/*
2
 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
 *            implemented on top of the SAX interfaces
4
 *
5
 * References:
6
 *   The XML specification:
7
 *     http://www.w3.org/TR/REC-xml
8
 *   Original 1.0 version:
9
 *     http://www.w3.org/TR/1998/REC-xml-19980210
10
 *   XML second edition working draft
11
 *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12
 *
13
 * Okay this is a big file, the parser core is around 7000 lines, then it
14
 * is followed by the progressive parser top routines, then the various
15
 * high level APIs to call the parser and a few miscellaneous functions.
16
 * A number of helper functions and deprecated ones have been moved to
17
 * parserInternals.c to reduce this file size.
18
 * As much as possible the functions are associated with their relative
19
 * production in the XML specification. A few productions defining the
20
 * different ranges of character are actually implanted either in
21
 * parserInternals.h or parserInternals.c
22
 * The DOM tree build is realized from the default SAX callbacks in
23
 * the module SAX2.c.
24
 * The routines doing the validation checks are in valid.c and called either
25
 * from the SAX callbacks or as standalone functions using a preparsed
26
 * document.
27
 *
28
 * See Copyright for the status of this software.
29
 *
30
 * daniel@veillard.com
31
 */
32
33
/* To avoid EBCDIC trouble when parsing on zOS */
34
#if defined(__MVS__)
35
#pragma convert("ISO8859-1")
36
#endif
37
38
#define IN_LIBXML
39
#include "libxml.h"
40
41
#if defined(_WIN32)
42
#define XML_DIR_SEP '\\'
43
#else
44
#define XML_DIR_SEP '/'
45
#endif
46
47
#include <stdlib.h>
48
#include <limits.h>
49
#include <string.h>
50
#include <stdarg.h>
51
#include <stddef.h>
52
#include <ctype.h>
53
#include <stdlib.h>
54
#include <libxml/parser.h>
55
#include <libxml/xmlmemory.h>
56
#include <libxml/tree.h>
57
#include <libxml/parserInternals.h>
58
#include <libxml/valid.h>
59
#include <libxml/entities.h>
60
#include <libxml/xmlerror.h>
61
#include <libxml/encoding.h>
62
#include <libxml/xmlIO.h>
63
#include <libxml/uri.h>
64
#include <libxml/SAX2.h>
65
#include <libxml/HTMLparser.h>
66
#ifdef LIBXML_CATALOG_ENABLED
67
#include <libxml/catalog.h>
68
#endif
69
70
#include "private/buf.h"
71
#include "private/dict.h"
72
#include "private/entities.h"
73
#include "private/error.h"
74
#include "private/html.h"
75
#include "private/io.h"
76
#include "private/memory.h"
77
#include "private/parser.h"
78
79
32.6M
#define NS_INDEX_EMPTY  INT_MAX
80
6.10M
#define NS_INDEX_XML    (INT_MAX - 1)
81
11.5M
#define URI_HASH_EMPTY  0xD943A04E
82
57.0k
#define URI_HASH_XML    0xF0451F02
83
84
#ifndef STDIN_FILENO
85
0
  #define STDIN_FILENO 0
86
#endif
87
88
#ifndef SIZE_MAX
89
  #define SIZE_MAX ((size_t) -1)
90
#endif
91
92
1.19M
#define XML_MAX_ATTRS 100000000 /* 100 million */
93
94
struct _xmlStartTag {
95
    const xmlChar *prefix;
96
    const xmlChar *URI;
97
    int line;
98
    int nsNr;
99
};
100
101
typedef struct {
102
    void *saxData;
103
    unsigned prefixHashValue;
104
    unsigned uriHashValue;
105
    unsigned elementId;
106
    int oldIndex;
107
} xmlParserNsExtra;
108
109
typedef struct {
110
    unsigned hashValue;
111
    int index;
112
} xmlParserNsBucket;
113
114
struct _xmlParserNsData {
115
    xmlParserNsExtra *extra;
116
117
    unsigned hashSize;
118
    unsigned hashElems;
119
    xmlParserNsBucket *hash;
120
121
    unsigned elementId;
122
    int defaultNsIndex;
123
    int minNsIndex;
124
};
125
126
static int
127
xmlParseElementStart(xmlParserCtxtPtr ctxt);
128
129
static void
130
xmlParseElementEnd(xmlParserCtxtPtr ctxt);
131
132
static xmlEntityPtr
133
xmlLookupGeneralEntity(xmlParserCtxtPtr ctxt, const xmlChar *name, int inAttr);
134
135
static const xmlChar *
136
xmlParseEntityRefInternal(xmlParserCtxtPtr ctxt);
137
138
/************************************************************************
139
 *                  *
140
 *  Arbitrary limits set in the parser. See XML_PARSE_HUGE    *
141
 *                  *
142
 ************************************************************************/
143
144
#define XML_PARSER_BIG_ENTITY 1000
145
#define XML_PARSER_LOT_ENTITY 5000
146
147
/*
148
 * Constants for protection against abusive entity expansion
149
 * ("billion laughs").
150
 */
151
152
/*
153
 * A certain amount of entity expansion which is always allowed.
154
 */
155
1.00M
#define XML_PARSER_ALLOWED_EXPANSION 1000000
156
157
/*
158
 * Fixed cost for each entity reference. This crudely models processing time
159
 * as well to protect, for example, against exponential expansion of empty
160
 * or very short entities.
161
 */
162
1.00M
#define XML_ENT_FIXED_COST 20
163
164
93.9M
#define XML_PARSER_BIG_BUFFER_SIZE 300
165
273k
#define XML_PARSER_BUFFER_SIZE 100
166
53.3k
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
167
168
/**
169
 * XML_PARSER_CHUNK_SIZE
170
 *
171
 * When calling GROW that's the minimal amount of data
172
 * the parser expected to have received. It is not a hard
173
 * limit but an optimization when reading strings like Names
174
 * It is not strictly needed as long as inputs available characters
175
 * are followed by 0, which should be provided by the I/O level
176
 */
177
#define XML_PARSER_CHUNK_SIZE 100
178
179
/**
180
 * xmlParserVersion:
181
 *
182
 * Constant string describing the internal version of the library
183
 */
184
const char *const
185
xmlParserVersion = LIBXML_VERSION_STRING LIBXML_VERSION_EXTRA;
186
187
/*
188
 * List of XML prefixed PI allowed by W3C specs
189
 */
190
191
static const char* const xmlW3CPIs[] = {
192
    "xml-stylesheet",
193
    "xml-model",
194
    NULL
195
};
196
197
198
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
199
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
200
                                              const xmlChar **str);
201
202
static void
203
xmlCtxtParseEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr ent);
204
205
static int
206
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
207
208
/************************************************************************
209
 *                  *
210
 *    Some factorized error routines        *
211
 *                  *
212
 ************************************************************************/
213
214
static void
215
0
xmlErrMemory(xmlParserCtxtPtr ctxt) {
216
0
    xmlCtxtErrMemory(ctxt);
217
0
}
218
219
/**
220
 * xmlErrAttributeDup:
221
 * @ctxt:  an XML parser context
222
 * @prefix:  the attribute prefix
223
 * @localname:  the attribute localname
224
 *
225
 * Handle a redefinition of attribute error
226
 */
227
static void
228
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
229
                   const xmlChar * localname)
230
258k
{
231
258k
    if (prefix == NULL)
232
188k
        xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, XML_ERR_ATTRIBUTE_REDEFINED,
233
188k
                   XML_ERR_FATAL, localname, NULL, NULL, 0,
234
188k
                   "Attribute %s redefined\n", localname);
235
70.1k
    else
236
70.1k
        xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, XML_ERR_ATTRIBUTE_REDEFINED,
237
70.1k
                   XML_ERR_FATAL, prefix, localname, NULL, 0,
238
70.1k
                   "Attribute %s:%s redefined\n", prefix, localname);
239
258k
}
240
241
/**
242
 * xmlFatalErrMsg:
243
 * @ctxt:  an XML parser context
244
 * @error:  the error number
245
 * @msg:  the error message
246
 *
247
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
248
 */
249
static void LIBXML_ATTR_FORMAT(3,0)
250
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
251
               const char *msg)
252
2.01M
{
253
2.01M
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
254
2.01M
               NULL, NULL, NULL, 0, "%s", msg);
255
2.01M
}
256
257
/**
258
 * xmlWarningMsg:
259
 * @ctxt:  an XML parser context
260
 * @error:  the error number
261
 * @msg:  the error message
262
 * @str1:  extra data
263
 * @str2:  extra data
264
 *
265
 * Handle a warning.
266
 */
267
void LIBXML_ATTR_FORMAT(3,0)
268
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
269
              const char *msg, const xmlChar *str1, const xmlChar *str2)
270
36.1k
{
271
36.1k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_WARNING,
272
36.1k
               str1, str2, NULL, 0, msg, str1, str2);
273
36.1k
}
274
275
/**
276
 * xmlValidityError:
277
 * @ctxt:  an XML parser context
278
 * @error:  the error number
279
 * @msg:  the error message
280
 * @str1:  extra data
281
 *
282
 * Handle a validity error.
283
 */
284
static void LIBXML_ATTR_FORMAT(3,0)
285
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
286
              const char *msg, const xmlChar *str1, const xmlChar *str2)
287
0
{
288
0
    ctxt->valid = 0;
289
290
0
    xmlCtxtErr(ctxt, NULL, XML_FROM_DTD, error, XML_ERR_ERROR,
291
0
               str1, str2, NULL, 0, msg, str1, str2);
292
0
}
293
294
/**
295
 * xmlFatalErrMsgInt:
296
 * @ctxt:  an XML parser context
297
 * @error:  the error number
298
 * @msg:  the error message
299
 * @val:  an integer value
300
 *
301
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
302
 */
303
static void LIBXML_ATTR_FORMAT(3,0)
304
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
305
                  const char *msg, int val)
306
14.1k
{
307
14.1k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
308
14.1k
               NULL, NULL, NULL, val, msg, val);
309
14.1k
}
310
311
/**
312
 * xmlFatalErrMsgStrIntStr:
313
 * @ctxt:  an XML parser context
314
 * @error:  the error number
315
 * @msg:  the error message
316
 * @str1:  an string info
317
 * @val:  an integer value
318
 * @str2:  an string info
319
 *
320
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
321
 */
322
static void LIBXML_ATTR_FORMAT(3,0)
323
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
324
                  const char *msg, const xmlChar *str1, int val,
325
      const xmlChar *str2)
326
69.6k
{
327
69.6k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
328
69.6k
               str1, str2, NULL, val, msg, str1, val, str2);
329
69.6k
}
330
331
/**
332
 * xmlFatalErrMsgStr:
333
 * @ctxt:  an XML parser context
334
 * @error:  the error number
335
 * @msg:  the error message
336
 * @val:  a string value
337
 *
338
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
339
 */
340
static void LIBXML_ATTR_FORMAT(3,0)
341
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
342
                  const char *msg, const xmlChar * val)
343
146k
{
344
146k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
345
146k
               val, NULL, NULL, 0, msg, val);
346
146k
}
347
348
/**
349
 * xmlErrMsgStr:
350
 * @ctxt:  an XML parser context
351
 * @error:  the error number
352
 * @msg:  the error message
353
 * @val:  a string value
354
 *
355
 * Handle a non fatal parser error
356
 */
357
static void LIBXML_ATTR_FORMAT(3,0)
358
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
359
                  const char *msg, const xmlChar * val)
360
15.3k
{
361
15.3k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_ERROR,
362
15.3k
               val, NULL, NULL, 0, msg, val);
363
15.3k
}
364
365
/**
366
 * xmlNsErr:
367
 * @ctxt:  an XML parser context
368
 * @error:  the error number
369
 * @msg:  the message
370
 * @info1:  extra information string
371
 * @info2:  extra information string
372
 *
373
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
374
 */
375
static void LIBXML_ATTR_FORMAT(3,0)
376
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
377
         const char *msg,
378
         const xmlChar * info1, const xmlChar * info2,
379
         const xmlChar * info3)
380
2.21M
{
381
2.21M
    ctxt->nsWellFormed = 0;
382
383
2.21M
    xmlCtxtErr(ctxt, NULL, XML_FROM_NAMESPACE, error, XML_ERR_ERROR,
384
2.21M
               info1, info2, info3, 0, msg, info1, info2, info3);
385
2.21M
}
386
387
/**
388
 * xmlNsWarn
389
 * @ctxt:  an XML parser context
390
 * @error:  the error number
391
 * @msg:  the message
392
 * @info1:  extra information string
393
 * @info2:  extra information string
394
 *
395
 * Handle a namespace warning error
396
 */
397
static void LIBXML_ATTR_FORMAT(3,0)
398
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
399
         const char *msg,
400
         const xmlChar * info1, const xmlChar * info2,
401
         const xmlChar * info3)
402
92.9k
{
403
92.9k
    xmlCtxtErr(ctxt, NULL, XML_FROM_NAMESPACE, error, XML_ERR_WARNING,
404
92.9k
               info1, info2, info3, 0, msg, info1, info2, info3);
405
92.9k
}
406
407
static void
408
3.01M
xmlSaturatedAdd(unsigned long *dst, unsigned long val) {
409
3.01M
    if (val > ULONG_MAX - *dst)
410
0
        *dst = ULONG_MAX;
411
3.01M
    else
412
3.01M
        *dst += val;
413
3.01M
}
414
415
static void
416
1.00M
xmlSaturatedAddSizeT(unsigned long *dst, size_t val) {
417
1.00M
    if (val > ULONG_MAX - *dst)
418
0
        *dst = ULONG_MAX;
419
1.00M
    else
420
1.00M
        *dst += val;
421
1.00M
}
422
423
/**
424
 * xmlParserEntityCheck:
425
 * @ctxt:  parser context
426
 * @extra:  sum of unexpanded entity sizes
427
 *
428
 * Check for non-linear entity expansion behaviour.
429
 *
430
 * In some cases like xmlExpandEntityInAttValue, this function is called
431
 * for each, possibly nested entity and its unexpanded content length.
432
 *
433
 * In other cases like xmlParseReference, it's only called for each
434
 * top-level entity with its unexpanded content length plus the sum of
435
 * the unexpanded content lengths (plus fixed cost) of all nested
436
 * entities.
437
 *
438
 * Summing the unexpanded lengths also adds the length of the reference.
439
 * This is by design. Taking the length of the entity name into account
440
 * discourages attacks that try to waste CPU time with abusively long
441
 * entity names. See test/recurse/lol6.xml for example. Each call also
442
 * adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with
443
 * short entities.
444
 *
445
 * Returns 1 on error, 0 on success.
446
 */
447
static int
448
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long extra)
449
1.00M
{
450
1.00M
    unsigned long consumed;
451
1.00M
    unsigned long *expandedSize;
452
1.00M
    xmlParserInputPtr input = ctxt->input;
453
1.00M
    xmlEntityPtr entity = input->entity;
454
455
1.00M
    if ((entity) && (entity->flags & XML_ENT_CHECKED))
456
0
        return(0);
457
458
    /*
459
     * Compute total consumed bytes so far, including input streams of
460
     * external entities.
461
     */
462
1.00M
    consumed = input->consumed;
463
1.00M
    xmlSaturatedAddSizeT(&consumed, input->cur - input->base);
464
1.00M
    xmlSaturatedAdd(&consumed, ctxt->sizeentities);
465
466
1.00M
    if (entity)
467
0
        expandedSize = &entity->expandedSize;
468
1.00M
    else
469
1.00M
        expandedSize = &ctxt->sizeentcopy;
470
471
    /*
472
     * Add extra cost and some fixed cost.
473
     */
474
1.00M
    xmlSaturatedAdd(expandedSize, extra);
475
1.00M
    xmlSaturatedAdd(expandedSize, XML_ENT_FIXED_COST);
476
477
    /*
478
     * It's important to always use saturation arithmetic when tracking
479
     * entity sizes to make the size checks reliable. If "sizeentcopy"
480
     * overflows, we have to abort.
481
     */
482
1.00M
    if ((*expandedSize > XML_PARSER_ALLOWED_EXPANSION) &&
483
180
        ((*expandedSize >= ULONG_MAX) ||
484
180
         (*expandedSize / ctxt->maxAmpl > consumed))) {
485
180
        xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
486
180
                       "Maximum entity amplification factor exceeded, see "
487
180
                       "xmlCtxtSetMaxAmplification.\n");
488
180
        xmlHaltParser(ctxt);
489
180
        return(1);
490
180
    }
491
492
1.00M
    return(0);
493
1.00M
}
494
495
/************************************************************************
496
 *                  *
497
 *    Library wide options          *
498
 *                  *
499
 ************************************************************************/
500
501
/**
502
  * xmlHasFeature:
503
  * @feature: the feature to be examined
504
  *
505
  * Examines if the library has been compiled with a given feature.
506
  *
507
  * Returns a non-zero value if the feature exist, otherwise zero.
508
  * Returns zero (0) if the feature does not exist or an unknown
509
  * unknown feature is requested, non-zero otherwise.
510
  */
511
int
512
xmlHasFeature(xmlFeature feature)
513
0
{
514
0
    switch (feature) {
515
0
  case XML_WITH_THREAD:
516
0
#ifdef LIBXML_THREAD_ENABLED
517
0
      return(1);
518
#else
519
      return(0);
520
#endif
521
0
        case XML_WITH_TREE:
522
0
            return(1);
523
0
        case XML_WITH_OUTPUT:
524
0
#ifdef LIBXML_OUTPUT_ENABLED
525
0
            return(1);
526
#else
527
            return(0);
528
#endif
529
0
        case XML_WITH_PUSH:
530
0
#ifdef LIBXML_PUSH_ENABLED
531
0
            return(1);
532
#else
533
            return(0);
534
#endif
535
0
        case XML_WITH_READER:
536
0
#ifdef LIBXML_READER_ENABLED
537
0
            return(1);
538
#else
539
            return(0);
540
#endif
541
0
        case XML_WITH_PATTERN:
542
0
#ifdef LIBXML_PATTERN_ENABLED
543
0
            return(1);
544
#else
545
            return(0);
546
#endif
547
0
        case XML_WITH_WRITER:
548
0
#ifdef LIBXML_WRITER_ENABLED
549
0
            return(1);
550
#else
551
            return(0);
552
#endif
553
0
        case XML_WITH_SAX1:
554
0
#ifdef LIBXML_SAX1_ENABLED
555
0
            return(1);
556
#else
557
            return(0);
558
#endif
559
0
        case XML_WITH_HTTP:
560
#ifdef LIBXML_HTTP_ENABLED
561
            return(1);
562
#else
563
0
            return(0);
564
0
#endif
565
0
        case XML_WITH_VALID:
566
0
#ifdef LIBXML_VALID_ENABLED
567
0
            return(1);
568
#else
569
            return(0);
570
#endif
571
0
        case XML_WITH_HTML:
572
0
#ifdef LIBXML_HTML_ENABLED
573
0
            return(1);
574
#else
575
            return(0);
576
#endif
577
0
        case XML_WITH_LEGACY:
578
0
            return(0);
579
0
        case XML_WITH_C14N:
580
0
#ifdef LIBXML_C14N_ENABLED
581
0
            return(1);
582
#else
583
            return(0);
584
#endif
585
0
        case XML_WITH_CATALOG:
586
0
#ifdef LIBXML_CATALOG_ENABLED
587
0
            return(1);
588
#else
589
            return(0);
590
#endif
591
0
        case XML_WITH_XPATH:
592
0
#ifdef LIBXML_XPATH_ENABLED
593
0
            return(1);
594
#else
595
            return(0);
596
#endif
597
0
        case XML_WITH_XPTR:
598
0
#ifdef LIBXML_XPTR_ENABLED
599
0
            return(1);
600
#else
601
            return(0);
602
#endif
603
0
        case XML_WITH_XINCLUDE:
604
0
#ifdef LIBXML_XINCLUDE_ENABLED
605
0
            return(1);
606
#else
607
            return(0);
608
#endif
609
0
        case XML_WITH_ICONV:
610
#ifdef LIBXML_ICONV_ENABLED
611
            return(1);
612
#else
613
0
            return(0);
614
0
#endif
615
0
        case XML_WITH_ISO8859X:
616
0
#ifdef LIBXML_ISO8859X_ENABLED
617
0
            return(1);
618
#else
619
            return(0);
620
#endif
621
0
        case XML_WITH_UNICODE:
622
0
            return(0);
623
0
        case XML_WITH_REGEXP:
624
0
#ifdef LIBXML_REGEXP_ENABLED
625
0
            return(1);
626
#else
627
            return(0);
628
#endif
629
0
        case XML_WITH_AUTOMATA:
630
0
#ifdef LIBXML_REGEXP_ENABLED
631
0
            return(1);
632
#else
633
            return(0);
634
#endif
635
0
        case XML_WITH_EXPR:
636
#ifdef LIBXML_EXPR_ENABLED
637
            return(1);
638
#else
639
0
            return(0);
640
0
#endif
641
0
        case XML_WITH_RELAXNG:
642
0
#ifdef LIBXML_RELAXNG_ENABLED
643
0
            return(1);
644
#else
645
            return(0);
646
#endif
647
0
        case XML_WITH_SCHEMAS:
648
0
#ifdef LIBXML_SCHEMAS_ENABLED
649
0
            return(1);
650
#else
651
            return(0);
652
#endif
653
0
        case XML_WITH_SCHEMATRON:
654
0
#ifdef LIBXML_SCHEMATRON_ENABLED
655
0
            return(1);
656
#else
657
            return(0);
658
#endif
659
0
        case XML_WITH_MODULES:
660
0
#ifdef LIBXML_MODULES_ENABLED
661
0
            return(1);
662
#else
663
            return(0);
664
#endif
665
0
        case XML_WITH_DEBUG:
666
0
#ifdef LIBXML_DEBUG_ENABLED
667
0
            return(1);
668
#else
669
            return(0);
670
#endif
671
0
        case XML_WITH_DEBUG_MEM:
672
0
            return(0);
673
0
        case XML_WITH_ZLIB:
674
#ifdef LIBXML_ZLIB_ENABLED
675
            return(1);
676
#else
677
0
            return(0);
678
0
#endif
679
0
        case XML_WITH_LZMA:
680
#ifdef LIBXML_LZMA_ENABLED
681
            return(1);
682
#else
683
0
            return(0);
684
0
#endif
685
0
        case XML_WITH_ICU:
686
#ifdef LIBXML_ICU_ENABLED
687
            return(1);
688
#else
689
0
            return(0);
690
0
#endif
691
0
        default:
692
0
      break;
693
0
     }
694
0
     return(0);
695
0
}
696
697
/************************************************************************
698
 *                  *
699
 *      Simple string buffer        *
700
 *                  *
701
 ************************************************************************/
702
703
typedef struct {
704
    xmlChar *mem;
705
    unsigned size;
706
    unsigned cap; /* size < cap */
707
    unsigned max; /* size <= max */
708
    xmlParserErrors code;
709
} xmlSBuf;
710
711
static void
712
24.0M
xmlSBufInit(xmlSBuf *buf, unsigned max) {
713
24.0M
    buf->mem = NULL;
714
24.0M
    buf->size = 0;
715
24.0M
    buf->cap = 0;
716
24.0M
    buf->max = max;
717
24.0M
    buf->code = XML_ERR_OK;
718
24.0M
}
719
720
static int
721
343k
xmlSBufGrow(xmlSBuf *buf, unsigned len) {
722
343k
    xmlChar *mem;
723
343k
    unsigned cap;
724
725
343k
    if (len >= UINT_MAX / 2 - buf->size) {
726
0
        if (buf->code == XML_ERR_OK)
727
0
            buf->code = XML_ERR_RESOURCE_LIMIT;
728
0
        return(-1);
729
0
    }
730
731
343k
    cap = (buf->size + len) * 2;
732
343k
    if (cap < 240)
733
286k
        cap = 240;
734
735
343k
    mem = xmlRealloc(buf->mem, cap);
736
343k
    if (mem == NULL) {
737
0
        buf->code = XML_ERR_NO_MEMORY;
738
0
        return(-1);
739
0
    }
740
741
343k
    buf->mem = mem;
742
343k
    buf->cap = cap;
743
744
343k
    return(0);
745
343k
}
746
747
static void
748
6.43M
xmlSBufAddString(xmlSBuf *buf, const xmlChar *str, unsigned len) {
749
6.43M
    if (buf->max - buf->size < len) {
750
0
        if (buf->code == XML_ERR_OK)
751
0
            buf->code = XML_ERR_RESOURCE_LIMIT;
752
0
        return;
753
0
    }
754
755
6.43M
    if (buf->cap - buf->size <= len) {
756
339k
        if (xmlSBufGrow(buf, len) < 0)
757
0
            return;
758
339k
    }
759
760
6.43M
    if (len > 0)
761
6.43M
        memcpy(buf->mem + buf->size, str, len);
762
6.43M
    buf->size += len;
763
6.43M
}
764
765
static void
766
5.08M
xmlSBufAddCString(xmlSBuf *buf, const char *str, unsigned len) {
767
5.08M
    xmlSBufAddString(buf, (const xmlChar *) str, len);
768
5.08M
}
769
770
static void
771
74.9k
xmlSBufAddChar(xmlSBuf *buf, int c) {
772
74.9k
    xmlChar *end;
773
774
74.9k
    if (buf->max - buf->size < 4) {
775
0
        if (buf->code == XML_ERR_OK)
776
0
            buf->code = XML_ERR_RESOURCE_LIMIT;
777
0
        return;
778
0
    }
779
780
74.9k
    if (buf->cap - buf->size <= 4) {
781
3.54k
        if (xmlSBufGrow(buf, 4) < 0)
782
0
            return;
783
3.54k
    }
784
785
74.9k
    end = buf->mem + buf->size;
786
787
74.9k
    if (c < 0x80) {
788
24.2k
        *end = (xmlChar) c;
789
24.2k
        buf->size += 1;
790
50.7k
    } else {
791
50.7k
        buf->size += xmlCopyCharMultiByte(end, c);
792
50.7k
    }
793
74.9k
}
794
795
static void
796
4.18M
xmlSBufAddReplChar(xmlSBuf *buf) {
797
4.18M
    xmlSBufAddCString(buf, "\xEF\xBF\xBD", 3);
798
4.18M
}
799
800
static void
801
0
xmlSBufReportError(xmlSBuf *buf, xmlParserCtxtPtr ctxt, const char *errMsg) {
802
0
    if (buf->code == XML_ERR_NO_MEMORY)
803
0
        xmlCtxtErrMemory(ctxt);
804
0
    else
805
0
        xmlFatalErr(ctxt, buf->code, errMsg);
806
0
}
807
808
static xmlChar *
809
xmlSBufFinish(xmlSBuf *buf, int *sizeOut, xmlParserCtxtPtr ctxt,
810
331k
              const char *errMsg) {
811
331k
    if (buf->mem == NULL) {
812
33.9k
        buf->mem = xmlMalloc(1);
813
33.9k
        if (buf->mem == NULL) {
814
0
            buf->code = XML_ERR_NO_MEMORY;
815
33.9k
        } else {
816
33.9k
            buf->mem[0] = 0;
817
33.9k
        }
818
297k
    } else {
819
297k
        buf->mem[buf->size] = 0;
820
297k
    }
821
822
331k
    if (buf->code == XML_ERR_OK) {
823
331k
        if (sizeOut != NULL)
824
232k
            *sizeOut = buf->size;
825
331k
        return(buf->mem);
826
331k
    }
827
828
0
    xmlSBufReportError(buf, ctxt, errMsg);
829
830
0
    xmlFree(buf->mem);
831
832
0
    if (sizeOut != NULL)
833
0
        *sizeOut = 0;
834
0
    return(NULL);
835
331k
}
836
837
static void
838
23.7M
xmlSBufCleanup(xmlSBuf *buf, xmlParserCtxtPtr ctxt, const char *errMsg) {
839
23.7M
    if (buf->code != XML_ERR_OK)
840
0
        xmlSBufReportError(buf, ctxt, errMsg);
841
842
23.7M
    xmlFree(buf->mem);
843
23.7M
}
844
845
static int
846
xmlUTF8MultibyteLen(xmlParserCtxtPtr ctxt, const xmlChar *str,
847
7.62M
                    const char *errMsg) {
848
7.62M
    int c = str[0];
849
7.62M
    int c1 = str[1];
850
851
7.62M
    if ((c1 & 0xC0) != 0x80)
852
1.89M
        goto encoding_error;
853
854
5.72M
    if (c < 0xE0) {
855
        /* 2-byte sequence */
856
1.72M
        if (c < 0xC2)
857
674k
            goto encoding_error;
858
859
1.05M
        return(2);
860
4.00M
    } else {
861
4.00M
        int c2 = str[2];
862
863
4.00M
        if ((c2 & 0xC0) != 0x80)
864
24.6k
            goto encoding_error;
865
866
3.97M
        if (c < 0xF0) {
867
            /* 3-byte sequence */
868
3.83M
            if (c == 0xE0) {
869
                /* overlong */
870
155k
                if (c1 < 0xA0)
871
2.08k
                    goto encoding_error;
872
3.68M
            } else if (c == 0xED) {
873
                /* surrogate */
874
17.6k
                if (c1 >= 0xA0)
875
4.35k
                    goto encoding_error;
876
3.66M
            } else if (c == 0xEF) {
877
                /* U+FFFE and U+FFFF are invalid Chars */
878
1.78M
                if ((c1 == 0xBF) && (c2 >= 0xBE))
879
31.2k
                    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR, errMsg);
880
1.78M
            }
881
882
3.82M
            return(3);
883
3.83M
        } else {
884
            /* 4-byte sequence */
885
142k
            if ((str[3] & 0xC0) != 0x80)
886
8.13k
                goto encoding_error;
887
134k
            if (c == 0xF0) {
888
                /* overlong */
889
4.61k
                if (c1 < 0x90)
890
1.69k
                    goto encoding_error;
891
129k
            } else if (c >= 0xF4) {
892
                /* greater than 0x10FFFF */
893
12.6k
                if ((c > 0xF4) || (c1 >= 0x90))
894
4.13k
                    goto encoding_error;
895
12.6k
            }
896
897
128k
            return(4);
898
134k
        }
899
3.97M
    }
900
901
2.61M
encoding_error:
902
    /* Only report the first error */
903
2.61M
    if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
904
5.69k
        xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
905
5.69k
        ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
906
5.69k
    }
907
908
2.61M
    return(0);
909
5.72M
}
910
911
/************************************************************************
912
 *                  *
913
 *    SAX2 defaulted attributes handling      *
914
 *                  *
915
 ************************************************************************/
916
917
/**
918
 * xmlCtxtInitializeLate:
919
 * @ctxt:  an XML parser context
920
 *
921
 * Final initialization of the parser context before starting to parse.
922
 *
923
 * This accounts for users modifying struct members of parser context
924
 * directly.
925
 */
926
static void
927
250k
xmlCtxtInitializeLate(xmlParserCtxtPtr ctxt) {
928
250k
    xmlSAXHandlerPtr sax;
929
930
    /* Avoid unused variable warning if features are disabled. */
931
250k
    (void) sax;
932
933
    /*
934
     * Changing the SAX struct directly is still widespread practice
935
     * in internal and external code.
936
     */
937
250k
    if (ctxt == NULL) return;
938
250k
    sax = ctxt->sax;
939
250k
#ifdef LIBXML_SAX1_ENABLED
940
    /*
941
     * Only enable SAX2 if there SAX2 element handlers, except when there
942
     * are no element handlers at all.
943
     */
944
250k
    if (((ctxt->options & XML_PARSE_SAX1) == 0) &&
945
250k
        (sax) &&
946
250k
        (sax->initialized == XML_SAX2_MAGIC) &&
947
250k
        ((sax->startElementNs != NULL) ||
948
0
         (sax->endElementNs != NULL) ||
949
0
         ((sax->startElement == NULL) && (sax->endElement == NULL))))
950
250k
        ctxt->sax2 = 1;
951
#else
952
    ctxt->sax2 = 1;
953
#endif /* LIBXML_SAX1_ENABLED */
954
955
    /*
956
     * Some users replace the dictionary directly in the context struct.
957
     * We really need an API function to do that cleanly.
958
     */
959
250k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
960
250k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
961
250k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
962
250k
    if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
963
250k
    (ctxt->str_xml_ns == NULL)) {
964
0
        xmlErrMemory(ctxt);
965
0
    }
966
967
250k
    xmlDictSetLimit(ctxt->dict,
968
250k
                    (ctxt->options & XML_PARSE_HUGE) ?
969
244k
                        0 :
970
250k
                        XML_MAX_DICTIONARY_LIMIT);
971
250k
}
972
973
typedef struct {
974
    xmlHashedString prefix;
975
    xmlHashedString name;
976
    xmlHashedString value;
977
    const xmlChar *valueEnd;
978
    int external;
979
    int expandedSize;
980
} xmlDefAttr;
981
982
typedef struct _xmlDefAttrs xmlDefAttrs;
983
typedef xmlDefAttrs *xmlDefAttrsPtr;
984
struct _xmlDefAttrs {
985
    int nbAttrs;  /* number of defaulted attributes on that element */
986
    int maxAttrs;       /* the size of the array */
987
#if __STDC_VERSION__ >= 199901L
988
    /* Using a C99 flexible array member avoids UBSan errors. */
989
    xmlDefAttr attrs[] ATTRIBUTE_COUNTED_BY(maxAttrs);
990
#else
991
    xmlDefAttr attrs[1];
992
#endif
993
};
994
995
/**
996
 * xmlAttrNormalizeSpace:
997
 * @src: the source string
998
 * @dst: the target string
999
 *
1000
 * Normalize the space in non CDATA attribute values:
1001
 * If the attribute type is not CDATA, then the XML processor MUST further
1002
 * process the normalized attribute value by discarding any leading and
1003
 * trailing space (#x20) characters, and by replacing sequences of space
1004
 * (#x20) characters by a single space (#x20) character.
1005
 * Note that the size of dst need to be at least src, and if one doesn't need
1006
 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1007
 * passing src as dst is just fine.
1008
 *
1009
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1010
 *         is needed.
1011
 */
1012
static xmlChar *
1013
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1014
40.5k
{
1015
40.5k
    if ((src == NULL) || (dst == NULL))
1016
0
        return(NULL);
1017
1018
57.1k
    while (*src == 0x20) src++;
1019
907k
    while (*src != 0) {
1020
866k
  if (*src == 0x20) {
1021
161k
      while (*src == 0x20) src++;
1022
47.1k
      if (*src != 0)
1023
40.3k
    *dst++ = 0x20;
1024
819k
  } else {
1025
819k
      *dst++ = *src++;
1026
819k
  }
1027
866k
    }
1028
40.5k
    *dst = 0;
1029
40.5k
    if (dst == src)
1030
26.6k
       return(NULL);
1031
13.9k
    return(dst);
1032
40.5k
}
1033
1034
/**
1035
 * xmlAddDefAttrs:
1036
 * @ctxt:  an XML parser context
1037
 * @fullname:  the element fullname
1038
 * @fullattr:  the attribute fullname
1039
 * @value:  the attribute value
1040
 *
1041
 * Add a defaulted attribute for an element
1042
 */
1043
static void
1044
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1045
               const xmlChar *fullname,
1046
               const xmlChar *fullattr,
1047
45.4k
               const xmlChar *value) {
1048
45.4k
    xmlDefAttrsPtr defaults;
1049
45.4k
    xmlDefAttr *attr;
1050
45.4k
    int len, expandedSize;
1051
45.4k
    xmlHashedString name;
1052
45.4k
    xmlHashedString prefix;
1053
45.4k
    xmlHashedString hvalue;
1054
45.4k
    const xmlChar *localname;
1055
1056
    /*
1057
     * Allows to detect attribute redefinitions
1058
     */
1059
45.4k
    if (ctxt->attsSpecial != NULL) {
1060
41.7k
        if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1061
11.9k
      return;
1062
41.7k
    }
1063
1064
33.4k
    if (ctxt->attsDefault == NULL) {
1065
3.73k
        ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1066
3.73k
  if (ctxt->attsDefault == NULL)
1067
0
      goto mem_error;
1068
3.73k
    }
1069
1070
    /*
1071
     * split the element name into prefix:localname , the string found
1072
     * are within the DTD and then not associated to namespace names.
1073
     */
1074
33.4k
    localname = xmlSplitQName3(fullname, &len);
1075
33.4k
    if (localname == NULL) {
1076
30.7k
        name = xmlDictLookupHashed(ctxt->dict, fullname, -1);
1077
30.7k
  prefix.name = NULL;
1078
30.7k
    } else {
1079
2.72k
        name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1080
2.72k
  prefix = xmlDictLookupHashed(ctxt->dict, fullname, len);
1081
2.72k
        if (prefix.name == NULL)
1082
0
            goto mem_error;
1083
2.72k
    }
1084
33.4k
    if (name.name == NULL)
1085
0
        goto mem_error;
1086
1087
    /*
1088
     * make sure there is some storage
1089
     */
1090
33.4k
    defaults = xmlHashLookup2(ctxt->attsDefault, name.name, prefix.name);
1091
33.4k
    if ((defaults == NULL) ||
1092
26.7k
        (defaults->nbAttrs >= defaults->maxAttrs)) {
1093
10.7k
        xmlDefAttrsPtr temp;
1094
10.7k
        int newSize;
1095
1096
10.7k
        if (defaults == NULL) {
1097
6.69k
            newSize = 4;
1098
6.69k
        } else {
1099
4.03k
            if ((defaults->maxAttrs >= XML_MAX_ATTRS) ||
1100
4.03k
                ((size_t) defaults->maxAttrs >
1101
4.03k
                     SIZE_MAX / 2 / sizeof(temp[0]) - sizeof(*defaults)))
1102
0
                goto mem_error;
1103
1104
4.03k
            if (defaults->maxAttrs > XML_MAX_ATTRS / 2)
1105
0
                newSize = XML_MAX_ATTRS;
1106
4.03k
            else
1107
4.03k
                newSize = defaults->maxAttrs * 2;
1108
4.03k
        }
1109
10.7k
        temp = xmlRealloc(defaults,
1110
10.7k
                          sizeof(*defaults) + newSize * sizeof(xmlDefAttr));
1111
10.7k
  if (temp == NULL)
1112
0
      goto mem_error;
1113
10.7k
        if (defaults == NULL)
1114
6.69k
            temp->nbAttrs = 0;
1115
10.7k
  temp->maxAttrs = newSize;
1116
10.7k
        defaults = temp;
1117
10.7k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name.name, prefix.name,
1118
10.7k
                          defaults, NULL) < 0) {
1119
0
      xmlFree(defaults);
1120
0
      goto mem_error;
1121
0
  }
1122
10.7k
    }
1123
1124
    /*
1125
     * Split the attribute name into prefix:localname , the string found
1126
     * are within the DTD and hen not associated to namespace names.
1127
     */
1128
33.4k
    localname = xmlSplitQName3(fullattr, &len);
1129
33.4k
    if (localname == NULL) {
1130
15.1k
        name = xmlDictLookupHashed(ctxt->dict, fullattr, -1);
1131
15.1k
  prefix.name = NULL;
1132
18.3k
    } else {
1133
18.3k
        name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1134
18.3k
  prefix = xmlDictLookupHashed(ctxt->dict, fullattr, len);
1135
18.3k
        if (prefix.name == NULL)
1136
0
            goto mem_error;
1137
18.3k
    }
1138
33.4k
    if (name.name == NULL)
1139
0
        goto mem_error;
1140
1141
    /* intern the string and precompute the end */
1142
33.4k
    len = strlen((const char *) value);
1143
33.4k
    hvalue = xmlDictLookupHashed(ctxt->dict, value, len);
1144
33.4k
    if (hvalue.name == NULL)
1145
0
        goto mem_error;
1146
1147
33.4k
    expandedSize = strlen((const char *) name.name);
1148
33.4k
    if (prefix.name != NULL)
1149
18.3k
        expandedSize += strlen((const char *) prefix.name);
1150
33.4k
    expandedSize += len;
1151
1152
33.4k
    attr = &defaults->attrs[defaults->nbAttrs++];
1153
33.4k
    attr->name = name;
1154
33.4k
    attr->prefix = prefix;
1155
33.4k
    attr->value = hvalue;
1156
33.4k
    attr->valueEnd = hvalue.name + len;
1157
33.4k
    attr->external = PARSER_EXTERNAL(ctxt);
1158
33.4k
    attr->expandedSize = expandedSize;
1159
1160
33.4k
    return;
1161
1162
0
mem_error:
1163
0
    xmlErrMemory(ctxt);
1164
0
}
1165
1166
/**
1167
 * xmlAddSpecialAttr:
1168
 * @ctxt:  an XML parser context
1169
 * @fullname:  the element fullname
1170
 * @fullattr:  the attribute fullname
1171
 * @type:  the attribute type
1172
 *
1173
 * Register this attribute type
1174
 */
1175
static void
1176
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1177
      const xmlChar *fullname,
1178
      const xmlChar *fullattr,
1179
      int type)
1180
54.1k
{
1181
54.1k
    if (ctxt->attsSpecial == NULL) {
1182
4.12k
        ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1183
4.12k
  if (ctxt->attsSpecial == NULL)
1184
0
      goto mem_error;
1185
4.12k
    }
1186
1187
54.1k
    if (xmlHashAdd2(ctxt->attsSpecial, fullname, fullattr,
1188
54.1k
                    XML_INT_TO_PTR(type)) < 0)
1189
0
        goto mem_error;
1190
54.1k
    return;
1191
1192
54.1k
mem_error:
1193
0
    xmlErrMemory(ctxt);
1194
0
}
1195
1196
/**
1197
 * xmlCleanSpecialAttrCallback:
1198
 *
1199
 * Removes CDATA attributes from the special attribute table
1200
 */
1201
static void
1202
xmlCleanSpecialAttrCallback(void *payload, void *data,
1203
                            const xmlChar *fullname, const xmlChar *fullattr,
1204
37.8k
                            const xmlChar *unused ATTRIBUTE_UNUSED) {
1205
37.8k
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1206
1207
37.8k
    if (XML_PTR_TO_INT(payload) == XML_ATTRIBUTE_CDATA) {
1208
3.56k
        xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1209
3.56k
    }
1210
37.8k
}
1211
1212
/**
1213
 * xmlCleanSpecialAttr:
1214
 * @ctxt:  an XML parser context
1215
 *
1216
 * Trim the list of attributes defined to remove all those of type
1217
 * CDATA as they are not special. This call should be done when finishing
1218
 * to parse the DTD and before starting to parse the document root.
1219
 */
1220
static void
1221
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1222
9.31k
{
1223
9.31k
    if (ctxt->attsSpecial == NULL)
1224
5.18k
        return;
1225
1226
4.12k
    xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1227
1228
4.12k
    if (xmlHashSize(ctxt->attsSpecial) == 0) {
1229
288
        xmlHashFree(ctxt->attsSpecial, NULL);
1230
288
        ctxt->attsSpecial = NULL;
1231
288
    }
1232
4.12k
}
1233
1234
/**
1235
 * xmlCheckLanguageID:
1236
 * @lang:  pointer to the string value
1237
 *
1238
 * DEPRECATED: Internal function, do not use.
1239
 *
1240
 * Checks that the value conforms to the LanguageID production:
1241
 *
1242
 * NOTE: this is somewhat deprecated, those productions were removed from
1243
 *       the XML Second edition.
1244
 *
1245
 * [33] LanguageID ::= Langcode ('-' Subcode)*
1246
 * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1247
 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1248
 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1249
 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1250
 * [38] Subcode ::= ([a-z] | [A-Z])+
1251
 *
1252
 * The current REC reference the successors of RFC 1766, currently 5646
1253
 *
1254
 * http://www.rfc-editor.org/rfc/rfc5646.txt
1255
 * langtag       = language
1256
 *                 ["-" script]
1257
 *                 ["-" region]
1258
 *                 *("-" variant)
1259
 *                 *("-" extension)
1260
 *                 ["-" privateuse]
1261
 * language      = 2*3ALPHA            ; shortest ISO 639 code
1262
 *                 ["-" extlang]       ; sometimes followed by
1263
 *                                     ; extended language subtags
1264
 *               / 4ALPHA              ; or reserved for future use
1265
 *               / 5*8ALPHA            ; or registered language subtag
1266
 *
1267
 * extlang       = 3ALPHA              ; selected ISO 639 codes
1268
 *                 *2("-" 3ALPHA)      ; permanently reserved
1269
 *
1270
 * script        = 4ALPHA              ; ISO 15924 code
1271
 *
1272
 * region        = 2ALPHA              ; ISO 3166-1 code
1273
 *               / 3DIGIT              ; UN M.49 code
1274
 *
1275
 * variant       = 5*8alphanum         ; registered variants
1276
 *               / (DIGIT 3alphanum)
1277
 *
1278
 * extension     = singleton 1*("-" (2*8alphanum))
1279
 *
1280
 *                                     ; Single alphanumerics
1281
 *                                     ; "x" reserved for private use
1282
 * singleton     = DIGIT               ; 0 - 9
1283
 *               / %x41-57             ; A - W
1284
 *               / %x59-5A             ; Y - Z
1285
 *               / %x61-77             ; a - w
1286
 *               / %x79-7A             ; y - z
1287
 *
1288
 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1289
 * The parser below doesn't try to cope with extension or privateuse
1290
 * that could be added but that's not interoperable anyway
1291
 *
1292
 * Returns 1 if correct 0 otherwise
1293
 **/
1294
int
1295
xmlCheckLanguageID(const xmlChar * lang)
1296
0
{
1297
0
    const xmlChar *cur = lang, *nxt;
1298
1299
0
    if (cur == NULL)
1300
0
        return (0);
1301
0
    if (((cur[0] == 'i') && (cur[1] == '-')) ||
1302
0
        ((cur[0] == 'I') && (cur[1] == '-')) ||
1303
0
        ((cur[0] == 'x') && (cur[1] == '-')) ||
1304
0
        ((cur[0] == 'X') && (cur[1] == '-'))) {
1305
        /*
1306
         * Still allow IANA code and user code which were coming
1307
         * from the previous version of the XML-1.0 specification
1308
         * it's deprecated but we should not fail
1309
         */
1310
0
        cur += 2;
1311
0
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1312
0
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
1313
0
            cur++;
1314
0
        return(cur[0] == 0);
1315
0
    }
1316
0
    nxt = cur;
1317
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1318
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1319
0
           nxt++;
1320
0
    if (nxt - cur >= 4) {
1321
        /*
1322
         * Reserved
1323
         */
1324
0
        if ((nxt - cur > 8) || (nxt[0] != 0))
1325
0
            return(0);
1326
0
        return(1);
1327
0
    }
1328
0
    if (nxt - cur < 2)
1329
0
        return(0);
1330
    /* we got an ISO 639 code */
1331
0
    if (nxt[0] == 0)
1332
0
        return(1);
1333
0
    if (nxt[0] != '-')
1334
0
        return(0);
1335
1336
0
    nxt++;
1337
0
    cur = nxt;
1338
    /* now we can have extlang or script or region or variant */
1339
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1340
0
        goto region_m49;
1341
1342
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1343
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1344
0
           nxt++;
1345
0
    if (nxt - cur == 4)
1346
0
        goto script;
1347
0
    if (nxt - cur == 2)
1348
0
        goto region;
1349
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1350
0
        goto variant;
1351
0
    if (nxt - cur != 3)
1352
0
        return(0);
1353
    /* we parsed an extlang */
1354
0
    if (nxt[0] == 0)
1355
0
        return(1);
1356
0
    if (nxt[0] != '-')
1357
0
        return(0);
1358
1359
0
    nxt++;
1360
0
    cur = nxt;
1361
    /* now we can have script or region or variant */
1362
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1363
0
        goto region_m49;
1364
1365
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1366
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1367
0
           nxt++;
1368
0
    if (nxt - cur == 2)
1369
0
        goto region;
1370
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1371
0
        goto variant;
1372
0
    if (nxt - cur != 4)
1373
0
        return(0);
1374
    /* we parsed a script */
1375
0
script:
1376
0
    if (nxt[0] == 0)
1377
0
        return(1);
1378
0
    if (nxt[0] != '-')
1379
0
        return(0);
1380
1381
0
    nxt++;
1382
0
    cur = nxt;
1383
    /* now we can have region or variant */
1384
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1385
0
        goto region_m49;
1386
1387
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1388
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1389
0
           nxt++;
1390
1391
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1392
0
        goto variant;
1393
0
    if (nxt - cur != 2)
1394
0
        return(0);
1395
    /* we parsed a region */
1396
0
region:
1397
0
    if (nxt[0] == 0)
1398
0
        return(1);
1399
0
    if (nxt[0] != '-')
1400
0
        return(0);
1401
1402
0
    nxt++;
1403
0
    cur = nxt;
1404
    /* now we can just have a variant */
1405
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1406
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1407
0
           nxt++;
1408
1409
0
    if ((nxt - cur < 5) || (nxt - cur > 8))
1410
0
        return(0);
1411
1412
    /* we parsed a variant */
1413
0
variant:
1414
0
    if (nxt[0] == 0)
1415
0
        return(1);
1416
0
    if (nxt[0] != '-')
1417
0
        return(0);
1418
    /* extensions and private use subtags not checked */
1419
0
    return (1);
1420
1421
0
region_m49:
1422
0
    if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1423
0
        ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1424
0
        nxt += 3;
1425
0
        goto region;
1426
0
    }
1427
0
    return(0);
1428
0
}
1429
1430
/************************************************************************
1431
 *                  *
1432
 *    Parser stacks related functions and macros    *
1433
 *                  *
1434
 ************************************************************************/
1435
1436
static xmlChar *
1437
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar **str);
1438
1439
/**
1440
 * xmlParserNsCreate:
1441
 *
1442
 * Create a new namespace database.
1443
 *
1444
 * Returns the new obejct.
1445
 */
1446
xmlParserNsData *
1447
250k
xmlParserNsCreate(void) {
1448
250k
    xmlParserNsData *nsdb = xmlMalloc(sizeof(*nsdb));
1449
1450
250k
    if (nsdb == NULL)
1451
0
        return(NULL);
1452
250k
    memset(nsdb, 0, sizeof(*nsdb));
1453
250k
    nsdb->defaultNsIndex = INT_MAX;
1454
1455
250k
    return(nsdb);
1456
250k
}
1457
1458
/**
1459
 * xmlParserNsFree:
1460
 * @nsdb: namespace database
1461
 *
1462
 * Free a namespace database.
1463
 */
1464
void
1465
250k
xmlParserNsFree(xmlParserNsData *nsdb) {
1466
250k
    if (nsdb == NULL)
1467
0
        return;
1468
1469
250k
    xmlFree(nsdb->extra);
1470
250k
    xmlFree(nsdb->hash);
1471
250k
    xmlFree(nsdb);
1472
250k
}
1473
1474
/**
1475
 * xmlParserNsReset:
1476
 * @nsdb: namespace database
1477
 *
1478
 * Reset a namespace database.
1479
 */
1480
static void
1481
6.01k
xmlParserNsReset(xmlParserNsData *nsdb) {
1482
6.01k
    if (nsdb == NULL)
1483
0
        return;
1484
1485
6.01k
    nsdb->hashElems = 0;
1486
6.01k
    nsdb->elementId = 0;
1487
6.01k
    nsdb->defaultNsIndex = INT_MAX;
1488
1489
6.01k
    if (nsdb->hash)
1490
0
        memset(nsdb->hash, 0, nsdb->hashSize * sizeof(nsdb->hash[0]));
1491
6.01k
}
1492
1493
/**
1494
 * xmlParserStartElement:
1495
 * @nsdb: namespace database
1496
 *
1497
 * Signal that a new element has started.
1498
 *
1499
 * Returns 0 on success, -1 if the element counter overflowed.
1500
 */
1501
static int
1502
38.6M
xmlParserNsStartElement(xmlParserNsData *nsdb) {
1503
38.6M
    if (nsdb->elementId == UINT_MAX)
1504
0
        return(-1);
1505
38.6M
    nsdb->elementId++;
1506
1507
38.6M
    return(0);
1508
38.6M
}
1509
1510
/**
1511
 * xmlParserNsLookup:
1512
 * @ctxt: parser context
1513
 * @prefix: namespace prefix
1514
 * @bucketPtr: optional bucket (return value)
1515
 *
1516
 * Lookup namespace with given prefix. If @bucketPtr is non-NULL, it will
1517
 * be set to the matching bucket, or the first empty bucket if no match
1518
 * was found.
1519
 *
1520
 * Returns the namespace index on success, INT_MAX if no namespace was
1521
 * found.
1522
 */
1523
static int
1524
xmlParserNsLookup(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1525
50.3M
                  xmlParserNsBucket **bucketPtr) {
1526
50.3M
    xmlParserNsBucket *bucket, *tombstone;
1527
50.3M
    unsigned index, hashValue;
1528
1529
50.3M
    if (prefix->name == NULL)
1530
25.0M
        return(ctxt->nsdb->defaultNsIndex);
1531
1532
25.2M
    if (ctxt->nsdb->hashSize == 0)
1533
533k
        return(INT_MAX);
1534
1535
24.7M
    hashValue = prefix->hashValue;
1536
24.7M
    index = hashValue & (ctxt->nsdb->hashSize - 1);
1537
24.7M
    bucket = &ctxt->nsdb->hash[index];
1538
24.7M
    tombstone = NULL;
1539
1540
26.4M
    while (bucket->hashValue) {
1541
24.4M
        if (bucket->index == INT_MAX) {
1542
329k
            if (tombstone == NULL)
1543
320k
                tombstone = bucket;
1544
24.1M
        } else if (bucket->hashValue == hashValue) {
1545
22.7M
            if (ctxt->nsTab[bucket->index * 2] == prefix->name) {
1546
22.7M
                if (bucketPtr != NULL)
1547
1.15M
                    *bucketPtr = bucket;
1548
22.7M
                return(bucket->index);
1549
22.7M
            }
1550
22.7M
        }
1551
1552
1.68M
        index++;
1553
1.68M
        bucket++;
1554
1.68M
        if (index == ctxt->nsdb->hashSize) {
1555
21.9k
            index = 0;
1556
21.9k
            bucket = ctxt->nsdb->hash;
1557
21.9k
        }
1558
1.68M
    }
1559
1560
1.97M
    if (bucketPtr != NULL)
1561
1.03M
        *bucketPtr = tombstone ? tombstone : bucket;
1562
1.97M
    return(INT_MAX);
1563
24.7M
}
1564
1565
/**
1566
 * xmlParserNsLookupUri:
1567
 * @ctxt: parser context
1568
 * @prefix: namespace prefix
1569
 *
1570
 * Lookup namespace URI with given prefix.
1571
 *
1572
 * Returns the namespace URI on success, NULL if no namespace was found.
1573
 */
1574
static const xmlChar *
1575
38.6M
xmlParserNsLookupUri(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix) {
1576
38.6M
    const xmlChar *ret;
1577
38.6M
    int nsIndex;
1578
1579
38.6M
    if (prefix->name == ctxt->str_xml)
1580
37.8k
        return(ctxt->str_xml_ns);
1581
1582
    /*
1583
     * minNsIndex is used when building an entity tree. We must
1584
     * ignore namespaces declared outside the entity.
1585
     */
1586
38.5M
    nsIndex = xmlParserNsLookup(ctxt, prefix, NULL);
1587
38.5M
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1588
20.8M
        return(NULL);
1589
1590
17.7M
    ret = ctxt->nsTab[nsIndex * 2 + 1];
1591
17.7M
    if (ret[0] == 0)
1592
37.8k
        ret = NULL;
1593
17.7M
    return(ret);
1594
38.5M
}
1595
1596
/**
1597
 * xmlParserNsLookupSax:
1598
 * @ctxt: parser context
1599
 * @prefix: namespace prefix
1600
 *
1601
 * Lookup extra data for the given prefix. This returns data stored
1602
 * with xmlParserNsUdpateSax.
1603
 *
1604
 * Returns the data on success, NULL if no namespace was found.
1605
 */
1606
void *
1607
929k
xmlParserNsLookupSax(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
1608
929k
    xmlHashedString hprefix;
1609
929k
    int nsIndex;
1610
1611
929k
    if (prefix == ctxt->str_xml)
1612
0
        return(NULL);
1613
1614
929k
    hprefix.name = prefix;
1615
929k
    if (prefix != NULL)
1616
928k
        hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1617
1.23k
    else
1618
1.23k
        hprefix.hashValue = 0;
1619
929k
    nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1620
929k
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1621
0
        return(NULL);
1622
1623
929k
    return(ctxt->nsdb->extra[nsIndex].saxData);
1624
929k
}
1625
1626
/**
1627
 * xmlParserNsUpdateSax:
1628
 * @ctxt: parser context
1629
 * @prefix: namespace prefix
1630
 * @saxData: extra data for SAX handler
1631
 *
1632
 * Sets or updates extra data for the given prefix. This value will be
1633
 * returned by xmlParserNsLookupSax as long as the namespace with the
1634
 * given prefix is in scope.
1635
 *
1636
 * Returns the data on success, NULL if no namespace was found.
1637
 */
1638
int
1639
xmlParserNsUpdateSax(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
1640
11.3k
                     void *saxData) {
1641
11.3k
    xmlHashedString hprefix;
1642
11.3k
    int nsIndex;
1643
1644
11.3k
    if (prefix == ctxt->str_xml)
1645
0
        return(-1);
1646
1647
11.3k
    hprefix.name = prefix;
1648
11.3k
    if (prefix != NULL)
1649
10.5k
        hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1650
794
    else
1651
794
        hprefix.hashValue = 0;
1652
11.3k
    nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1653
11.3k
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1654
0
        return(-1);
1655
1656
11.3k
    ctxt->nsdb->extra[nsIndex].saxData = saxData;
1657
11.3k
    return(0);
1658
11.3k
}
1659
1660
/**
1661
 * xmlParserNsGrow:
1662
 * @ctxt: parser context
1663
 *
1664
 * Grows the namespace tables.
1665
 *
1666
 * Returns 0 on success, -1 if a memory allocation failed.
1667
 */
1668
static int
1669
613k
xmlParserNsGrow(xmlParserCtxtPtr ctxt) {
1670
613k
    const xmlChar **table;
1671
613k
    xmlParserNsExtra *extra;
1672
613k
    int newSize;
1673
1674
613k
    newSize = xmlGrowCapacity(ctxt->nsMax,
1675
613k
                              sizeof(table[0]) + sizeof(extra[0]),
1676
613k
                              16, XML_MAX_ITEMS);
1677
613k
    if (newSize < 0)
1678
0
        goto error;
1679
1680
613k
    table = xmlRealloc(ctxt->nsTab, 2 * newSize * sizeof(table[0]));
1681
613k
    if (table == NULL)
1682
0
        goto error;
1683
613k
    ctxt->nsTab = table;
1684
1685
613k
    extra = xmlRealloc(ctxt->nsdb->extra, newSize * sizeof(extra[0]));
1686
613k
    if (extra == NULL)
1687
0
        goto error;
1688
613k
    ctxt->nsdb->extra = extra;
1689
1690
613k
    ctxt->nsMax = newSize;
1691
613k
    return(0);
1692
1693
0
error:
1694
0
    xmlErrMemory(ctxt);
1695
0
    return(-1);
1696
613k
}
1697
1698
/**
1699
 * xmlParserNsPush:
1700
 * @ctxt: parser context
1701
 * @prefix: prefix with hash value
1702
 * @uri: uri with hash value
1703
 * @saxData: extra data for SAX handler
1704
 * @defAttr: whether the namespace comes from a default attribute
1705
 *
1706
 * Push a new namespace on the table.
1707
 *
1708
 * Returns 1 if the namespace was pushed, 0 if the namespace was ignored,
1709
 * -1 if a memory allocation failed.
1710
 */
1711
static int
1712
xmlParserNsPush(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1713
1.88M
                const xmlHashedString *uri, void *saxData, int defAttr) {
1714
1.88M
    xmlParserNsBucket *bucket = NULL;
1715
1.88M
    xmlParserNsExtra *extra;
1716
1.88M
    const xmlChar **ns;
1717
1.88M
    unsigned hashValue, nsIndex, oldIndex;
1718
1719
1.88M
    if ((prefix != NULL) && (prefix->name == ctxt->str_xml))
1720
85
        return(0);
1721
1722
1.88M
    if ((ctxt->nsNr >= ctxt->nsMax) && (xmlParserNsGrow(ctxt) < 0)) {
1723
0
        xmlErrMemory(ctxt);
1724
0
        return(-1);
1725
0
    }
1726
1727
    /*
1728
     * Default namespace and 'xml' namespace
1729
     */
1730
1.88M
    if ((prefix == NULL) || (prefix->name == NULL)) {
1731
423k
        oldIndex = ctxt->nsdb->defaultNsIndex;
1732
1733
423k
        if (oldIndex != INT_MAX) {
1734
249k
            extra = &ctxt->nsdb->extra[oldIndex];
1735
1736
249k
            if (extra->elementId == ctxt->nsdb->elementId) {
1737
130k
                if (defAttr == 0)
1738
124k
                    xmlErrAttributeDup(ctxt, NULL, BAD_CAST "xmlns");
1739
130k
                return(0);
1740
130k
            }
1741
1742
119k
            if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1743
0
                (uri->name == ctxt->nsTab[oldIndex * 2 + 1]))
1744
0
                return(0);
1745
119k
        }
1746
1747
293k
        ctxt->nsdb->defaultNsIndex = ctxt->nsNr;
1748
293k
        goto populate_entry;
1749
423k
    }
1750
1751
    /*
1752
     * Hash table lookup
1753
     */
1754
1.46M
    oldIndex = xmlParserNsLookup(ctxt, prefix, &bucket);
1755
1.46M
    if (oldIndex != INT_MAX) {
1756
298k
        extra = &ctxt->nsdb->extra[oldIndex];
1757
1758
        /*
1759
         * Check for duplicate definitions on the same element.
1760
         */
1761
298k
        if (extra->elementId == ctxt->nsdb->elementId) {
1762
67.7k
            if (defAttr == 0)
1763
67.7k
                xmlErrAttributeDup(ctxt, BAD_CAST "xmlns", prefix->name);
1764
67.7k
            return(0);
1765
67.7k
        }
1766
1767
230k
        if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1768
0
            (uri->name == ctxt->nsTab[bucket->index * 2 + 1]))
1769
0
            return(0);
1770
1771
230k
        bucket->index = ctxt->nsNr;
1772
230k
        goto populate_entry;
1773
230k
    }
1774
1775
    /*
1776
     * Insert new bucket
1777
     */
1778
1779
1.16M
    hashValue = prefix->hashValue;
1780
1781
    /*
1782
     * Grow hash table, 50% fill factor
1783
     */
1784
1.16M
    if (ctxt->nsdb->hashElems + 1 > ctxt->nsdb->hashSize / 2) {
1785
181k
        xmlParserNsBucket *newHash;
1786
181k
        unsigned newSize, i, index;
1787
1788
181k
        if (ctxt->nsdb->hashSize > UINT_MAX / 2) {
1789
0
            xmlErrMemory(ctxt);
1790
0
            return(-1);
1791
0
        }
1792
181k
        newSize = ctxt->nsdb->hashSize ? ctxt->nsdb->hashSize * 2 : 16;
1793
181k
        newHash = xmlMalloc(newSize * sizeof(newHash[0]));
1794
181k
        if (newHash == NULL) {
1795
0
            xmlErrMemory(ctxt);
1796
0
            return(-1);
1797
0
        }
1798
181k
        memset(newHash, 0, newSize * sizeof(newHash[0]));
1799
1800
2.10M
        for (i = 0; i < ctxt->nsdb->hashSize; i++) {
1801
1.92M
            unsigned hv = ctxt->nsdb->hash[i].hashValue;
1802
1.92M
            unsigned newIndex;
1803
1804
1.92M
            if ((hv == 0) || (ctxt->nsdb->hash[i].index == INT_MAX))
1805
1.40M
                continue;
1806
517k
            newIndex = hv & (newSize - 1);
1807
1808
534k
            while (newHash[newIndex].hashValue != 0) {
1809
16.8k
                newIndex++;
1810
16.8k
                if (newIndex == newSize)
1811
105
                    newIndex = 0;
1812
16.8k
            }
1813
1814
517k
            newHash[newIndex] = ctxt->nsdb->hash[i];
1815
517k
        }
1816
1817
181k
        xmlFree(ctxt->nsdb->hash);
1818
181k
        ctxt->nsdb->hash = newHash;
1819
181k
        ctxt->nsdb->hashSize = newSize;
1820
1821
        /*
1822
         * Relookup
1823
         */
1824
181k
        index = hashValue & (newSize - 1);
1825
1826
184k
        while (newHash[index].hashValue != 0) {
1827
3.12k
            index++;
1828
3.12k
            if (index == newSize)
1829
54
                index = 0;
1830
3.12k
        }
1831
1832
181k
        bucket = &newHash[index];
1833
181k
    }
1834
1835
1.16M
    bucket->hashValue = hashValue;
1836
1.16M
    bucket->index = ctxt->nsNr;
1837
1.16M
    ctxt->nsdb->hashElems++;
1838
1.16M
    oldIndex = INT_MAX;
1839
1840
1.69M
populate_entry:
1841
1.69M
    nsIndex = ctxt->nsNr;
1842
1843
1.69M
    ns = &ctxt->nsTab[nsIndex * 2];
1844
1.69M
    ns[0] = prefix ? prefix->name : NULL;
1845
1.69M
    ns[1] = uri->name;
1846
1847
1.69M
    extra = &ctxt->nsdb->extra[nsIndex];
1848
1.69M
    extra->saxData = saxData;
1849
1.69M
    extra->prefixHashValue = prefix ? prefix->hashValue : 0;
1850
1.69M
    extra->uriHashValue = uri->hashValue;
1851
1.69M
    extra->elementId = ctxt->nsdb->elementId;
1852
1.69M
    extra->oldIndex = oldIndex;
1853
1854
1.69M
    ctxt->nsNr++;
1855
1856
1.69M
    return(1);
1857
1.16M
}
1858
1859
/**
1860
 * xmlParserNsPop:
1861
 * @ctxt: an XML parser context
1862
 * @nr:  the number to pop
1863
 *
1864
 * Pops the top @nr namespaces and restores the hash table.
1865
 *
1866
 * Returns the number of namespaces popped.
1867
 */
1868
static int
1869
xmlParserNsPop(xmlParserCtxtPtr ctxt, int nr)
1870
389k
{
1871
389k
    int i;
1872
1873
    /* assert(nr <= ctxt->nsNr); */
1874
1875
1.44M
    for (i = ctxt->nsNr - 1; i >= ctxt->nsNr - nr; i--) {
1876
1.05M
        const xmlChar *prefix = ctxt->nsTab[i * 2];
1877
1.05M
        xmlParserNsExtra *extra = &ctxt->nsdb->extra[i];
1878
1879
1.05M
        if (prefix == NULL) {
1880
195k
            ctxt->nsdb->defaultNsIndex = extra->oldIndex;
1881
856k
        } else {
1882
856k
            xmlHashedString hprefix;
1883
856k
            xmlParserNsBucket *bucket = NULL;
1884
1885
856k
            hprefix.name = prefix;
1886
856k
            hprefix.hashValue = extra->prefixHashValue;
1887
856k
            xmlParserNsLookup(ctxt, &hprefix, &bucket);
1888
            /* assert(bucket && bucket->hashValue); */
1889
856k
            bucket->index = extra->oldIndex;
1890
856k
        }
1891
1.05M
    }
1892
1893
389k
    ctxt->nsNr -= nr;
1894
389k
    return(nr);
1895
389k
}
1896
1897
static int
1898
641k
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt) {
1899
641k
    const xmlChar **atts;
1900
641k
    unsigned *attallocs;
1901
641k
    int newSize;
1902
1903
641k
    newSize = xmlGrowCapacity(ctxt->maxatts / 5,
1904
641k
                              sizeof(atts[0]) * 5 + sizeof(attallocs[0]),
1905
641k
                              10, XML_MAX_ATTRS);
1906
641k
    if (newSize < 0) {
1907
0
        xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
1908
0
                    "Maximum number of attributes exceeded");
1909
0
        return(-1);
1910
0
    }
1911
1912
641k
    atts = xmlRealloc(ctxt->atts, newSize * sizeof(atts[0]) * 5);
1913
641k
    if (atts == NULL)
1914
0
        goto mem_error;
1915
641k
    ctxt->atts = atts;
1916
1917
641k
    attallocs = xmlRealloc(ctxt->attallocs,
1918
641k
                           newSize * sizeof(attallocs[0]));
1919
641k
    if (attallocs == NULL)
1920
0
        goto mem_error;
1921
641k
    ctxt->attallocs = attallocs;
1922
1923
641k
    ctxt->maxatts = newSize * 5;
1924
1925
641k
    return(0);
1926
1927
0
mem_error:
1928
0
    xmlErrMemory(ctxt);
1929
0
    return(-1);
1930
641k
}
1931
1932
/**
1933
 * xmlCtxtPushInput:
1934
 * @ctxt:  an XML parser context
1935
 * @value:  the parser input
1936
 *
1937
 * Pushes a new parser input on top of the input stack
1938
 *
1939
 * Returns -1 in case of error, the index in the stack otherwise
1940
 */
1941
int
1942
xmlCtxtPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1943
250k
{
1944
250k
    char *directory = NULL;
1945
250k
    int maxDepth;
1946
1947
250k
    if ((ctxt == NULL) || (value == NULL))
1948
0
        return(-1);
1949
1950
250k
    maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
1951
1952
250k
    if (ctxt->inputNr >= ctxt->inputMax) {
1953
0
        xmlParserInputPtr *tmp;
1954
0
        int newSize;
1955
1956
0
        newSize = xmlGrowCapacity(ctxt->inputMax, sizeof(tmp[0]),
1957
0
                                  5, maxDepth);
1958
0
        if (newSize < 0) {
1959
0
            xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
1960
0
                           "Maximum entity nesting depth exceeded");
1961
0
            xmlHaltParser(ctxt);
1962
0
            return(-1);
1963
0
        }
1964
0
        tmp = xmlRealloc(ctxt->inputTab, newSize * sizeof(tmp[0]));
1965
0
        if (tmp == NULL) {
1966
0
            xmlErrMemory(ctxt);
1967
0
            return(-1);
1968
0
        }
1969
0
        ctxt->inputTab = tmp;
1970
0
        ctxt->inputMax = newSize;
1971
0
    }
1972
1973
250k
    if ((ctxt->inputNr == 0) && (value->filename != NULL)) {
1974
0
        directory = xmlParserGetDirectory(value->filename);
1975
0
        if (directory == NULL) {
1976
0
            xmlErrMemory(ctxt);
1977
0
            return(-1);
1978
0
        }
1979
0
    }
1980
1981
250k
    if (ctxt->input_id >= INT_MAX) {
1982
0
        xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT, "Input ID overflow\n");
1983
0
        return(-1);
1984
0
    }
1985
1986
250k
    ctxt->inputTab[ctxt->inputNr] = value;
1987
250k
    ctxt->input = value;
1988
1989
250k
    if (ctxt->inputNr == 0) {
1990
250k
        xmlFree(ctxt->directory);
1991
250k
        ctxt->directory = directory;
1992
250k
    }
1993
1994
    /*
1995
     * Internally, the input ID is only used to detect parameter entity
1996
     * boundaries. But there are entity loaders in downstream code that
1997
     * detect the main document by checking for "input_id == 1".
1998
     */
1999
250k
    value->id = ctxt->input_id++;
2000
2001
250k
    return(ctxt->inputNr++);
2002
250k
}
2003
2004
/**
2005
 * xmlCtxtPopInput:
2006
 * @ctxt: an XML parser context
2007
 *
2008
 * Pops the top parser input from the input stack
2009
 *
2010
 * Returns the input just removed
2011
 */
2012
xmlParserInputPtr
2013
xmlCtxtPopInput(xmlParserCtxtPtr ctxt)
2014
757k
{
2015
757k
    xmlParserInputPtr ret;
2016
2017
757k
    if (ctxt == NULL)
2018
0
        return(NULL);
2019
757k
    if (ctxt->inputNr <= 0)
2020
506k
        return (NULL);
2021
250k
    ctxt->inputNr--;
2022
250k
    if (ctxt->inputNr > 0)
2023
0
        ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
2024
250k
    else
2025
250k
        ctxt->input = NULL;
2026
250k
    ret = ctxt->inputTab[ctxt->inputNr];
2027
250k
    ctxt->inputTab[ctxt->inputNr] = NULL;
2028
250k
    return (ret);
2029
757k
}
2030
2031
/**
2032
 * nodePush:
2033
 * @ctxt:  an XML parser context
2034
 * @value:  the element node
2035
 *
2036
 * DEPRECATED: Internal function, do not use.
2037
 *
2038
 * Pushes a new element node on top of the node stack
2039
 *
2040
 * Returns -1 in case of error, the index in the stack otherwise
2041
 */
2042
int
2043
nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
2044
932k
{
2045
932k
    if (ctxt == NULL)
2046
0
        return(0);
2047
2048
932k
    if (ctxt->nodeNr >= ctxt->nodeMax) {
2049
24.0k
        int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
2050
24.0k
        xmlNodePtr *tmp;
2051
24.0k
        int newSize;
2052
2053
24.0k
        newSize = xmlGrowCapacity(ctxt->nodeMax, sizeof(tmp[0]),
2054
24.0k
                                  10, maxDepth);
2055
24.0k
        if (newSize < 0) {
2056
0
            xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
2057
0
                    "Excessive depth in document: %d,"
2058
0
                    " use XML_PARSE_HUGE option\n",
2059
0
                    ctxt->nodeNr);
2060
0
            xmlHaltParser(ctxt);
2061
0
            return(-1);
2062
0
        }
2063
2064
24.0k
  tmp = xmlRealloc(ctxt->nodeTab, newSize * sizeof(tmp[0]));
2065
24.0k
        if (tmp == NULL) {
2066
0
            xmlErrMemory(ctxt);
2067
0
            return (-1);
2068
0
        }
2069
24.0k
        ctxt->nodeTab = tmp;
2070
24.0k
  ctxt->nodeMax = newSize;
2071
24.0k
    }
2072
2073
932k
    ctxt->nodeTab[ctxt->nodeNr] = value;
2074
932k
    ctxt->node = value;
2075
932k
    return (ctxt->nodeNr++);
2076
932k
}
2077
2078
/**
2079
 * nodePop:
2080
 * @ctxt: an XML parser context
2081
 *
2082
 * DEPRECATED: Internal function, do not use.
2083
 *
2084
 * Pops the top element node from the node stack
2085
 *
2086
 * Returns the node just removed
2087
 */
2088
xmlNodePtr
2089
nodePop(xmlParserCtxtPtr ctxt)
2090
1.03M
{
2091
1.03M
    xmlNodePtr ret;
2092
2093
1.03M
    if (ctxt == NULL) return(NULL);
2094
1.03M
    if (ctxt->nodeNr <= 0)
2095
104k
        return (NULL);
2096
932k
    ctxt->nodeNr--;
2097
932k
    if (ctxt->nodeNr > 0)
2098
926k
        ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
2099
5.92k
    else
2100
5.92k
        ctxt->node = NULL;
2101
932k
    ret = ctxt->nodeTab[ctxt->nodeNr];
2102
932k
    ctxt->nodeTab[ctxt->nodeNr] = NULL;
2103
932k
    return (ret);
2104
1.03M
}
2105
2106
/**
2107
 * nameNsPush:
2108
 * @ctxt:  an XML parser context
2109
 * @value:  the element name
2110
 * @prefix:  the element prefix
2111
 * @URI:  the element namespace name
2112
 * @line:  the current line number for error messages
2113
 * @nsNr:  the number of namespaces pushed on the namespace table
2114
 *
2115
 * Pushes a new element name/prefix/URL on top of the name stack
2116
 *
2117
 * Returns -1 in case of error, the index in the stack otherwise
2118
 */
2119
static int
2120
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
2121
           const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
2122
29.3M
{
2123
29.3M
    xmlStartTag *tag;
2124
2125
29.3M
    if (ctxt->nameNr >= ctxt->nameMax) {
2126
905k
        const xmlChar **tmp;
2127
905k
        xmlStartTag *tmp2;
2128
905k
        int newSize;
2129
2130
905k
        newSize = xmlGrowCapacity(ctxt->nameMax,
2131
905k
                                  sizeof(tmp[0]) + sizeof(tmp2[0]),
2132
905k
                                  10, XML_MAX_ITEMS);
2133
905k
        if (newSize < 0)
2134
0
            goto mem_error;
2135
2136
905k
        tmp = xmlRealloc(ctxt->nameTab, newSize * sizeof(tmp[0]));
2137
905k
        if (tmp == NULL)
2138
0
      goto mem_error;
2139
905k
  ctxt->nameTab = tmp;
2140
2141
905k
        tmp2 = xmlRealloc(ctxt->pushTab, newSize * sizeof(tmp2[0]));
2142
905k
        if (tmp2 == NULL)
2143
0
      goto mem_error;
2144
905k
  ctxt->pushTab = tmp2;
2145
2146
905k
        ctxt->nameMax = newSize;
2147
28.4M
    } else if (ctxt->pushTab == NULL) {
2148
227k
        ctxt->pushTab = xmlMalloc(ctxt->nameMax * sizeof(ctxt->pushTab[0]));
2149
227k
        if (ctxt->pushTab == NULL)
2150
0
            goto mem_error;
2151
227k
    }
2152
29.3M
    ctxt->nameTab[ctxt->nameNr] = value;
2153
29.3M
    ctxt->name = value;
2154
29.3M
    tag = &ctxt->pushTab[ctxt->nameNr];
2155
29.3M
    tag->prefix = prefix;
2156
29.3M
    tag->URI = URI;
2157
29.3M
    tag->line = line;
2158
29.3M
    tag->nsNr = nsNr;
2159
29.3M
    return (ctxt->nameNr++);
2160
0
mem_error:
2161
0
    xmlErrMemory(ctxt);
2162
0
    return (-1);
2163
29.3M
}
2164
#ifdef LIBXML_PUSH_ENABLED
2165
/**
2166
 * nameNsPop:
2167
 * @ctxt: an XML parser context
2168
 *
2169
 * Pops the top element/prefix/URI name from the name stack
2170
 *
2171
 * Returns the name just removed
2172
 */
2173
static const xmlChar *
2174
nameNsPop(xmlParserCtxtPtr ctxt)
2175
8.30M
{
2176
8.30M
    const xmlChar *ret;
2177
2178
8.30M
    if (ctxt->nameNr <= 0)
2179
0
        return (NULL);
2180
8.30M
    ctxt->nameNr--;
2181
8.30M
    if (ctxt->nameNr > 0)
2182
8.16M
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2183
135k
    else
2184
135k
        ctxt->name = NULL;
2185
8.30M
    ret = ctxt->nameTab[ctxt->nameNr];
2186
8.30M
    ctxt->nameTab[ctxt->nameNr] = NULL;
2187
8.30M
    return (ret);
2188
8.30M
}
2189
#endif /* LIBXML_PUSH_ENABLED */
2190
2191
/**
2192
 * namePop:
2193
 * @ctxt: an XML parser context
2194
 *
2195
 * DEPRECATED: Internal function, do not use.
2196
 *
2197
 * Pops the top element name from the name stack
2198
 *
2199
 * Returns the name just removed
2200
 */
2201
static const xmlChar *
2202
namePop(xmlParserCtxtPtr ctxt)
2203
1.01M
{
2204
1.01M
    const xmlChar *ret;
2205
2206
1.01M
    if ((ctxt == NULL) || (ctxt->nameNr <= 0))
2207
0
        return (NULL);
2208
1.01M
    ctxt->nameNr--;
2209
1.01M
    if (ctxt->nameNr > 0)
2210
1.00M
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2211
5.79k
    else
2212
5.79k
        ctxt->name = NULL;
2213
1.01M
    ret = ctxt->nameTab[ctxt->nameNr];
2214
1.01M
    ctxt->nameTab[ctxt->nameNr] = NULL;
2215
1.01M
    return (ret);
2216
1.01M
}
2217
2218
38.6M
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
2219
38.6M
    if (ctxt->spaceNr >= ctxt->spaceMax) {
2220
1.11M
        int *tmp;
2221
1.11M
        int newSize;
2222
2223
1.11M
        newSize = xmlGrowCapacity(ctxt->spaceMax, sizeof(tmp[0]),
2224
1.11M
                                  10, XML_MAX_ITEMS);
2225
1.11M
        if (newSize < 0) {
2226
0
      xmlErrMemory(ctxt);
2227
0
      return(-1);
2228
0
        }
2229
2230
1.11M
        tmp = xmlRealloc(ctxt->spaceTab, newSize * sizeof(tmp[0]));
2231
1.11M
        if (tmp == NULL) {
2232
0
      xmlErrMemory(ctxt);
2233
0
      return(-1);
2234
0
  }
2235
1.11M
  ctxt->spaceTab = tmp;
2236
2237
1.11M
        ctxt->spaceMax = newSize;
2238
1.11M
    }
2239
38.6M
    ctxt->spaceTab[ctxt->spaceNr] = val;
2240
38.6M
    ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
2241
38.6M
    return(ctxt->spaceNr++);
2242
38.6M
}
2243
2244
18.5M
static int spacePop(xmlParserCtxtPtr ctxt) {
2245
18.5M
    int ret;
2246
18.5M
    if (ctxt->spaceNr <= 0) return(0);
2247
18.5M
    ctxt->spaceNr--;
2248
18.5M
    if (ctxt->spaceNr > 0)
2249
18.5M
  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
2250
5.81k
    else
2251
5.81k
        ctxt->space = &ctxt->spaceTab[0];
2252
18.5M
    ret = ctxt->spaceTab[ctxt->spaceNr];
2253
18.5M
    ctxt->spaceTab[ctxt->spaceNr] = -1;
2254
18.5M
    return(ret);
2255
18.5M
}
2256
2257
/*
2258
 * Macros for accessing the content. Those should be used only by the parser,
2259
 * and not exported.
2260
 *
2261
 * Dirty macros, i.e. one often need to make assumption on the context to
2262
 * use them
2263
 *
2264
 *   CUR_PTR return the current pointer to the xmlChar to be parsed.
2265
 *           To be used with extreme caution since operations consuming
2266
 *           characters may move the input buffer to a different location !
2267
 *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
2268
 *           This should be used internally by the parser
2269
 *           only to compare to ASCII values otherwise it would break when
2270
 *           running with UTF-8 encoding.
2271
 *   RAW     same as CUR but in the input buffer, bypass any token
2272
 *           extraction that may have been done
2273
 *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
2274
 *           to compare on ASCII based substring.
2275
 *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2276
 *           strings without newlines within the parser.
2277
 *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2278
 *           defined char within the parser.
2279
 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2280
 *
2281
 *   NEXT    Skip to the next character, this does the proper decoding
2282
 *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2283
 *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2284
 *   CUR_SCHAR  same but operate on a string instead of the context
2285
 *   COPY_BUF  copy the current unicode char to the target buffer, increment
2286
 *            the index
2287
 *   GROW, SHRINK  handling of input buffers
2288
 */
2289
2290
279M
#define RAW (*ctxt->input->cur)
2291
440M
#define CUR (*ctxt->input->cur)
2292
34.5M
#define NXT(val) ctxt->input->cur[(val)]
2293
673M
#define CUR_PTR ctxt->input->cur
2294
156M
#define BASE_PTR ctxt->input->base
2295
2296
#define CMP4( s, c1, c2, c3, c4 ) \
2297
5.86M
  ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2298
3.05M
    ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2299
#define CMP5( s, c1, c2, c3, c4, c5 ) \
2300
5.47M
  ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2301
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2302
5.10M
  ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2303
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2304
4.63M
  ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2305
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2306
4.15M
  ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2307
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2308
2.05M
  ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2309
2.05M
    ((unsigned char *) s)[ 8 ] == c9 )
2310
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2311
103k
  ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2312
103k
    ((unsigned char *) s)[ 9 ] == c10 )
2313
2314
20.2M
#define SKIP(val) do {             \
2315
20.2M
    ctxt->input->cur += (val),ctxt->input->col+=(val);      \
2316
20.2M
    if (*ctxt->input->cur == 0)           \
2317
20.2M
        xmlParserGrow(ctxt);           \
2318
20.2M
  } while (0)
2319
2320
#define SKIPL(val) do {             \
2321
    int skipl;                \
2322
    for(skipl=0; skipl<val; skipl++) {          \
2323
  if (*(ctxt->input->cur) == '\n') {        \
2324
  ctxt->input->line++; ctxt->input->col = 1;      \
2325
  } else ctxt->input->col++;          \
2326
  ctxt->input->cur++;           \
2327
    }                 \
2328
    if (*ctxt->input->cur == 0)           \
2329
        xmlParserGrow(ctxt);            \
2330
  } while (0)
2331
2332
#define SHRINK \
2333
3.42M
    if (!PARSER_PROGRESSIVE(ctxt)) \
2334
3.42M
  xmlParserShrink(ctxt);
2335
2336
#define GROW \
2337
233M
    if ((!PARSER_PROGRESSIVE(ctxt)) && \
2338
233M
        (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2339
462k
  xmlParserGrow(ctxt);
2340
2341
110M
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2342
2343
1.20M
#define SKIP_BLANKS_PE xmlSkipBlankCharsPE(ctxt)
2344
2345
82.2M
#define NEXT xmlNextChar(ctxt)
2346
2347
47.7M
#define NEXT1 {               \
2348
47.7M
  ctxt->input->col++;           \
2349
47.7M
  ctxt->input->cur++;           \
2350
47.7M
  if (*ctxt->input->cur == 0)         \
2351
47.7M
      xmlParserGrow(ctxt);           \
2352
47.7M
    }
2353
2354
505M
#define NEXTL(l) do {             \
2355
505M
    if (*(ctxt->input->cur) == '\n') {         \
2356
1.78M
  ctxt->input->line++; ctxt->input->col = 1;      \
2357
503M
    } else ctxt->input->col++;           \
2358
505M
    ctxt->input->cur += l;        \
2359
505M
  } while (0)
2360
2361
664k
#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2362
2363
#define COPY_BUF(b, i, v)           \
2364
157M
    if (v < 0x80) b[i++] = v;           \
2365
157M
    else i += xmlCopyCharMultiByte(&b[i],v)
2366
2367
static int
2368
153M
xmlCurrentCharRecover(xmlParserCtxtPtr ctxt, int *len) {
2369
153M
    int c = xmlCurrentChar(ctxt, len);
2370
2371
153M
    if (c == XML_INVALID_CHAR)
2372
535k
        c = 0xFFFD; /* replacement character */
2373
2374
153M
    return(c);
2375
153M
}
2376
2377
/**
2378
 * xmlSkipBlankChars:
2379
 * @ctxt:  the XML parser context
2380
 *
2381
 * DEPRECATED: Internal function, do not use.
2382
 *
2383
 * Skip whitespace in the input stream.
2384
 *
2385
 * Returns the number of space chars skipped
2386
 */
2387
int
2388
111M
xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2389
111M
    const xmlChar *cur;
2390
111M
    int res = 0;
2391
2392
111M
    cur = ctxt->input->cur;
2393
111M
    while (IS_BLANK_CH(*cur)) {
2394
33.4M
        if (*cur == '\n') {
2395
1.02M
            ctxt->input->line++; ctxt->input->col = 1;
2396
32.4M
        } else {
2397
32.4M
            ctxt->input->col++;
2398
32.4M
        }
2399
33.4M
        cur++;
2400
33.4M
        if (res < INT_MAX)
2401
33.4M
            res++;
2402
33.4M
        if (*cur == 0) {
2403
12.7k
            ctxt->input->cur = cur;
2404
12.7k
            xmlParserGrow(ctxt);
2405
12.7k
            cur = ctxt->input->cur;
2406
12.7k
        }
2407
33.4M
    }
2408
111M
    ctxt->input->cur = cur;
2409
2410
111M
    if (res > 4)
2411
403k
        GROW;
2412
2413
111M
    return(res);
2414
111M
}
2415
2416
static void
2417
0
xmlPopPE(xmlParserCtxtPtr ctxt) {
2418
0
    unsigned long consumed;
2419
0
    xmlEntityPtr ent;
2420
2421
0
    ent = ctxt->input->entity;
2422
2423
0
    ent->flags &= ~XML_ENT_EXPANDING;
2424
2425
0
    if ((ent->flags & XML_ENT_CHECKED) == 0) {
2426
0
        int result;
2427
2428
        /*
2429
         * Read the rest of the stream in case of errors. We want
2430
         * to account for the whole entity size.
2431
         */
2432
0
        do {
2433
0
            ctxt->input->cur = ctxt->input->end;
2434
0
            xmlParserShrink(ctxt);
2435
0
            result = xmlParserGrow(ctxt);
2436
0
        } while (result > 0);
2437
2438
0
        consumed = ctxt->input->consumed;
2439
0
        xmlSaturatedAddSizeT(&consumed,
2440
0
                             ctxt->input->end - ctxt->input->base);
2441
2442
0
        xmlSaturatedAdd(&ent->expandedSize, consumed);
2443
2444
        /*
2445
         * Add to sizeentities when parsing an external entity
2446
         * for the first time.
2447
         */
2448
0
        if (ent->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
2449
0
            xmlSaturatedAdd(&ctxt->sizeentities, consumed);
2450
0
        }
2451
2452
0
        ent->flags |= XML_ENT_CHECKED;
2453
0
    }
2454
2455
0
    xmlFreeInputStream(xmlCtxtPopInput(ctxt));
2456
2457
0
    xmlParserEntityCheck(ctxt, ent->expandedSize);
2458
2459
0
    GROW;
2460
0
}
2461
2462
/**
2463
 * xmlSkipBlankCharsPE:
2464
 * @ctxt:  the XML parser context
2465
 *
2466
 * Skip whitespace in the input stream, also handling parameter
2467
 * entities.
2468
 *
2469
 * Returns the number of space chars skipped
2470
 */
2471
static int
2472
1.20M
xmlSkipBlankCharsPE(xmlParserCtxtPtr ctxt) {
2473
1.20M
    int res = 0;
2474
1.20M
    int inParam;
2475
1.20M
    int expandParam;
2476
2477
1.20M
    inParam = PARSER_IN_PE(ctxt);
2478
1.20M
    expandParam = PARSER_EXTERNAL(ctxt);
2479
2480
1.20M
    if (!inParam && !expandParam)
2481
1.20M
        return(xmlSkipBlankChars(ctxt));
2482
2483
    /*
2484
     * It's Okay to use CUR/NEXT here since all the blanks are on
2485
     * the ASCII range.
2486
     */
2487
0
    while (PARSER_STOPPED(ctxt) == 0) {
2488
0
        if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2489
0
            NEXT;
2490
0
        } else if (CUR == '%') {
2491
0
            if ((expandParam == 0) ||
2492
0
                (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2493
0
                break;
2494
2495
            /*
2496
             * Expand parameter entity. We continue to consume
2497
             * whitespace at the start of the entity and possible
2498
             * even consume the whole entity and pop it. We might
2499
             * even pop multiple PEs in this loop.
2500
             */
2501
0
            xmlParsePEReference(ctxt);
2502
2503
0
            inParam = PARSER_IN_PE(ctxt);
2504
0
            expandParam = PARSER_EXTERNAL(ctxt);
2505
0
        } else if (CUR == 0) {
2506
0
            if (inParam == 0)
2507
0
                break;
2508
2509
0
            xmlPopPE(ctxt);
2510
2511
0
            inParam = PARSER_IN_PE(ctxt);
2512
0
            expandParam = PARSER_EXTERNAL(ctxt);
2513
0
        } else {
2514
0
            break;
2515
0
        }
2516
2517
        /*
2518
         * Also increase the counter when entering or exiting a PERef.
2519
         * The spec says: "When a parameter-entity reference is recognized
2520
         * in the DTD and included, its replacement text MUST be enlarged
2521
         * by the attachment of one leading and one following space (#x20)
2522
         * character."
2523
         */
2524
0
        if (res < INT_MAX)
2525
0
            res++;
2526
0
    }
2527
2528
0
    return(res);
2529
1.20M
}
2530
2531
/************************************************************************
2532
 *                  *
2533
 *    Commodity functions to handle entities      *
2534
 *                  *
2535
 ************************************************************************/
2536
2537
/**
2538
 * xmlPopInput:
2539
 * @ctxt:  an XML parser context
2540
 *
2541
 * DEPRECATED: Internal function, don't use.
2542
 *
2543
 * Returns the current xmlChar in the parser context
2544
 */
2545
xmlChar
2546
0
xmlPopInput(xmlParserCtxtPtr ctxt) {
2547
0
    xmlParserInputPtr input;
2548
2549
0
    if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2550
0
    input = xmlCtxtPopInput(ctxt);
2551
0
    xmlFreeInputStream(input);
2552
0
    if (*ctxt->input->cur == 0)
2553
0
        xmlParserGrow(ctxt);
2554
0
    return(CUR);
2555
0
}
2556
2557
/**
2558
 * xmlPushInput:
2559
 * @ctxt:  an XML parser context
2560
 * @input:  an XML parser input fragment (entity, XML fragment ...).
2561
 *
2562
 * DEPRECATED: Internal function, don't use.
2563
 *
2564
 * Push an input stream onto the stack.
2565
 *
2566
 * Returns -1 in case of error or the index in the input stack
2567
 */
2568
int
2569
0
xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2570
0
    int ret;
2571
2572
0
    if ((ctxt == NULL) || (input == NULL))
2573
0
        return(-1);
2574
2575
0
    ret = xmlCtxtPushInput(ctxt, input);
2576
0
    if (ret >= 0)
2577
0
        GROW;
2578
0
    return(ret);
2579
0
}
2580
2581
/**
2582
 * xmlParseCharRef:
2583
 * @ctxt:  an XML parser context
2584
 *
2585
 * DEPRECATED: Internal function, don't use.
2586
 *
2587
 * Parse a numeric character reference. Always consumes '&'.
2588
 *
2589
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2590
 *                  '&#x' [0-9a-fA-F]+ ';'
2591
 *
2592
 * [ WFC: Legal Character ]
2593
 * Characters referred to using character references must match the
2594
 * production for Char.
2595
 *
2596
 * Returns the value parsed (as an int), 0 in case of error
2597
 */
2598
int
2599
178k
xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2600
178k
    int val = 0;
2601
178k
    int count = 0;
2602
2603
    /*
2604
     * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2605
     */
2606
178k
    if ((RAW == '&') && (NXT(1) == '#') &&
2607
178k
        (NXT(2) == 'x')) {
2608
124k
  SKIP(3);
2609
124k
  GROW;
2610
596k
  while ((RAW != ';') && (PARSER_STOPPED(ctxt) == 0)) {
2611
472k
      if (count++ > 20) {
2612
299
    count = 0;
2613
299
    GROW;
2614
299
      }
2615
472k
      if ((RAW >= '0') && (RAW <= '9'))
2616
235k
          val = val * 16 + (CUR - '0');
2617
236k
      else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2618
185k
          val = val * 16 + (CUR - 'a') + 10;
2619
51.2k
      else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2620
50.8k
          val = val * 16 + (CUR - 'A') + 10;
2621
414
      else {
2622
414
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2623
414
    val = 0;
2624
414
    break;
2625
414
      }
2626
471k
      if (val > 0x110000)
2627
3.30k
          val = 0x110000;
2628
2629
471k
      NEXT;
2630
471k
      count++;
2631
471k
  }
2632
124k
  if (RAW == ';') {
2633
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2634
124k
      ctxt->input->col++;
2635
124k
      ctxt->input->cur++;
2636
124k
  }
2637
124k
    } else if  ((RAW == '&') && (NXT(1) == '#')) {
2638
53.7k
  SKIP(2);
2639
53.7k
  GROW;
2640
181k
  while (RAW != ';') { /* loop blocked by count */
2641
127k
      if (count++ > 20) {
2642
510
    count = 0;
2643
510
    GROW;
2644
510
      }
2645
127k
      if ((RAW >= '0') && (RAW <= '9'))
2646
127k
          val = val * 10 + (CUR - '0');
2647
346
      else {
2648
346
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2649
346
    val = 0;
2650
346
    break;
2651
346
      }
2652
127k
      if (val > 0x110000)
2653
4.95k
          val = 0x110000;
2654
2655
127k
      NEXT;
2656
127k
      count++;
2657
127k
  }
2658
53.7k
  if (RAW == ';') {
2659
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2660
53.4k
      ctxt->input->col++;
2661
53.4k
      ctxt->input->cur++;
2662
53.4k
  }
2663
53.7k
    } else {
2664
0
        if (RAW == '&')
2665
0
            SKIP(1);
2666
0
        xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2667
0
    }
2668
2669
    /*
2670
     * [ WFC: Legal Character ]
2671
     * Characters referred to using character references must match the
2672
     * production for Char.
2673
     */
2674
178k
    if (val >= 0x110000) {
2675
136
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2676
136
                "xmlParseCharRef: character reference out of bounds\n",
2677
136
          val);
2678
136
        val = 0xFFFD;
2679
178k
    } else if (!IS_CHAR(val)) {
2680
1.68k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2681
1.68k
                          "xmlParseCharRef: invalid xmlChar value %d\n",
2682
1.68k
                    val);
2683
1.68k
    }
2684
178k
    return(val);
2685
178k
}
2686
2687
/**
2688
 * xmlParseStringCharRef:
2689
 * @ctxt:  an XML parser context
2690
 * @str:  a pointer to an index in the string
2691
 *
2692
 * parse Reference declarations, variant parsing from a string rather
2693
 * than an an input flow.
2694
 *
2695
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2696
 *                  '&#x' [0-9a-fA-F]+ ';'
2697
 *
2698
 * [ WFC: Legal Character ]
2699
 * Characters referred to using character references must match the
2700
 * production for Char.
2701
 *
2702
 * Returns the value parsed (as an int), 0 in case of error, str will be
2703
 *         updated to the current value of the index
2704
 */
2705
static int
2706
49.8k
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2707
49.8k
    const xmlChar *ptr;
2708
49.8k
    xmlChar cur;
2709
49.8k
    int val = 0;
2710
2711
49.8k
    if ((str == NULL) || (*str == NULL)) return(0);
2712
49.8k
    ptr = *str;
2713
49.8k
    cur = *ptr;
2714
49.8k
    if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2715
17.0k
  ptr += 3;
2716
17.0k
  cur = *ptr;
2717
75.1k
  while (cur != ';') { /* Non input consuming loop */
2718
61.8k
      if ((cur >= '0') && (cur <= '9'))
2719
32.5k
          val = val * 16 + (cur - '0');
2720
29.3k
      else if ((cur >= 'a') && (cur <= 'f'))
2721
14.7k
          val = val * 16 + (cur - 'a') + 10;
2722
14.6k
      else if ((cur >= 'A') && (cur <= 'F'))
2723
10.9k
          val = val * 16 + (cur - 'A') + 10;
2724
3.67k
      else {
2725
3.67k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2726
3.67k
    val = 0;
2727
3.67k
    break;
2728
3.67k
      }
2729
58.1k
      if (val > 0x110000)
2730
8.40k
          val = 0x110000;
2731
2732
58.1k
      ptr++;
2733
58.1k
      cur = *ptr;
2734
58.1k
  }
2735
17.0k
  if (cur == ';')
2736
13.3k
      ptr++;
2737
32.8k
    } else if  ((cur == '&') && (ptr[1] == '#')){
2738
32.8k
  ptr += 2;
2739
32.8k
  cur = *ptr;
2740
185k
  while (cur != ';') { /* Non input consuming loops */
2741
156k
      if ((cur >= '0') && (cur <= '9'))
2742
153k
          val = val * 10 + (cur - '0');
2743
2.85k
      else {
2744
2.85k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2745
2.85k
    val = 0;
2746
2.85k
    break;
2747
2.85k
      }
2748
153k
      if (val > 0x110000)
2749
2.70k
          val = 0x110000;
2750
2751
153k
      ptr++;
2752
153k
      cur = *ptr;
2753
153k
  }
2754
32.8k
  if (cur == ';')
2755
29.9k
      ptr++;
2756
32.8k
    } else {
2757
0
  xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2758
0
  return(0);
2759
0
    }
2760
49.8k
    *str = ptr;
2761
2762
    /*
2763
     * [ WFC: Legal Character ]
2764
     * Characters referred to using character references must match the
2765
     * production for Char.
2766
     */
2767
49.8k
    if (val >= 0x110000) {
2768
249
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2769
249
                "xmlParseStringCharRef: character reference out of bounds\n",
2770
249
                val);
2771
49.5k
    } else if (IS_CHAR(val)) {
2772
41.4k
        return(val);
2773
41.4k
    } else {
2774
8.13k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2775
8.13k
        "xmlParseStringCharRef: invalid xmlChar value %d\n",
2776
8.13k
        val);
2777
8.13k
    }
2778
8.38k
    return(0);
2779
49.8k
}
2780
2781
/**
2782
 * xmlParserHandlePEReference:
2783
 * @ctxt:  the parser context
2784
 *
2785
 * DEPRECATED: Internal function, do not use.
2786
 *
2787
 * [69] PEReference ::= '%' Name ';'
2788
 *
2789
 * [ WFC: No Recursion ]
2790
 * A parsed entity must not contain a recursive
2791
 * reference to itself, either directly or indirectly.
2792
 *
2793
 * [ WFC: Entity Declared ]
2794
 * In a document without any DTD, a document with only an internal DTD
2795
 * subset which contains no parameter entity references, or a document
2796
 * with "standalone='yes'", ...  ... The declaration of a parameter
2797
 * entity must precede any reference to it...
2798
 *
2799
 * [ VC: Entity Declared ]
2800
 * In a document with an external subset or external parameter entities
2801
 * with "standalone='no'", ...  ... The declaration of a parameter entity
2802
 * must precede any reference to it...
2803
 *
2804
 * [ WFC: In DTD ]
2805
 * Parameter-entity references may only appear in the DTD.
2806
 * NOTE: misleading but this is handled.
2807
 *
2808
 * A PEReference may have been detected in the current input stream
2809
 * the handling is done accordingly to
2810
 *      http://www.w3.org/TR/REC-xml#entproc
2811
 * i.e.
2812
 *   - Included in literal in entity values
2813
 *   - Included as Parameter Entity reference within DTDs
2814
 */
2815
void
2816
0
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2817
0
    xmlParsePEReference(ctxt);
2818
0
}
2819
2820
/**
2821
 * xmlStringLenDecodeEntities:
2822
 * @ctxt:  the parser context
2823
 * @str:  the input string
2824
 * @len: the string length
2825
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2826
 * @end:  an end marker xmlChar, 0 if none
2827
 * @end2:  an end marker xmlChar, 0 if none
2828
 * @end3:  an end marker xmlChar, 0 if none
2829
 *
2830
 * DEPRECATED: Internal function, don't use.
2831
 *
2832
 * Returns A newly allocated string with the substitution done. The caller
2833
 *      must deallocate it !
2834
 */
2835
xmlChar *
2836
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2837
                           int what ATTRIBUTE_UNUSED,
2838
0
                           xmlChar end, xmlChar end2, xmlChar end3) {
2839
0
    if ((ctxt == NULL) || (str == NULL) || (len < 0))
2840
0
        return(NULL);
2841
2842
0
    if ((str[len] != 0) ||
2843
0
        (end != 0) || (end2 != 0) || (end3 != 0))
2844
0
        return(NULL);
2845
2846
0
    return(xmlExpandEntitiesInAttValue(ctxt, str, 0));
2847
0
}
2848
2849
/**
2850
 * xmlStringDecodeEntities:
2851
 * @ctxt:  the parser context
2852
 * @str:  the input string
2853
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2854
 * @end:  an end marker xmlChar, 0 if none
2855
 * @end2:  an end marker xmlChar, 0 if none
2856
 * @end3:  an end marker xmlChar, 0 if none
2857
 *
2858
 * DEPRECATED: Internal function, don't use.
2859
 *
2860
 * Returns A newly allocated string with the substitution done. The caller
2861
 *      must deallocate it !
2862
 */
2863
xmlChar *
2864
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str,
2865
                        int what ATTRIBUTE_UNUSED,
2866
0
            xmlChar end, xmlChar  end2, xmlChar end3) {
2867
0
    if ((ctxt == NULL) || (str == NULL))
2868
0
        return(NULL);
2869
2870
0
    if ((end != 0) || (end2 != 0) || (end3 != 0))
2871
0
        return(NULL);
2872
2873
0
    return(xmlExpandEntitiesInAttValue(ctxt, str, 0));
2874
0
}
2875
2876
/************************************************************************
2877
 *                  *
2878
 *    Commodity functions, cleanup needed ?     *
2879
 *                  *
2880
 ************************************************************************/
2881
2882
/**
2883
 * areBlanks:
2884
 * @ctxt:  an XML parser context
2885
 * @str:  a xmlChar *
2886
 * @len:  the size of @str
2887
 * @blank_chars: we know the chars are blanks
2888
 *
2889
 * Is this a sequence of blank chars that one can ignore ?
2890
 *
2891
 * Returns 1 if ignorable 0 otherwise.
2892
 */
2893
2894
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2895
14.0M
                     int blank_chars) {
2896
14.0M
    int i;
2897
14.0M
    xmlNodePtr lastChild;
2898
2899
    /*
2900
     * Check for xml:space value.
2901
     */
2902
14.0M
    if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2903
13.9M
        (*(ctxt->space) == -2))
2904
5.73M
  return(0);
2905
2906
    /*
2907
     * Check that the string is made of blanks
2908
     */
2909
8.30M
    if (blank_chars == 0) {
2910
8.72M
  for (i = 0;i < len;i++)
2911
8.58M
      if (!(IS_BLANK_CH(str[i]))) return(0);
2912
7.28M
    }
2913
2914
    /*
2915
     * Look if the element is mixed content in the DTD if available
2916
     */
2917
1.16M
    if (ctxt->node == NULL) return(0);
2918
16
    if (ctxt->myDoc != NULL) {
2919
0
        xmlElementPtr elemDecl = NULL;
2920
0
        xmlDocPtr doc = ctxt->myDoc;
2921
0
        const xmlChar *prefix = NULL;
2922
2923
0
        if (ctxt->node->ns)
2924
0
            prefix = ctxt->node->ns->prefix;
2925
0
        if (doc->intSubset != NULL)
2926
0
            elemDecl = xmlHashLookup2(doc->intSubset->elements, ctxt->node->name,
2927
0
                                      prefix);
2928
0
        if ((elemDecl == NULL) && (doc->extSubset != NULL))
2929
0
            elemDecl = xmlHashLookup2(doc->extSubset->elements, ctxt->node->name,
2930
0
                                      prefix);
2931
0
        if (elemDecl != NULL) {
2932
0
            if (elemDecl->etype == XML_ELEMENT_TYPE_ELEMENT)
2933
0
                return(1);
2934
0
            if ((elemDecl->etype == XML_ELEMENT_TYPE_ANY) ||
2935
0
                (elemDecl->etype == XML_ELEMENT_TYPE_MIXED))
2936
0
                return(0);
2937
0
        }
2938
0
    }
2939
2940
    /*
2941
     * Otherwise, heuristic :-\
2942
     *
2943
     * When push parsing, we could be at the end of a chunk.
2944
     * This makes the look-ahead and consequently the NOBLANKS
2945
     * option unreliable.
2946
     */
2947
16
    if ((RAW != '<') && (RAW != 0xD)) return(0);
2948
16
    if ((ctxt->node->children == NULL) &&
2949
0
  (RAW == '<') && (NXT(1) == '/')) return(0);
2950
2951
16
    lastChild = xmlGetLastChild(ctxt->node);
2952
16
    if (lastChild == NULL) {
2953
0
        if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2954
0
            (ctxt->node->content != NULL)) return(0);
2955
16
    } else if (xmlNodeIsText(lastChild))
2956
0
        return(0);
2957
16
    else if ((ctxt->node->children != NULL) &&
2958
0
             (xmlNodeIsText(ctxt->node->children)))
2959
0
        return(0);
2960
16
    return(1);
2961
16
}
2962
2963
/************************************************************************
2964
 *                  *
2965
 *    Extra stuff for namespace support     *
2966
 *  Relates to http://www.w3.org/TR/WD-xml-names      *
2967
 *                  *
2968
 ************************************************************************/
2969
2970
/**
2971
 * xmlSplitQName:
2972
 * @ctxt:  an XML parser context
2973
 * @name:  an XML parser context
2974
 * @prefixOut:  a xmlChar **
2975
 *
2976
 * DEPRECATED: Don't use.
2977
 *
2978
 * parse an UTF8 encoded XML qualified name string
2979
 *
2980
 * [NS 5] QName ::= (Prefix ':')? LocalPart
2981
 *
2982
 * [NS 6] Prefix ::= NCName
2983
 *
2984
 * [NS 7] LocalPart ::= NCName
2985
 *
2986
 * Returns the local part, and prefix is updated
2987
 *   to get the Prefix if any.
2988
 */
2989
2990
xmlChar *
2991
0
xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefixOut) {
2992
0
    xmlChar buf[XML_MAX_NAMELEN + 5];
2993
0
    xmlChar *buffer = NULL;
2994
0
    int len = 0;
2995
0
    int max = XML_MAX_NAMELEN;
2996
0
    xmlChar *ret = NULL;
2997
0
    xmlChar *prefix;
2998
0
    const xmlChar *cur = name;
2999
0
    int c;
3000
3001
0
    if (prefixOut == NULL) return(NULL);
3002
0
    *prefixOut = NULL;
3003
3004
0
    if (cur == NULL) return(NULL);
3005
3006
    /* nasty but well=formed */
3007
0
    if (cur[0] == ':')
3008
0
  return(xmlStrdup(name));
3009
3010
0
    c = *cur++;
3011
0
    while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
3012
0
  buf[len++] = c;
3013
0
  c = *cur++;
3014
0
    }
3015
0
    if (len >= max) {
3016
  /*
3017
   * Okay someone managed to make a huge name, so he's ready to pay
3018
   * for the processing speed.
3019
   */
3020
0
  max = len * 2;
3021
3022
0
  buffer = xmlMalloc(max);
3023
0
  if (buffer == NULL) {
3024
0
      xmlErrMemory(ctxt);
3025
0
      return(NULL);
3026
0
  }
3027
0
  memcpy(buffer, buf, len);
3028
0
  while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3029
0
      if (len + 10 > max) {
3030
0
          xmlChar *tmp;
3031
0
                int newSize;
3032
3033
0
                newSize = xmlGrowCapacity(max, 1, 1, XML_MAX_ITEMS);
3034
0
                if (newSize < 0) {
3035
0
        xmlErrMemory(ctxt);
3036
0
        xmlFree(buffer);
3037
0
        return(NULL);
3038
0
                }
3039
0
    tmp = xmlRealloc(buffer, newSize);
3040
0
    if (tmp == NULL) {
3041
0
        xmlErrMemory(ctxt);
3042
0
        xmlFree(buffer);
3043
0
        return(NULL);
3044
0
    }
3045
0
    buffer = tmp;
3046
0
    max = newSize;
3047
0
      }
3048
0
      buffer[len++] = c;
3049
0
      c = *cur++;
3050
0
  }
3051
0
  buffer[len] = 0;
3052
0
    }
3053
3054
0
    if ((c == ':') && (*cur == 0)) {
3055
0
        if (buffer != NULL)
3056
0
      xmlFree(buffer);
3057
0
  return(xmlStrdup(name));
3058
0
    }
3059
3060
0
    if (buffer == NULL) {
3061
0
  ret = xmlStrndup(buf, len);
3062
0
        if (ret == NULL) {
3063
0
      xmlErrMemory(ctxt);
3064
0
      return(NULL);
3065
0
        }
3066
0
    } else {
3067
0
  ret = buffer;
3068
0
  buffer = NULL;
3069
0
  max = XML_MAX_NAMELEN;
3070
0
    }
3071
3072
3073
0
    if (c == ':') {
3074
0
  c = *cur;
3075
0
        prefix = ret;
3076
0
  if (c == 0) {
3077
0
      ret = xmlStrndup(BAD_CAST "", 0);
3078
0
            if (ret == NULL) {
3079
0
                xmlFree(prefix);
3080
0
                return(NULL);
3081
0
            }
3082
0
            *prefixOut = prefix;
3083
0
            return(ret);
3084
0
  }
3085
0
  len = 0;
3086
3087
  /*
3088
   * Check that the first character is proper to start
3089
   * a new name
3090
   */
3091
0
  if (!(((c >= 0x61) && (c <= 0x7A)) ||
3092
0
        ((c >= 0x41) && (c <= 0x5A)) ||
3093
0
        (c == '_') || (c == ':'))) {
3094
0
      int l;
3095
0
      int first = CUR_SCHAR(cur, l);
3096
3097
0
      if (!IS_LETTER(first) && (first != '_')) {
3098
0
    xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3099
0
          "Name %s is not XML Namespace compliant\n",
3100
0
          name);
3101
0
      }
3102
0
  }
3103
0
  cur++;
3104
3105
0
  while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3106
0
      buf[len++] = c;
3107
0
      c = *cur++;
3108
0
  }
3109
0
  if (len >= max) {
3110
      /*
3111
       * Okay someone managed to make a huge name, so he's ready to pay
3112
       * for the processing speed.
3113
       */
3114
0
      max = len * 2;
3115
3116
0
      buffer = xmlMalloc(max);
3117
0
      if (buffer == NULL) {
3118
0
          xmlErrMemory(ctxt);
3119
0
                xmlFree(prefix);
3120
0
    return(NULL);
3121
0
      }
3122
0
      memcpy(buffer, buf, len);
3123
0
      while (c != 0) { /* tested bigname2.xml */
3124
0
    if (len + 10 > max) {
3125
0
        xmlChar *tmp;
3126
0
                    int newSize;
3127
3128
0
                    newSize = xmlGrowCapacity(max, 1, 1, XML_MAX_ITEMS);
3129
0
                    if (newSize < 0) {
3130
0
                        xmlErrMemory(ctxt);
3131
0
                        xmlFree(buffer);
3132
0
                        return(NULL);
3133
0
                    }
3134
0
        tmp = xmlRealloc(buffer, newSize);
3135
0
        if (tmp == NULL) {
3136
0
      xmlErrMemory(ctxt);
3137
0
                        xmlFree(prefix);
3138
0
      xmlFree(buffer);
3139
0
      return(NULL);
3140
0
        }
3141
0
        buffer = tmp;
3142
0
                    max = newSize;
3143
0
    }
3144
0
    buffer[len++] = c;
3145
0
    c = *cur++;
3146
0
      }
3147
0
      buffer[len] = 0;
3148
0
  }
3149
3150
0
  if (buffer == NULL) {
3151
0
      ret = xmlStrndup(buf, len);
3152
0
            if (ret == NULL) {
3153
0
                xmlFree(prefix);
3154
0
                return(NULL);
3155
0
            }
3156
0
  } else {
3157
0
      ret = buffer;
3158
0
  }
3159
3160
0
        *prefixOut = prefix;
3161
0
    }
3162
3163
0
    return(ret);
3164
0
}
3165
3166
/************************************************************************
3167
 *                  *
3168
 *      The parser itself       *
3169
 *  Relates to http://www.w3.org/TR/REC-xml       *
3170
 *                  *
3171
 ************************************************************************/
3172
3173
/************************************************************************
3174
 *                  *
3175
 *  Routines to parse Name, NCName and NmToken      *
3176
 *                  *
3177
 ************************************************************************/
3178
3179
/*
3180
 * The two following functions are related to the change of accepted
3181
 * characters for Name and NmToken in the Revision 5 of XML-1.0
3182
 * They correspond to the modified production [4] and the new production [4a]
3183
 * changes in that revision. Also note that the macros used for the
3184
 * productions Letter, Digit, CombiningChar and Extender are not needed
3185
 * anymore.
3186
 * We still keep compatibility to pre-revision5 parsing semantic if the
3187
 * new XML_PARSE_OLD10 option is given to the parser.
3188
 */
3189
static int
3190
1.92M
xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3191
1.92M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3192
        /*
3193
   * Use the new checks of production [4] [4a] amd [5] of the
3194
   * Update 5 of XML-1.0
3195
   */
3196
1.92M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3197
1.92M
      (((c >= 'a') && (c <= 'z')) ||
3198
894k
       ((c >= 'A') && (c <= 'Z')) ||
3199
551k
       (c == '_') || (c == ':') ||
3200
311k
       ((c >= 0xC0) && (c <= 0xD6)) ||
3201
308k
       ((c >= 0xD8) && (c <= 0xF6)) ||
3202
307k
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3203
303k
       ((c >= 0x370) && (c <= 0x37D)) ||
3204
303k
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3205
301k
       ((c >= 0x200C) && (c <= 0x200D)) ||
3206
300k
       ((c >= 0x2070) && (c <= 0x218F)) ||
3207
300k
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3208
300k
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3209
287k
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3210
287k
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3211
145k
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3212
1.77M
      return(1);
3213
1.92M
    } else {
3214
0
        if (IS_LETTER(c) || (c == '_') || (c == ':'))
3215
0
      return(1);
3216
0
    }
3217
147k
    return(0);
3218
1.92M
}
3219
3220
static int
3221
42.2M
xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3222
42.2M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3223
        /*
3224
   * Use the new checks of production [4] [4a] amd [5] of the
3225
   * Update 5 of XML-1.0
3226
   */
3227
42.2M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3228
41.7M
      (((c >= 'a') && (c <= 'z')) ||
3229
23.6M
       ((c >= 'A') && (c <= 'Z')) ||
3230
18.2M
       ((c >= '0') && (c <= '9')) || /* !start */
3231
16.0M
       (c == '_') || (c == ':') ||
3232
15.1M
       (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3233
14.5M
       ((c >= 0xC0) && (c <= 0xD6)) ||
3234
14.4M
       ((c >= 0xD8) && (c <= 0xF6)) ||
3235
14.3M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3236
14.1M
       ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3237
14.1M
       ((c >= 0x370) && (c <= 0x37D)) ||
3238
14.1M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3239
14.0M
       ((c >= 0x200C) && (c <= 0x200D)) ||
3240
14.0M
       ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3241
14.0M
       ((c >= 0x2070) && (c <= 0x218F)) ||
3242
14.0M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3243
14.0M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3244
13.8M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3245
13.8M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3246
265k
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3247
41.5M
       return(1);
3248
42.2M
    } else {
3249
0
        if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3250
0
            (c == '.') || (c == '-') ||
3251
0
      (c == '_') || (c == ':') ||
3252
0
      (IS_COMBINING(c)) ||
3253
0
      (IS_EXTENDER(c)))
3254
0
      return(1);
3255
0
    }
3256
738k
    return(0);
3257
42.2M
}
3258
3259
static const xmlChar *
3260
414k
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3261
414k
    const xmlChar *ret;
3262
414k
    int len = 0, l;
3263
414k
    int c;
3264
414k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3265
414k
                    XML_MAX_TEXT_LENGTH :
3266
414k
                    XML_MAX_NAME_LENGTH;
3267
3268
    /*
3269
     * Handler for more complex cases
3270
     */
3271
414k
    c = xmlCurrentChar(ctxt, &l);
3272
414k
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3273
        /*
3274
   * Use the new checks of production [4] [4a] amd [5] of the
3275
   * Update 5 of XML-1.0
3276
   */
3277
414k
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3278
405k
      (!(((c >= 'a') && (c <= 'z')) ||
3279
351k
         ((c >= 'A') && (c <= 'Z')) ||
3280
345k
         (c == '_') || (c == ':') ||
3281
337k
         ((c >= 0xC0) && (c <= 0xD6)) ||
3282
335k
         ((c >= 0xD8) && (c <= 0xF6)) ||
3283
321k
         ((c >= 0xF8) && (c <= 0x2FF)) ||
3284
308k
         ((c >= 0x370) && (c <= 0x37D)) ||
3285
292k
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
3286
287k
         ((c >= 0x200C) && (c <= 0x200D)) ||
3287
282k
         ((c >= 0x2070) && (c <= 0x218F)) ||
3288
263k
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3289
253k
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
3290
243k
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
3291
240k
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3292
230k
         ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3293
230k
      return(NULL);
3294
230k
  }
3295
184k
  len += l;
3296
184k
  NEXTL(l);
3297
184k
  c = xmlCurrentChar(ctxt, &l);
3298
1.11M
  while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3299
1.10M
         (((c >= 'a') && (c <= 'z')) ||
3300
855k
          ((c >= 'A') && (c <= 'Z')) ||
3301
802k
          ((c >= '0') && (c <= '9')) || /* !start */
3302
743k
          (c == '_') || (c == ':') ||
3303
722k
          (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3304
698k
          ((c >= 0xC0) && (c <= 0xD6)) ||
3305
693k
          ((c >= 0xD8) && (c <= 0xF6)) ||
3306
682k
          ((c >= 0xF8) && (c <= 0x2FF)) ||
3307
635k
          ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3308
633k
          ((c >= 0x370) && (c <= 0x37D)) ||
3309
632k
          ((c >= 0x37F) && (c <= 0x1FFF)) ||
3310
610k
          ((c >= 0x200C) && (c <= 0x200D)) ||
3311
608k
          ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3312
607k
          ((c >= 0x2070) && (c <= 0x218F)) ||
3313
584k
          ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3314
568k
          ((c >= 0x3001) && (c <= 0xD7FF)) ||
3315
359k
          ((c >= 0xF900) && (c <= 0xFDCF)) ||
3316
357k
          ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3317
174k
          ((c >= 0x10000) && (c <= 0xEFFFF))
3318
1.10M
    )) {
3319
933k
            if (len <= INT_MAX - l)
3320
933k
          len += l;
3321
933k
      NEXTL(l);
3322
933k
      c = xmlCurrentChar(ctxt, &l);
3323
933k
  }
3324
184k
    } else {
3325
0
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3326
0
      (!IS_LETTER(c) && (c != '_') &&
3327
0
       (c != ':'))) {
3328
0
      return(NULL);
3329
0
  }
3330
0
  len += l;
3331
0
  NEXTL(l);
3332
0
  c = xmlCurrentChar(ctxt, &l);
3333
3334
0
  while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3335
0
         ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3336
0
    (c == '.') || (c == '-') ||
3337
0
    (c == '_') || (c == ':') ||
3338
0
    (IS_COMBINING(c)) ||
3339
0
    (IS_EXTENDER(c)))) {
3340
0
            if (len <= INT_MAX - l)
3341
0
          len += l;
3342
0
      NEXTL(l);
3343
0
      c = xmlCurrentChar(ctxt, &l);
3344
0
  }
3345
0
    }
3346
184k
    if (len > maxLength) {
3347
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3348
0
        return(NULL);
3349
0
    }
3350
184k
    if (ctxt->input->cur - ctxt->input->base < len) {
3351
        /*
3352
         * There were a couple of bugs where PERefs lead to to a change
3353
         * of the buffer. Check the buffer size to avoid passing an invalid
3354
         * pointer to xmlDictLookup.
3355
         */
3356
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3357
0
                    "unexpected change of input buffer");
3358
0
        return (NULL);
3359
0
    }
3360
184k
    if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3361
631
        ret = xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len);
3362
183k
    else
3363
183k
        ret = xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len);
3364
184k
    if (ret == NULL)
3365
0
        xmlErrMemory(ctxt);
3366
184k
    return(ret);
3367
184k
}
3368
3369
/**
3370
 * xmlParseName:
3371
 * @ctxt:  an XML parser context
3372
 *
3373
 * DEPRECATED: Internal function, don't use.
3374
 *
3375
 * parse an XML name.
3376
 *
3377
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3378
 *                  CombiningChar | Extender
3379
 *
3380
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3381
 *
3382
 * [6] Names ::= Name (#x20 Name)*
3383
 *
3384
 * Returns the Name parsed or NULL
3385
 */
3386
3387
const xmlChar *
3388
1.32M
xmlParseName(xmlParserCtxtPtr ctxt) {
3389
1.32M
    const xmlChar *in;
3390
1.32M
    const xmlChar *ret;
3391
1.32M
    size_t count = 0;
3392
1.32M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3393
1.32M
                       XML_MAX_TEXT_LENGTH :
3394
1.32M
                       XML_MAX_NAME_LENGTH;
3395
3396
1.32M
    GROW;
3397
3398
    /*
3399
     * Accelerator for simple ASCII names
3400
     */
3401
1.32M
    in = ctxt->input->cur;
3402
1.32M
    if (((*in >= 0x61) && (*in <= 0x7A)) ||
3403
446k
  ((*in >= 0x41) && (*in <= 0x5A)) ||
3404
979k
  (*in == '_') || (*in == ':')) {
3405
979k
  in++;
3406
4.58M
  while (((*in >= 0x61) && (*in <= 0x7A)) ||
3407
1.78M
         ((*in >= 0x41) && (*in <= 0x5A)) ||
3408
1.44M
         ((*in >= 0x30) && (*in <= 0x39)) ||
3409
1.17M
         (*in == '_') || (*in == '-') ||
3410
1.05M
         (*in == ':') || (*in == '.'))
3411
3.60M
      in++;
3412
979k
  if ((*in > 0) && (*in < 0x80)) {
3413
911k
      count = in - ctxt->input->cur;
3414
911k
            if (count > maxLength) {
3415
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3416
0
                return(NULL);
3417
0
            }
3418
911k
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3419
911k
      ctxt->input->cur = in;
3420
911k
      ctxt->input->col += count;
3421
911k
      if (ret == NULL)
3422
0
          xmlErrMemory(ctxt);
3423
911k
      return(ret);
3424
911k
  }
3425
979k
    }
3426
    /* accelerator for special cases */
3427
414k
    return(xmlParseNameComplex(ctxt));
3428
1.32M
}
3429
3430
static xmlHashedString
3431
1.94M
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3432
1.94M
    xmlHashedString ret;
3433
1.94M
    int len = 0, l;
3434
1.94M
    int c;
3435
1.94M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3436
1.89M
                    XML_MAX_TEXT_LENGTH :
3437
1.94M
                    XML_MAX_NAME_LENGTH;
3438
1.94M
    size_t startPosition = 0;
3439
3440
1.94M
    ret.name = NULL;
3441
1.94M
    ret.hashValue = 0;
3442
3443
    /*
3444
     * Handler for more complex cases
3445
     */
3446
1.94M
    startPosition = CUR_PTR - BASE_PTR;
3447
1.94M
    c = xmlCurrentChar(ctxt, &l);
3448
1.94M
    if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3449
1.84M
  (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3450
468k
  return(ret);
3451
468k
    }
3452
3453
37.0M
    while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3454
35.7M
     (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3455
35.5M
        if (len <= INT_MAX - l)
3456
35.5M
      len += l;
3457
35.5M
  NEXTL(l);
3458
35.5M
  c = xmlCurrentChar(ctxt, &l);
3459
35.5M
    }
3460
1.47M
    if (len > maxLength) {
3461
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3462
0
        return(ret);
3463
0
    }
3464
1.47M
    ret = xmlDictLookupHashed(ctxt->dict, (BASE_PTR + startPosition), len);
3465
1.47M
    if (ret.name == NULL)
3466
0
        xmlErrMemory(ctxt);
3467
1.47M
    return(ret);
3468
1.47M
}
3469
3470
/**
3471
 * xmlParseNCName:
3472
 * @ctxt:  an XML parser context
3473
 * @len:  length of the string parsed
3474
 *
3475
 * parse an XML name.
3476
 *
3477
 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3478
 *                      CombiningChar | Extender
3479
 *
3480
 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3481
 *
3482
 * Returns the Name parsed or NULL
3483
 */
3484
3485
static xmlHashedString
3486
86.1M
xmlParseNCName(xmlParserCtxtPtr ctxt) {
3487
86.1M
    const xmlChar *in, *e;
3488
86.1M
    xmlHashedString ret;
3489
86.1M
    size_t count = 0;
3490
86.1M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3491
82.8M
                       XML_MAX_TEXT_LENGTH :
3492
86.1M
                       XML_MAX_NAME_LENGTH;
3493
3494
86.1M
    ret.name = NULL;
3495
3496
    /*
3497
     * Accelerator for simple ASCII names
3498
     */
3499
86.1M
    in = ctxt->input->cur;
3500
86.1M
    e = ctxt->input->end;
3501
86.1M
    if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3502
6.76M
   ((*in >= 0x41) && (*in <= 0x5A)) ||
3503
85.4M
   (*in == '_')) && (in < e)) {
3504
85.4M
  in++;
3505
497M
  while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3506
161M
          ((*in >= 0x41) && (*in <= 0x5A)) ||
3507
109M
          ((*in >= 0x30) && (*in <= 0x39)) ||
3508
94.7M
          (*in == '_') || (*in == '-') ||
3509
411M
          (*in == '.')) && (in < e))
3510
411M
      in++;
3511
85.4M
  if (in >= e)
3512
15.5k
      goto complex;
3513
85.4M
  if ((*in > 0) && (*in < 0x80)) {
3514
84.1M
      count = in - ctxt->input->cur;
3515
84.1M
            if (count > maxLength) {
3516
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3517
0
                return(ret);
3518
0
            }
3519
84.1M
      ret = xmlDictLookupHashed(ctxt->dict, ctxt->input->cur, count);
3520
84.1M
      ctxt->input->cur = in;
3521
84.1M
      ctxt->input->col += count;
3522
84.1M
      if (ret.name == NULL) {
3523
0
          xmlErrMemory(ctxt);
3524
0
      }
3525
84.1M
      return(ret);
3526
84.1M
  }
3527
85.4M
    }
3528
1.94M
complex:
3529
1.94M
    return(xmlParseNCNameComplex(ctxt));
3530
86.1M
}
3531
3532
/**
3533
 * xmlParseNameAndCompare:
3534
 * @ctxt:  an XML parser context
3535
 *
3536
 * parse an XML name and compares for match
3537
 * (specialized for endtag parsing)
3538
 *
3539
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3540
 * and the name for mismatch
3541
 */
3542
3543
static const xmlChar *
3544
2.66M
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3545
2.66M
    register const xmlChar *cmp = other;
3546
2.66M
    register const xmlChar *in;
3547
2.66M
    const xmlChar *ret;
3548
3549
2.66M
    GROW;
3550
3551
2.66M
    in = ctxt->input->cur;
3552
8.76M
    while (*in != 0 && *in == *cmp) {
3553
6.10M
  ++in;
3554
6.10M
  ++cmp;
3555
6.10M
    }
3556
2.66M
    if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3557
  /* success */
3558
2.65M
  ctxt->input->col += in - ctxt->input->cur;
3559
2.65M
  ctxt->input->cur = in;
3560
2.65M
  return (const xmlChar*) 1;
3561
2.65M
    }
3562
    /* failure (or end of input buffer), check with full function */
3563
4.80k
    ret = xmlParseName (ctxt);
3564
    /* strings coming from the dictionary direct compare possible */
3565
4.80k
    if (ret == other) {
3566
54
  return (const xmlChar*) 1;
3567
54
    }
3568
4.75k
    return ret;
3569
4.80k
}
3570
3571
/**
3572
 * xmlParseStringName:
3573
 * @ctxt:  an XML parser context
3574
 * @str:  a pointer to the string pointer (IN/OUT)
3575
 *
3576
 * parse an XML name.
3577
 *
3578
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3579
 *                  CombiningChar | Extender
3580
 *
3581
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3582
 *
3583
 * [6] Names ::= Name (#x20 Name)*
3584
 *
3585
 * Returns the Name parsed or NULL. The @str pointer
3586
 * is updated to the current location in the string.
3587
 */
3588
3589
static xmlChar *
3590
81.0k
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3591
81.0k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3592
81.0k
    xmlChar *ret;
3593
81.0k
    const xmlChar *cur = *str;
3594
81.0k
    int len = 0, l;
3595
81.0k
    int c;
3596
81.0k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3597
81.0k
                    XML_MAX_TEXT_LENGTH :
3598
81.0k
                    XML_MAX_NAME_LENGTH;
3599
3600
81.0k
    c = CUR_SCHAR(cur, l);
3601
81.0k
    if (!xmlIsNameStartChar(ctxt, c)) {
3602
13.9k
  return(NULL);
3603
13.9k
    }
3604
3605
67.0k
    COPY_BUF(buf, len, c);
3606
67.0k
    cur += l;
3607
67.0k
    c = CUR_SCHAR(cur, l);
3608
322k
    while (xmlIsNameChar(ctxt, c)) {
3609
258k
  COPY_BUF(buf, len, c);
3610
258k
  cur += l;
3611
258k
  c = CUR_SCHAR(cur, l);
3612
258k
  if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3613
      /*
3614
       * Okay someone managed to make a huge name, so he's ready to pay
3615
       * for the processing speed.
3616
       */
3617
2.93k
      xmlChar *buffer;
3618
2.93k
      int max = len * 2;
3619
3620
2.93k
      buffer = xmlMalloc(max);
3621
2.93k
      if (buffer == NULL) {
3622
0
          xmlErrMemory(ctxt);
3623
0
    return(NULL);
3624
0
      }
3625
2.93k
      memcpy(buffer, buf, len);
3626
261k
      while (xmlIsNameChar(ctxt, c)) {
3627
258k
    if (len + 10 > max) {
3628
1.53k
        xmlChar *tmp;
3629
1.53k
                    int newSize;
3630
3631
1.53k
                    newSize = xmlGrowCapacity(max, 1, 1, maxLength);
3632
1.53k
                    if (newSize < 0) {
3633
0
                        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3634
0
                        xmlFree(buffer);
3635
0
                        return(NULL);
3636
0
                    }
3637
1.53k
        tmp = xmlRealloc(buffer, newSize);
3638
1.53k
        if (tmp == NULL) {
3639
0
      xmlErrMemory(ctxt);
3640
0
      xmlFree(buffer);
3641
0
      return(NULL);
3642
0
        }
3643
1.53k
        buffer = tmp;
3644
1.53k
                    max = newSize;
3645
1.53k
    }
3646
258k
    COPY_BUF(buffer, len, c);
3647
258k
    cur += l;
3648
258k
    c = CUR_SCHAR(cur, l);
3649
258k
      }
3650
2.93k
      buffer[len] = 0;
3651
2.93k
      *str = cur;
3652
2.93k
      return(buffer);
3653
2.93k
  }
3654
258k
    }
3655
64.1k
    if (len > maxLength) {
3656
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3657
0
        return(NULL);
3658
0
    }
3659
64.1k
    *str = cur;
3660
64.1k
    ret = xmlStrndup(buf, len);
3661
64.1k
    if (ret == NULL)
3662
0
        xmlErrMemory(ctxt);
3663
64.1k
    return(ret);
3664
64.1k
}
3665
3666
/**
3667
 * xmlParseNmtoken:
3668
 * @ctxt:  an XML parser context
3669
 *
3670
 * DEPRECATED: Internal function, don't use.
3671
 *
3672
 * parse an XML Nmtoken.
3673
 *
3674
 * [7] Nmtoken ::= (NameChar)+
3675
 *
3676
 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3677
 *
3678
 * Returns the Nmtoken parsed or NULL
3679
 */
3680
3681
xmlChar *
3682
615k
xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3683
615k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3684
615k
    xmlChar *ret;
3685
615k
    int len = 0, l;
3686
615k
    int c;
3687
615k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3688
609k
                    XML_MAX_TEXT_LENGTH :
3689
615k
                    XML_MAX_NAME_LENGTH;
3690
3691
615k
    c = xmlCurrentChar(ctxt, &l);
3692
3693
5.57M
    while (xmlIsNameChar(ctxt, c)) {
3694
4.96M
  COPY_BUF(buf, len, c);
3695
4.96M
  NEXTL(l);
3696
4.96M
  c = xmlCurrentChar(ctxt, &l);
3697
4.96M
  if (len >= XML_MAX_NAMELEN) {
3698
      /*
3699
       * Okay someone managed to make a huge token, so he's ready to pay
3700
       * for the processing speed.
3701
       */
3702
5.67k
      xmlChar *buffer;
3703
5.67k
      int max = len * 2;
3704
3705
5.67k
      buffer = xmlMalloc(max);
3706
5.67k
      if (buffer == NULL) {
3707
0
          xmlErrMemory(ctxt);
3708
0
    return(NULL);
3709
0
      }
3710
5.67k
      memcpy(buffer, buf, len);
3711
393k
      while (xmlIsNameChar(ctxt, c)) {
3712
388k
    if (len + 10 > max) {
3713
1.74k
        xmlChar *tmp;
3714
1.74k
                    int newSize;
3715
3716
1.74k
                    newSize = xmlGrowCapacity(max, 1, 1, maxLength);
3717
1.74k
                    if (newSize < 0) {
3718
0
                        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3719
0
                        xmlFree(buffer);
3720
0
                        return(NULL);
3721
0
                    }
3722
1.74k
        tmp = xmlRealloc(buffer, newSize);
3723
1.74k
        if (tmp == NULL) {
3724
0
      xmlErrMemory(ctxt);
3725
0
      xmlFree(buffer);
3726
0
      return(NULL);
3727
0
        }
3728
1.74k
        buffer = tmp;
3729
1.74k
                    max = newSize;
3730
1.74k
    }
3731
388k
    COPY_BUF(buffer, len, c);
3732
388k
    NEXTL(l);
3733
388k
    c = xmlCurrentChar(ctxt, &l);
3734
388k
      }
3735
5.67k
      buffer[len] = 0;
3736
5.67k
      return(buffer);
3737
5.67k
  }
3738
4.96M
    }
3739
609k
    if (len == 0)
3740
93.0k
        return(NULL);
3741
516k
    if (len > maxLength) {
3742
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3743
0
        return(NULL);
3744
0
    }
3745
516k
    ret = xmlStrndup(buf, len);
3746
516k
    if (ret == NULL)
3747
0
        xmlErrMemory(ctxt);
3748
516k
    return(ret);
3749
516k
}
3750
3751
/**
3752
 * xmlExpandPEsInEntityValue:
3753
 * @ctxt:  parser context
3754
 * @buf:  string buffer
3755
 * @str:  entity value
3756
 * @length:  size of entity value
3757
 * @depth:  nesting depth
3758
 *
3759
 * Validate an entity value and expand parameter entities.
3760
 */
3761
static void
3762
xmlExpandPEsInEntityValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf,
3763
53.2k
                          const xmlChar *str, int length, int depth) {
3764
53.2k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3765
53.2k
    const xmlChar *end, *chunk;
3766
53.2k
    int c, l;
3767
3768
53.2k
    if (str == NULL)
3769
0
        return;
3770
3771
53.2k
    depth += 1;
3772
53.2k
    if (depth > maxDepth) {
3773
0
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3774
0
                       "Maximum entity nesting depth exceeded");
3775
0
  return;
3776
0
    }
3777
3778
53.2k
    end = str + length;
3779
53.2k
    chunk = str;
3780
3781
3.33M
    while ((str < end) && (!PARSER_STOPPED(ctxt))) {
3782
3.31M
        c = *str;
3783
3784
3.31M
        if (c >= 0x80) {
3785
557k
            l = xmlUTF8MultibyteLen(ctxt, str,
3786
557k
                    "invalid character in entity value\n");
3787
557k
            if (l == 0) {
3788
248k
                if (chunk < str)
3789
29.3k
                    xmlSBufAddString(buf, chunk, str - chunk);
3790
248k
                xmlSBufAddReplChar(buf);
3791
248k
                str += 1;
3792
248k
                chunk = str;
3793
308k
            } else {
3794
308k
                str += l;
3795
308k
            }
3796
2.75M
        } else if (c == '&') {
3797
126k
            if (str[1] == '#') {
3798
49.8k
                if (chunk < str)
3799
33.6k
                    xmlSBufAddString(buf, chunk, str - chunk);
3800
3801
49.8k
                c = xmlParseStringCharRef(ctxt, &str);
3802
49.8k
                if (c == 0)
3803
8.38k
                    return;
3804
3805
41.4k
                xmlSBufAddChar(buf, c);
3806
3807
41.4k
                chunk = str;
3808
76.5k
            } else {
3809
76.5k
                xmlChar *name;
3810
3811
                /*
3812
                 * General entity references are checked for
3813
                 * syntactic validity.
3814
                 */
3815
76.5k
                str++;
3816
76.5k
                name = xmlParseStringName(ctxt, &str);
3817
3818
76.5k
                if ((name == NULL) || (*str++ != ';')) {
3819
23.9k
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3820
23.9k
                            "EntityValue: '&' forbidden except for entities "
3821
23.9k
                            "references\n");
3822
23.9k
                    xmlFree(name);
3823
23.9k
                    return;
3824
23.9k
                }
3825
3826
52.5k
                xmlFree(name);
3827
52.5k
            }
3828
2.63M
        } else if (c == '%') {
3829
4.46k
            xmlEntityPtr ent;
3830
3831
4.46k
            if (chunk < str)
3832
3.72k
                xmlSBufAddString(buf, chunk, str - chunk);
3833
3834
4.46k
            ent = xmlParseStringPEReference(ctxt, &str);
3835
4.46k
            if (ent == NULL)
3836
4.46k
                return;
3837
3838
0
            if (!PARSER_EXTERNAL(ctxt)) {
3839
0
                xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3840
0
                return;
3841
0
            }
3842
3843
0
            if (ent->content == NULL) {
3844
                /*
3845
                 * Note: external parsed entities will not be loaded,
3846
                 * it is not required for a non-validating parser to
3847
                 * complete external PEReferences coming from the
3848
                 * internal subset
3849
                 */
3850
0
                if (((ctxt->options & XML_PARSE_NO_XXE) == 0) &&
3851
0
                    ((ctxt->replaceEntities) ||
3852
0
                     (ctxt->validate))) {
3853
0
                    xmlLoadEntityContent(ctxt, ent);
3854
0
                } else {
3855
0
                    xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
3856
0
                                  "not validating will not read content for "
3857
0
                                  "PE entity %s\n", ent->name, NULL);
3858
0
                }
3859
0
            }
3860
3861
            /*
3862
             * TODO: Skip if ent->content is still NULL.
3863
             */
3864
3865
0
            if (xmlParserEntityCheck(ctxt, ent->length))
3866
0
                return;
3867
3868
0
            if (ent->flags & XML_ENT_EXPANDING) {
3869
0
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
3870
0
                xmlHaltParser(ctxt);
3871
0
                return;
3872
0
            }
3873
3874
0
            ent->flags |= XML_ENT_EXPANDING;
3875
0
            xmlExpandPEsInEntityValue(ctxt, buf, ent->content, ent->length,
3876
0
                                      depth);
3877
0
            ent->flags &= ~XML_ENT_EXPANDING;
3878
3879
0
            chunk = str;
3880
2.62M
        } else {
3881
            /* Normal ASCII char */
3882
2.62M
            if (!IS_BYTE_CHAR(c)) {
3883
286k
                xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3884
286k
                        "invalid character in entity value\n");
3885
286k
                if (chunk < str)
3886
12.2k
                    xmlSBufAddString(buf, chunk, str - chunk);
3887
286k
                xmlSBufAddReplChar(buf);
3888
286k
                str += 1;
3889
286k
                chunk = str;
3890
2.34M
            } else {
3891
2.34M
                str += 1;
3892
2.34M
            }
3893
2.62M
        }
3894
3.31M
    }
3895
3896
16.4k
    if (chunk < str)
3897
11.8k
        xmlSBufAddString(buf, chunk, str - chunk);
3898
16.4k
}
3899
3900
/**
3901
 * xmlParseEntityValue:
3902
 * @ctxt:  an XML parser context
3903
 * @orig:  if non-NULL store a copy of the original entity value
3904
 *
3905
 * DEPRECATED: Internal function, don't use.
3906
 *
3907
 * parse a value for ENTITY declarations
3908
 *
3909
 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3910
 *                 "'" ([^%&'] | PEReference | Reference)* "'"
3911
 *
3912
 * Returns the EntityValue parsed with reference substituted or NULL
3913
 */
3914
xmlChar *
3915
53.7k
xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3916
53.7k
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3917
53.7k
                         XML_MAX_HUGE_LENGTH :
3918
53.7k
                         XML_MAX_TEXT_LENGTH;
3919
53.7k
    xmlSBuf buf;
3920
53.7k
    const xmlChar *start;
3921
53.7k
    int quote, length;
3922
3923
53.7k
    xmlSBufInit(&buf, maxLength);
3924
3925
53.7k
    GROW;
3926
3927
53.7k
    quote = CUR;
3928
53.7k
    if ((quote != '"') && (quote != '\'')) {
3929
0
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3930
0
  return(NULL);
3931
0
    }
3932
53.7k
    CUR_PTR++;
3933
3934
53.7k
    length = 0;
3935
3936
    /*
3937
     * Copy raw content of the entity into a buffer
3938
     */
3939
8.83M
    while (1) {
3940
8.83M
        int c;
3941
3942
8.83M
        if (PARSER_STOPPED(ctxt))
3943
0
            goto error;
3944
3945
8.83M
        if (CUR_PTR >= ctxt->input->end) {
3946
293
            xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3947
293
            goto error;
3948
293
        }
3949
3950
8.83M
        c = CUR;
3951
3952
8.83M
        if (c == 0) {
3953
131
            xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3954
131
                    "invalid character in entity value\n");
3955
131
            goto error;
3956
131
        }
3957
8.83M
        if (c == quote)
3958
53.2k
            break;
3959
8.78M
        NEXTL(1);
3960
8.78M
        length += 1;
3961
3962
        /*
3963
         * TODO: Check growth threshold
3964
         */
3965
8.78M
        if (ctxt->input->end - CUR_PTR < 10)
3966
3.89k
            GROW;
3967
8.78M
    }
3968
3969
53.2k
    start = CUR_PTR - length;
3970
3971
53.2k
    if (orig != NULL) {
3972
53.2k
        *orig = xmlStrndup(start, length);
3973
53.2k
        if (*orig == NULL)
3974
0
            xmlErrMemory(ctxt);
3975
53.2k
    }
3976
3977
53.2k
    xmlExpandPEsInEntityValue(ctxt, &buf, start, length, ctxt->inputNr);
3978
3979
53.2k
    NEXTL(1);
3980
3981
53.2k
    return(xmlSBufFinish(&buf, NULL, ctxt, "entity length too long"));
3982
3983
424
error:
3984
424
    xmlSBufCleanup(&buf, ctxt, "entity length too long");
3985
424
    return(NULL);
3986
53.7k
}
3987
3988
/**
3989
 * xmlCheckEntityInAttValue:
3990
 * @ctxt:  parser context
3991
 * @pent:  entity
3992
 * @depth:  nesting depth
3993
 *
3994
 * Check an entity reference in an attribute value for validity
3995
 * without expanding it.
3996
 */
3997
static void
3998
0
xmlCheckEntityInAttValue(xmlParserCtxtPtr ctxt, xmlEntityPtr pent, int depth) {
3999
0
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
4000
0
    const xmlChar *str;
4001
0
    unsigned long expandedSize = pent->length;
4002
0
    int c, flags;
4003
4004
0
    depth += 1;
4005
0
    if (depth > maxDepth) {
4006
0
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
4007
0
                       "Maximum entity nesting depth exceeded");
4008
0
  return;
4009
0
    }
4010
4011
0
    if (pent->flags & XML_ENT_EXPANDING) {
4012
0
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
4013
0
        xmlHaltParser(ctxt);
4014
0
        return;
4015
0
    }
4016
4017
    /*
4018
     * If we're parsing a default attribute value in DTD content,
4019
     * the entity might reference other entities which weren't
4020
     * defined yet, so the check isn't reliable.
4021
     */
4022
0
    if (ctxt->inSubset == 0)
4023
0
        flags = XML_ENT_CHECKED | XML_ENT_VALIDATED;
4024
0
    else
4025
0
        flags = XML_ENT_VALIDATED;
4026
4027
0
    str = pent->content;
4028
0
    if (str == NULL)
4029
0
        goto done;
4030
4031
    /*
4032
     * Note that entity values are already validated. We only check
4033
     * for illegal less-than signs and compute the expanded size
4034
     * of the entity. No special handling for multi-byte characters
4035
     * is needed.
4036
     */
4037
0
    while (!PARSER_STOPPED(ctxt)) {
4038
0
        c = *str;
4039
4040
0
  if (c != '&') {
4041
0
            if (c == 0)
4042
0
                break;
4043
4044
0
            if (c == '<')
4045
0
                xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
4046
0
                        "'<' in entity '%s' is not allowed in attributes "
4047
0
                        "values\n", pent->name);
4048
4049
0
            str += 1;
4050
0
        } else if (str[1] == '#') {
4051
0
            int val;
4052
4053
0
      val = xmlParseStringCharRef(ctxt, &str);
4054
0
      if (val == 0) {
4055
0
                pent->content[0] = 0;
4056
0
                break;
4057
0
            }
4058
0
  } else {
4059
0
            xmlChar *name;
4060
0
            xmlEntityPtr ent;
4061
4062
0
      name = xmlParseStringEntityRef(ctxt, &str);
4063
0
      if (name == NULL) {
4064
0
                pent->content[0] = 0;
4065
0
                break;
4066
0
            }
4067
4068
0
            ent = xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 1);
4069
0
            xmlFree(name);
4070
4071
0
            if ((ent != NULL) &&
4072
0
                (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
4073
0
                if ((ent->flags & flags) != flags) {
4074
0
                    pent->flags |= XML_ENT_EXPANDING;
4075
0
                    xmlCheckEntityInAttValue(ctxt, ent, depth);
4076
0
                    pent->flags &= ~XML_ENT_EXPANDING;
4077
0
                }
4078
4079
0
                xmlSaturatedAdd(&expandedSize, ent->expandedSize);
4080
0
                xmlSaturatedAdd(&expandedSize, XML_ENT_FIXED_COST);
4081
0
            }
4082
0
        }
4083
0
    }
4084
4085
0
done:
4086
0
    if (ctxt->inSubset == 0)
4087
0
        pent->expandedSize = expandedSize;
4088
4089
0
    pent->flags |= flags;
4090
0
}
4091
4092
/**
4093
 * xmlExpandEntityInAttValue:
4094
 * @ctxt:  parser context
4095
 * @buf:  string buffer
4096
 * @str:  entity or attribute value
4097
 * @pent:  entity for entity value, NULL for attribute values
4098
 * @normalize:  whether to collapse whitespace
4099
 * @inSpace:  whitespace state
4100
 * @depth:  nesting depth
4101
 * @check:  whether to check for amplification
4102
 *
4103
 * Expand general entity references in an entity or attribute value.
4104
 * Perform attribute value normalization.
4105
 */
4106
static void
4107
xmlExpandEntityInAttValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf,
4108
                          const xmlChar *str, xmlEntityPtr pent, int normalize,
4109
0
                          int *inSpace, int depth, int check) {
4110
0
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
4111
0
    int c, chunkSize;
4112
4113
0
    if (str == NULL)
4114
0
        return;
4115
4116
0
    depth += 1;
4117
0
    if (depth > maxDepth) {
4118
0
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
4119
0
                       "Maximum entity nesting depth exceeded");
4120
0
  return;
4121
0
    }
4122
4123
0
    if (pent != NULL) {
4124
0
        if (pent->flags & XML_ENT_EXPANDING) {
4125
0
            xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
4126
0
            xmlHaltParser(ctxt);
4127
0
            return;
4128
0
        }
4129
4130
0
        if (check) {
4131
0
            if (xmlParserEntityCheck(ctxt, pent->length))
4132
0
                return;
4133
0
        }
4134
0
    }
4135
4136
0
    chunkSize = 0;
4137
4138
    /*
4139
     * Note that entity values are already validated. No special
4140
     * handling for multi-byte characters is needed.
4141
     */
4142
0
    while (!PARSER_STOPPED(ctxt)) {
4143
0
        c = *str;
4144
4145
0
  if (c != '&') {
4146
0
            if (c == 0)
4147
0
                break;
4148
4149
            /*
4150
             * If this function is called without an entity, it is used to
4151
             * expand entities in an attribute content where less-than was
4152
             * already unscaped and is allowed.
4153
             */
4154
0
            if ((pent != NULL) && (c == '<')) {
4155
0
                xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
4156
0
                        "'<' in entity '%s' is not allowed in attributes "
4157
0
                        "values\n", pent->name);
4158
0
                break;
4159
0
            }
4160
4161
0
            if (c <= 0x20) {
4162
0
                if ((normalize) && (*inSpace)) {
4163
                    /* Skip char */
4164
0
                    if (chunkSize > 0) {
4165
0
                        xmlSBufAddString(buf, str - chunkSize, chunkSize);
4166
0
                        chunkSize = 0;
4167
0
                    }
4168
0
                } else if (c < 0x20) {
4169
0
                    if (chunkSize > 0) {
4170
0
                        xmlSBufAddString(buf, str - chunkSize, chunkSize);
4171
0
                        chunkSize = 0;
4172
0
                    }
4173
4174
0
                    xmlSBufAddCString(buf, " ", 1);
4175
0
                } else {
4176
0
                    chunkSize += 1;
4177
0
                }
4178
4179
0
                *inSpace = 1;
4180
0
            } else {
4181
0
                chunkSize += 1;
4182
0
                *inSpace = 0;
4183
0
            }
4184
4185
0
            str += 1;
4186
0
        } else if (str[1] == '#') {
4187
0
            int val;
4188
4189
0
            if (chunkSize > 0) {
4190
0
                xmlSBufAddString(buf, str - chunkSize, chunkSize);
4191
0
                chunkSize = 0;
4192
0
            }
4193
4194
0
      val = xmlParseStringCharRef(ctxt, &str);
4195
0
      if (val == 0) {
4196
0
                if (pent != NULL)
4197
0
                    pent->content[0] = 0;
4198
0
                break;
4199
0
            }
4200
4201
0
            if (val == ' ') {
4202
0
                if ((!normalize) || (!*inSpace))
4203
0
                    xmlSBufAddCString(buf, " ", 1);
4204
0
                *inSpace = 1;
4205
0
            } else {
4206
0
                xmlSBufAddChar(buf, val);
4207
0
                *inSpace = 0;
4208
0
            }
4209
0
  } else {
4210
0
            xmlChar *name;
4211
0
            xmlEntityPtr ent;
4212
4213
0
            if (chunkSize > 0) {
4214
0
                xmlSBufAddString(buf, str - chunkSize, chunkSize);
4215
0
                chunkSize = 0;
4216
0
            }
4217
4218
0
      name = xmlParseStringEntityRef(ctxt, &str);
4219
0
            if (name == NULL) {
4220
0
                if (pent != NULL)
4221
0
                    pent->content[0] = 0;
4222
0
                break;
4223
0
            }
4224
4225
0
            ent = xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 1);
4226
0
            xmlFree(name);
4227
4228
0
      if ((ent != NULL) &&
4229
0
    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
4230
0
    if (ent->content == NULL) {
4231
0
        xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
4232
0
          "predefined entity has no content\n");
4233
0
                    break;
4234
0
                }
4235
4236
0
                xmlSBufAddString(buf, ent->content, ent->length);
4237
4238
0
                *inSpace = 0;
4239
0
      } else if ((ent != NULL) && (ent->content != NULL)) {
4240
0
                if (pent != NULL)
4241
0
                    pent->flags |= XML_ENT_EXPANDING;
4242
0
    xmlExpandEntityInAttValue(ctxt, buf, ent->content, ent,
4243
0
                                          normalize, inSpace, depth, check);
4244
0
                if (pent != NULL)
4245
0
                    pent->flags &= ~XML_ENT_EXPANDING;
4246
0
      }
4247
0
        }
4248
0
    }
4249
4250
0
    if (chunkSize > 0)
4251
0
        xmlSBufAddString(buf, str - chunkSize, chunkSize);
4252
0
}
4253
4254
/**
4255
 * xmlExpandEntitiesInAttValue:
4256
 * @ctxt:  parser context
4257
 * @str:  entity or attribute value
4258
 * @normalize:  whether to collapse whitespace
4259
 *
4260
 * Expand general entity references in an entity or attribute value.
4261
 * Perform attribute value normalization.
4262
 *
4263
 * Returns the expanded attribtue value.
4264
 */
4265
xmlChar *
4266
xmlExpandEntitiesInAttValue(xmlParserCtxtPtr ctxt, const xmlChar *str,
4267
0
                            int normalize) {
4268
0
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4269
0
                         XML_MAX_HUGE_LENGTH :
4270
0
                         XML_MAX_TEXT_LENGTH;
4271
0
    xmlSBuf buf;
4272
0
    int inSpace = 1;
4273
4274
0
    xmlSBufInit(&buf, maxLength);
4275
4276
0
    xmlExpandEntityInAttValue(ctxt, &buf, str, NULL, normalize, &inSpace,
4277
0
                              ctxt->inputNr, /* check */ 0);
4278
4279
0
    if ((normalize) && (inSpace) && (buf.size > 0))
4280
0
        buf.size--;
4281
4282
0
    return(xmlSBufFinish(&buf, NULL, ctxt, "AttValue length too long"));
4283
0
}
4284
4285
/**
4286
 * xmlParseAttValueInternal:
4287
 * @ctxt:  an XML parser context
4288
 * @len:  attribute len result
4289
 * @alloc:  whether the attribute was reallocated as a new string
4290
 * @normalize:  if 1 then further non-CDATA normalization must be done
4291
 *
4292
 * parse a value for an attribute.
4293
 * NOTE: if no normalization is needed, the routine will return pointers
4294
 *       directly from the data buffer.
4295
 *
4296
 * 3.3.3 Attribute-Value Normalization:
4297
 * Before the value of an attribute is passed to the application or
4298
 * checked for validity, the XML processor must normalize it as follows:
4299
 * - a character reference is processed by appending the referenced
4300
 *   character to the attribute value
4301
 * - an entity reference is processed by recursively processing the
4302
 *   replacement text of the entity
4303
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4304
 *   appending #x20 to the normalized value, except that only a single
4305
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4306
 *   parsed entity or the literal entity value of an internal parsed entity
4307
 * - other characters are processed by appending them to the normalized value
4308
 * If the declared value is not CDATA, then the XML processor must further
4309
 * process the normalized attribute value by discarding any leading and
4310
 * trailing space (#x20) characters, and by replacing sequences of space
4311
 * (#x20) characters by a single space (#x20) character.
4312
 * All attributes for which no declaration has been read should be treated
4313
 * by a non-validating parser as if declared CDATA.
4314
 *
4315
 * Returns the AttValue parsed or NULL. The value has to be freed by the
4316
 *     caller if it was copied, this can be detected by val[*len] == 0.
4317
 */
4318
static xmlChar *
4319
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *attlen, int *alloc,
4320
23.9M
                         int normalize, int isNamespace) {
4321
23.9M
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4322
22.7M
                         XML_MAX_HUGE_LENGTH :
4323
23.9M
                         XML_MAX_TEXT_LENGTH;
4324
23.9M
    xmlSBuf buf;
4325
23.9M
    xmlChar *ret;
4326
23.9M
    int c, l, quote, flags, chunkSize;
4327
23.9M
    int inSpace = 1;
4328
23.9M
    int replaceEntities;
4329
4330
    /* Always expand namespace URIs */
4331
23.9M
    replaceEntities = (ctxt->replaceEntities) || (isNamespace);
4332
4333
23.9M
    xmlSBufInit(&buf, maxLength);
4334
4335
23.9M
    GROW;
4336
4337
23.9M
    quote = CUR;
4338
23.9M
    if ((quote != '"') && (quote != '\'')) {
4339
3.02k
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
4340
3.02k
  return(NULL);
4341
3.02k
    }
4342
23.9M
    NEXTL(1);
4343
4344
23.9M
    if (ctxt->inSubset == 0)
4345
23.9M
        flags = XML_ENT_CHECKED | XML_ENT_VALIDATED;
4346
45.6k
    else
4347
45.6k
        flags = XML_ENT_VALIDATED;
4348
4349
23.9M
    inSpace = 1;
4350
23.9M
    chunkSize = 0;
4351
4352
279M
    while (1) {
4353
279M
        if (PARSER_STOPPED(ctxt))
4354
0
            goto error;
4355
4356
279M
        if (CUR_PTR >= ctxt->input->end) {
4357
5.01k
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4358
5.01k
                           "AttValue: ' expected\n");
4359
5.01k
            goto error;
4360
5.01k
        }
4361
4362
        /*
4363
         * TODO: Check growth threshold
4364
         */
4365
279M
        if (ctxt->input->end - CUR_PTR < 10)
4366
55.1k
            GROW;
4367
4368
279M
        c = CUR;
4369
4370
279M
        if (c >= 0x80) {
4371
7.06M
            l = xmlUTF8MultibyteLen(ctxt, CUR_PTR,
4372
7.06M
                    "invalid character in attribute value\n");
4373
7.06M
            if (l == 0) {
4374
2.36M
                if (chunkSize > 0) {
4375
253k
                    xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4376
253k
                    chunkSize = 0;
4377
253k
                }
4378
2.36M
                xmlSBufAddReplChar(&buf);
4379
2.36M
                NEXTL(1);
4380
4.70M
            } else {
4381
4.70M
                chunkSize += l;
4382
4.70M
                NEXTL(l);
4383
4.70M
            }
4384
4385
7.06M
            inSpace = 0;
4386
272M
        } else if (c != '&') {
4387
271M
            if (c > 0x20) {
4388
267M
                if (c == quote)
4389
23.9M
                    break;
4390
4391
243M
                if (c == '<')
4392
208k
                    xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4393
4394
243M
                chunkSize += 1;
4395
243M
                inSpace = 0;
4396
243M
            } else if (!IS_BYTE_CHAR(c)) {
4397
1.28M
                xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4398
1.28M
                        "invalid character in attribute value\n");
4399
1.28M
                if (chunkSize > 0) {
4400
64.5k
                    xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4401
64.5k
                    chunkSize = 0;
4402
64.5k
                }
4403
1.28M
                xmlSBufAddReplChar(&buf);
4404
1.28M
                inSpace = 0;
4405
3.02M
            } else {
4406
                /* Whitespace */
4407
3.02M
                if ((normalize) && (inSpace)) {
4408
                    /* Skip char */
4409
69.9k
                    if (chunkSize > 0) {
4410
3.30k
                        xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4411
3.30k
                        chunkSize = 0;
4412
3.30k
                    }
4413
2.95M
                } else if (c < 0x20) {
4414
                    /* Convert to space */
4415
871k
                    if (chunkSize > 0) {
4416
193k
                        xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4417
193k
                        chunkSize = 0;
4418
193k
                    }
4419
4420
871k
                    xmlSBufAddCString(&buf, " ", 1);
4421
2.08M
                } else {
4422
2.08M
                    chunkSize += 1;
4423
2.08M
                }
4424
4425
3.02M
                inSpace = 1;
4426
4427
3.02M
                if ((c == 0xD) && (NXT(1) == 0xA))
4428
31.2k
                    CUR_PTR++;
4429
3.02M
            }
4430
4431
247M
            NEXTL(1);
4432
247M
        } else if (NXT(1) == '#') {
4433
72.8k
            int val;
4434
4435
72.8k
            if (chunkSize > 0) {
4436
35.0k
                xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4437
35.0k
                chunkSize = 0;
4438
35.0k
            }
4439
4440
72.8k
            val = xmlParseCharRef(ctxt);
4441
72.8k
            if (val == 0)
4442
720
                goto error;
4443
4444
72.0k
            if ((val == '&') && (!replaceEntities)) {
4445
                /*
4446
                 * The reparsing will be done in xmlNodeParseContent()
4447
                 * called from SAX2.c
4448
                 */
4449
0
                xmlSBufAddCString(&buf, "&#38;", 5);
4450
0
                inSpace = 0;
4451
72.0k
            } else if (val == ' ') {
4452
38.5k
                if ((!normalize) || (!inSpace))
4453
31.5k
                    xmlSBufAddCString(&buf, " ", 1);
4454
38.5k
                inSpace = 1;
4455
38.5k
            } else {
4456
33.5k
                xmlSBufAddChar(&buf, val);
4457
33.5k
                inSpace = 0;
4458
33.5k
            }
4459
633k
        } else {
4460
633k
            const xmlChar *name;
4461
633k
            xmlEntityPtr ent;
4462
4463
633k
            if (chunkSize > 0) {
4464
325k
                xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4465
325k
                chunkSize = 0;
4466
325k
            }
4467
4468
633k
            name = xmlParseEntityRefInternal(ctxt);
4469
633k
            if (name == NULL) {
4470
                /*
4471
                 * Probably a literal '&' which wasn't escaped.
4472
                 * TODO: Handle gracefully in recovery mode.
4473
                 */
4474
411k
                continue;
4475
411k
            }
4476
4477
221k
            ent = xmlLookupGeneralEntity(ctxt, name, /* isAttr */ 1);
4478
221k
            if (ent == NULL)
4479
27.2k
                continue;
4480
4481
194k
            if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
4482
194k
                if ((ent->content[0] == '&') && (!replaceEntities))
4483
0
                    xmlSBufAddCString(&buf, "&#38;", 5);
4484
194k
                else
4485
194k
                    xmlSBufAddString(&buf, ent->content, ent->length);
4486
194k
                inSpace = 0;
4487
18.4E
            } else if (replaceEntities) {
4488
0
                xmlExpandEntityInAttValue(ctxt, &buf, ent->content, ent,
4489
0
                                          normalize, &inSpace, ctxt->inputNr,
4490
0
                                          /* check */ 1);
4491
18.4E
            } else {
4492
18.4E
                if ((ent->flags & flags) != flags)
4493
0
                    xmlCheckEntityInAttValue(ctxt, ent, ctxt->inputNr);
4494
4495
18.4E
                if (xmlParserEntityCheck(ctxt, ent->expandedSize)) {
4496
0
                    ent->content[0] = 0;
4497
0
                    goto error;
4498
0
                }
4499
4500
                /*
4501
                 * Just output the reference
4502
                 */
4503
18.4E
                xmlSBufAddCString(&buf, "&", 1);
4504
18.4E
                xmlSBufAddString(&buf, ent->name, xmlStrlen(ent->name));
4505
18.4E
                xmlSBufAddCString(&buf, ";", 1);
4506
4507
18.4E
                inSpace = 0;
4508
18.4E
            }
4509
194k
  }
4510
279M
    }
4511
4512
23.9M
    if ((buf.mem == NULL) && (alloc != NULL)) {
4513
23.7M
        ret = (xmlChar *) CUR_PTR - chunkSize;
4514
4515
23.7M
        if (attlen != NULL)
4516
23.7M
            *attlen = chunkSize;
4517
23.7M
        if ((normalize) && (inSpace) && (chunkSize > 0))
4518
371
            *attlen -= 1;
4519
23.7M
        *alloc = 0;
4520
4521
        /* Report potential error */
4522
23.7M
        xmlSBufCleanup(&buf, ctxt, "AttValue length too long");
4523
23.7M
    } else {
4524
277k
        if (chunkSize > 0)
4525
188k
            xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4526
4527
277k
        if ((normalize) && (inSpace) && (buf.size > 0))
4528
4.48k
            buf.size--;
4529
4530
277k
        ret = xmlSBufFinish(&buf, attlen, ctxt, "AttValue length too long");
4531
4532
278k
        if (ret != NULL) {
4533
278k
            if (attlen != NULL)
4534
232k
                *attlen = buf.size;
4535
278k
            if (alloc != NULL)
4536
232k
                *alloc = 1;
4537
278k
        }
4538
277k
    }
4539
4540
23.9M
    NEXTL(1);
4541
4542
23.9M
    return(ret);
4543
4544
5.73k
error:
4545
5.73k
    xmlSBufCleanup(&buf, ctxt, "AttValue length too long");
4546
5.73k
    return(NULL);
4547
23.9M
}
4548
4549
/**
4550
 * xmlParseAttValue:
4551
 * @ctxt:  an XML parser context
4552
 *
4553
 * DEPRECATED: Internal function, don't use.
4554
 *
4555
 * parse a value for an attribute
4556
 * Note: the parser won't do substitution of entities here, this
4557
 * will be handled later in xmlStringGetNodeList
4558
 *
4559
 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4560
 *                   "'" ([^<&'] | Reference)* "'"
4561
 *
4562
 * 3.3.3 Attribute-Value Normalization:
4563
 * Before the value of an attribute is passed to the application or
4564
 * checked for validity, the XML processor must normalize it as follows:
4565
 * - a character reference is processed by appending the referenced
4566
 *   character to the attribute value
4567
 * - an entity reference is processed by recursively processing the
4568
 *   replacement text of the entity
4569
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4570
 *   appending #x20 to the normalized value, except that only a single
4571
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4572
 *   parsed entity or the literal entity value of an internal parsed entity
4573
 * - other characters are processed by appending them to the normalized value
4574
 * If the declared value is not CDATA, then the XML processor must further
4575
 * process the normalized attribute value by discarding any leading and
4576
 * trailing space (#x20) characters, and by replacing sequences of space
4577
 * (#x20) characters by a single space (#x20) character.
4578
 * All attributes for which no declaration has been read should be treated
4579
 * by a non-validating parser as if declared CDATA.
4580
 *
4581
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4582
 */
4583
4584
4585
xmlChar *
4586
46.0k
xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4587
46.0k
    if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4588
46.0k
    return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0, 0));
4589
46.0k
}
4590
4591
/**
4592
 * xmlParseSystemLiteral:
4593
 * @ctxt:  an XML parser context
4594
 *
4595
 * DEPRECATED: Internal function, don't use.
4596
 *
4597
 * parse an XML Literal
4598
 *
4599
 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4600
 *
4601
 * Returns the SystemLiteral parsed or NULL
4602
 */
4603
4604
xmlChar *
4605
11.4k
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4606
11.4k
    xmlChar *buf = NULL;
4607
11.4k
    int len = 0;
4608
11.4k
    int size = XML_PARSER_BUFFER_SIZE;
4609
11.4k
    int cur, l;
4610
11.4k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4611
11.4k
                    XML_MAX_TEXT_LENGTH :
4612
11.4k
                    XML_MAX_NAME_LENGTH;
4613
11.4k
    xmlChar stop;
4614
4615
11.4k
    if (RAW == '"') {
4616
8.94k
        NEXT;
4617
8.94k
  stop = '"';
4618
8.94k
    } else if (RAW == '\'') {
4619
1.68k
        NEXT;
4620
1.68k
  stop = '\'';
4621
1.68k
    } else {
4622
829
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4623
829
  return(NULL);
4624
829
    }
4625
4626
10.6k
    buf = xmlMalloc(size);
4627
10.6k
    if (buf == NULL) {
4628
0
        xmlErrMemory(ctxt);
4629
0
  return(NULL);
4630
0
    }
4631
10.6k
    cur = xmlCurrentCharRecover(ctxt, &l);
4632
452k
    while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4633
442k
  if (len + 5 >= size) {
4634
1.88k
      xmlChar *tmp;
4635
1.88k
            int newSize;
4636
4637
1.88k
            newSize = xmlGrowCapacity(size, 1, 1, maxLength);
4638
1.88k
            if (newSize < 0) {
4639
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4640
0
                xmlFree(buf);
4641
0
                return(NULL);
4642
0
            }
4643
1.88k
      tmp = xmlRealloc(buf, newSize);
4644
1.88k
      if (tmp == NULL) {
4645
0
          xmlFree(buf);
4646
0
    xmlErrMemory(ctxt);
4647
0
    return(NULL);
4648
0
      }
4649
1.88k
      buf = tmp;
4650
1.88k
            size = newSize;
4651
1.88k
  }
4652
442k
  COPY_BUF(buf, len, cur);
4653
442k
  NEXTL(l);
4654
442k
  cur = xmlCurrentCharRecover(ctxt, &l);
4655
442k
    }
4656
10.6k
    buf[len] = 0;
4657
10.6k
    if (!IS_CHAR(cur)) {
4658
174
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4659
10.4k
    } else {
4660
10.4k
  NEXT;
4661
10.4k
    }
4662
10.6k
    return(buf);
4663
10.6k
}
4664
4665
/**
4666
 * xmlParsePubidLiteral:
4667
 * @ctxt:  an XML parser context
4668
 *
4669
 * DEPRECATED: Internal function, don't use.
4670
 *
4671
 * parse an XML public literal
4672
 *
4673
 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4674
 *
4675
 * Returns the PubidLiteral parsed or NULL.
4676
 */
4677
4678
xmlChar *
4679
7.18k
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4680
7.18k
    xmlChar *buf = NULL;
4681
7.18k
    int len = 0;
4682
7.18k
    int size = XML_PARSER_BUFFER_SIZE;
4683
7.18k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4684
7.18k
                    XML_MAX_TEXT_LENGTH :
4685
7.18k
                    XML_MAX_NAME_LENGTH;
4686
7.18k
    xmlChar cur;
4687
7.18k
    xmlChar stop;
4688
4689
7.18k
    if (RAW == '"') {
4690
3.55k
        NEXT;
4691
3.55k
  stop = '"';
4692
3.63k
    } else if (RAW == '\'') {
4693
2.84k
        NEXT;
4694
2.84k
  stop = '\'';
4695
2.84k
    } else {
4696
788
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4697
788
  return(NULL);
4698
788
    }
4699
6.40k
    buf = xmlMalloc(size);
4700
6.40k
    if (buf == NULL) {
4701
0
  xmlErrMemory(ctxt);
4702
0
  return(NULL);
4703
0
    }
4704
6.40k
    cur = CUR;
4705
244k
    while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop) &&
4706
238k
           (PARSER_STOPPED(ctxt) == 0)) { /* checked */
4707
238k
  if (len + 1 >= size) {
4708
171
      xmlChar *tmp;
4709
171
            int newSize;
4710
4711
171
      newSize = xmlGrowCapacity(size, 1, 1, maxLength);
4712
171
            if (newSize < 0) {
4713
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4714
0
                xmlFree(buf);
4715
0
                return(NULL);
4716
0
            }
4717
171
      tmp = xmlRealloc(buf, newSize);
4718
171
      if (tmp == NULL) {
4719
0
    xmlErrMemory(ctxt);
4720
0
    xmlFree(buf);
4721
0
    return(NULL);
4722
0
      }
4723
171
      buf = tmp;
4724
171
            size = newSize;
4725
171
  }
4726
238k
  buf[len++] = cur;
4727
238k
  NEXT;
4728
238k
  cur = CUR;
4729
238k
    }
4730
6.40k
    buf[len] = 0;
4731
6.40k
    if (cur != stop) {
4732
184
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4733
6.21k
    } else {
4734
6.21k
  NEXTL(1);
4735
6.21k
    }
4736
6.40k
    return(buf);
4737
6.40k
}
4738
4739
static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial);
4740
4741
/*
4742
 * used for the test in the inner loop of the char data testing
4743
 */
4744
static const unsigned char test_char_data[256] = {
4745
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4746
    0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4747
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4748
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4749
    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4750
    0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4751
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4752
    0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4753
    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4754
    0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4755
    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4756
    0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4757
    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4758
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4759
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4760
    0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4761
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4762
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4763
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4764
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4765
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4766
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4767
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4768
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4769
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4770
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4771
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4772
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4773
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4774
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4775
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4776
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4777
};
4778
4779
static void
4780
xmlCharacters(xmlParserCtxtPtr ctxt, const xmlChar *buf, int size,
4781
14.4M
              int isBlank) {
4782
14.4M
    int checkBlanks;
4783
4784
14.4M
    if ((ctxt->sax == NULL) || (ctxt->disableSAX))
4785
58.7k
        return;
4786
4787
14.3M
    checkBlanks = (!ctxt->keepBlanks) ||
4788
14.3M
                  (ctxt->sax->ignorableWhitespace != ctxt->sax->characters);
4789
4790
    /*
4791
     * Calling areBlanks with only parts of a text node
4792
     * is fundamentally broken, making the NOBLANKS option
4793
     * essentially unusable.
4794
     */
4795
14.3M
    if ((checkBlanks) &&
4796
14.0M
        (areBlanks(ctxt, buf, size, isBlank))) {
4797
0
        if ((ctxt->sax->ignorableWhitespace != NULL) &&
4798
0
            (ctxt->keepBlanks))
4799
0
            ctxt->sax->ignorableWhitespace(ctxt->userData, buf, size);
4800
14.3M
    } else {
4801
14.3M
        if (ctxt->sax->characters != NULL)
4802
14.3M
            ctxt->sax->characters(ctxt->userData, buf, size);
4803
4804
        /*
4805
         * The old code used to update this value for "complex" data
4806
         * even if checkBlanks was false. This was probably a bug.
4807
         */
4808
14.3M
        if ((checkBlanks) && (*ctxt->space == -1))
4809
8.30M
            *ctxt->space = -2;
4810
14.3M
    }
4811
14.3M
}
4812
4813
/**
4814
 * xmlParseCharDataInternal:
4815
 * @ctxt:  an XML parser context
4816
 * @partial:  buffer may contain partial UTF-8 sequences
4817
 *
4818
 * Parse character data. Always makes progress if the first char isn't
4819
 * '<' or '&'.
4820
 *
4821
 * The right angle bracket (>) may be represented using the string "&gt;",
4822
 * and must, for compatibility, be escaped using "&gt;" or a character
4823
 * reference when it appears in the string "]]>" in content, when that
4824
 * string is not marking the end of a CDATA section.
4825
 *
4826
 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4827
 */
4828
static void
4829
13.4M
xmlParseCharDataInternal(xmlParserCtxtPtr ctxt, int partial) {
4830
13.4M
    const xmlChar *in;
4831
13.4M
    int nbchar = 0;
4832
13.4M
    int line = ctxt->input->line;
4833
13.4M
    int col = ctxt->input->col;
4834
13.4M
    int ccol;
4835
4836
13.4M
    GROW;
4837
    /*
4838
     * Accelerated common case where input don't need to be
4839
     * modified before passing it to the handler.
4840
     */
4841
13.4M
    in = ctxt->input->cur;
4842
13.8M
    do {
4843
18.0M
get_more_space:
4844
42.7M
        while (*in == 0x20) { in++; ctxt->input->col++; }
4845
18.0M
        if (*in == 0xA) {
4846
4.42M
            do {
4847
4.42M
                ctxt->input->line++; ctxt->input->col = 1;
4848
4.42M
                in++;
4849
4.42M
            } while (*in == 0xA);
4850
4.24M
            goto get_more_space;
4851
4.24M
        }
4852
13.8M
        if (*in == '<') {
4853
4.24M
            nbchar = in - ctxt->input->cur;
4854
4.24M
            if (nbchar > 0) {
4855
4.24M
                const xmlChar *tmp = ctxt->input->cur;
4856
4.24M
                ctxt->input->cur = in;
4857
4858
4.24M
                xmlCharacters(ctxt, tmp, nbchar, 1);
4859
4.24M
            }
4860
4.24M
            return;
4861
4.24M
        }
4862
4863
10.7M
get_more:
4864
10.7M
        ccol = ctxt->input->col;
4865
139M
        while (test_char_data[*in]) {
4866
129M
            in++;
4867
129M
            ccol++;
4868
129M
        }
4869
10.7M
        ctxt->input->col = ccol;
4870
10.7M
        if (*in == 0xA) {
4871
1.14M
            do {
4872
1.14M
                ctxt->input->line++; ctxt->input->col = 1;
4873
1.14M
                in++;
4874
1.14M
            } while (*in == 0xA);
4875
904k
            goto get_more;
4876
904k
        }
4877
9.81M
        if (*in == ']') {
4878
216k
            if ((in[1] == ']') && (in[2] == '>')) {
4879
20
                xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4880
20
                ctxt->input->cur = in + 1;
4881
20
                return;
4882
20
            }
4883
216k
            if ((!partial) || (ctxt->input->end - in >= 2)) {
4884
216k
                in++;
4885
216k
                ctxt->input->col++;
4886
216k
                goto get_more;
4887
216k
            }
4888
216k
        }
4889
9.59M
        nbchar = in - ctxt->input->cur;
4890
9.59M
        if (nbchar > 0) {
4891
8.81M
            const xmlChar *tmp = ctxt->input->cur;
4892
8.81M
            ctxt->input->cur = in;
4893
4894
8.81M
            xmlCharacters(ctxt, tmp, nbchar, 0);
4895
4896
8.81M
            line = ctxt->input->line;
4897
8.81M
            col = ctxt->input->col;
4898
8.81M
        }
4899
9.59M
        ctxt->input->cur = in;
4900
9.59M
        if (*in == 0xD) {
4901
475k
            in++;
4902
475k
            if (*in == 0xA) {
4903
435k
                ctxt->input->cur = in;
4904
435k
                in++;
4905
435k
                ctxt->input->line++; ctxt->input->col = 1;
4906
435k
                continue; /* while */
4907
435k
            }
4908
39.9k
            in--;
4909
39.9k
        }
4910
9.16M
        if (*in == '<') {
4911
7.83M
            return;
4912
7.83M
        }
4913
1.32M
        if (*in == '&') {
4914
225k
            return;
4915
225k
        }
4916
1.10M
        if ((partial) && (*in == ']') && (ctxt->input->end - in < 2)) {
4917
1
            return;
4918
1
        }
4919
1.10M
        SHRINK;
4920
1.10M
        GROW;
4921
1.10M
        in = ctxt->input->cur;
4922
1.53M
    } while (((*in >= 0x20) && (*in <= 0x7F)) ||
4923
1.38M
             (*in == 0x09) || (*in == 0x0a));
4924
1.12M
    ctxt->input->line = line;
4925
1.12M
    ctxt->input->col = col;
4926
1.12M
    xmlParseCharDataComplex(ctxt, partial);
4927
1.12M
}
4928
4929
/**
4930
 * xmlParseCharDataComplex:
4931
 * @ctxt:  an XML parser context
4932
 * @cdata:  int indicating whether we are within a CDATA section
4933
 *
4934
 * Always makes progress if the first char isn't '<' or '&'.
4935
 *
4936
 * parse a CharData section.this is the fallback function
4937
 * of xmlParseCharData() when the parsing requires handling
4938
 * of non-ASCII characters.
4939
 */
4940
static void
4941
1.12M
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial) {
4942
1.12M
    xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4943
1.12M
    int nbchar = 0;
4944
1.12M
    int cur, l;
4945
4946
1.12M
    cur = xmlCurrentCharRecover(ctxt, &l);
4947
81.9M
    while ((cur != '<') && /* checked */
4948
80.9M
           (cur != '&') &&
4949
80.8M
           ((!partial) || (cur != ']') ||
4950
26.3k
            (ctxt->input->end - ctxt->input->cur >= 2)) &&
4951
80.8M
     (IS_CHAR(cur))) {
4952
80.8M
  if ((cur == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4953
471
      xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4954
471
  }
4955
80.8M
  COPY_BUF(buf, nbchar, cur);
4956
  /* move current position before possible calling of ctxt->sax->characters */
4957
80.8M
  NEXTL(l);
4958
80.8M
  if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4959
231k
      buf[nbchar] = 0;
4960
4961
231k
            xmlCharacters(ctxt, buf, nbchar, 0);
4962
231k
      nbchar = 0;
4963
231k
            SHRINK;
4964
231k
  }
4965
80.8M
  cur = xmlCurrentCharRecover(ctxt, &l);
4966
80.8M
    }
4967
1.12M
    if (nbchar != 0) {
4968
1.11M
        buf[nbchar] = 0;
4969
4970
1.11M
        xmlCharacters(ctxt, buf, nbchar, 0);
4971
1.11M
    }
4972
    /*
4973
     * cur == 0 can mean
4974
     *
4975
     * - End of buffer.
4976
     * - An actual 0 character.
4977
     * - An incomplete UTF-8 sequence. This is allowed if partial is set.
4978
     */
4979
1.12M
    if (ctxt->input->cur < ctxt->input->end) {
4980
1.11M
        if ((cur == 0) && (CUR != 0)) {
4981
275
            if (partial == 0) {
4982
266
                xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4983
266
                        "Incomplete UTF-8 sequence starting with %02X\n", CUR);
4984
266
                NEXTL(1);
4985
266
            }
4986
1.11M
        } else if ((cur != '<') && (cur != '&') && (cur != ']')) {
4987
            /* Generate the error and skip the offending character */
4988
3.48k
            xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4989
3.48k
                              "PCDATA invalid Char value %d\n", cur);
4990
3.48k
            NEXTL(l);
4991
3.48k
        }
4992
1.11M
    }
4993
1.12M
}
4994
4995
/**
4996
 * xmlParseCharData:
4997
 * @ctxt:  an XML parser context
4998
 * @cdata:  unused
4999
 *
5000
 * DEPRECATED: Internal function, don't use.
5001
 */
5002
void
5003
0
xmlParseCharData(xmlParserCtxtPtr ctxt, ATTRIBUTE_UNUSED int cdata) {
5004
0
    xmlParseCharDataInternal(ctxt, 0);
5005
0
}
5006
5007
/**
5008
 * xmlParseExternalID:
5009
 * @ctxt:  an XML parser context
5010
 * @publicID:  a xmlChar** receiving PubidLiteral
5011
 * @strict: indicate whether we should restrict parsing to only
5012
 *          production [75], see NOTE below
5013
 *
5014
 * DEPRECATED: Internal function, don't use.
5015
 *
5016
 * Parse an External ID or a Public ID
5017
 *
5018
 * NOTE: Productions [75] and [83] interact badly since [75] can generate
5019
 *       'PUBLIC' S PubidLiteral S SystemLiteral
5020
 *
5021
 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
5022
 *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
5023
 *
5024
 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
5025
 *
5026
 * Returns the function returns SystemLiteral and in the second
5027
 *                case publicID receives PubidLiteral, is strict is off
5028
 *                it is possible to return NULL and have publicID set.
5029
 */
5030
5031
xmlChar *
5032
23.3k
xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
5033
23.3k
    xmlChar *URI = NULL;
5034
5035
23.3k
    *publicID = NULL;
5036
23.3k
    if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
5037
6.70k
        SKIP(6);
5038
6.70k
  if (SKIP_BLANKS == 0) {
5039
193
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5040
193
                     "Space required after 'SYSTEM'\n");
5041
193
  }
5042
6.70k
  URI = xmlParseSystemLiteral(ctxt);
5043
6.70k
  if (URI == NULL) {
5044
142
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
5045
142
        }
5046
16.6k
    } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
5047
7.18k
        SKIP(6);
5048
7.18k
  if (SKIP_BLANKS == 0) {
5049
75
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5050
75
        "Space required after 'PUBLIC'\n");
5051
75
  }
5052
7.18k
  *publicID = xmlParsePubidLiteral(ctxt);
5053
7.18k
  if (*publicID == NULL) {
5054
788
      xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
5055
788
  }
5056
7.18k
  if (strict) {
5057
      /*
5058
       * We don't handle [83] so "S SystemLiteral" is required.
5059
       */
5060
4.23k
      if (SKIP_BLANKS == 0) {
5061
698
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5062
698
      "Space required after the Public Identifier\n");
5063
698
      }
5064
4.23k
  } else {
5065
      /*
5066
       * We handle [83] so we return immediately, if
5067
       * "S SystemLiteral" is not detected. We skip blanks if no
5068
             * system literal was found, but this is harmless since we must
5069
             * be at the end of a NotationDecl.
5070
       */
5071
2.95k
      if (SKIP_BLANKS == 0) return(NULL);
5072
1.45k
      if ((CUR != '\'') && (CUR != '"')) return(NULL);
5073
1.45k
  }
5074
4.75k
  URI = xmlParseSystemLiteral(ctxt);
5075
4.75k
  if (URI == NULL) {
5076
687
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
5077
687
        }
5078
4.75k
    }
5079
20.9k
    return(URI);
5080
23.3k
}
5081
5082
/**
5083
 * xmlParseCommentComplex:
5084
 * @ctxt:  an XML parser context
5085
 * @buf:  the already parsed part of the buffer
5086
 * @len:  number of bytes in the buffer
5087
 * @size:  allocated size of the buffer
5088
 *
5089
 * Skip an XML (SGML) comment <!-- .... -->
5090
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
5091
 *  must not occur within comments. "
5092
 * This is the slow routine in case the accelerator for ascii didn't work
5093
 *
5094
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
5095
 */
5096
static void
5097
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
5098
40.7k
                       size_t len, size_t size) {
5099
40.7k
    int q, ql;
5100
40.7k
    int r, rl;
5101
40.7k
    int cur, l;
5102
40.7k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5103
40.7k
                    XML_MAX_HUGE_LENGTH :
5104
40.7k
                    XML_MAX_TEXT_LENGTH;
5105
5106
40.7k
    if (buf == NULL) {
5107
10.2k
        len = 0;
5108
10.2k
  size = XML_PARSER_BUFFER_SIZE;
5109
10.2k
  buf = xmlMalloc(size);
5110
10.2k
  if (buf == NULL) {
5111
0
      xmlErrMemory(ctxt);
5112
0
      return;
5113
0
  }
5114
10.2k
    }
5115
40.7k
    q = xmlCurrentCharRecover(ctxt, &ql);
5116
40.7k
    if (q == 0)
5117
285
        goto not_terminated;
5118
40.4k
    if (!IS_CHAR(q)) {
5119
46
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
5120
46
                          "xmlParseComment: invalid xmlChar value %d\n",
5121
46
                    q);
5122
46
  xmlFree (buf);
5123
46
  return;
5124
46
    }
5125
40.4k
    NEXTL(ql);
5126
40.4k
    r = xmlCurrentCharRecover(ctxt, &rl);
5127
40.4k
    if (r == 0)
5128
32
        goto not_terminated;
5129
40.4k
    if (!IS_CHAR(r)) {
5130
26
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
5131
26
                          "xmlParseComment: invalid xmlChar value %d\n",
5132
26
                    r);
5133
26
  xmlFree (buf);
5134
26
  return;
5135
26
    }
5136
40.3k
    NEXTL(rl);
5137
40.3k
    cur = xmlCurrentCharRecover(ctxt, &l);
5138
40.3k
    if (cur == 0)
5139
29
        goto not_terminated;
5140
3.03M
    while (IS_CHAR(cur) && /* checked */
5141
3.03M
           ((cur != '>') ||
5142
2.99M
      (r != '-') || (q != '-'))) {
5143
2.99M
  if ((r == '-') && (q == '-')) {
5144
14.4k
      xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
5145
14.4k
  }
5146
2.99M
  if (len + 5 >= size) {
5147
17.2k
      xmlChar *tmp;
5148
17.2k
            int newSize;
5149
5150
17.2k
      newSize = xmlGrowCapacity(size, 1, 1, maxLength);
5151
17.2k
            if (newSize < 0) {
5152
0
                xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5153
0
                             "Comment too big found", NULL);
5154
0
                xmlFree (buf);
5155
0
                return;
5156
0
            }
5157
17.2k
      tmp = xmlRealloc(buf, newSize);
5158
17.2k
      if (tmp == NULL) {
5159
0
    xmlErrMemory(ctxt);
5160
0
    xmlFree(buf);
5161
0
    return;
5162
0
      }
5163
17.2k
      buf = tmp;
5164
17.2k
            size = newSize;
5165
17.2k
  }
5166
2.99M
  COPY_BUF(buf, len, q);
5167
5168
2.99M
  q = r;
5169
2.99M
  ql = rl;
5170
2.99M
  r = cur;
5171
2.99M
  rl = l;
5172
5173
2.99M
  NEXTL(l);
5174
2.99M
  cur = xmlCurrentCharRecover(ctxt, &l);
5175
5176
2.99M
    }
5177
40.3k
    buf[len] = 0;
5178
40.3k
    if (cur == 0) {
5179
558
  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5180
558
                       "Comment not terminated \n<!--%.50s\n", buf);
5181
39.7k
    } else if (!IS_CHAR(cur)) {
5182
120
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
5183
120
                          "xmlParseComment: invalid xmlChar value %d\n",
5184
120
                    cur);
5185
39.6k
    } else {
5186
39.6k
        NEXT;
5187
39.6k
  if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5188
0
      (!ctxt->disableSAX))
5189
0
      ctxt->sax->comment(ctxt->userData, buf);
5190
39.6k
    }
5191
40.3k
    xmlFree(buf);
5192
40.3k
    return;
5193
346
not_terminated:
5194
346
    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5195
346
       "Comment not terminated\n", NULL);
5196
346
    xmlFree(buf);
5197
346
}
5198
5199
/**
5200
 * xmlParseComment:
5201
 * @ctxt:  an XML parser context
5202
 *
5203
 * DEPRECATED: Internal function, don't use.
5204
 *
5205
 * Parse an XML (SGML) comment. Always consumes '<!'.
5206
 *
5207
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
5208
 *  must not occur within comments. "
5209
 *
5210
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
5211
 */
5212
void
5213
88.7k
xmlParseComment(xmlParserCtxtPtr ctxt) {
5214
88.7k
    xmlChar *buf = NULL;
5215
88.7k
    size_t size = XML_PARSER_BUFFER_SIZE;
5216
88.7k
    size_t len = 0;
5217
88.7k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5218
88.7k
                       XML_MAX_HUGE_LENGTH :
5219
88.7k
                       XML_MAX_TEXT_LENGTH;
5220
88.7k
    const xmlChar *in;
5221
88.7k
    size_t nbchar = 0;
5222
88.7k
    int ccol;
5223
5224
    /*
5225
     * Check that there is a comment right here.
5226
     */
5227
88.7k
    if ((RAW != '<') || (NXT(1) != '!'))
5228
0
        return;
5229
88.7k
    SKIP(2);
5230
88.7k
    if ((RAW != '-') || (NXT(1) != '-'))
5231
28
        return;
5232
88.7k
    SKIP(2);
5233
88.7k
    GROW;
5234
5235
    /*
5236
     * Accelerated common case where input don't need to be
5237
     * modified before passing it to the handler.
5238
     */
5239
88.7k
    in = ctxt->input->cur;
5240
88.7k
    do {
5241
88.7k
  if (*in == 0xA) {
5242
11.9k
      do {
5243
11.9k
    ctxt->input->line++; ctxt->input->col = 1;
5244
11.9k
    in++;
5245
11.9k
      } while (*in == 0xA);
5246
4.93k
  }
5247
167k
get_more:
5248
167k
        ccol = ctxt->input->col;
5249
1.81M
  while (((*in > '-') && (*in <= 0x7F)) ||
5250
387k
         ((*in >= 0x20) && (*in < '-')) ||
5251
1.64M
         (*in == 0x09)) {
5252
1.64M
        in++;
5253
1.64M
        ccol++;
5254
1.64M
  }
5255
167k
  ctxt->input->col = ccol;
5256
167k
  if (*in == 0xA) {
5257
24.2k
      do {
5258
24.2k
    ctxt->input->line++; ctxt->input->col = 1;
5259
24.2k
    in++;
5260
24.2k
      } while (*in == 0xA);
5261
14.2k
      goto get_more;
5262
14.2k
  }
5263
153k
  nbchar = in - ctxt->input->cur;
5264
  /*
5265
   * save current set of data
5266
   */
5267
153k
  if (nbchar > 0) {
5268
124k
            if (nbchar > maxLength - len) {
5269
0
                xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5270
0
                                  "Comment too big found", NULL);
5271
0
                xmlFree(buf);
5272
0
                return;
5273
0
            }
5274
124k
            if (buf == NULL) {
5275
72.2k
                if ((*in == '-') && (in[1] == '-'))
5276
32.3k
                    size = nbchar + 1;
5277
39.9k
                else
5278
39.9k
                    size = XML_PARSER_BUFFER_SIZE + nbchar;
5279
72.2k
                buf = xmlMalloc(size);
5280
72.2k
                if (buf == NULL) {
5281
0
                    xmlErrMemory(ctxt);
5282
0
                    return;
5283
0
                }
5284
72.2k
                len = 0;
5285
72.2k
            } else if (len + nbchar + 1 >= size) {
5286
3.14k
                xmlChar *new_buf;
5287
3.14k
                size += len + nbchar + XML_PARSER_BUFFER_SIZE;
5288
3.14k
                new_buf = xmlRealloc(buf, size);
5289
3.14k
                if (new_buf == NULL) {
5290
0
                    xmlErrMemory(ctxt);
5291
0
                    xmlFree(buf);
5292
0
                    return;
5293
0
                }
5294
3.14k
                buf = new_buf;
5295
3.14k
            }
5296
124k
            memcpy(&buf[len], ctxt->input->cur, nbchar);
5297
124k
            len += nbchar;
5298
124k
            buf[len] = 0;
5299
124k
  }
5300
153k
  ctxt->input->cur = in;
5301
153k
  if (*in == 0xA) {
5302
0
      in++;
5303
0
      ctxt->input->line++; ctxt->input->col = 1;
5304
0
  }
5305
153k
  if (*in == 0xD) {
5306
15.4k
      in++;
5307
15.4k
      if (*in == 0xA) {
5308
6.93k
    ctxt->input->cur = in;
5309
6.93k
    in++;
5310
6.93k
    ctxt->input->line++; ctxt->input->col = 1;
5311
6.93k
    goto get_more;
5312
6.93k
      }
5313
8.56k
      in--;
5314
8.56k
  }
5315
146k
  SHRINK;
5316
146k
  GROW;
5317
146k
  in = ctxt->input->cur;
5318
146k
  if (*in == '-') {
5319
106k
      if (in[1] == '-') {
5320
73.0k
          if (in[2] == '>') {
5321
47.9k
        SKIP(3);
5322
47.9k
        if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5323
0
            (!ctxt->disableSAX)) {
5324
0
      if (buf != NULL)
5325
0
          ctxt->sax->comment(ctxt->userData, buf);
5326
0
      else
5327
0
          ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5328
0
        }
5329
47.9k
        if (buf != NULL)
5330
41.7k
            xmlFree(buf);
5331
47.9k
        return;
5332
47.9k
    }
5333
25.0k
    if (buf != NULL) {
5334
20.6k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5335
20.6k
                          "Double hyphen within comment: "
5336
20.6k
                                      "<!--%.50s\n",
5337
20.6k
              buf);
5338
20.6k
    } else
5339
4.45k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5340
4.45k
                          "Double hyphen within comment\n", NULL);
5341
25.0k
    in++;
5342
25.0k
    ctxt->input->col++;
5343
25.0k
      }
5344
58.0k
      in++;
5345
58.0k
      ctxt->input->col++;
5346
58.0k
      goto get_more;
5347
106k
  }
5348
146k
    } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5349
40.7k
    xmlParseCommentComplex(ctxt, buf, len, size);
5350
40.7k
}
5351
5352
5353
/**
5354
 * xmlParsePITarget:
5355
 * @ctxt:  an XML parser context
5356
 *
5357
 * DEPRECATED: Internal function, don't use.
5358
 *
5359
 * parse the name of a PI
5360
 *
5361
 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5362
 *
5363
 * Returns the PITarget name or NULL
5364
 */
5365
5366
const xmlChar *
5367
97.6k
xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5368
97.6k
    const xmlChar *name;
5369
5370
97.6k
    name = xmlParseName(ctxt);
5371
97.6k
    if ((name != NULL) &&
5372
96.7k
        ((name[0] == 'x') || (name[0] == 'X')) &&
5373
51.2k
        ((name[1] == 'm') || (name[1] == 'M')) &&
5374
43.2k
        ((name[2] == 'l') || (name[2] == 'L'))) {
5375
33.2k
  int i;
5376
33.2k
  if ((name[0] == 'x') && (name[1] == 'm') &&
5377
23.9k
      (name[2] == 'l') && (name[3] == 0)) {
5378
1.31k
      xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5379
1.31k
     "XML declaration allowed only at the start of the document\n");
5380
1.31k
      return(name);
5381
31.9k
  } else if (name[3] == 0) {
5382
2.24k
      xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5383
2.24k
      return(name);
5384
2.24k
  }
5385
86.2k
  for (i = 0;;i++) {
5386
86.2k
      if (xmlW3CPIs[i] == NULL) break;
5387
58.1k
      if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5388
1.62k
          return(name);
5389
58.1k
  }
5390
28.0k
  xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5391
28.0k
          "xmlParsePITarget: invalid name prefix 'xml'\n",
5392
28.0k
          NULL, NULL);
5393
28.0k
    }
5394
92.4k
    if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5395
8.16k
  xmlNsErr(ctxt, XML_NS_ERR_COLON,
5396
8.16k
     "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5397
8.16k
    }
5398
92.4k
    return(name);
5399
97.6k
}
5400
5401
#ifdef LIBXML_CATALOG_ENABLED
5402
/**
5403
 * xmlParseCatalogPI:
5404
 * @ctxt:  an XML parser context
5405
 * @catalog:  the PI value string
5406
 *
5407
 * parse an XML Catalog Processing Instruction.
5408
 *
5409
 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5410
 *
5411
 * Occurs only if allowed by the user and if happening in the Misc
5412
 * part of the document before any doctype information
5413
 * This will add the given catalog to the parsing context in order
5414
 * to be used if there is a resolution need further down in the document
5415
 */
5416
5417
static void
5418
0
xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5419
0
    xmlChar *URL = NULL;
5420
0
    const xmlChar *tmp, *base;
5421
0
    xmlChar marker;
5422
5423
0
    tmp = catalog;
5424
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5425
0
    if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5426
0
  goto error;
5427
0
    tmp += 7;
5428
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5429
0
    if (*tmp != '=') {
5430
0
  return;
5431
0
    }
5432
0
    tmp++;
5433
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5434
0
    marker = *tmp;
5435
0
    if ((marker != '\'') && (marker != '"'))
5436
0
  goto error;
5437
0
    tmp++;
5438
0
    base = tmp;
5439
0
    while ((*tmp != 0) && (*tmp != marker)) tmp++;
5440
0
    if (*tmp == 0)
5441
0
  goto error;
5442
0
    URL = xmlStrndup(base, tmp - base);
5443
0
    tmp++;
5444
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5445
0
    if (*tmp != 0)
5446
0
  goto error;
5447
5448
0
    if (URL != NULL) {
5449
        /*
5450
         * Unfortunately, the catalog API doesn't report OOM errors.
5451
         * xmlGetLastError isn't very helpful since we don't know
5452
         * where the last error came from. We'd have to reset it
5453
         * before this call and restore it afterwards.
5454
         */
5455
0
  ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5456
0
  xmlFree(URL);
5457
0
    }
5458
0
    return;
5459
5460
0
error:
5461
0
    xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5462
0
            "Catalog PI syntax error: %s\n",
5463
0
      catalog, NULL);
5464
0
    if (URL != NULL)
5465
0
  xmlFree(URL);
5466
0
}
5467
#endif
5468
5469
/**
5470
 * xmlParsePI:
5471
 * @ctxt:  an XML parser context
5472
 *
5473
 * DEPRECATED: Internal function, don't use.
5474
 *
5475
 * parse an XML Processing Instruction.
5476
 *
5477
 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5478
 *
5479
 * The processing is transferred to SAX once parsed.
5480
 */
5481
5482
void
5483
97.6k
xmlParsePI(xmlParserCtxtPtr ctxt) {
5484
97.6k
    xmlChar *buf = NULL;
5485
97.6k
    size_t len = 0;
5486
97.6k
    size_t size = XML_PARSER_BUFFER_SIZE;
5487
97.6k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5488
97.4k
                       XML_MAX_HUGE_LENGTH :
5489
97.6k
                       XML_MAX_TEXT_LENGTH;
5490
97.6k
    int cur, l;
5491
97.6k
    const xmlChar *target;
5492
5493
97.6k
    if ((RAW == '<') && (NXT(1) == '?')) {
5494
  /*
5495
   * this is a Processing Instruction.
5496
   */
5497
97.6k
  SKIP(2);
5498
5499
  /*
5500
   * Parse the target name and check for special support like
5501
   * namespace.
5502
   */
5503
97.6k
        target = xmlParsePITarget(ctxt);
5504
97.6k
  if (target != NULL) {
5505
96.7k
      if ((RAW == '?') && (NXT(1) == '>')) {
5506
20.1k
    SKIP(2);
5507
5508
    /*
5509
     * SAX: PI detected.
5510
     */
5511
20.1k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5512
9.80k
        (ctxt->sax->processingInstruction != NULL))
5513
9.80k
        ctxt->sax->processingInstruction(ctxt->userData,
5514
9.80k
                                         target, NULL);
5515
20.1k
    return;
5516
20.1k
      }
5517
76.5k
      buf = xmlMalloc(size);
5518
76.5k
      if (buf == NULL) {
5519
0
    xmlErrMemory(ctxt);
5520
0
    return;
5521
0
      }
5522
76.5k
      if (SKIP_BLANKS == 0) {
5523
12.1k
    xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5524
12.1k
        "ParsePI: PI %s space expected\n", target);
5525
12.1k
      }
5526
76.5k
      cur = xmlCurrentCharRecover(ctxt, &l);
5527
7.50M
      while (IS_CHAR(cur) && /* checked */
5528
7.49M
       ((cur != '?') || (NXT(1) != '>'))) {
5529
7.42M
    if (len + 5 >= size) {
5530
47.0k
        xmlChar *tmp;
5531
47.0k
                    int newSize;
5532
5533
47.0k
                    newSize = xmlGrowCapacity(size, 1, 1, maxLength);
5534
47.0k
                    if (newSize < 0) {
5535
0
                        xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5536
0
                                          "PI %s too big found", target);
5537
0
                        xmlFree(buf);
5538
0
                        return;
5539
0
                    }
5540
47.0k
        tmp = xmlRealloc(buf, newSize);
5541
47.0k
        if (tmp == NULL) {
5542
0
      xmlErrMemory(ctxt);
5543
0
      xmlFree(buf);
5544
0
      return;
5545
0
        }
5546
47.0k
        buf = tmp;
5547
47.0k
                    size = newSize;
5548
47.0k
    }
5549
7.42M
    COPY_BUF(buf, len, cur);
5550
7.42M
    NEXTL(l);
5551
7.42M
    cur = xmlCurrentCharRecover(ctxt, &l);
5552
7.42M
      }
5553
76.5k
      buf[len] = 0;
5554
76.5k
      if (cur != '?') {
5555
2.24k
    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5556
2.24k
          "ParsePI: PI %s never end ...\n", target);
5557
74.2k
      } else {
5558
74.2k
    SKIP(2);
5559
5560
74.2k
#ifdef LIBXML_CATALOG_ENABLED
5561
74.2k
    if ((ctxt->inSubset == 0) &&
5562
58.5k
        (xmlStrEqual(target, XML_CATALOG_PI))) {
5563
15.9k
        xmlCatalogAllow allow = xmlCatalogGetDefaults();
5564
5565
15.9k
        if ((ctxt->options & XML_PARSE_CATALOG_PI) &&
5566
0
                        ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5567
0
       (allow == XML_CATA_ALLOW_ALL)))
5568
0
      xmlParseCatalogPI(ctxt, buf);
5569
15.9k
    }
5570
74.2k
#endif
5571
5572
    /*
5573
     * SAX: PI detected.
5574
     */
5575
74.2k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5576
61.4k
        (ctxt->sax->processingInstruction != NULL))
5577
61.4k
        ctxt->sax->processingInstruction(ctxt->userData,
5578
61.4k
                                         target, buf);
5579
74.2k
      }
5580
76.5k
      xmlFree(buf);
5581
76.5k
  } else {
5582
929
      xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5583
929
  }
5584
97.6k
    }
5585
97.6k
}
5586
5587
/**
5588
 * xmlParseNotationDecl:
5589
 * @ctxt:  an XML parser context
5590
 *
5591
 * DEPRECATED: Internal function, don't use.
5592
 *
5593
 * Parse a notation declaration. Always consumes '<!'.
5594
 *
5595
 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
5596
 *
5597
 * Hence there is actually 3 choices:
5598
 *     'PUBLIC' S PubidLiteral
5599
 *     'PUBLIC' S PubidLiteral S SystemLiteral
5600
 * and 'SYSTEM' S SystemLiteral
5601
 *
5602
 * See the NOTE on xmlParseExternalID().
5603
 */
5604
5605
void
5606
4.12k
xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5607
4.12k
    const xmlChar *name;
5608
4.12k
    xmlChar *Pubid;
5609
4.12k
    xmlChar *Systemid;
5610
5611
4.12k
    if ((CUR != '<') || (NXT(1) != '!'))
5612
0
        return;
5613
4.12k
    SKIP(2);
5614
5615
4.12k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5616
4.03k
  int inputid = ctxt->input->id;
5617
4.03k
  SKIP(8);
5618
4.03k
  if (SKIP_BLANKS_PE == 0) {
5619
37
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5620
37
         "Space required after '<!NOTATION'\n");
5621
37
      return;
5622
37
  }
5623
5624
4.00k
        name = xmlParseName(ctxt);
5625
4.00k
  if (name == NULL) {
5626
179
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5627
179
      return;
5628
179
  }
5629
3.82k
  if (xmlStrchr(name, ':') != NULL) {
5630
22
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5631
22
         "colons are forbidden from notation names '%s'\n",
5632
22
         name, NULL, NULL);
5633
22
  }
5634
3.82k
  if (SKIP_BLANKS_PE == 0) {
5635
28
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5636
28
         "Space required after the NOTATION name'\n");
5637
28
      return;
5638
28
  }
5639
5640
  /*
5641
   * Parse the IDs.
5642
   */
5643
3.79k
  Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5644
3.79k
  SKIP_BLANKS_PE;
5645
5646
3.79k
  if (RAW == '>') {
5647
3.39k
      if (inputid != ctxt->input->id) {
5648
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5649
0
                         "Notation declaration doesn't start and stop"
5650
0
                               " in the same entity\n");
5651
0
      }
5652
3.39k
      NEXT;
5653
3.39k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5654
2.68k
    (ctxt->sax->notationDecl != NULL))
5655
0
    ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5656
3.39k
  } else {
5657
395
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5658
395
  }
5659
3.79k
  if (Systemid != NULL) xmlFree(Systemid);
5660
3.79k
  if (Pubid != NULL) xmlFree(Pubid);
5661
3.79k
    }
5662
4.12k
}
5663
5664
/**
5665
 * xmlParseEntityDecl:
5666
 * @ctxt:  an XML parser context
5667
 *
5668
 * DEPRECATED: Internal function, don't use.
5669
 *
5670
 * Parse an entity declaration. Always consumes '<!'.
5671
 *
5672
 * [70] EntityDecl ::= GEDecl | PEDecl
5673
 *
5674
 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5675
 *
5676
 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5677
 *
5678
 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5679
 *
5680
 * [74] PEDef ::= EntityValue | ExternalID
5681
 *
5682
 * [76] NDataDecl ::= S 'NDATA' S Name
5683
 *
5684
 * [ VC: Notation Declared ]
5685
 * The Name must match the declared name of a notation.
5686
 */
5687
5688
void
5689
64.2k
xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5690
64.2k
    const xmlChar *name = NULL;
5691
64.2k
    xmlChar *value = NULL;
5692
64.2k
    xmlChar *URI = NULL, *literal = NULL;
5693
64.2k
    const xmlChar *ndata = NULL;
5694
64.2k
    int isParameter = 0;
5695
64.2k
    xmlChar *orig = NULL;
5696
5697
64.2k
    if ((CUR != '<') || (NXT(1) != '!'))
5698
0
        return;
5699
64.2k
    SKIP(2);
5700
5701
    /* GROW; done in the caller */
5702
64.2k
    if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5703
64.0k
  int inputid = ctxt->input->id;
5704
64.0k
  SKIP(6);
5705
64.0k
  if (SKIP_BLANKS_PE == 0) {
5706
12.5k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5707
12.5k
         "Space required after '<!ENTITY'\n");
5708
12.5k
  }
5709
5710
64.0k
  if (RAW == '%') {
5711
13.5k
      NEXT;
5712
13.5k
      if (SKIP_BLANKS_PE == 0) {
5713
6.71k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5714
6.71k
             "Space required after '%%'\n");
5715
6.71k
      }
5716
13.5k
      isParameter = 1;
5717
13.5k
  }
5718
5719
64.0k
        name = xmlParseName(ctxt);
5720
64.0k
  if (name == NULL) {
5721
221
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5722
221
                     "xmlParseEntityDecl: no name\n");
5723
221
            return;
5724
221
  }
5725
63.8k
  if (xmlStrchr(name, ':') != NULL) {
5726
1.74k
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5727
1.74k
         "colons are forbidden from entities names '%s'\n",
5728
1.74k
         name, NULL, NULL);
5729
1.74k
  }
5730
63.8k
  if (SKIP_BLANKS_PE == 0) {
5731
25.3k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5732
25.3k
         "Space required after the entity name\n");
5733
25.3k
  }
5734
5735
  /*
5736
   * handle the various case of definitions...
5737
   */
5738
63.8k
  if (isParameter) {
5739
13.4k
      if ((RAW == '"') || (RAW == '\'')) {
5740
11.7k
          value = xmlParseEntityValue(ctxt, &orig);
5741
11.7k
    if (value) {
5742
11.7k
        if ((ctxt->sax != NULL) &&
5743
11.7k
      (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5744
0
      ctxt->sax->entityDecl(ctxt->userData, name,
5745
0
                        XML_INTERNAL_PARAMETER_ENTITY,
5746
0
            NULL, NULL, value);
5747
11.7k
    }
5748
11.7k
      } else {
5749
1.74k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5750
1.74k
    if ((URI == NULL) && (literal == NULL)) {
5751
50
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5752
50
    }
5753
1.74k
    if (URI) {
5754
1.63k
                    if (xmlStrchr(URI, '#')) {
5755
6
                        xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5756
1.63k
                    } else {
5757
1.63k
                        if ((ctxt->sax != NULL) &&
5758
1.63k
                            (!ctxt->disableSAX) &&
5759
1.10k
                            (ctxt->sax->entityDecl != NULL))
5760
0
                            ctxt->sax->entityDecl(ctxt->userData, name,
5761
0
                                        XML_EXTERNAL_PARAMETER_ENTITY,
5762
0
                                        literal, URI, NULL);
5763
1.63k
                    }
5764
1.63k
    }
5765
1.74k
      }
5766
50.3k
  } else {
5767
50.3k
      if ((RAW == '"') || (RAW == '\'')) {
5768
41.9k
          value = xmlParseEntityValue(ctxt, &orig);
5769
41.9k
    if ((ctxt->sax != NULL) &&
5770
41.9k
        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5771
0
        ctxt->sax->entityDecl(ctxt->userData, name,
5772
0
        XML_INTERNAL_GENERAL_ENTITY,
5773
0
        NULL, NULL, value);
5774
    /*
5775
     * For expat compatibility in SAX mode.
5776
     */
5777
41.9k
    if ((ctxt->myDoc == NULL) ||
5778
41.9k
        (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5779
41.9k
        if (ctxt->myDoc == NULL) {
5780
1.59k
      ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5781
1.59k
      if (ctxt->myDoc == NULL) {
5782
0
          xmlErrMemory(ctxt);
5783
0
          goto done;
5784
0
      }
5785
1.59k
      ctxt->myDoc->properties = XML_DOC_INTERNAL;
5786
1.59k
        }
5787
41.9k
        if (ctxt->myDoc->intSubset == NULL) {
5788
1.59k
      ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5789
1.59k
              BAD_CAST "fake", NULL, NULL);
5790
1.59k
                        if (ctxt->myDoc->intSubset == NULL) {
5791
0
                            xmlErrMemory(ctxt);
5792
0
                            goto done;
5793
0
                        }
5794
1.59k
                    }
5795
5796
41.9k
        xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5797
41.9k
                    NULL, NULL, value);
5798
41.9k
    }
5799
41.9k
      } else {
5800
8.40k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5801
8.40k
    if ((URI == NULL) && (literal == NULL)) {
5802
1.05k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5803
1.05k
    }
5804
8.40k
    if (URI) {
5805
7.29k
                    if (xmlStrchr(URI, '#')) {
5806
243
                        xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5807
243
                    }
5808
7.29k
    }
5809
8.40k
    if ((RAW != '>') && (SKIP_BLANKS_PE == 0)) {
5810
844
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5811
844
           "Space required before 'NDATA'\n");
5812
844
    }
5813
8.40k
    if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5814
2.95k
        SKIP(5);
5815
2.95k
        if (SKIP_BLANKS_PE == 0) {
5816
52
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5817
52
               "Space required after 'NDATA'\n");
5818
52
        }
5819
2.95k
        ndata = xmlParseName(ctxt);
5820
2.95k
        if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5821
905
            (ctxt->sax->unparsedEntityDecl != NULL))
5822
0
      ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5823
0
            literal, URI, ndata);
5824
5.44k
    } else {
5825
5.44k
        if ((ctxt->sax != NULL) &&
5826
5.44k
            (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5827
0
      ctxt->sax->entityDecl(ctxt->userData, name,
5828
0
            XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5829
0
            literal, URI, NULL);
5830
        /*
5831
         * For expat compatibility in SAX mode.
5832
         * assuming the entity replacement was asked for
5833
         */
5834
5.44k
        if ((ctxt->replaceEntities != 0) &&
5835
5.44k
      ((ctxt->myDoc == NULL) ||
5836
5.44k
      (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5837
5.44k
      if (ctxt->myDoc == NULL) {
5838
334
          ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5839
334
          if (ctxt->myDoc == NULL) {
5840
0
              xmlErrMemory(ctxt);
5841
0
        goto done;
5842
0
          }
5843
334
          ctxt->myDoc->properties = XML_DOC_INTERNAL;
5844
334
      }
5845
5846
5.44k
      if (ctxt->myDoc->intSubset == NULL) {
5847
334
          ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5848
334
            BAD_CAST "fake", NULL, NULL);
5849
334
                            if (ctxt->myDoc->intSubset == NULL) {
5850
0
                                xmlErrMemory(ctxt);
5851
0
                                goto done;
5852
0
                            }
5853
334
                        }
5854
5.44k
      xmlSAX2EntityDecl(ctxt, name,
5855
5.44k
                  XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5856
5.44k
                  literal, URI, NULL);
5857
5.44k
        }
5858
5.44k
    }
5859
8.40k
      }
5860
50.3k
  }
5861
63.8k
  SKIP_BLANKS_PE;
5862
63.8k
  if (RAW != '>') {
5863
1.41k
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5864
1.41k
              "xmlParseEntityDecl: entity %s not terminated\n", name);
5865
1.41k
      xmlHaltParser(ctxt);
5866
62.4k
  } else {
5867
62.4k
      if (inputid != ctxt->input->id) {
5868
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5869
0
                         "Entity declaration doesn't start and stop in"
5870
0
                               " the same entity\n");
5871
0
      }
5872
62.4k
      NEXT;
5873
62.4k
  }
5874
63.8k
  if (orig != NULL) {
5875
      /*
5876
       * Ugly mechanism to save the raw entity value.
5877
       */
5878
53.2k
      xmlEntityPtr cur = NULL;
5879
5880
53.2k
      if (isParameter) {
5881
11.7k
          if ((ctxt->sax != NULL) &&
5882
11.7k
        (ctxt->sax->getParameterEntity != NULL))
5883
0
        cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5884
41.5k
      } else {
5885
41.5k
          if ((ctxt->sax != NULL) &&
5886
41.5k
        (ctxt->sax->getEntity != NULL))
5887
41.5k
        cur = ctxt->sax->getEntity(ctxt->userData, name);
5888
41.5k
    if ((cur == NULL) && (ctxt->userData==ctxt)) {
5889
0
        cur = xmlSAX2GetEntity(ctxt, name);
5890
0
    }
5891
41.5k
      }
5892
53.2k
            if ((cur != NULL) && (cur->orig == NULL)) {
5893
0
    cur->orig = orig;
5894
0
                orig = NULL;
5895
0
      }
5896
53.2k
  }
5897
5898
63.8k
done:
5899
63.8k
  if (value != NULL) xmlFree(value);
5900
63.8k
  if (URI != NULL) xmlFree(URI);
5901
63.8k
  if (literal != NULL) xmlFree(literal);
5902
63.8k
        if (orig != NULL) xmlFree(orig);
5903
63.8k
    }
5904
64.2k
}
5905
5906
/**
5907
 * xmlParseDefaultDecl:
5908
 * @ctxt:  an XML parser context
5909
 * @value:  Receive a possible fixed default value for the attribute
5910
 *
5911
 * DEPRECATED: Internal function, don't use.
5912
 *
5913
 * Parse an attribute default declaration
5914
 *
5915
 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5916
 *
5917
 * [ VC: Required Attribute ]
5918
 * if the default declaration is the keyword #REQUIRED, then the
5919
 * attribute must be specified for all elements of the type in the
5920
 * attribute-list declaration.
5921
 *
5922
 * [ VC: Attribute Default Legal ]
5923
 * The declared default value must meet the lexical constraints of
5924
 * the declared attribute type c.f. xmlValidateAttributeDecl()
5925
 *
5926
 * [ VC: Fixed Attribute Default ]
5927
 * if an attribute has a default value declared with the #FIXED
5928
 * keyword, instances of that attribute must match the default value.
5929
 *
5930
 * [ WFC: No < in Attribute Values ]
5931
 * handled in xmlParseAttValue()
5932
 *
5933
 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5934
 *          or XML_ATTRIBUTE_FIXED.
5935
 */
5936
5937
int
5938
54.7k
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5939
54.7k
    int val;
5940
54.7k
    xmlChar *ret;
5941
5942
54.7k
    *value = NULL;
5943
54.7k
    if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5944
3.35k
  SKIP(9);
5945
3.35k
  return(XML_ATTRIBUTE_REQUIRED);
5946
3.35k
    }
5947
51.4k
    if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5948
5.36k
  SKIP(8);
5949
5.36k
  return(XML_ATTRIBUTE_IMPLIED);
5950
5.36k
    }
5951
46.0k
    val = XML_ATTRIBUTE_NONE;
5952
46.0k
    if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5953
1.35k
  SKIP(6);
5954
1.35k
  val = XML_ATTRIBUTE_FIXED;
5955
1.35k
  if (SKIP_BLANKS_PE == 0) {
5956
53
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5957
53
         "Space required after '#FIXED'\n");
5958
53
  }
5959
1.35k
    }
5960
46.0k
    ret = xmlParseAttValue(ctxt);
5961
46.0k
    if (ret == NULL) {
5962
458
  xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5963
458
           "Attribute default value declaration error\n");
5964
458
    } else
5965
45.6k
        *value = ret;
5966
46.0k
    return(val);
5967
51.4k
}
5968
5969
/**
5970
 * xmlParseNotationType:
5971
 * @ctxt:  an XML parser context
5972
 *
5973
 * DEPRECATED: Internal function, don't use.
5974
 *
5975
 * parse an Notation attribute type.
5976
 *
5977
 * Note: the leading 'NOTATION' S part has already being parsed...
5978
 *
5979
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5980
 *
5981
 * [ VC: Notation Attributes ]
5982
 * Values of this type must match one of the notation names included
5983
 * in the declaration; all notation names in the declaration must be declared.
5984
 *
5985
 * Returns: the notation attribute tree built while parsing
5986
 */
5987
5988
xmlEnumerationPtr
5989
807
xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5990
807
    const xmlChar *name;
5991
807
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5992
5993
807
    if (RAW != '(') {
5994
9
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5995
9
  return(NULL);
5996
9
    }
5997
1.98k
    do {
5998
1.98k
        NEXT;
5999
1.98k
  SKIP_BLANKS_PE;
6000
1.98k
        name = xmlParseName(ctxt);
6001
1.98k
  if (name == NULL) {
6002
57
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6003
57
         "Name expected in NOTATION declaration\n");
6004
57
            xmlFreeEnumeration(ret);
6005
57
      return(NULL);
6006
57
  }
6007
1.93k
        tmp = NULL;
6008
1.93k
#ifdef LIBXML_VALID_ENABLED
6009
1.93k
        if (ctxt->validate) {
6010
0
            tmp = ret;
6011
0
            while (tmp != NULL) {
6012
0
                if (xmlStrEqual(name, tmp->name)) {
6013
0
                    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
6014
0
              "standalone: attribute notation value token %s duplicated\n",
6015
0
                                     name, NULL);
6016
0
                    if (!xmlDictOwns(ctxt->dict, name))
6017
0
                        xmlFree((xmlChar *) name);
6018
0
                    break;
6019
0
                }
6020
0
                tmp = tmp->next;
6021
0
            }
6022
0
        }
6023
1.93k
#endif /* LIBXML_VALID_ENABLED */
6024
1.93k
  if (tmp == NULL) {
6025
1.93k
      cur = xmlCreateEnumeration(name);
6026
1.93k
      if (cur == NULL) {
6027
0
                xmlErrMemory(ctxt);
6028
0
                xmlFreeEnumeration(ret);
6029
0
                return(NULL);
6030
0
            }
6031
1.93k
      if (last == NULL) ret = last = cur;
6032
1.17k
      else {
6033
1.17k
    last->next = cur;
6034
1.17k
    last = cur;
6035
1.17k
      }
6036
1.93k
  }
6037
1.93k
  SKIP_BLANKS_PE;
6038
1.93k
    } while (RAW == '|');
6039
741
    if (RAW != ')') {
6040
42
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
6041
42
        xmlFreeEnumeration(ret);
6042
42
  return(NULL);
6043
42
    }
6044
699
    NEXT;
6045
699
    return(ret);
6046
741
}
6047
6048
/**
6049
 * xmlParseEnumerationType:
6050
 * @ctxt:  an XML parser context
6051
 *
6052
 * DEPRECATED: Internal function, don't use.
6053
 *
6054
 * parse an Enumeration attribute type.
6055
 *
6056
 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
6057
 *
6058
 * [ VC: Enumeration ]
6059
 * Values of this type must match one of the Nmtoken tokens in
6060
 * the declaration
6061
 *
6062
 * Returns: the enumeration attribute tree built while parsing
6063
 */
6064
6065
xmlEnumerationPtr
6066
8.77k
xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
6067
8.77k
    xmlChar *name;
6068
8.77k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
6069
6070
8.77k
    if (RAW != '(') {
6071
319
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
6072
319
  return(NULL);
6073
319
    }
6074
11.1k
    do {
6075
11.1k
        NEXT;
6076
11.1k
  SKIP_BLANKS_PE;
6077
11.1k
        name = xmlParseNmtoken(ctxt);
6078
11.1k
  if (name == NULL) {
6079
70
      xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
6080
70
      return(ret);
6081
70
  }
6082
11.0k
        tmp = NULL;
6083
11.0k
#ifdef LIBXML_VALID_ENABLED
6084
11.0k
        if (ctxt->validate) {
6085
0
            tmp = ret;
6086
0
            while (tmp != NULL) {
6087
0
                if (xmlStrEqual(name, tmp->name)) {
6088
0
                    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
6089
0
              "standalone: attribute enumeration value token %s duplicated\n",
6090
0
                                     name, NULL);
6091
0
                    if (!xmlDictOwns(ctxt->dict, name))
6092
0
                        xmlFree(name);
6093
0
                    break;
6094
0
                }
6095
0
                tmp = tmp->next;
6096
0
            }
6097
0
        }
6098
11.0k
#endif /* LIBXML_VALID_ENABLED */
6099
11.0k
  if (tmp == NULL) {
6100
11.0k
      cur = xmlCreateEnumeration(name);
6101
11.0k
      if (!xmlDictOwns(ctxt->dict, name))
6102
11.0k
    xmlFree(name);
6103
11.0k
      if (cur == NULL) {
6104
0
                xmlErrMemory(ctxt);
6105
0
                xmlFreeEnumeration(ret);
6106
0
                return(NULL);
6107
0
            }
6108
11.0k
      if (last == NULL) ret = last = cur;
6109
2.58k
      else {
6110
2.58k
    last->next = cur;
6111
2.58k
    last = cur;
6112
2.58k
      }
6113
11.0k
  }
6114
11.0k
  SKIP_BLANKS_PE;
6115
11.0k
    } while (RAW == '|');
6116
8.38k
    if (RAW != ')') {
6117
113
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
6118
113
  return(ret);
6119
113
    }
6120
8.27k
    NEXT;
6121
8.27k
    return(ret);
6122
8.38k
}
6123
6124
/**
6125
 * xmlParseEnumeratedType:
6126
 * @ctxt:  an XML parser context
6127
 * @tree:  the enumeration tree built while parsing
6128
 *
6129
 * DEPRECATED: Internal function, don't use.
6130
 *
6131
 * parse an Enumerated attribute type.
6132
 *
6133
 * [57] EnumeratedType ::= NotationType | Enumeration
6134
 *
6135
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
6136
 *
6137
 *
6138
 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
6139
 */
6140
6141
int
6142
9.59k
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6143
9.59k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
6144
813
  SKIP(8);
6145
813
  if (SKIP_BLANKS_PE == 0) {
6146
6
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6147
6
         "Space required after 'NOTATION'\n");
6148
6
      return(0);
6149
6
  }
6150
807
  *tree = xmlParseNotationType(ctxt);
6151
807
  if (*tree == NULL) return(0);
6152
699
  return(XML_ATTRIBUTE_NOTATION);
6153
807
    }
6154
8.77k
    *tree = xmlParseEnumerationType(ctxt);
6155
8.77k
    if (*tree == NULL) return(0);
6156
8.44k
    return(XML_ATTRIBUTE_ENUMERATION);
6157
8.77k
}
6158
6159
/**
6160
 * xmlParseAttributeType:
6161
 * @ctxt:  an XML parser context
6162
 * @tree:  the enumeration tree built while parsing
6163
 *
6164
 * DEPRECATED: Internal function, don't use.
6165
 *
6166
 * parse the Attribute list def for an element
6167
 *
6168
 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
6169
 *
6170
 * [55] StringType ::= 'CDATA'
6171
 *
6172
 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
6173
 *                        'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
6174
 *
6175
 * Validity constraints for attribute values syntax are checked in
6176
 * xmlValidateAttributeValue()
6177
 *
6178
 * [ VC: ID ]
6179
 * Values of type ID must match the Name production. A name must not
6180
 * appear more than once in an XML document as a value of this type;
6181
 * i.e., ID values must uniquely identify the elements which bear them.
6182
 *
6183
 * [ VC: One ID per Element Type ]
6184
 * No element type may have more than one ID attribute specified.
6185
 *
6186
 * [ VC: ID Attribute Default ]
6187
 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
6188
 *
6189
 * [ VC: IDREF ]
6190
 * Values of type IDREF must match the Name production, and values
6191
 * of type IDREFS must match Names; each IDREF Name must match the value
6192
 * of an ID attribute on some element in the XML document; i.e. IDREF
6193
 * values must match the value of some ID attribute.
6194
 *
6195
 * [ VC: Entity Name ]
6196
 * Values of type ENTITY must match the Name production, values
6197
 * of type ENTITIES must match Names; each Entity Name must match the
6198
 * name of an unparsed entity declared in the DTD.
6199
 *
6200
 * [ VC: Name Token ]
6201
 * Values of type NMTOKEN must match the Nmtoken production; values
6202
 * of type NMTOKENS must match Nmtokens.
6203
 *
6204
 * Returns the attribute type
6205
 */
6206
int
6207
55.4k
xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6208
55.4k
    if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
6209
5.65k
  SKIP(5);
6210
5.65k
  return(XML_ATTRIBUTE_CDATA);
6211
49.8k
     } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
6212
927
  SKIP(6);
6213
927
  return(XML_ATTRIBUTE_IDREFS);
6214
48.9k
     } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
6215
696
  SKIP(5);
6216
696
  return(XML_ATTRIBUTE_IDREF);
6217
48.2k
     } else if ((RAW == 'I') && (NXT(1) == 'D')) {
6218
32.0k
        SKIP(2);
6219
32.0k
  return(XML_ATTRIBUTE_ID);
6220
32.0k
     } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
6221
600
  SKIP(6);
6222
600
  return(XML_ATTRIBUTE_ENTITY);
6223
15.5k
     } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
6224
4.62k
  SKIP(8);
6225
4.62k
  return(XML_ATTRIBUTE_ENTITIES);
6226
10.9k
     } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
6227
364
  SKIP(8);
6228
364
  return(XML_ATTRIBUTE_NMTOKENS);
6229
10.5k
     } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
6230
945
  SKIP(7);
6231
945
  return(XML_ATTRIBUTE_NMTOKEN);
6232
945
     }
6233
9.59k
     return(xmlParseEnumeratedType(ctxt, tree));
6234
55.4k
}
6235
6236
/**
6237
 * xmlParseAttributeListDecl:
6238
 * @ctxt:  an XML parser context
6239
 *
6240
 * DEPRECATED: Internal function, don't use.
6241
 *
6242
 * Parse an attribute list declaration for an element. Always consumes '<!'.
6243
 *
6244
 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
6245
 *
6246
 * [53] AttDef ::= S Name S AttType S DefaultDecl
6247
 *
6248
 */
6249
void
6250
12.6k
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
6251
12.6k
    const xmlChar *elemName;
6252
12.6k
    const xmlChar *attrName;
6253
12.6k
    xmlEnumerationPtr tree;
6254
6255
12.6k
    if ((CUR != '<') || (NXT(1) != '!'))
6256
0
        return;
6257
12.6k
    SKIP(2);
6258
6259
12.6k
    if (CMP7(CUR_PTR, 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
6260
12.6k
  int inputid = ctxt->input->id;
6261
6262
12.6k
  SKIP(7);
6263
12.6k
  if (SKIP_BLANKS_PE == 0) {
6264
238
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6265
238
                     "Space required after '<!ATTLIST'\n");
6266
238
  }
6267
12.6k
        elemName = xmlParseName(ctxt);
6268
12.6k
  if (elemName == NULL) {
6269
76
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6270
76
         "ATTLIST: no name for Element\n");
6271
76
      return;
6272
76
  }
6273
12.5k
  SKIP_BLANKS_PE;
6274
12.5k
  GROW;
6275
66.7k
  while ((RAW != '>') && (PARSER_STOPPED(ctxt) == 0)) {
6276
56.0k
      int type;
6277
56.0k
      int def;
6278
56.0k
      xmlChar *defaultValue = NULL;
6279
6280
56.0k
      GROW;
6281
56.0k
            tree = NULL;
6282
56.0k
      attrName = xmlParseName(ctxt);
6283
56.0k
      if (attrName == NULL) {
6284
182
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6285
182
             "ATTLIST: no name for Attribute\n");
6286
182
    break;
6287
182
      }
6288
55.8k
      GROW;
6289
55.8k
      if (SKIP_BLANKS_PE == 0) {
6290
383
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6291
383
            "Space required after the attribute name\n");
6292
383
    break;
6293
383
      }
6294
6295
55.4k
      type = xmlParseAttributeType(ctxt, &tree);
6296
55.4k
      if (type <= 0) {
6297
444
          break;
6298
444
      }
6299
6300
55.0k
      GROW;
6301
55.0k
      if (SKIP_BLANKS_PE == 0) {
6302
259
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6303
259
             "Space required after the attribute type\n");
6304
259
          if (tree != NULL)
6305
185
        xmlFreeEnumeration(tree);
6306
259
    break;
6307
259
      }
6308
6309
54.7k
      def = xmlParseDefaultDecl(ctxt, &defaultValue);
6310
54.7k
      if (def <= 0) {
6311
0
                if (defaultValue != NULL)
6312
0
        xmlFree(defaultValue);
6313
0
          if (tree != NULL)
6314
0
        xmlFreeEnumeration(tree);
6315
0
          break;
6316
0
      }
6317
54.7k
      if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6318
40.5k
          xmlAttrNormalizeSpace(defaultValue, defaultValue);
6319
6320
54.7k
      GROW;
6321
54.7k
            if (RAW != '>') {
6322
46.7k
    if (SKIP_BLANKS_PE == 0) {
6323
624
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6324
624
      "Space required after the attribute default value\n");
6325
624
        if (defaultValue != NULL)
6326
156
      xmlFree(defaultValue);
6327
624
        if (tree != NULL)
6328
69
      xmlFreeEnumeration(tree);
6329
624
        break;
6330
624
    }
6331
46.7k
      }
6332
54.1k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6333
46.8k
    (ctxt->sax->attributeDecl != NULL))
6334
0
    ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6335
0
                          type, def, defaultValue, tree);
6336
54.1k
      else if (tree != NULL)
6337
8.89k
    xmlFreeEnumeration(tree);
6338
6339
54.1k
      if ((ctxt->sax2) && (defaultValue != NULL) &&
6340
45.4k
          (def != XML_ATTRIBUTE_IMPLIED) &&
6341
45.4k
    (def != XML_ATTRIBUTE_REQUIRED)) {
6342
45.4k
    xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6343
45.4k
      }
6344
54.1k
      if (ctxt->sax2) {
6345
54.1k
    xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6346
54.1k
      }
6347
54.1k
      if (defaultValue != NULL)
6348
45.4k
          xmlFree(defaultValue);
6349
54.1k
      GROW;
6350
54.1k
  }
6351
12.5k
  if (RAW == '>') {
6352
10.7k
      if (inputid != ctxt->input->id) {
6353
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6354
0
                               "Attribute list declaration doesn't start and"
6355
0
                               " stop in the same entity\n");
6356
0
      }
6357
10.7k
      NEXT;
6358
10.7k
  }
6359
12.5k
    }
6360
12.6k
}
6361
6362
/**
6363
 * xmlParseElementMixedContentDecl:
6364
 * @ctxt:  an XML parser context
6365
 * @inputchk:  the input used for the current entity, needed for boundary checks
6366
 *
6367
 * DEPRECATED: Internal function, don't use.
6368
 *
6369
 * parse the declaration for a Mixed Element content
6370
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6371
 *
6372
 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6373
 *                '(' S? '#PCDATA' S? ')'
6374
 *
6375
 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6376
 *
6377
 * [ VC: No Duplicate Types ]
6378
 * The same name must not appear more than once in a single
6379
 * mixed-content declaration.
6380
 *
6381
 * returns: the list of the xmlElementContentPtr describing the element choices
6382
 */
6383
xmlElementContentPtr
6384
6.94k
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6385
6.94k
    xmlElementContentPtr ret = NULL, cur = NULL, n;
6386
6.94k
    const xmlChar *elem = NULL;
6387
6388
6.94k
    GROW;
6389
6.94k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6390
6.94k
  SKIP(7);
6391
6.94k
  SKIP_BLANKS_PE;
6392
6.94k
  if (RAW == ')') {
6393
2.78k
      if (ctxt->input->id != inputchk) {
6394
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6395
0
                               "Element content declaration doesn't start and"
6396
0
                               " stop in the same entity\n");
6397
0
      }
6398
2.78k
      NEXT;
6399
2.78k
      ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6400
2.78k
      if (ret == NULL)
6401
0
                goto mem_error;
6402
2.78k
      if (RAW == '*') {
6403
643
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6404
643
    NEXT;
6405
643
      }
6406
2.78k
      return(ret);
6407
2.78k
  }
6408
4.15k
  if ((RAW == '(') || (RAW == '|')) {
6409
3.99k
      ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6410
3.99k
      if (ret == NULL)
6411
0
                goto mem_error;
6412
3.99k
  }
6413
12.4k
  while ((RAW == '|') && (PARSER_STOPPED(ctxt) == 0)) {
6414
8.41k
      NEXT;
6415
8.41k
            n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6416
8.41k
            if (n == NULL)
6417
0
                goto mem_error;
6418
8.41k
      if (elem == NULL) {
6419
3.98k
    n->c1 = cur;
6420
3.98k
    if (cur != NULL)
6421
3.98k
        cur->parent = n;
6422
3.98k
    ret = cur = n;
6423
4.42k
      } else {
6424
4.42k
          cur->c2 = n;
6425
4.42k
    n->parent = cur;
6426
4.42k
    n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6427
4.42k
                if (n->c1 == NULL)
6428
0
                    goto mem_error;
6429
4.42k
    n->c1->parent = n;
6430
4.42k
    cur = n;
6431
4.42k
      }
6432
8.41k
      SKIP_BLANKS_PE;
6433
8.41k
      elem = xmlParseName(ctxt);
6434
8.41k
      if (elem == NULL) {
6435
115
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6436
115
      "xmlParseElementMixedContentDecl : Name expected\n");
6437
115
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6438
115
    return(NULL);
6439
115
      }
6440
8.29k
      SKIP_BLANKS_PE;
6441
8.29k
      GROW;
6442
8.29k
  }
6443
4.04k
  if ((RAW == ')') && (NXT(1) == '*')) {
6444
2.72k
      if (elem != NULL) {
6445
2.72k
    cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6446
2.72k
                                   XML_ELEMENT_CONTENT_ELEMENT);
6447
2.72k
    if (cur->c2 == NULL)
6448
0
                    goto mem_error;
6449
2.72k
    cur->c2->parent = cur;
6450
2.72k
            }
6451
2.72k
            if (ret != NULL)
6452
2.72k
                ret->ocur = XML_ELEMENT_CONTENT_MULT;
6453
2.72k
      if (ctxt->input->id != inputchk) {
6454
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6455
0
                               "Element content declaration doesn't start and"
6456
0
                               " stop in the same entity\n");
6457
0
      }
6458
2.72k
      SKIP(2);
6459
2.72k
  } else {
6460
1.31k
      xmlFreeDocElementContent(ctxt->myDoc, ret);
6461
1.31k
      xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6462
1.31k
      return(NULL);
6463
1.31k
  }
6464
6465
4.04k
    } else {
6466
0
  xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6467
0
    }
6468
2.72k
    return(ret);
6469
6470
0
mem_error:
6471
0
    xmlErrMemory(ctxt);
6472
0
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6473
0
    return(NULL);
6474
6.94k
}
6475
6476
/**
6477
 * xmlParseElementChildrenContentDeclPriv:
6478
 * @ctxt:  an XML parser context
6479
 * @inputchk:  the input used for the current entity, needed for boundary checks
6480
 * @depth: the level of recursion
6481
 *
6482
 * parse the declaration for a Mixed Element content
6483
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6484
 *
6485
 *
6486
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6487
 *
6488
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6489
 *
6490
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6491
 *
6492
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6493
 *
6494
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6495
 * TODO Parameter-entity replacement text must be properly nested
6496
 *  with parenthesized groups. That is to say, if either of the
6497
 *  opening or closing parentheses in a choice, seq, or Mixed
6498
 *  construct is contained in the replacement text for a parameter
6499
 *  entity, both must be contained in the same replacement text. For
6500
 *  interoperability, if a parameter-entity reference appears in a
6501
 *  choice, seq, or Mixed construct, its replacement text should not
6502
 *  be empty, and neither the first nor last non-blank character of
6503
 *  the replacement text should be a connector (| or ,).
6504
 *
6505
 * Returns the tree of xmlElementContentPtr describing the element
6506
 *          hierarchy.
6507
 */
6508
static xmlElementContentPtr
6509
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6510
129k
                                       int depth) {
6511
129k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
6512
129k
    xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6513
129k
    const xmlChar *elem;
6514
129k
    xmlChar type = 0;
6515
6516
129k
    if (depth > maxDepth) {
6517
4
        xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
6518
4
                "xmlParseElementChildrenContentDecl : depth %d too deep, "
6519
4
                "use XML_PARSE_HUGE\n", depth);
6520
4
  return(NULL);
6521
4
    }
6522
129k
    SKIP_BLANKS_PE;
6523
129k
    GROW;
6524
129k
    if (RAW == '(') {
6525
109k
  int inputid = ctxt->input->id;
6526
6527
        /* Recurse on first child */
6528
109k
  NEXT;
6529
109k
  SKIP_BLANKS_PE;
6530
109k
        cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6531
109k
                                                           depth + 1);
6532
109k
        if (cur == NULL)
6533
85.8k
            return(NULL);
6534
23.3k
  SKIP_BLANKS_PE;
6535
23.3k
  GROW;
6536
23.3k
    } else {
6537
19.9k
  elem = xmlParseName(ctxt);
6538
19.9k
  if (elem == NULL) {
6539
201
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6540
201
      return(NULL);
6541
201
  }
6542
19.7k
        cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6543
19.7k
  if (cur == NULL) {
6544
0
      xmlErrMemory(ctxt);
6545
0
      return(NULL);
6546
0
  }
6547
19.7k
  GROW;
6548
19.7k
  if (RAW == '?') {
6549
2.23k
      cur->ocur = XML_ELEMENT_CONTENT_OPT;
6550
2.23k
      NEXT;
6551
17.4k
  } else if (RAW == '*') {
6552
861
      cur->ocur = XML_ELEMENT_CONTENT_MULT;
6553
861
      NEXT;
6554
16.6k
  } else if (RAW == '+') {
6555
2.27k
      cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6556
2.27k
      NEXT;
6557
14.3k
  } else {
6558
14.3k
      cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6559
14.3k
  }
6560
19.7k
  GROW;
6561
19.7k
    }
6562
43.0k
    SKIP_BLANKS_PE;
6563
134k
    while ((RAW != ')') && (PARSER_STOPPED(ctxt) == 0)) {
6564
        /*
6565
   * Each loop we parse one separator and one element.
6566
   */
6567
98.6k
        if (RAW == ',') {
6568
72.5k
      if (type == 0) type = CUR;
6569
6570
      /*
6571
       * Detect "Name | Name , Name" error
6572
       */
6573
66.0k
      else if (type != CUR) {
6574
10
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6575
10
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6576
10
                      type);
6577
10
    if ((last != NULL) && (last != ret))
6578
10
        xmlFreeDocElementContent(ctxt->myDoc, last);
6579
10
    if (ret != NULL)
6580
10
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6581
10
    return(NULL);
6582
10
      }
6583
72.5k
      NEXT;
6584
6585
72.5k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6586
72.5k
      if (op == NULL) {
6587
0
                xmlErrMemory(ctxt);
6588
0
    if ((last != NULL) && (last != ret))
6589
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6590
0
          xmlFreeDocElementContent(ctxt->myDoc, ret);
6591
0
    return(NULL);
6592
0
      }
6593
72.5k
      if (last == NULL) {
6594
6.58k
    op->c1 = ret;
6595
6.58k
    if (ret != NULL)
6596
6.58k
        ret->parent = op;
6597
6.58k
    ret = cur = op;
6598
65.9k
      } else {
6599
65.9k
          cur->c2 = op;
6600
65.9k
    if (op != NULL)
6601
65.9k
        op->parent = cur;
6602
65.9k
    op->c1 = last;
6603
65.9k
    if (last != NULL)
6604
65.9k
        last->parent = op;
6605
65.9k
    cur =op;
6606
65.9k
    last = NULL;
6607
65.9k
      }
6608
72.5k
  } else if (RAW == '|') {
6609
24.7k
      if (type == 0) type = CUR;
6610
6611
      /*
6612
       * Detect "Name , Name | Name" error
6613
       */
6614
12.8k
      else if (type != CUR) {
6615
4
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6616
4
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6617
4
          type);
6618
4
    if ((last != NULL) && (last != ret))
6619
4
        xmlFreeDocElementContent(ctxt->myDoc, last);
6620
4
    if (ret != NULL)
6621
4
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6622
4
    return(NULL);
6623
4
      }
6624
24.7k
      NEXT;
6625
6626
24.7k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6627
24.7k
      if (op == NULL) {
6628
0
                xmlErrMemory(ctxt);
6629
0
    if ((last != NULL) && (last != ret))
6630
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6631
0
    if (ret != NULL)
6632
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6633
0
    return(NULL);
6634
0
      }
6635
24.7k
      if (last == NULL) {
6636
11.9k
    op->c1 = ret;
6637
11.9k
    if (ret != NULL)
6638
11.9k
        ret->parent = op;
6639
11.9k
    ret = cur = op;
6640
12.8k
      } else {
6641
12.8k
          cur->c2 = op;
6642
12.8k
    if (op != NULL)
6643
12.8k
        op->parent = cur;
6644
12.8k
    op->c1 = last;
6645
12.8k
    if (last != NULL)
6646
12.8k
        last->parent = op;
6647
12.8k
    cur =op;
6648
12.8k
    last = NULL;
6649
12.8k
      }
6650
24.7k
  } else {
6651
1.29k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6652
1.29k
      if ((last != NULL) && (last != ret))
6653
1.00k
          xmlFreeDocElementContent(ctxt->myDoc, last);
6654
1.29k
      if (ret != NULL)
6655
1.29k
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6656
1.29k
      return(NULL);
6657
1.29k
  }
6658
97.3k
  GROW;
6659
97.3k
  SKIP_BLANKS_PE;
6660
97.3k
  GROW;
6661
97.3k
  if (RAW == '(') {
6662
13.8k
      int inputid = ctxt->input->id;
6663
      /* Recurse on second child */
6664
13.8k
      NEXT;
6665
13.8k
      SKIP_BLANKS_PE;
6666
13.8k
      last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6667
13.8k
                                                          depth + 1);
6668
13.8k
            if (last == NULL) {
6669
6.32k
    if (ret != NULL)
6670
6.32k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6671
6.32k
    return(NULL);
6672
6.32k
            }
6673
7.55k
      SKIP_BLANKS_PE;
6674
83.4k
  } else {
6675
83.4k
      elem = xmlParseName(ctxt);
6676
83.4k
      if (elem == NULL) {
6677
68
    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6678
68
    if (ret != NULL)
6679
68
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6680
68
    return(NULL);
6681
68
      }
6682
83.3k
      last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6683
83.3k
      if (last == NULL) {
6684
0
                xmlErrMemory(ctxt);
6685
0
    if (ret != NULL)
6686
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6687
0
    return(NULL);
6688
0
      }
6689
83.3k
      if (RAW == '?') {
6690
641
    last->ocur = XML_ELEMENT_CONTENT_OPT;
6691
641
    NEXT;
6692
82.7k
      } else if (RAW == '*') {
6693
1.64k
    last->ocur = XML_ELEMENT_CONTENT_MULT;
6694
1.64k
    NEXT;
6695
81.0k
      } else if (RAW == '+') {
6696
1.26k
    last->ocur = XML_ELEMENT_CONTENT_PLUS;
6697
1.26k
    NEXT;
6698
79.8k
      } else {
6699
79.8k
    last->ocur = XML_ELEMENT_CONTENT_ONCE;
6700
79.8k
      }
6701
83.3k
  }
6702
90.9k
  SKIP_BLANKS_PE;
6703
90.9k
  GROW;
6704
90.9k
    }
6705
35.3k
    if ((cur != NULL) && (last != NULL)) {
6706
11.1k
        cur->c2 = last;
6707
11.1k
  if (last != NULL)
6708
11.1k
      last->parent = cur;
6709
11.1k
    }
6710
35.3k
    if (ctxt->input->id != inputchk) {
6711
0
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6712
0
                       "Element content declaration doesn't start and stop in"
6713
0
                       " the same entity\n");
6714
0
    }
6715
35.3k
    NEXT;
6716
35.3k
    if (RAW == '?') {
6717
4.78k
  if (ret != NULL) {
6718
4.78k
      if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6719
4.37k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6720
1.17k
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6721
3.61k
      else
6722
3.61k
          ret->ocur = XML_ELEMENT_CONTENT_OPT;
6723
4.78k
  }
6724
4.78k
  NEXT;
6725
30.6k
    } else if (RAW == '*') {
6726
4.90k
  if (ret != NULL) {
6727
4.90k
      ret->ocur = XML_ELEMENT_CONTENT_MULT;
6728
4.90k
      cur = ret;
6729
      /*
6730
       * Some normalization:
6731
       * (a | b* | c?)* == (a | b | c)*
6732
       */
6733
12.3k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6734
7.44k
    if ((cur->c1 != NULL) &&
6735
7.44k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6736
7.31k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6737
2.44k
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6738
7.44k
    if ((cur->c2 != NULL) &&
6739
7.44k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6740
7.44k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6741
178
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6742
7.44k
    cur = cur->c2;
6743
7.44k
      }
6744
4.90k
  }
6745
4.90k
  NEXT;
6746
25.6k
    } else if (RAW == '+') {
6747
8.98k
  if (ret != NULL) {
6748
8.98k
      int found = 0;
6749
6750
8.98k
      if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6751
7.38k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6752
3.27k
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6753
5.71k
      else
6754
5.71k
          ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6755
      /*
6756
       * Some normalization:
6757
       * (a | b*)+ == (a | b)*
6758
       * (a | b?)+ == (a | b)*
6759
       */
6760
14.9k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6761
5.94k
    if ((cur->c1 != NULL) &&
6762
5.94k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6763
5.20k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6764
1.77k
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6765
1.77k
        found = 1;
6766
1.77k
    }
6767
5.94k
    if ((cur->c2 != NULL) &&
6768
5.94k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6769
5.80k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6770
274
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6771
274
        found = 1;
6772
274
    }
6773
5.94k
    cur = cur->c2;
6774
5.94k
      }
6775
8.98k
      if (found)
6776
1.84k
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6777
8.98k
  }
6778
8.98k
  NEXT;
6779
8.98k
    }
6780
35.3k
    return(ret);
6781
43.0k
}
6782
6783
/**
6784
 * xmlParseElementChildrenContentDecl:
6785
 * @ctxt:  an XML parser context
6786
 * @inputchk:  the input used for the current entity, needed for boundary checks
6787
 *
6788
 * DEPRECATED: Internal function, don't use.
6789
 *
6790
 * parse the declaration for a Mixed Element content
6791
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6792
 *
6793
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6794
 *
6795
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6796
 *
6797
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6798
 *
6799
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6800
 *
6801
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6802
 * TODO Parameter-entity replacement text must be properly nested
6803
 *  with parenthesized groups. That is to say, if either of the
6804
 *  opening or closing parentheses in a choice, seq, or Mixed
6805
 *  construct is contained in the replacement text for a parameter
6806
 *  entity, both must be contained in the same replacement text. For
6807
 *  interoperability, if a parameter-entity reference appears in a
6808
 *  choice, seq, or Mixed construct, its replacement text should not
6809
 *  be empty, and neither the first nor last non-blank character of
6810
 *  the replacement text should be a connector (| or ,).
6811
 *
6812
 * Returns the tree of xmlElementContentPtr describing the element
6813
 *          hierarchy.
6814
 */
6815
xmlElementContentPtr
6816
0
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6817
    /* stub left for API/ABI compat */
6818
0
    return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6819
0
}
6820
6821
/**
6822
 * xmlParseElementContentDecl:
6823
 * @ctxt:  an XML parser context
6824
 * @name:  the name of the element being defined.
6825
 * @result:  the Element Content pointer will be stored here if any
6826
 *
6827
 * DEPRECATED: Internal function, don't use.
6828
 *
6829
 * parse the declaration for an Element content either Mixed or Children,
6830
 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6831
 *
6832
 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6833
 *
6834
 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6835
 */
6836
6837
int
6838
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6839
12.9k
                           xmlElementContentPtr *result) {
6840
6841
12.9k
    xmlElementContentPtr tree = NULL;
6842
12.9k
    int inputid = ctxt->input->id;
6843
12.9k
    int res;
6844
6845
12.9k
    *result = NULL;
6846
6847
12.9k
    if (RAW != '(') {
6848
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6849
0
    "xmlParseElementContentDecl : %s '(' expected\n", name);
6850
0
  return(-1);
6851
0
    }
6852
12.9k
    NEXT;
6853
12.9k
    GROW;
6854
12.9k
    SKIP_BLANKS_PE;
6855
12.9k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6856
6.94k
        tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6857
6.94k
  res = XML_ELEMENT_TYPE_MIXED;
6858
6.94k
    } else {
6859
6.04k
        tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6860
6.04k
  res = XML_ELEMENT_TYPE_ELEMENT;
6861
6.04k
    }
6862
12.9k
    SKIP_BLANKS_PE;
6863
12.9k
    *result = tree;
6864
12.9k
    return(res);
6865
12.9k
}
6866
6867
/**
6868
 * xmlParseElementDecl:
6869
 * @ctxt:  an XML parser context
6870
 *
6871
 * DEPRECATED: Internal function, don't use.
6872
 *
6873
 * Parse an element declaration. Always consumes '<!'.
6874
 *
6875
 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6876
 *
6877
 * [ VC: Unique Element Type Declaration ]
6878
 * No element type may be declared more than once
6879
 *
6880
 * Returns the type of the element, or -1 in case of error
6881
 */
6882
int
6883
15.0k
xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6884
15.0k
    const xmlChar *name;
6885
15.0k
    int ret = -1;
6886
15.0k
    xmlElementContentPtr content  = NULL;
6887
6888
15.0k
    if ((CUR != '<') || (NXT(1) != '!'))
6889
0
        return(ret);
6890
15.0k
    SKIP(2);
6891
6892
    /* GROW; done in the caller */
6893
15.0k
    if (CMP7(CUR_PTR, 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6894
14.9k
  int inputid = ctxt->input->id;
6895
6896
14.9k
  SKIP(7);
6897
14.9k
  if (SKIP_BLANKS_PE == 0) {
6898
26
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6899
26
               "Space required after 'ELEMENT'\n");
6900
26
      return(-1);
6901
26
  }
6902
14.8k
        name = xmlParseName(ctxt);
6903
14.8k
  if (name == NULL) {
6904
29
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6905
29
         "xmlParseElementDecl: no name for Element\n");
6906
29
      return(-1);
6907
29
  }
6908
14.8k
  if (SKIP_BLANKS_PE == 0) {
6909
160
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6910
160
         "Space required after the element name\n");
6911
160
  }
6912
14.8k
  if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6913
534
      SKIP(5);
6914
      /*
6915
       * Element must always be empty.
6916
       */
6917
534
      ret = XML_ELEMENT_TYPE_EMPTY;
6918
14.3k
  } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6919
804
             (NXT(2) == 'Y')) {
6920
799
      SKIP(3);
6921
      /*
6922
       * Element is a generic container.
6923
       */
6924
799
      ret = XML_ELEMENT_TYPE_ANY;
6925
13.5k
  } else if (RAW == '(') {
6926
12.9k
      ret = xmlParseElementContentDecl(ctxt, name, &content);
6927
12.9k
  } else {
6928
      /*
6929
       * [ WFC: PEs in Internal Subset ] error handling.
6930
       */
6931
536
            xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6932
536
                  "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6933
536
      return(-1);
6934
536
  }
6935
6936
14.3k
  SKIP_BLANKS_PE;
6937
6938
14.3k
  if (RAW != '>') {
6939
1.23k
      xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6940
1.23k
      if (content != NULL) {
6941
68
    xmlFreeDocElementContent(ctxt->myDoc, content);
6942
68
      }
6943
13.0k
  } else {
6944
13.0k
      if (inputid != ctxt->input->id) {
6945
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6946
0
                               "Element declaration doesn't start and stop in"
6947
0
                               " the same entity\n");
6948
0
      }
6949
6950
13.0k
      NEXT;
6951
13.0k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6952
5.63k
    (ctxt->sax->elementDecl != NULL)) {
6953
0
    if (content != NULL)
6954
0
        content->parent = NULL;
6955
0
          ctxt->sax->elementDecl(ctxt->userData, name, ret,
6956
0
                           content);
6957
0
    if ((content != NULL) && (content->parent == NULL)) {
6958
        /*
6959
         * this is a trick: if xmlAddElementDecl is called,
6960
         * instead of copying the full tree it is plugged directly
6961
         * if called from the parser. Avoid duplicating the
6962
         * interfaces or change the API/ABI
6963
         */
6964
0
        xmlFreeDocElementContent(ctxt->myDoc, content);
6965
0
    }
6966
13.0k
      } else if (content != NULL) {
6967
9.90k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6968
9.90k
      }
6969
13.0k
  }
6970
14.3k
    }
6971
14.4k
    return(ret);
6972
15.0k
}
6973
6974
/**
6975
 * xmlParseConditionalSections
6976
 * @ctxt:  an XML parser context
6977
 *
6978
 * Parse a conditional section. Always consumes '<!['.
6979
 *
6980
 * [61] conditionalSect ::= includeSect | ignoreSect
6981
 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6982
 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6983
 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6984
 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6985
 */
6986
6987
static void
6988
0
xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6989
0
    int *inputIds = NULL;
6990
0
    size_t inputIdsSize = 0;
6991
0
    size_t depth = 0;
6992
6993
0
    while (PARSER_STOPPED(ctxt) == 0) {
6994
0
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6995
0
            int id = ctxt->input->id;
6996
6997
0
            SKIP(3);
6998
0
            SKIP_BLANKS_PE;
6999
7000
0
            if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
7001
0
                SKIP(7);
7002
0
                SKIP_BLANKS_PE;
7003
0
                if (RAW != '[') {
7004
0
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
7005
0
                    xmlHaltParser(ctxt);
7006
0
                    goto error;
7007
0
                }
7008
0
                if (ctxt->input->id != id) {
7009
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
7010
0
                                   "All markup of the conditional section is"
7011
0
                                   " not in the same entity\n");
7012
0
                }
7013
0
                NEXT;
7014
7015
0
                if (inputIdsSize <= depth) {
7016
0
                    int *tmp;
7017
0
                    int newSize;
7018
7019
0
                    newSize = xmlGrowCapacity(inputIdsSize, sizeof(tmp[0]),
7020
0
                                              4, 1000);
7021
0
                    if (newSize < 0) {
7022
0
                        xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
7023
0
                                       "Maximum conditional section nesting"
7024
0
                                       " depth exceeded\n");
7025
0
                        goto error;
7026
0
                    }
7027
0
                    tmp = xmlRealloc(inputIds, newSize * sizeof(tmp[0]));
7028
0
                    if (tmp == NULL) {
7029
0
                        xmlErrMemory(ctxt);
7030
0
                        goto error;
7031
0
                    }
7032
0
                    inputIds = tmp;
7033
0
                    inputIdsSize = newSize;
7034
0
                }
7035
0
                inputIds[depth] = id;
7036
0
                depth++;
7037
0
            } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
7038
0
                size_t ignoreDepth = 0;
7039
7040
0
                SKIP(6);
7041
0
                SKIP_BLANKS_PE;
7042
0
                if (RAW != '[') {
7043
0
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
7044
0
                    xmlHaltParser(ctxt);
7045
0
                    goto error;
7046
0
                }
7047
0
                if (ctxt->input->id != id) {
7048
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
7049
0
                                   "All markup of the conditional section is"
7050
0
                                   " not in the same entity\n");
7051
0
                }
7052
0
                NEXT;
7053
7054
0
                while (PARSER_STOPPED(ctxt) == 0) {
7055
0
                    if (RAW == 0) {
7056
0
                        xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
7057
0
                        goto error;
7058
0
                    }
7059
0
                    if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7060
0
                        SKIP(3);
7061
0
                        ignoreDepth++;
7062
                        /* Check for integer overflow */
7063
0
                        if (ignoreDepth == 0) {
7064
0
                            xmlErrMemory(ctxt);
7065
0
                            goto error;
7066
0
                        }
7067
0
                    } else if ((RAW == ']') && (NXT(1) == ']') &&
7068
0
                               (NXT(2) == '>')) {
7069
0
                        SKIP(3);
7070
0
                        if (ignoreDepth == 0)
7071
0
                            break;
7072
0
                        ignoreDepth--;
7073
0
                    } else {
7074
0
                        NEXT;
7075
0
                    }
7076
0
                }
7077
7078
0
                if (ctxt->input->id != id) {
7079
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
7080
0
                                   "All markup of the conditional section is"
7081
0
                                   " not in the same entity\n");
7082
0
                }
7083
0
            } else {
7084
0
                xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
7085
0
                xmlHaltParser(ctxt);
7086
0
                goto error;
7087
0
            }
7088
0
        } else if ((depth > 0) &&
7089
0
                   (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
7090
0
            depth--;
7091
0
            if (ctxt->input->id != inputIds[depth]) {
7092
0
                xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
7093
0
                               "All markup of the conditional section is not"
7094
0
                               " in the same entity\n");
7095
0
            }
7096
0
            SKIP(3);
7097
0
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7098
0
            xmlParseMarkupDecl(ctxt);
7099
0
        } else {
7100
0
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7101
0
            xmlHaltParser(ctxt);
7102
0
            goto error;
7103
0
        }
7104
7105
0
        if (depth == 0)
7106
0
            break;
7107
7108
0
        SKIP_BLANKS_PE;
7109
0
        SHRINK;
7110
0
        GROW;
7111
0
    }
7112
7113
0
error:
7114
0
    xmlFree(inputIds);
7115
0
}
7116
7117
/**
7118
 * xmlParseMarkupDecl:
7119
 * @ctxt:  an XML parser context
7120
 *
7121
 * DEPRECATED: Internal function, don't use.
7122
 *
7123
 * Parse markup declarations. Always consumes '<!' or '<?'.
7124
 *
7125
 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
7126
 *                     NotationDecl | PI | Comment
7127
 *
7128
 * [ VC: Proper Declaration/PE Nesting ]
7129
 * Parameter-entity replacement text must be properly nested with
7130
 * markup declarations. That is to say, if either the first character
7131
 * or the last character of a markup declaration (markupdecl above) is
7132
 * contained in the replacement text for a parameter-entity reference,
7133
 * both must be contained in the same replacement text.
7134
 *
7135
 * [ WFC: PEs in Internal Subset ]
7136
 * In the internal DTD subset, parameter-entity references can occur
7137
 * only where markup declarations can occur, not within markup declarations.
7138
 * (This does not apply to references that occur in external parameter
7139
 * entities or to the external subset.)
7140
 */
7141
void
7142
145k
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
7143
145k
    GROW;
7144
145k
    if (CUR == '<') {
7145
145k
        if (NXT(1) == '!') {
7146
117k
      switch (NXT(2)) {
7147
79.3k
          case 'E':
7148
79.3k
        if (NXT(3) == 'L')
7149
15.0k
      xmlParseElementDecl(ctxt);
7150
64.3k
        else if (NXT(3) == 'N')
7151
64.2k
      xmlParseEntityDecl(ctxt);
7152
66
                    else
7153
66
                        SKIP(2);
7154
79.3k
        break;
7155
12.6k
          case 'A':
7156
12.6k
        xmlParseAttributeListDecl(ctxt);
7157
12.6k
        break;
7158
4.12k
          case 'N':
7159
4.12k
        xmlParseNotationDecl(ctxt);
7160
4.12k
        break;
7161
21.0k
          case '-':
7162
21.0k
        xmlParseComment(ctxt);
7163
21.0k
        break;
7164
201
    default:
7165
201
                    xmlFatalErr(ctxt,
7166
201
                                ctxt->inSubset == 2 ?
7167
0
                                    XML_ERR_EXT_SUBSET_NOT_FINISHED :
7168
201
                                    XML_ERR_INT_SUBSET_NOT_FINISHED,
7169
201
                                NULL);
7170
201
                    SKIP(2);
7171
201
        break;
7172
117k
      }
7173
117k
  } else if (NXT(1) == '?') {
7174
27.9k
      xmlParsePI(ctxt);
7175
27.9k
  }
7176
145k
    }
7177
145k
}
7178
7179
/**
7180
 * xmlParseTextDecl:
7181
 * @ctxt:  an XML parser context
7182
 *
7183
 * DEPRECATED: Internal function, don't use.
7184
 *
7185
 * parse an XML declaration header for external entities
7186
 *
7187
 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
7188
 */
7189
7190
void
7191
0
xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
7192
0
    xmlChar *version;
7193
7194
    /*
7195
     * We know that '<?xml' is here.
7196
     */
7197
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7198
0
  SKIP(5);
7199
0
    } else {
7200
0
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
7201
0
  return;
7202
0
    }
7203
7204
0
    if (SKIP_BLANKS == 0) {
7205
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7206
0
           "Space needed after '<?xml'\n");
7207
0
    }
7208
7209
    /*
7210
     * We may have the VersionInfo here.
7211
     */
7212
0
    version = xmlParseVersionInfo(ctxt);
7213
0
    if (version == NULL) {
7214
0
  version = xmlCharStrdup(XML_DEFAULT_VERSION);
7215
0
        if (version == NULL) {
7216
0
            xmlErrMemory(ctxt);
7217
0
            return;
7218
0
        }
7219
0
    } else {
7220
0
  if (SKIP_BLANKS == 0) {
7221
0
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7222
0
               "Space needed here\n");
7223
0
  }
7224
0
    }
7225
0
    ctxt->input->version = version;
7226
7227
    /*
7228
     * We must have the encoding declaration
7229
     */
7230
0
    xmlParseEncodingDecl(ctxt);
7231
7232
0
    SKIP_BLANKS;
7233
0
    if ((RAW == '?') && (NXT(1) == '>')) {
7234
0
        SKIP(2);
7235
0
    } else if (RAW == '>') {
7236
        /* Deprecated old WD ... */
7237
0
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7238
0
  NEXT;
7239
0
    } else {
7240
0
        int c;
7241
7242
0
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7243
0
        while ((PARSER_STOPPED(ctxt) == 0) && ((c = CUR) != 0)) {
7244
0
            NEXT;
7245
0
            if (c == '>')
7246
0
                break;
7247
0
        }
7248
0
    }
7249
0
}
7250
7251
/**
7252
 * xmlParseExternalSubset:
7253
 * @ctxt:  an XML parser context
7254
 * @ExternalID: the external identifier
7255
 * @SystemID: the system identifier (or URL)
7256
 *
7257
 * DEPRECATED: Internal function, don't use.
7258
 *
7259
 * parse Markup declarations from an external subset
7260
 *
7261
 * [30] extSubset ::= textDecl? extSubsetDecl
7262
 *
7263
 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7264
 */
7265
void
7266
xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7267
0
                       const xmlChar *SystemID) {
7268
0
    int oldInputNr;
7269
7270
0
    xmlCtxtInitializeLate(ctxt);
7271
7272
0
    xmlDetectEncoding(ctxt);
7273
7274
0
    if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7275
0
  xmlParseTextDecl(ctxt);
7276
0
    }
7277
0
    if (ctxt->myDoc == NULL) {
7278
0
        ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7279
0
  if (ctxt->myDoc == NULL) {
7280
0
      xmlErrMemory(ctxt);
7281
0
      return;
7282
0
  }
7283
0
  ctxt->myDoc->properties = XML_DOC_INTERNAL;
7284
0
    }
7285
0
    if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL) &&
7286
0
        (xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID) == NULL)) {
7287
0
        xmlErrMemory(ctxt);
7288
0
    }
7289
7290
0
    ctxt->inSubset = 2;
7291
0
    oldInputNr = ctxt->inputNr;
7292
7293
0
    SKIP_BLANKS_PE;
7294
0
    while (((RAW != 0) || (ctxt->inputNr > oldInputNr)) &&
7295
0
           (!PARSER_STOPPED(ctxt))) {
7296
0
  GROW;
7297
0
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7298
0
            xmlParseConditionalSections(ctxt);
7299
0
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7300
0
            xmlParseMarkupDecl(ctxt);
7301
0
        } else {
7302
0
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7303
0
            xmlHaltParser(ctxt);
7304
0
            return;
7305
0
        }
7306
0
        SKIP_BLANKS_PE;
7307
0
        SHRINK;
7308
0
    }
7309
7310
0
    while (ctxt->inputNr > oldInputNr)
7311
0
        xmlPopPE(ctxt);
7312
7313
0
    xmlParserCheckEOF(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED);
7314
0
}
7315
7316
/**
7317
 * xmlParseReference:
7318
 * @ctxt:  an XML parser context
7319
 *
7320
 * DEPRECATED: Internal function, don't use.
7321
 *
7322
 * parse and handle entity references in content, depending on the SAX
7323
 * interface, this may end-up in a call to character() if this is a
7324
 * CharRef, a predefined entity, if there is no reference() callback.
7325
 * or if the parser was asked to switch to that mode.
7326
 *
7327
 * Always consumes '&'.
7328
 *
7329
 * [67] Reference ::= EntityRef | CharRef
7330
 */
7331
void
7332
404k
xmlParseReference(xmlParserCtxtPtr ctxt) {
7333
404k
    xmlEntityPtr ent = NULL;
7334
404k
    const xmlChar *name;
7335
404k
    xmlChar *val;
7336
7337
404k
    if (RAW != '&')
7338
0
        return;
7339
7340
    /*
7341
     * Simple case of a CharRef
7342
     */
7343
404k
    if (NXT(1) == '#') {
7344
105k
  int i = 0;
7345
105k
  xmlChar out[16];
7346
105k
  int value = xmlParseCharRef(ctxt);
7347
7348
105k
  if (value == 0)
7349
214
      return;
7350
7351
        /*
7352
         * Just encode the value in UTF-8
7353
         */
7354
105k
        COPY_BUF(out, i, value);
7355
105k
        out[i] = 0;
7356
105k
        if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7357
105k
            (!ctxt->disableSAX))
7358
105k
            ctxt->sax->characters(ctxt->userData, out, i);
7359
105k
  return;
7360
105k
    }
7361
7362
    /*
7363
     * We are seeing an entity reference
7364
     */
7365
298k
    name = xmlParseEntityRefInternal(ctxt);
7366
298k
    if (name == NULL)
7367
266
        return;
7368
298k
    ent = xmlLookupGeneralEntity(ctxt, name, /* isAttr */ 0);
7369
298k
    if (ent == NULL) {
7370
        /*
7371
         * Create a reference for undeclared entities.
7372
         */
7373
1.31k
        if ((ctxt->replaceEntities == 0) &&
7374
0
            (ctxt->sax != NULL) &&
7375
0
            (ctxt->disableSAX == 0) &&
7376
0
            (ctxt->sax->reference != NULL)) {
7377
0
            ctxt->sax->reference(ctxt->userData, name);
7378
0
        }
7379
1.31k
        return;
7380
1.31k
    }
7381
297k
    if (!ctxt->wellFormed)
7382
0
  return;
7383
7384
    /* special case of predefined entities */
7385
297k
    if ((ent->name == NULL) ||
7386
297k
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7387
297k
  val = ent->content;
7388
297k
  if (val == NULL) return;
7389
  /*
7390
   * inline the entity.
7391
   */
7392
297k
  if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7393
297k
      (!ctxt->disableSAX))
7394
297k
      ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7395
297k
  return;
7396
297k
    }
7397
7398
    /*
7399
     * Some users try to parse entities on their own and used to set
7400
     * the renamed "checked" member. Fix the flags to cover this
7401
     * case.
7402
     */
7403
18.4E
    if (((ent->flags & XML_ENT_PARSED) == 0) && (ent->children != NULL))
7404
0
        ent->flags |= XML_ENT_PARSED;
7405
7406
    /*
7407
     * The first reference to the entity trigger a parsing phase
7408
     * where the ent->children is filled with the result from
7409
     * the parsing.
7410
     * Note: external parsed entities will not be loaded, it is not
7411
     * required for a non-validating parser, unless the parsing option
7412
     * of validating, or substituting entities were given. Doing so is
7413
     * far more secure as the parser will only process data coming from
7414
     * the document entity by default.
7415
     *
7416
     * FIXME: This doesn't work correctly since entities can be
7417
     * expanded with different namespace declarations in scope.
7418
     * For example:
7419
     *
7420
     * <!DOCTYPE doc [
7421
     *   <!ENTITY ent "<ns:elem/>">
7422
     * ]>
7423
     * <doc>
7424
     *   <decl1 xmlns:ns="urn:ns1">
7425
     *     &ent;
7426
     *   </decl1>
7427
     *   <decl2 xmlns:ns="urn:ns2">
7428
     *     &ent;
7429
     *   </decl2>
7430
     * </doc>
7431
     *
7432
     * Proposed fix:
7433
     *
7434
     * - Ignore current namespace declarations when parsing the
7435
     *   entity. If a prefix can't be resolved, don't report an error
7436
     *   but mark it as unresolved.
7437
     * - Try to resolve these prefixes when expanding the entity.
7438
     *   This will require a specialized version of xmlStaticCopyNode
7439
     *   which can also make use of the namespace hash table to avoid
7440
     *   quadratic behavior.
7441
     *
7442
     * Alternatively, we could simply reparse the entity on each
7443
     * expansion like we already do with custom SAX callbacks.
7444
     * External entity content should be cached in this case.
7445
     */
7446
18.4E
    if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7447
0
        (((ctxt->options & XML_PARSE_NO_XXE) == 0) &&
7448
0
         ((ctxt->replaceEntities) ||
7449
0
          (ctxt->validate)))) {
7450
0
        if ((ent->flags & XML_ENT_PARSED) == 0) {
7451
0
            xmlCtxtParseEntity(ctxt, ent);
7452
0
        } else if (ent->children == NULL) {
7453
            /*
7454
             * Probably running in SAX mode and the callbacks don't
7455
             * build the entity content. Parse the entity again.
7456
             *
7457
             * This will also be triggered in normal tree builder mode
7458
             * if an entity happens to be empty, causing unnecessary
7459
             * reloads. It's hard to come up with a reliable check in
7460
             * which mode we're running.
7461
             */
7462
0
            xmlCtxtParseEntity(ctxt, ent);
7463
0
        }
7464
0
    }
7465
7466
    /*
7467
     * We also check for amplification if entities aren't substituted.
7468
     * They might be expanded later.
7469
     */
7470
18.4E
    if (xmlParserEntityCheck(ctxt, ent->expandedSize))
7471
0
        return;
7472
7473
18.4E
    if ((ctxt->sax == NULL) || (ctxt->disableSAX))
7474
0
        return;
7475
7476
18.4E
    if (ctxt->replaceEntities == 0) {
7477
  /*
7478
   * Create a reference
7479
   */
7480
0
        if (ctxt->sax->reference != NULL)
7481
0
      ctxt->sax->reference(ctxt->userData, ent->name);
7482
18.4E
    } else if ((ent->children != NULL) && (ctxt->node != NULL)) {
7483
0
        xmlNodePtr copy, cur;
7484
7485
        /*
7486
         * Seems we are generating the DOM content, copy the tree
7487
   */
7488
0
        cur = ent->children;
7489
7490
        /*
7491
         * Handle first text node with SAX to coalesce text efficiently
7492
         */
7493
0
        if ((cur->type == XML_TEXT_NODE) ||
7494
0
            (cur->type == XML_CDATA_SECTION_NODE)) {
7495
0
            int len = xmlStrlen(cur->content);
7496
7497
0
            if ((cur->type == XML_TEXT_NODE) ||
7498
0
                (ctxt->options & XML_PARSE_NOCDATA)) {
7499
0
                if (ctxt->sax->characters != NULL)
7500
0
                    ctxt->sax->characters(ctxt, cur->content, len);
7501
0
            } else {
7502
0
                if (ctxt->sax->cdataBlock != NULL)
7503
0
                    ctxt->sax->cdataBlock(ctxt, cur->content, len);
7504
0
            }
7505
7506
0
            cur = cur->next;
7507
0
        }
7508
7509
0
        while (cur != NULL) {
7510
0
            xmlNodePtr last;
7511
7512
            /*
7513
             * Handle last text node with SAX to coalesce text efficiently
7514
             */
7515
0
            if ((cur->next == NULL) &&
7516
0
                ((cur->type == XML_TEXT_NODE) ||
7517
0
                 (cur->type == XML_CDATA_SECTION_NODE))) {
7518
0
                int len = xmlStrlen(cur->content);
7519
7520
0
                if ((cur->type == XML_TEXT_NODE) ||
7521
0
                    (ctxt->options & XML_PARSE_NOCDATA)) {
7522
0
                    if (ctxt->sax->characters != NULL)
7523
0
                        ctxt->sax->characters(ctxt, cur->content, len);
7524
0
                } else {
7525
0
                    if (ctxt->sax->cdataBlock != NULL)
7526
0
                        ctxt->sax->cdataBlock(ctxt, cur->content, len);
7527
0
                }
7528
7529
0
                break;
7530
0
            }
7531
7532
            /*
7533
             * Reset coalesce buffer stats only for non-text nodes.
7534
             */
7535
0
            ctxt->nodemem = 0;
7536
0
            ctxt->nodelen = 0;
7537
7538
0
            copy = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7539
7540
0
            if (copy == NULL) {
7541
0
                xmlErrMemory(ctxt);
7542
0
                break;
7543
0
            }
7544
7545
0
            if (ctxt->parseMode == XML_PARSE_READER) {
7546
                /* Needed for reader */
7547
0
                copy->extra = cur->extra;
7548
                /* Maybe needed for reader */
7549
0
                copy->_private = cur->_private;
7550
0
            }
7551
7552
0
            copy->parent = ctxt->node;
7553
0
            last = ctxt->node->last;
7554
0
            if (last == NULL) {
7555
0
                ctxt->node->children = copy;
7556
0
            } else {
7557
0
                last->next = copy;
7558
0
                copy->prev = last;
7559
0
            }
7560
0
            ctxt->node->last = copy;
7561
7562
0
            cur = cur->next;
7563
0
        }
7564
0
    }
7565
18.4E
}
7566
7567
static void
7568
41.2k
xmlHandleUndeclaredEntity(xmlParserCtxtPtr ctxt, const xmlChar *name) {
7569
    /*
7570
     * [ WFC: Entity Declared ]
7571
     * In a document without any DTD, a document with only an
7572
     * internal DTD subset which contains no parameter entity
7573
     * references, or a document with "standalone='yes'", the
7574
     * Name given in the entity reference must match that in an
7575
     * entity declaration, except that well-formed documents
7576
     * need not declare any of the following entities: amp, lt,
7577
     * gt, apos, quot.
7578
     * The declaration of a parameter entity must precede any
7579
     * reference to it.
7580
     * Similarly, the declaration of a general entity must
7581
     * precede any reference to it which appears in a default
7582
     * value in an attribute-list declaration. Note that if
7583
     * entities are declared in the external subset or in
7584
     * external parameter entities, a non-validating processor
7585
     * is not obligated to read and process their declarations;
7586
     * for such documents, the rule that an entity must be
7587
     * declared is a well-formedness constraint only if
7588
     * standalone='yes'.
7589
     */
7590
41.2k
    if ((ctxt->standalone == 1) ||
7591
39.4k
        ((ctxt->hasExternalSubset == 0) &&
7592
37.2k
         (ctxt->hasPErefs == 0))) {
7593
25.9k
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7594
25.9k
                          "Entity '%s' not defined\n", name);
7595
25.9k
    } else if (ctxt->validate) {
7596
        /*
7597
         * [ VC: Entity Declared ]
7598
         * In a document with an external subset or external
7599
         * parameter entities with "standalone='no'", ...
7600
         * ... The declaration of a parameter entity must
7601
         * precede any reference to it...
7602
         */
7603
0
        xmlValidityError(ctxt, XML_ERR_UNDECLARED_ENTITY,
7604
0
                         "Entity '%s' not defined\n", name, NULL);
7605
15.3k
    } else if ((ctxt->loadsubset & ~XML_SKIP_IDS) ||
7606
15.3k
               ((ctxt->replaceEntities) &&
7607
15.3k
                ((ctxt->options & XML_PARSE_NO_XXE) == 0))) {
7608
        /*
7609
         * Also raise a non-fatal error
7610
         *
7611
         * - if the external subset is loaded and all entity declarations
7612
         *   should be available, or
7613
         * - entity substition was requested without restricting
7614
         *   external entity access.
7615
         */
7616
15.3k
        xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7617
15.3k
                     "Entity '%s' not defined\n", name);
7618
15.3k
    } else {
7619
0
        xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7620
0
                      "Entity '%s' not defined\n", name, NULL);
7621
0
    }
7622
7623
41.2k
    ctxt->valid = 0;
7624
41.2k
}
7625
7626
static xmlEntityPtr
7627
520k
xmlLookupGeneralEntity(xmlParserCtxtPtr ctxt, const xmlChar *name, int inAttr) {
7628
520k
    xmlEntityPtr ent = NULL;
7629
7630
    /*
7631
     * Predefined entities override any extra definition
7632
     */
7633
520k
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7634
520k
        ent = xmlGetPredefinedEntity(name);
7635
520k
        if (ent != NULL)
7636
492k
            return(ent);
7637
520k
    }
7638
7639
    /*
7640
     * Ask first SAX for entity resolution, otherwise try the
7641
     * entities which may have stored in the parser context.
7642
     */
7643
28.5k
    if (ctxt->sax != NULL) {
7644
28.5k
  if (ctxt->sax->getEntity != NULL)
7645
28.5k
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7646
28.5k
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7647
2.84k
      (ctxt->options & XML_PARSE_OLDSAX))
7648
0
      ent = xmlGetPredefinedEntity(name);
7649
28.5k
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7650
2.84k
      (ctxt->userData==ctxt)) {
7651
0
      ent = xmlSAX2GetEntity(ctxt, name);
7652
0
  }
7653
28.5k
    }
7654
7655
28.5k
    if (ent == NULL) {
7656
28.5k
        xmlHandleUndeclaredEntity(ctxt, name);
7657
28.5k
    }
7658
7659
    /*
7660
     * [ WFC: Parsed Entity ]
7661
     * An entity reference must not contain the name of an
7662
     * unparsed entity
7663
     */
7664
0
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7665
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7666
0
     "Entity reference to unparsed entity %s\n", name);
7667
0
        ent = NULL;
7668
0
    }
7669
7670
    /*
7671
     * [ WFC: No External Entity References ]
7672
     * Attribute values cannot contain direct or indirect
7673
     * entity references to external entities.
7674
     */
7675
0
    else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7676
0
        if (inAttr) {
7677
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7678
0
                 "Attribute references external entity '%s'\n", name);
7679
0
            ent = NULL;
7680
0
        }
7681
0
    }
7682
7683
28.5k
    return(ent);
7684
520k
}
7685
7686
/**
7687
 * xmlParseEntityRefInternal:
7688
 * @ctxt:  an XML parser context
7689
 * @inAttr:  whether we are in an attribute value
7690
 *
7691
 * Parse an entity reference. Always consumes '&'.
7692
 *
7693
 * [68] EntityRef ::= '&' Name ';'
7694
 *
7695
 * Returns the name, or NULL in case of error.
7696
 */
7697
static const xmlChar *
7698
932k
xmlParseEntityRefInternal(xmlParserCtxtPtr ctxt) {
7699
932k
    const xmlChar *name;
7700
7701
932k
    GROW;
7702
7703
932k
    if (RAW != '&')
7704
0
        return(NULL);
7705
932k
    NEXT;
7706
932k
    name = xmlParseName(ctxt);
7707
932k
    if (name == NULL) {
7708
225k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7709
225k
           "xmlParseEntityRef: no name\n");
7710
225k
        return(NULL);
7711
225k
    }
7712
707k
    if (RAW != ';') {
7713
186k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7714
186k
  return(NULL);
7715
186k
    }
7716
520k
    NEXT;
7717
7718
520k
    return(name);
7719
707k
}
7720
7721
/**
7722
 * xmlParseEntityRef:
7723
 * @ctxt:  an XML parser context
7724
 *
7725
 * DEPRECATED: Internal function, don't use.
7726
 *
7727
 * Returns the xmlEntityPtr if found, or NULL otherwise.
7728
 */
7729
xmlEntityPtr
7730
0
xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7731
0
    const xmlChar *name;
7732
7733
0
    if (ctxt == NULL)
7734
0
        return(NULL);
7735
7736
0
    name = xmlParseEntityRefInternal(ctxt);
7737
0
    if (name == NULL)
7738
0
        return(NULL);
7739
7740
0
    return(xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 0));
7741
0
}
7742
7743
/**
7744
 * xmlParseStringEntityRef:
7745
 * @ctxt:  an XML parser context
7746
 * @str:  a pointer to an index in the string
7747
 *
7748
 * parse ENTITY references declarations, but this version parses it from
7749
 * a string value.
7750
 *
7751
 * [68] EntityRef ::= '&' Name ';'
7752
 *
7753
 * [ WFC: Entity Declared ]
7754
 * In a document without any DTD, a document with only an internal DTD
7755
 * subset which contains no parameter entity references, or a document
7756
 * with "standalone='yes'", the Name given in the entity reference
7757
 * must match that in an entity declaration, except that well-formed
7758
 * documents need not declare any of the following entities: amp, lt,
7759
 * gt, apos, quot.  The declaration of a parameter entity must precede
7760
 * any reference to it.  Similarly, the declaration of a general entity
7761
 * must precede any reference to it which appears in a default value in an
7762
 * attribute-list declaration. Note that if entities are declared in the
7763
 * external subset or in external parameter entities, a non-validating
7764
 * processor is not obligated to read and process their declarations;
7765
 * for such documents, the rule that an entity must be declared is a
7766
 * well-formedness constraint only if standalone='yes'.
7767
 *
7768
 * [ WFC: Parsed Entity ]
7769
 * An entity reference must not contain the name of an unparsed entity
7770
 *
7771
 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7772
 * is updated to the current location in the string.
7773
 */
7774
static xmlChar *
7775
0
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7776
0
    xmlChar *name;
7777
0
    const xmlChar *ptr;
7778
0
    xmlChar cur;
7779
7780
0
    if ((str == NULL) || (*str == NULL))
7781
0
        return(NULL);
7782
0
    ptr = *str;
7783
0
    cur = *ptr;
7784
0
    if (cur != '&')
7785
0
  return(NULL);
7786
7787
0
    ptr++;
7788
0
    name = xmlParseStringName(ctxt, &ptr);
7789
0
    if (name == NULL) {
7790
0
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7791
0
           "xmlParseStringEntityRef: no name\n");
7792
0
  *str = ptr;
7793
0
  return(NULL);
7794
0
    }
7795
0
    if (*ptr != ';') {
7796
0
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7797
0
        xmlFree(name);
7798
0
  *str = ptr;
7799
0
  return(NULL);
7800
0
    }
7801
0
    ptr++;
7802
7803
0
    *str = ptr;
7804
0
    return(name);
7805
0
}
7806
7807
/**
7808
 * xmlParsePEReference:
7809
 * @ctxt:  an XML parser context
7810
 *
7811
 * DEPRECATED: Internal function, don't use.
7812
 *
7813
 * Parse a parameter entity reference. Always consumes '%'.
7814
 *
7815
 * The entity content is handled directly by pushing it's content as
7816
 * a new input stream.
7817
 *
7818
 * [69] PEReference ::= '%' Name ';'
7819
 *
7820
 * [ WFC: No Recursion ]
7821
 * A parsed entity must not contain a recursive
7822
 * reference to itself, either directly or indirectly.
7823
 *
7824
 * [ WFC: Entity Declared ]
7825
 * In a document without any DTD, a document with only an internal DTD
7826
 * subset which contains no parameter entity references, or a document
7827
 * with "standalone='yes'", ...  ... The declaration of a parameter
7828
 * entity must precede any reference to it...
7829
 *
7830
 * [ VC: Entity Declared ]
7831
 * In a document with an external subset or external parameter entities
7832
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7833
 * must precede any reference to it...
7834
 *
7835
 * [ WFC: In DTD ]
7836
 * Parameter-entity references may only appear in the DTD.
7837
 * NOTE: misleading but this is handled.
7838
 */
7839
void
7840
xmlParsePEReference(xmlParserCtxtPtr ctxt)
7841
13.7k
{
7842
13.7k
    const xmlChar *name;
7843
13.7k
    xmlEntityPtr entity = NULL;
7844
13.7k
    xmlParserInputPtr input;
7845
7846
13.7k
    if (RAW != '%')
7847
0
        return;
7848
13.7k
    NEXT;
7849
13.7k
    name = xmlParseName(ctxt);
7850
13.7k
    if (name == NULL) {
7851
2.56k
  xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
7852
2.56k
  return;
7853
2.56k
    }
7854
11.2k
    if (RAW != ';') {
7855
507
  xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
7856
507
        return;
7857
507
    }
7858
7859
10.7k
    NEXT;
7860
7861
    /* Must be set before xmlHandleUndeclaredEntity */
7862
10.7k
    ctxt->hasPErefs = 1;
7863
7864
    /*
7865
     * Request the entity from SAX
7866
     */
7867
10.7k
    if ((ctxt->sax != NULL) &&
7868
10.7k
  (ctxt->sax->getParameterEntity != NULL))
7869
0
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7870
7871
10.7k
    if (entity == NULL) {
7872
10.7k
        xmlHandleUndeclaredEntity(ctxt, name);
7873
10.7k
    } else {
7874
  /*
7875
   * Internal checking in case the entity quest barfed
7876
   */
7877
0
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7878
0
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7879
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7880
0
      "Internal: %%%s; is not a parameter entity\n",
7881
0
        name, NULL);
7882
0
  } else {
7883
0
      if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7884
0
                ((ctxt->options & XML_PARSE_NO_XXE) ||
7885
0
     ((ctxt->loadsubset == 0) &&
7886
0
      (ctxt->replaceEntities == 0) &&
7887
0
      (ctxt->validate == 0))))
7888
0
    return;
7889
7890
0
            if (entity->flags & XML_ENT_EXPANDING) {
7891
0
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7892
0
                xmlHaltParser(ctxt);
7893
0
                return;
7894
0
            }
7895
7896
0
      input = xmlNewEntityInputStream(ctxt, entity);
7897
0
      if (xmlCtxtPushInput(ctxt, input) < 0) {
7898
0
                xmlFreeInputStream(input);
7899
0
    return;
7900
0
            }
7901
7902
0
            entity->flags |= XML_ENT_EXPANDING;
7903
7904
0
            GROW;
7905
7906
0
      if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
7907
0
                xmlDetectEncoding(ctxt);
7908
7909
0
                if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7910
0
                    (IS_BLANK_CH(NXT(5)))) {
7911
0
                    xmlParseTextDecl(ctxt);
7912
0
                }
7913
0
            }
7914
0
  }
7915
0
    }
7916
10.7k
}
7917
7918
/**
7919
 * xmlLoadEntityContent:
7920
 * @ctxt:  an XML parser context
7921
 * @entity: an unloaded system entity
7922
 *
7923
 * Load the content of an entity.
7924
 *
7925
 * Returns 0 in case of success and -1 in case of failure
7926
 */
7927
static int
7928
0
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7929
0
    xmlParserInputPtr oldinput, input = NULL;
7930
0
    xmlParserInputPtr *oldinputTab;
7931
0
    const xmlChar *oldencoding;
7932
0
    xmlChar *content = NULL;
7933
0
    xmlResourceType rtype;
7934
0
    size_t length, i;
7935
0
    int oldinputNr, oldinputMax;
7936
0
    int ret = -1;
7937
0
    int res;
7938
7939
0
    if ((ctxt == NULL) || (entity == NULL) ||
7940
0
        ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7941
0
   (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7942
0
  (entity->content != NULL)) {
7943
0
  xmlFatalErr(ctxt, XML_ERR_ARGUMENT,
7944
0
              "xmlLoadEntityContent parameter error");
7945
0
        return(-1);
7946
0
    }
7947
7948
0
    if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)
7949
0
        rtype = XML_RESOURCE_PARAMETER_ENTITY;
7950
0
    else
7951
0
        rtype = XML_RESOURCE_GENERAL_ENTITY;
7952
7953
0
    input = xmlLoadResource(ctxt, (char *) entity->URI,
7954
0
                            (char *) entity->ExternalID, rtype);
7955
0
    if (input == NULL)
7956
0
        return(-1);
7957
7958
0
    oldinput = ctxt->input;
7959
0
    oldinputNr = ctxt->inputNr;
7960
0
    oldinputMax = ctxt->inputMax;
7961
0
    oldinputTab = ctxt->inputTab;
7962
0
    oldencoding = ctxt->encoding;
7963
7964
0
    ctxt->input = NULL;
7965
0
    ctxt->inputNr = 0;
7966
0
    ctxt->inputMax = 1;
7967
0
    ctxt->encoding = NULL;
7968
0
    ctxt->inputTab = xmlMalloc(sizeof(xmlParserInputPtr));
7969
0
    if (ctxt->inputTab == NULL) {
7970
0
        xmlErrMemory(ctxt);
7971
0
        xmlFreeInputStream(input);
7972
0
        goto error;
7973
0
    }
7974
7975
0
    xmlBufResetInput(input->buf->buffer, input);
7976
7977
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
7978
0
        xmlFreeInputStream(input);
7979
0
        goto error;
7980
0
    }
7981
7982
0
    xmlDetectEncoding(ctxt);
7983
7984
    /*
7985
     * Parse a possible text declaration first
7986
     */
7987
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7988
0
  xmlParseTextDecl(ctxt);
7989
        /*
7990
         * An XML-1.0 document can't reference an entity not XML-1.0
7991
         */
7992
0
        if ((xmlStrEqual(ctxt->version, BAD_CAST "1.0")) &&
7993
0
            (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
7994
0
            xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
7995
0
                           "Version mismatch between document and entity\n");
7996
0
        }
7997
0
    }
7998
7999
0
    length = input->cur - input->base;
8000
0
    xmlBufShrink(input->buf->buffer, length);
8001
0
    xmlSaturatedAdd(&ctxt->sizeentities, length);
8002
8003
0
    while ((res = xmlParserInputBufferGrow(input->buf, 4096)) > 0)
8004
0
        ;
8005
8006
0
    xmlBufResetInput(input->buf->buffer, input);
8007
8008
0
    if (res < 0) {
8009
0
        xmlCtxtErrIO(ctxt, input->buf->error, NULL);
8010
0
        goto error;
8011
0
    }
8012
8013
0
    length = xmlBufUse(input->buf->buffer);
8014
0
    if (length > INT_MAX) {
8015
0
        xmlErrMemory(ctxt);
8016
0
        goto error;
8017
0
    }
8018
8019
0
    content = xmlStrndup(xmlBufContent(input->buf->buffer), length);
8020
0
    if (content == NULL) {
8021
0
        xmlErrMemory(ctxt);
8022
0
        goto error;
8023
0
    }
8024
8025
0
    for (i = 0; i < length; ) {
8026
0
        int clen = length - i;
8027
0
        int c = xmlGetUTF8Char(content + i, &clen);
8028
8029
0
        if ((c < 0) || (!IS_CHAR(c))) {
8030
0
            xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8031
0
                              "xmlLoadEntityContent: invalid char value %d\n",
8032
0
                              content[i]);
8033
0
            goto error;
8034
0
        }
8035
0
        i += clen;
8036
0
    }
8037
8038
0
    xmlSaturatedAdd(&ctxt->sizeentities, length);
8039
0
    entity->content = content;
8040
0
    entity->length = length;
8041
0
    content = NULL;
8042
0
    ret = 0;
8043
8044
0
error:
8045
0
    while (ctxt->inputNr > 0)
8046
0
        xmlFreeInputStream(xmlCtxtPopInput(ctxt));
8047
0
    xmlFree(ctxt->inputTab);
8048
0
    xmlFree((xmlChar *) ctxt->encoding);
8049
8050
0
    ctxt->input = oldinput;
8051
0
    ctxt->inputNr = oldinputNr;
8052
0
    ctxt->inputMax = oldinputMax;
8053
0
    ctxt->inputTab = oldinputTab;
8054
0
    ctxt->encoding = oldencoding;
8055
8056
0
    xmlFree(content);
8057
8058
0
    return(ret);
8059
0
}
8060
8061
/**
8062
 * xmlParseStringPEReference:
8063
 * @ctxt:  an XML parser context
8064
 * @str:  a pointer to an index in the string
8065
 *
8066
 * parse PEReference declarations
8067
 *
8068
 * [69] PEReference ::= '%' Name ';'
8069
 *
8070
 * [ WFC: No Recursion ]
8071
 * A parsed entity must not contain a recursive
8072
 * reference to itself, either directly or indirectly.
8073
 *
8074
 * [ WFC: Entity Declared ]
8075
 * In a document without any DTD, a document with only an internal DTD
8076
 * subset which contains no parameter entity references, or a document
8077
 * with "standalone='yes'", ...  ... The declaration of a parameter
8078
 * entity must precede any reference to it...
8079
 *
8080
 * [ VC: Entity Declared ]
8081
 * In a document with an external subset or external parameter entities
8082
 * with "standalone='no'", ...  ... The declaration of a parameter entity
8083
 * must precede any reference to it...
8084
 *
8085
 * [ WFC: In DTD ]
8086
 * Parameter-entity references may only appear in the DTD.
8087
 * NOTE: misleading but this is handled.
8088
 *
8089
 * Returns the string of the entity content.
8090
 *         str is updated to the current value of the index
8091
 */
8092
static xmlEntityPtr
8093
4.46k
xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8094
4.46k
    const xmlChar *ptr;
8095
4.46k
    xmlChar cur;
8096
4.46k
    xmlChar *name;
8097
4.46k
    xmlEntityPtr entity = NULL;
8098
8099
4.46k
    if ((str == NULL) || (*str == NULL)) return(NULL);
8100
4.46k
    ptr = *str;
8101
4.46k
    cur = *ptr;
8102
4.46k
    if (cur != '%')
8103
0
        return(NULL);
8104
4.46k
    ptr++;
8105
4.46k
    name = xmlParseStringName(ctxt, &ptr);
8106
4.46k
    if (name == NULL) {
8107
1.37k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8108
1.37k
           "xmlParseStringPEReference: no name\n");
8109
1.37k
  *str = ptr;
8110
1.37k
  return(NULL);
8111
1.37k
    }
8112
3.08k
    cur = *ptr;
8113
3.08k
    if (cur != ';') {
8114
1.11k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8115
1.11k
  xmlFree(name);
8116
1.11k
  *str = ptr;
8117
1.11k
  return(NULL);
8118
1.11k
    }
8119
1.96k
    ptr++;
8120
8121
    /* Must be set before xmlHandleUndeclaredEntity */
8122
1.96k
    ctxt->hasPErefs = 1;
8123
8124
    /*
8125
     * Request the entity from SAX
8126
     */
8127
1.96k
    if ((ctxt->sax != NULL) &&
8128
1.96k
  (ctxt->sax->getParameterEntity != NULL))
8129
0
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8130
8131
1.96k
    if (entity == NULL) {
8132
1.96k
        xmlHandleUndeclaredEntity(ctxt, name);
8133
1.96k
    } else {
8134
  /*
8135
   * Internal checking in case the entity quest barfed
8136
   */
8137
0
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8138
0
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8139
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8140
0
        "%%%s; is not a parameter entity\n",
8141
0
        name, NULL);
8142
0
  }
8143
0
    }
8144
8145
1.96k
    xmlFree(name);
8146
1.96k
    *str = ptr;
8147
1.96k
    return(entity);
8148
3.08k
}
8149
8150
/**
8151
 * xmlParseDocTypeDecl:
8152
 * @ctxt:  an XML parser context
8153
 *
8154
 * DEPRECATED: Internal function, don't use.
8155
 *
8156
 * parse a DOCTYPE declaration
8157
 *
8158
 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8159
 *                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8160
 *
8161
 * [ VC: Root Element Type ]
8162
 * The Name in the document type declaration must match the element
8163
 * type of the root element.
8164
 */
8165
8166
void
8167
9.39k
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8168
9.39k
    const xmlChar *name = NULL;
8169
9.39k
    xmlChar *ExternalID = NULL;
8170
9.39k
    xmlChar *URI = NULL;
8171
8172
    /*
8173
     * We know that '<!DOCTYPE' has been detected.
8174
     */
8175
9.39k
    SKIP(9);
8176
8177
9.39k
    if (SKIP_BLANKS == 0) {
8178
169
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8179
169
                       "Space required after 'DOCTYPE'\n");
8180
169
    }
8181
8182
    /*
8183
     * Parse the DOCTYPE name.
8184
     */
8185
9.39k
    name = xmlParseName(ctxt);
8186
9.39k
    if (name == NULL) {
8187
15
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8188
15
           "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8189
15
    }
8190
9.39k
    ctxt->intSubName = name;
8191
8192
9.39k
    SKIP_BLANKS;
8193
8194
    /*
8195
     * Check for SystemID and ExternalID
8196
     */
8197
9.39k
    URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8198
8199
9.39k
    if ((URI != NULL) || (ExternalID != NULL)) {
8200
457
        ctxt->hasExternalSubset = 1;
8201
457
    }
8202
9.39k
    ctxt->extSubURI = URI;
8203
9.39k
    ctxt->extSubSystem = ExternalID;
8204
8205
9.39k
    SKIP_BLANKS;
8206
8207
    /*
8208
     * Create and update the internal subset.
8209
     */
8210
9.39k
    if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8211
0
  (!ctxt->disableSAX))
8212
0
  ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8213
8214
9.39k
    if ((RAW != '[') && (RAW != '>')) {
8215
246
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8216
246
    }
8217
9.39k
}
8218
8219
/**
8220
 * xmlParseInternalSubset:
8221
 * @ctxt:  an XML parser context
8222
 *
8223
 * parse the internal subset declaration
8224
 *
8225
 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8226
 */
8227
8228
static void
8229
8.75k
xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8230
    /*
8231
     * Is there any DTD definition ?
8232
     */
8233
8.75k
    if (RAW == '[') {
8234
8.75k
        int oldInputNr = ctxt->inputNr;
8235
8236
8.75k
        NEXT;
8237
  /*
8238
   * Parse the succession of Markup declarations and
8239
   * PEReferences.
8240
   * Subsequence (markupdecl | PEReference | S)*
8241
   */
8242
8.75k
  SKIP_BLANKS;
8243
167k
  while (((RAW != ']') || (ctxt->inputNr > oldInputNr)) &&
8244
164k
               (PARSER_STOPPED(ctxt) == 0)) {
8245
8246
            /*
8247
             * Conditional sections are allowed from external entities included
8248
             * by PE References in the internal subset.
8249
             */
8250
163k
            if ((PARSER_EXTERNAL(ctxt)) &&
8251
0
                (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8252
0
                xmlParseConditionalSections(ctxt);
8253
163k
            } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
8254
145k
          xmlParseMarkupDecl(ctxt);
8255
145k
            } else if (RAW == '%') {
8256
13.7k
          xmlParsePEReference(ctxt);
8257
13.7k
            } else {
8258
4.03k
    xmlFatalErr(ctxt, XML_ERR_INT_SUBSET_NOT_FINISHED, NULL);
8259
4.03k
                break;
8260
4.03k
            }
8261
159k
      SKIP_BLANKS_PE;
8262
159k
            SHRINK;
8263
159k
            GROW;
8264
159k
  }
8265
8266
8.75k
        while (ctxt->inputNr > oldInputNr)
8267
0
            xmlPopPE(ctxt);
8268
8269
8.75k
  if (RAW == ']') {
8270
3.31k
      NEXT;
8271
3.31k
      SKIP_BLANKS;
8272
3.31k
  }
8273
8.75k
    }
8274
8275
    /*
8276
     * We should be at the end of the DOCTYPE declaration.
8277
     */
8278
8.75k
    if ((ctxt->wellFormed) && (RAW != '>')) {
8279
19
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8280
19
  return;
8281
19
    }
8282
8.73k
    NEXT;
8283
8.73k
}
8284
8285
#ifdef LIBXML_SAX1_ENABLED
8286
/**
8287
 * xmlParseAttribute:
8288
 * @ctxt:  an XML parser context
8289
 * @value:  a xmlChar ** used to store the value of the attribute
8290
 *
8291
 * DEPRECATED: Internal function, don't use.
8292
 *
8293
 * parse an attribute
8294
 *
8295
 * [41] Attribute ::= Name Eq AttValue
8296
 *
8297
 * [ WFC: No External Entity References ]
8298
 * Attribute values cannot contain direct or indirect entity references
8299
 * to external entities.
8300
 *
8301
 * [ WFC: No < in Attribute Values ]
8302
 * The replacement text of any entity referred to directly or indirectly in
8303
 * an attribute value (other than "&lt;") must not contain a <.
8304
 *
8305
 * [ VC: Attribute Value Type ]
8306
 * The attribute must have been declared; the value must be of the type
8307
 * declared for it.
8308
 *
8309
 * [25] Eq ::= S? '=' S?
8310
 *
8311
 * With namespace:
8312
 *
8313
 * [NS 11] Attribute ::= QName Eq AttValue
8314
 *
8315
 * Also the case QName == xmlns:??? is handled independently as a namespace
8316
 * definition.
8317
 *
8318
 * Returns the attribute name, and the value in *value.
8319
 */
8320
8321
const xmlChar *
8322
0
xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8323
0
    const xmlChar *name;
8324
0
    xmlChar *val;
8325
8326
0
    *value = NULL;
8327
0
    GROW;
8328
0
    name = xmlParseName(ctxt);
8329
0
    if (name == NULL) {
8330
0
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8331
0
                 "error parsing attribute name\n");
8332
0
        return(NULL);
8333
0
    }
8334
8335
    /*
8336
     * read the value
8337
     */
8338
0
    SKIP_BLANKS;
8339
0
    if (RAW == '=') {
8340
0
        NEXT;
8341
0
  SKIP_BLANKS;
8342
0
  val = xmlParseAttValue(ctxt);
8343
0
    } else {
8344
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8345
0
         "Specification mandates value for attribute %s\n", name);
8346
0
  return(name);
8347
0
    }
8348
8349
    /*
8350
     * Check that xml:lang conforms to the specification
8351
     * No more registered as an error, just generate a warning now
8352
     * since this was deprecated in XML second edition
8353
     */
8354
0
    if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8355
0
  if (!xmlCheckLanguageID(val)) {
8356
0
      xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8357
0
              "Malformed value for xml:lang : %s\n",
8358
0
        val, NULL);
8359
0
  }
8360
0
    }
8361
8362
    /*
8363
     * Check that xml:space conforms to the specification
8364
     */
8365
0
    if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8366
0
  if (xmlStrEqual(val, BAD_CAST "default"))
8367
0
      *(ctxt->space) = 0;
8368
0
  else if (xmlStrEqual(val, BAD_CAST "preserve"))
8369
0
      *(ctxt->space) = 1;
8370
0
  else {
8371
0
    xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8372
0
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8373
0
                                 val, NULL);
8374
0
  }
8375
0
    }
8376
8377
0
    *value = val;
8378
0
    return(name);
8379
0
}
8380
8381
/**
8382
 * xmlParseStartTag:
8383
 * @ctxt:  an XML parser context
8384
 *
8385
 * DEPRECATED: Internal function, don't use.
8386
 *
8387
 * Parse a start tag. Always consumes '<'.
8388
 *
8389
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8390
 *
8391
 * [ WFC: Unique Att Spec ]
8392
 * No attribute name may appear more than once in the same start-tag or
8393
 * empty-element tag.
8394
 *
8395
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8396
 *
8397
 * [ WFC: Unique Att Spec ]
8398
 * No attribute name may appear more than once in the same start-tag or
8399
 * empty-element tag.
8400
 *
8401
 * With namespace:
8402
 *
8403
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8404
 *
8405
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8406
 *
8407
 * Returns the element name parsed
8408
 */
8409
8410
const xmlChar *
8411
0
xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8412
0
    const xmlChar *name;
8413
0
    const xmlChar *attname;
8414
0
    xmlChar *attvalue;
8415
0
    const xmlChar **atts = ctxt->atts;
8416
0
    int nbatts = 0;
8417
0
    int maxatts = ctxt->maxatts;
8418
0
    int i;
8419
8420
0
    if (RAW != '<') return(NULL);
8421
0
    NEXT1;
8422
8423
0
    name = xmlParseName(ctxt);
8424
0
    if (name == NULL) {
8425
0
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8426
0
       "xmlParseStartTag: invalid element name\n");
8427
0
        return(NULL);
8428
0
    }
8429
8430
    /*
8431
     * Now parse the attributes, it ends up with the ending
8432
     *
8433
     * (S Attribute)* S?
8434
     */
8435
0
    SKIP_BLANKS;
8436
0
    GROW;
8437
8438
0
    while (((RAW != '>') &&
8439
0
     ((RAW != '/') || (NXT(1) != '>')) &&
8440
0
     (IS_BYTE_CHAR(RAW))) && (PARSER_STOPPED(ctxt) == 0)) {
8441
0
  attname = xmlParseAttribute(ctxt, &attvalue);
8442
0
        if (attname == NULL)
8443
0
      break;
8444
0
        if (attvalue != NULL) {
8445
      /*
8446
       * [ WFC: Unique Att Spec ]
8447
       * No attribute name may appear more than once in the same
8448
       * start-tag or empty-element tag.
8449
       */
8450
0
      for (i = 0; i < nbatts;i += 2) {
8451
0
          if (xmlStrEqual(atts[i], attname)) {
8452
0
        xmlErrAttributeDup(ctxt, NULL, attname);
8453
0
        goto failed;
8454
0
    }
8455
0
      }
8456
      /*
8457
       * Add the pair to atts
8458
       */
8459
0
      if (nbatts + 4 > maxatts) {
8460
0
          const xmlChar **n;
8461
0
                int newSize;
8462
8463
0
                newSize = xmlGrowCapacity(maxatts, sizeof(n[0]) * 2,
8464
0
                                          11, XML_MAX_ATTRS);
8465
0
                if (newSize < 0) {
8466
0
        xmlErrMemory(ctxt);
8467
0
        goto failed;
8468
0
    }
8469
0
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
8470
0
                if (newSize < 2)
8471
0
                    newSize = 2;
8472
0
#endif
8473
0
          n = xmlRealloc(atts, newSize * sizeof(n[0]) * 2);
8474
0
    if (n == NULL) {
8475
0
        xmlErrMemory(ctxt);
8476
0
        goto failed;
8477
0
    }
8478
0
    atts = n;
8479
0
                maxatts = newSize * 2;
8480
0
    ctxt->atts = atts;
8481
0
    ctxt->maxatts = maxatts;
8482
0
      }
8483
8484
0
      atts[nbatts++] = attname;
8485
0
      atts[nbatts++] = attvalue;
8486
0
      atts[nbatts] = NULL;
8487
0
      atts[nbatts + 1] = NULL;
8488
8489
0
            attvalue = NULL;
8490
0
  }
8491
8492
0
failed:
8493
8494
0
        if (attvalue != NULL)
8495
0
            xmlFree(attvalue);
8496
8497
0
  GROW
8498
0
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8499
0
      break;
8500
0
  if (SKIP_BLANKS == 0) {
8501
0
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8502
0
         "attributes construct error\n");
8503
0
  }
8504
0
  SHRINK;
8505
0
        GROW;
8506
0
    }
8507
8508
    /*
8509
     * SAX: Start of Element !
8510
     */
8511
0
    if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8512
0
  (!ctxt->disableSAX)) {
8513
0
  if (nbatts > 0)
8514
0
      ctxt->sax->startElement(ctxt->userData, name, atts);
8515
0
  else
8516
0
      ctxt->sax->startElement(ctxt->userData, name, NULL);
8517
0
    }
8518
8519
0
    if (atts != NULL) {
8520
        /* Free only the content strings */
8521
0
        for (i = 1;i < nbatts;i+=2)
8522
0
      if (atts[i] != NULL)
8523
0
         xmlFree((xmlChar *) atts[i]);
8524
0
    }
8525
0
    return(name);
8526
0
}
8527
8528
/**
8529
 * xmlParseEndTag1:
8530
 * @ctxt:  an XML parser context
8531
 * @line:  line of the start tag
8532
 * @nsNr:  number of namespaces on the start tag
8533
 *
8534
 * Parse an end tag. Always consumes '</'.
8535
 *
8536
 * [42] ETag ::= '</' Name S? '>'
8537
 *
8538
 * With namespace
8539
 *
8540
 * [NS 9] ETag ::= '</' QName S? '>'
8541
 */
8542
8543
static void
8544
0
xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8545
0
    const xmlChar *name;
8546
8547
0
    GROW;
8548
0
    if ((RAW != '<') || (NXT(1) != '/')) {
8549
0
  xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8550
0
           "xmlParseEndTag: '</' not found\n");
8551
0
  return;
8552
0
    }
8553
0
    SKIP(2);
8554
8555
0
    name = xmlParseNameAndCompare(ctxt,ctxt->name);
8556
8557
    /*
8558
     * We should definitely be at the ending "S? '>'" part
8559
     */
8560
0
    GROW;
8561
0
    SKIP_BLANKS;
8562
0
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8563
0
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8564
0
    } else
8565
0
  NEXT1;
8566
8567
    /*
8568
     * [ WFC: Element Type Match ]
8569
     * The Name in an element's end-tag must match the element type in the
8570
     * start-tag.
8571
     *
8572
     */
8573
0
    if (name != (xmlChar*)1) {
8574
0
        if (name == NULL) name = BAD_CAST "unparsable";
8575
0
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8576
0
         "Opening and ending tag mismatch: %s line %d and %s\n",
8577
0
                    ctxt->name, line, name);
8578
0
    }
8579
8580
    /*
8581
     * SAX: End of Tag
8582
     */
8583
0
    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8584
0
  (!ctxt->disableSAX))
8585
0
        ctxt->sax->endElement(ctxt->userData, ctxt->name);
8586
8587
0
    namePop(ctxt);
8588
0
    spacePop(ctxt);
8589
0
}
8590
8591
/**
8592
 * xmlParseEndTag:
8593
 * @ctxt:  an XML parser context
8594
 *
8595
 * DEPRECATED: Internal function, don't use.
8596
 *
8597
 * parse an end of tag
8598
 *
8599
 * [42] ETag ::= '</' Name S? '>'
8600
 *
8601
 * With namespace
8602
 *
8603
 * [NS 9] ETag ::= '</' QName S? '>'
8604
 */
8605
8606
void
8607
0
xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8608
0
    xmlParseEndTag1(ctxt, 0);
8609
0
}
8610
#endif /* LIBXML_SAX1_ENABLED */
8611
8612
/************************************************************************
8613
 *                  *
8614
 *          SAX 2 specific operations       *
8615
 *                  *
8616
 ************************************************************************/
8617
8618
/**
8619
 * xmlParseQNameHashed:
8620
 * @ctxt:  an XML parser context
8621
 * @prefix:  pointer to store the prefix part
8622
 *
8623
 * parse an XML Namespace QName
8624
 *
8625
 * [6]  QName  ::= (Prefix ':')? LocalPart
8626
 * [7]  Prefix  ::= NCName
8627
 * [8]  LocalPart  ::= NCName
8628
 *
8629
 * Returns the Name parsed or NULL
8630
 */
8631
8632
static xmlHashedString
8633
62.6M
xmlParseQNameHashed(xmlParserCtxtPtr ctxt, xmlHashedString *prefix) {
8634
62.6M
    xmlHashedString l, p;
8635
62.6M
    int start, isNCName = 0;
8636
8637
62.6M
    l.name = NULL;
8638
62.6M
    p.name = NULL;
8639
8640
62.6M
    GROW;
8641
62.6M
    start = CUR_PTR - BASE_PTR;
8642
8643
62.6M
    l = xmlParseNCName(ctxt);
8644
62.6M
    if (l.name != NULL) {
8645
62.4M
        isNCName = 1;
8646
62.4M
        if (CUR == ':') {
8647
23.4M
            NEXT;
8648
23.4M
            p = l;
8649
23.4M
            l = xmlParseNCName(ctxt);
8650
23.4M
        }
8651
62.4M
    }
8652
62.6M
    if ((l.name == NULL) || (CUR == ':')) {
8653
667k
        xmlChar *tmp;
8654
8655
667k
        l.name = NULL;
8656
667k
        p.name = NULL;
8657
667k
        if ((isNCName == 0) && (CUR != ':'))
8658
63.1k
            return(l);
8659
603k
        tmp = xmlParseNmtoken(ctxt);
8660
603k
        if (tmp != NULL)
8661
511k
            xmlFree(tmp);
8662
603k
        l = xmlDictLookupHashed(ctxt->dict, BASE_PTR + start,
8663
603k
                                CUR_PTR - (BASE_PTR + start));
8664
603k
        if (l.name == NULL) {
8665
0
            xmlErrMemory(ctxt);
8666
0
            return(l);
8667
0
        }
8668
603k
        xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8669
603k
                 "Failed to parse QName '%s'\n", l.name, NULL, NULL);
8670
603k
    }
8671
8672
62.6M
    *prefix = p;
8673
62.6M
    return(l);
8674
62.6M
}
8675
8676
/**
8677
 * xmlParseQName:
8678
 * @ctxt:  an XML parser context
8679
 * @prefix:  pointer to store the prefix part
8680
 *
8681
 * parse an XML Namespace QName
8682
 *
8683
 * [6]  QName  ::= (Prefix ':')? LocalPart
8684
 * [7]  Prefix  ::= NCName
8685
 * [8]  LocalPart  ::= NCName
8686
 *
8687
 * Returns the Name parsed or NULL
8688
 */
8689
8690
static const xmlChar *
8691
12.1k
xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8692
12.1k
    xmlHashedString n, p;
8693
8694
12.1k
    n = xmlParseQNameHashed(ctxt, &p);
8695
12.1k
    if (n.name == NULL)
8696
535
        return(NULL);
8697
11.5k
    *prefix = p.name;
8698
11.5k
    return(n.name);
8699
12.1k
}
8700
8701
/**
8702
 * xmlParseQNameAndCompare:
8703
 * @ctxt:  an XML parser context
8704
 * @name:  the localname
8705
 * @prefix:  the prefix, if any.
8706
 *
8707
 * parse an XML name and compares for match
8708
 * (specialized for endtag parsing)
8709
 *
8710
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8711
 * and the name for mismatch
8712
 */
8713
8714
static const xmlChar *
8715
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8716
6.05M
                        xmlChar const *prefix) {
8717
6.05M
    const xmlChar *cmp;
8718
6.05M
    const xmlChar *in;
8719
6.05M
    const xmlChar *ret;
8720
6.05M
    const xmlChar *prefix2;
8721
8722
6.05M
    if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8723
8724
6.05M
    GROW;
8725
6.05M
    in = ctxt->input->cur;
8726
8727
6.05M
    cmp = prefix;
8728
19.4M
    while (*in != 0 && *in == *cmp) {
8729
13.3M
  ++in;
8730
13.3M
  ++cmp;
8731
13.3M
    }
8732
6.05M
    if ((*cmp == 0) && (*in == ':')) {
8733
6.04M
        in++;
8734
6.04M
  cmp = name;
8735
42.0M
  while (*in != 0 && *in == *cmp) {
8736
35.9M
      ++in;
8737
35.9M
      ++cmp;
8738
35.9M
  }
8739
6.04M
  if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8740
      /* success */
8741
6.04M
            ctxt->input->col += in - ctxt->input->cur;
8742
6.04M
      ctxt->input->cur = in;
8743
6.04M
      return((const xmlChar*) 1);
8744
6.04M
  }
8745
6.04M
    }
8746
    /*
8747
     * all strings coms from the dictionary, equality can be done directly
8748
     */
8749
12.1k
    ret = xmlParseQName (ctxt, &prefix2);
8750
12.1k
    if (ret == NULL)
8751
535
        return(NULL);
8752
11.6k
    if ((ret == name) && (prefix == prefix2))
8753
260
  return((const xmlChar*) 1);
8754
11.3k
    return ret;
8755
11.6k
}
8756
8757
/**
8758
 * xmlParseAttribute2:
8759
 * @ctxt:  an XML parser context
8760
 * @pref:  the element prefix
8761
 * @elem:  the element name
8762
 * @prefix:  a xmlChar ** used to store the value of the attribute prefix
8763
 * @value:  a xmlChar ** used to store the value of the attribute
8764
 * @len:  an int * to save the length of the attribute
8765
 * @alloc:  an int * to indicate if the attribute was allocated
8766
 *
8767
 * parse an attribute in the new SAX2 framework.
8768
 *
8769
 * Returns the attribute name, and the value in *value, .
8770
 */
8771
8772
static xmlHashedString
8773
xmlParseAttribute2(xmlParserCtxtPtr ctxt,
8774
                   const xmlChar * pref, const xmlChar * elem,
8775
                   xmlHashedString * hprefix, xmlChar ** value,
8776
                   int *len, int *alloc)
8777
24.0M
{
8778
24.0M
    xmlHashedString hname;
8779
24.0M
    const xmlChar *prefix, *name;
8780
24.0M
    xmlChar *val = NULL, *internal_val = NULL;
8781
24.0M
    int normalize = 0;
8782
24.0M
    int isNamespace;
8783
8784
24.0M
    *value = NULL;
8785
24.0M
    GROW;
8786
24.0M
    hname = xmlParseQNameHashed(ctxt, hprefix);
8787
24.0M
    if (hname.name == NULL) {
8788
50.5k
        xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8789
50.5k
                       "error parsing attribute name\n");
8790
50.5k
        return(hname);
8791
50.5k
    }
8792
23.9M
    name = hname.name;
8793
23.9M
    prefix = hprefix->name;
8794
8795
    /*
8796
     * get the type if needed
8797
     */
8798
23.9M
    if (ctxt->attsSpecial != NULL) {
8799
31.8k
        int type;
8800
8801
31.8k
        type = XML_PTR_TO_INT(xmlHashQLookup2(ctxt->attsSpecial, pref, elem,
8802
31.8k
                                              prefix, name));
8803
31.8k
        if (type != 0)
8804
14.7k
            normalize = 1;
8805
31.8k
    }
8806
8807
    /*
8808
     * read the value
8809
     */
8810
23.9M
    SKIP_BLANKS;
8811
23.9M
    if (RAW == '=') {
8812
23.9M
        NEXT;
8813
23.9M
        SKIP_BLANKS;
8814
23.9M
        isNamespace = (((prefix == NULL) && (name == ctxt->str_xmlns)) ||
8815
23.5M
                       (prefix == ctxt->str_xmlns));
8816
23.9M
        val = xmlParseAttValueInternal(ctxt, len, alloc, normalize,
8817
23.9M
                                       isNamespace);
8818
23.9M
        if (val == NULL)
8819
8.30k
            goto error;
8820
23.9M
    } else {
8821
17.4k
        xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8822
17.4k
                          "Specification mandates value for attribute %s\n",
8823
17.4k
                          name);
8824
17.4k
        goto error;
8825
17.4k
    }
8826
8827
23.9M
    if (prefix == ctxt->str_xml) {
8828
        /*
8829
         * Check that xml:lang conforms to the specification
8830
         * No more registered as an error, just generate a warning now
8831
         * since this was deprecated in XML second edition
8832
         */
8833
105k
        if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8834
0
            internal_val = xmlStrndup(val, *len);
8835
0
            if (internal_val == NULL)
8836
0
                goto mem_error;
8837
0
            if (!xmlCheckLanguageID(internal_val)) {
8838
0
                xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8839
0
                              "Malformed value for xml:lang : %s\n",
8840
0
                              internal_val, NULL);
8841
0
            }
8842
0
        }
8843
8844
        /*
8845
         * Check that xml:space conforms to the specification
8846
         */
8847
105k
        if (xmlStrEqual(name, BAD_CAST "space")) {
8848
80.3k
            internal_val = xmlStrndup(val, *len);
8849
80.3k
            if (internal_val == NULL)
8850
0
                goto mem_error;
8851
80.3k
            if (xmlStrEqual(internal_val, BAD_CAST "default"))
8852
163
                *(ctxt->space) = 0;
8853
80.1k
            else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8854
78.5k
                *(ctxt->space) = 1;
8855
1.56k
            else {
8856
1.56k
                xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8857
1.56k
                              "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8858
1.56k
                              internal_val, NULL);
8859
1.56k
            }
8860
80.3k
        }
8861
105k
        if (internal_val) {
8862
80.3k
            xmlFree(internal_val);
8863
80.3k
        }
8864
105k
    }
8865
8866
23.9M
    *value = val;
8867
23.9M
    return (hname);
8868
8869
0
mem_error:
8870
0
    xmlErrMemory(ctxt);
8871
25.1k
error:
8872
25.1k
    if ((val != NULL) && (*alloc != 0))
8873
0
        xmlFree(val);
8874
25.1k
    return(hname);
8875
0
}
8876
8877
/**
8878
 * xmlAttrHashInsert:
8879
 * @ctxt: parser context
8880
 * @size: size of the hash table
8881
 * @name: attribute name
8882
 * @uri: namespace uri
8883
 * @hashValue: combined hash value of name and uri
8884
 * @aindex: attribute index (this is a multiple of 5)
8885
 *
8886
 * Inserts a new attribute into the hash table.
8887
 *
8888
 * Returns INT_MAX if no existing attribute was found, the attribute
8889
 * index if an attribute was found, -1 if a memory allocation failed.
8890
 */
8891
static int
8892
xmlAttrHashInsert(xmlParserCtxtPtr ctxt, unsigned size, const xmlChar *name,
8893
17.5M
                  const xmlChar *uri, unsigned hashValue, int aindex) {
8894
17.5M
    xmlAttrHashBucket *table = ctxt->attrHash;
8895
17.5M
    xmlAttrHashBucket *bucket;
8896
17.5M
    unsigned hindex;
8897
8898
17.5M
    hindex = hashValue & (size - 1);
8899
17.5M
    bucket = &table[hindex];
8900
8901
20.5M
    while (bucket->index >= 0) {
8902
3.09M
        const xmlChar **atts = &ctxt->atts[bucket->index];
8903
8904
3.09M
        if (name == atts[0]) {
8905
193k
            int nsIndex = XML_PTR_TO_INT(atts[2]);
8906
8907
193k
            if ((nsIndex == NS_INDEX_EMPTY) ? (uri == NULL) :
8908
193k
                (nsIndex == NS_INDEX_XML) ? (uri == ctxt->str_xml_ns) :
8909
3.30k
                (uri == ctxt->nsTab[nsIndex * 2 + 1]))
8910
80.4k
                return(bucket->index);
8911
193k
        }
8912
8913
3.01M
        hindex++;
8914
3.01M
        bucket++;
8915
3.01M
        if (hindex >= size) {
8916
983k
            hindex = 0;
8917
983k
            bucket = table;
8918
983k
        }
8919
3.01M
    }
8920
8921
17.4M
    bucket->index = aindex;
8922
8923
17.4M
    return(INT_MAX);
8924
17.5M
}
8925
8926
static int
8927
xmlAttrHashInsertQName(xmlParserCtxtPtr ctxt, unsigned size,
8928
                       const xmlChar *name, const xmlChar *prefix,
8929
103
                       unsigned hashValue, int aindex) {
8930
103
    xmlAttrHashBucket *table = ctxt->attrHash;
8931
103
    xmlAttrHashBucket *bucket;
8932
103
    unsigned hindex;
8933
8934
103
    hindex = hashValue & (size - 1);
8935
103
    bucket = &table[hindex];
8936
8937
148
    while (bucket->index >= 0) {
8938
83
        const xmlChar **atts = &ctxt->atts[bucket->index];
8939
8940
83
        if ((name == atts[0]) && (prefix == atts[1]))
8941
38
            return(bucket->index);
8942
8943
45
        hindex++;
8944
45
        bucket++;
8945
45
        if (hindex >= size) {
8946
0
            hindex = 0;
8947
0
            bucket = table;
8948
0
        }
8949
45
    }
8950
8951
65
    bucket->index = aindex;
8952
8953
65
    return(INT_MAX);
8954
103
}
8955
/**
8956
 * xmlParseStartTag2:
8957
 * @ctxt:  an XML parser context
8958
 *
8959
 * Parse a start tag. Always consumes '<'.
8960
 *
8961
 * This routine is called when running SAX2 parsing
8962
 *
8963
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8964
 *
8965
 * [ WFC: Unique Att Spec ]
8966
 * No attribute name may appear more than once in the same start-tag or
8967
 * empty-element tag.
8968
 *
8969
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8970
 *
8971
 * [ WFC: Unique Att Spec ]
8972
 * No attribute name may appear more than once in the same start-tag or
8973
 * empty-element tag.
8974
 *
8975
 * With namespace:
8976
 *
8977
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8978
 *
8979
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8980
 *
8981
 * Returns the element name parsed
8982
 */
8983
8984
static const xmlChar *
8985
xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
8986
38.6M
                  const xmlChar **URI, int *nbNsPtr) {
8987
38.6M
    xmlHashedString hlocalname;
8988
38.6M
    xmlHashedString hprefix;
8989
38.6M
    xmlHashedString hattname;
8990
38.6M
    xmlHashedString haprefix;
8991
38.6M
    const xmlChar *localname;
8992
38.6M
    const xmlChar *prefix;
8993
38.6M
    const xmlChar *attname;
8994
38.6M
    const xmlChar *aprefix;
8995
38.6M
    const xmlChar *uri;
8996
38.6M
    xmlChar *attvalue = NULL;
8997
38.6M
    const xmlChar **atts = ctxt->atts;
8998
38.6M
    unsigned attrHashSize = 0;
8999
38.6M
    int maxatts = ctxt->maxatts;
9000
38.6M
    int nratts, nbatts, nbdef;
9001
38.6M
    int i, j, nbNs, nbTotalDef, attval, nsIndex, maxAtts;
9002
38.6M
    int alloc = 0;
9003
38.6M
    int numNsErr = 0;
9004
38.6M
    int numDupErr = 0;
9005
9006
38.6M
    if (RAW != '<') return(NULL);
9007
38.6M
    NEXT1;
9008
9009
38.6M
    nbatts = 0;
9010
38.6M
    nratts = 0;
9011
38.6M
    nbdef = 0;
9012
38.6M
    nbNs = 0;
9013
38.6M
    nbTotalDef = 0;
9014
38.6M
    attval = 0;
9015
9016
38.6M
    if (xmlParserNsStartElement(ctxt->nsdb) < 0) {
9017
0
        xmlErrMemory(ctxt);
9018
0
        return(NULL);
9019
0
    }
9020
9021
38.6M
    hlocalname = xmlParseQNameHashed(ctxt, &hprefix);
9022
38.6M
    if (hlocalname.name == NULL) {
9023
12.1k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9024
12.1k
           "StartTag: invalid element name\n");
9025
12.1k
        return(NULL);
9026
12.1k
    }
9027
38.6M
    localname = hlocalname.name;
9028
38.6M
    prefix = hprefix.name;
9029
9030
    /*
9031
     * Now parse the attributes, it ends up with the ending
9032
     *
9033
     * (S Attribute)* S?
9034
     */
9035
38.6M
    SKIP_BLANKS;
9036
38.6M
    GROW;
9037
9038
    /*
9039
     * The ctxt->atts array will be ultimately passed to the SAX callback
9040
     * containing five xmlChar pointers for each attribute:
9041
     *
9042
     * [0] attribute name
9043
     * [1] attribute prefix
9044
     * [2] namespace URI
9045
     * [3] attribute value
9046
     * [4] end of attribute value
9047
     *
9048
     * To save memory, we reuse this array temporarily and store integers
9049
     * in these pointer variables.
9050
     *
9051
     * [0] attribute name
9052
     * [1] attribute prefix
9053
     * [2] hash value of attribute prefix, and later namespace index
9054
     * [3] for non-allocated values: ptrdiff_t offset into input buffer
9055
     * [4] for non-allocated values: ptrdiff_t offset into input buffer
9056
     *
9057
     * The ctxt->attallocs array contains an additional unsigned int for
9058
     * each attribute, containing the hash value of the attribute name
9059
     * and the alloc flag in bit 31.
9060
     */
9061
9062
51.6M
    while (((RAW != '>') &&
9063
26.8M
     ((RAW != '/') || (NXT(1) != '>')) &&
9064
24.0M
     (IS_BYTE_CHAR(RAW))) && (PARSER_STOPPED(ctxt) == 0)) {
9065
24.0M
  int len = -1;
9066
9067
24.0M
  hattname = xmlParseAttribute2(ctxt, prefix, localname,
9068
24.0M
                                          &haprefix, &attvalue, &len,
9069
24.0M
                                          &alloc);
9070
24.0M
        if (hattname.name == NULL)
9071
50.5k
      break;
9072
23.9M
        if (attvalue == NULL)
9073
25.1k
            goto next_attr;
9074
23.9M
        attname = hattname.name;
9075
23.9M
        aprefix = haprefix.name;
9076
23.9M
  if (len < 0) len = xmlStrlen(attvalue);
9077
9078
23.9M
        if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9079
387k
            xmlHashedString huri;
9080
387k
            xmlURIPtr parsedUri;
9081
9082
387k
            huri = xmlDictLookupHashed(ctxt->dict, attvalue, len);
9083
387k
            uri = huri.name;
9084
387k
            if (uri == NULL) {
9085
0
                xmlErrMemory(ctxt);
9086
0
                goto next_attr;
9087
0
            }
9088
387k
            if (*uri != 0) {
9089
382k
                if (xmlParseURISafe((const char *) uri, &parsedUri) < 0) {
9090
0
                    xmlErrMemory(ctxt);
9091
0
                    goto next_attr;
9092
0
                }
9093
382k
                if (parsedUri == NULL) {
9094
128k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9095
128k
                             "xmlns: '%s' is not a valid URI\n",
9096
128k
                                       uri, NULL, NULL);
9097
253k
                } else {
9098
253k
                    if (parsedUri->scheme == NULL) {
9099
92.9k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9100
92.9k
                                  "xmlns: URI %s is not absolute\n",
9101
92.9k
                                  uri, NULL, NULL);
9102
92.9k
                    }
9103
253k
                    xmlFreeURI(parsedUri);
9104
253k
                }
9105
382k
                if (uri == ctxt->str_xml_ns) {
9106
144
                    if (attname != ctxt->str_xml) {
9107
144
                        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9108
144
                     "xml namespace URI cannot be the default namespace\n",
9109
144
                                 NULL, NULL, NULL);
9110
144
                    }
9111
144
                    goto next_attr;
9112
144
                }
9113
382k
                if ((len == 29) &&
9114
10.6k
                    (xmlStrEqual(uri,
9115
10.6k
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9116
337
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9117
337
                         "reuse of the xmlns namespace name is forbidden\n",
9118
337
                             NULL, NULL, NULL);
9119
337
                    goto next_attr;
9120
337
                }
9121
382k
            }
9122
9123
386k
            if (xmlParserNsPush(ctxt, NULL, &huri, NULL, 0) > 0)
9124
261k
                nbNs++;
9125
23.5M
        } else if (aprefix == ctxt->str_xmlns) {
9126
1.03M
            xmlHashedString huri;
9127
1.03M
            xmlURIPtr parsedUri;
9128
9129
1.03M
            huri = xmlDictLookupHashed(ctxt->dict, attvalue, len);
9130
1.03M
            uri = huri.name;
9131
1.03M
            if (uri == NULL) {
9132
0
                xmlErrMemory(ctxt);
9133
0
                goto next_attr;
9134
0
            }
9135
9136
1.03M
            if (attname == ctxt->str_xml) {
9137
361
                if (uri != ctxt->str_xml_ns) {
9138
116
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9139
116
                             "xml namespace prefix mapped to wrong URI\n",
9140
116
                             NULL, NULL, NULL);
9141
116
                }
9142
                /*
9143
                 * Do not keep a namespace definition node
9144
                 */
9145
361
                goto next_attr;
9146
361
            }
9147
1.03M
            if (uri == ctxt->str_xml_ns) {
9148
46
                if (attname != ctxt->str_xml) {
9149
46
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9150
46
                             "xml namespace URI mapped to wrong prefix\n",
9151
46
                             NULL, NULL, NULL);
9152
46
                }
9153
46
                goto next_attr;
9154
46
            }
9155
1.03M
            if (attname == ctxt->str_xmlns) {
9156
318
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9157
318
                         "redefinition of the xmlns prefix is forbidden\n",
9158
318
                         NULL, NULL, NULL);
9159
318
                goto next_attr;
9160
318
            }
9161
1.03M
            if ((len == 29) &&
9162
20.3k
                (xmlStrEqual(uri,
9163
20.3k
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9164
56
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9165
56
                         "reuse of the xmlns namespace name is forbidden\n",
9166
56
                         NULL, NULL, NULL);
9167
56
                goto next_attr;
9168
56
            }
9169
1.03M
            if ((uri == NULL) || (uri[0] == 0)) {
9170
483
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9171
483
                         "xmlns:%s: Empty XML namespace is not allowed\n",
9172
483
                              attname, NULL, NULL);
9173
483
                goto next_attr;
9174
1.03M
            } else {
9175
1.03M
                if (xmlParseURISafe((const char *) uri, &parsedUri) < 0) {
9176
0
                    xmlErrMemory(ctxt);
9177
0
                    goto next_attr;
9178
0
                }
9179
1.03M
                if (parsedUri == NULL) {
9180
117k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9181
117k
                         "xmlns:%s: '%s' is not a valid URI\n",
9182
117k
                                       attname, uri, NULL);
9183
921k
                } else {
9184
921k
                    if ((ctxt->pedantic) && (parsedUri->scheme == NULL)) {
9185
0
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9186
0
                                  "xmlns:%s: URI %s is not absolute\n",
9187
0
                                  attname, uri, NULL);
9188
0
                    }
9189
921k
                    xmlFreeURI(parsedUri);
9190
921k
                }
9191
1.03M
            }
9192
9193
1.03M
            if (xmlParserNsPush(ctxt, &hattname, &huri, NULL, 0) > 0)
9194
970k
                nbNs++;
9195
22.5M
        } else {
9196
            /*
9197
             * Populate attributes array, see above for repurposing
9198
             * of xmlChar pointers.
9199
             */
9200
22.5M
            if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9201
633k
                int res = xmlCtxtGrowAttrs(ctxt);
9202
9203
633k
                maxatts = ctxt->maxatts;
9204
633k
                atts = ctxt->atts;
9205
9206
633k
                if (res < 0)
9207
0
                    goto next_attr;
9208
633k
            }
9209
22.5M
            ctxt->attallocs[nratts++] = (hattname.hashValue & 0x7FFFFFFF) |
9210
22.5M
                                        ((unsigned) alloc << 31);
9211
22.5M
            atts[nbatts++] = attname;
9212
22.5M
            atts[nbatts++] = aprefix;
9213
22.5M
            atts[nbatts++] = (const xmlChar *) (size_t) haprefix.hashValue;
9214
22.5M
            if (alloc) {
9215
113k
                atts[nbatts++] = attvalue;
9216
113k
                attvalue += len;
9217
113k
                atts[nbatts++] = attvalue;
9218
22.4M
            } else {
9219
                /*
9220
                 * attvalue points into the input buffer which can be
9221
                 * reallocated. Store differences to input->base instead.
9222
                 * The pointers will be reconstructed later.
9223
                 */
9224
22.4M
                atts[nbatts++] = (void *) (attvalue - BASE_PTR);
9225
22.4M
                attvalue += len;
9226
22.4M
                atts[nbatts++] = (void *) (attvalue - BASE_PTR);
9227
22.4M
            }
9228
            /*
9229
             * tag if some deallocation is needed
9230
             */
9231
22.5M
            if (alloc != 0) attval = 1;
9232
22.5M
            attvalue = NULL; /* moved into atts */
9233
22.5M
        }
9234
9235
23.9M
next_attr:
9236
23.9M
        if ((attvalue != NULL) && (alloc != 0)) {
9237
118k
            xmlFree(attvalue);
9238
118k
            attvalue = NULL;
9239
118k
        }
9240
9241
23.9M
  GROW
9242
23.9M
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9243
10.9M
      break;
9244
13.0M
  if (SKIP_BLANKS == 0) {
9245
33.2k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9246
33.2k
         "attributes construct error\n");
9247
33.2k
      break;
9248
33.2k
  }
9249
13.0M
        GROW;
9250
13.0M
    }
9251
9252
    /*
9253
     * Namespaces from default attributes
9254
     */
9255
38.6M
    if (ctxt->attsDefault != NULL) {
9256
172k
        xmlDefAttrsPtr defaults;
9257
9258
172k
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9259
172k
  if (defaults != NULL) {
9260
1.12M
      for (i = 0; i < defaults->nbAttrs; i++) {
9261
1.00M
                xmlDefAttr *attr = &defaults->attrs[i];
9262
9263
1.00M
          attname = attr->name.name;
9264
1.00M
    aprefix = attr->prefix.name;
9265
9266
1.00M
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9267
37.0k
                    xmlParserEntityCheck(ctxt, attr->expandedSize);
9268
9269
37.0k
                    if (xmlParserNsPush(ctxt, NULL, &attr->value, NULL, 1) > 0)
9270
31.8k
                        nbNs++;
9271
971k
    } else if (aprefix == ctxt->str_xmlns) {
9272
426k
                    xmlParserEntityCheck(ctxt, attr->expandedSize);
9273
9274
426k
                    if (xmlParserNsPush(ctxt, &attr->name, &attr->value,
9275
426k
                                      NULL, 1) > 0)
9276
426k
                        nbNs++;
9277
544k
    } else {
9278
544k
                    if (nratts + nbTotalDef >= XML_MAX_ATTRS) {
9279
0
                        xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
9280
0
                                    "Maximum number of attributes exceeded");
9281
0
                        break;
9282
0
                    }
9283
544k
                    nbTotalDef += 1;
9284
544k
                }
9285
1.00M
      }
9286
119k
  }
9287
172k
    }
9288
9289
    /*
9290
     * Resolve attribute namespaces
9291
     */
9292
61.1M
    for (i = 0; i < nbatts; i += 5) {
9293
22.5M
        attname = atts[i];
9294
22.5M
        aprefix = atts[i+1];
9295
9296
        /*
9297
  * The default namespace does not apply to attribute names.
9298
  */
9299
22.5M
  if (aprefix == NULL) {
9300
14.0M
            nsIndex = NS_INDEX_EMPTY;
9301
14.0M
        } else if (aprefix == ctxt->str_xml) {
9302
105k
            nsIndex = NS_INDEX_XML;
9303
8.31M
        } else {
9304
8.31M
            haprefix.name = aprefix;
9305
8.31M
            haprefix.hashValue = (size_t) atts[i+2];
9306
8.31M
            nsIndex = xmlParserNsLookup(ctxt, &haprefix, NULL);
9307
9308
8.31M
      if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex)) {
9309
479k
                xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9310
479k
        "Namespace prefix %s for %s on %s is not defined\n",
9311
479k
        aprefix, attname, localname);
9312
479k
                nsIndex = NS_INDEX_EMPTY;
9313
479k
            }
9314
8.31M
        }
9315
9316
22.5M
        atts[i+2] = XML_INT_TO_PTR(nsIndex);
9317
22.5M
    }
9318
9319
    /*
9320
     * Maximum number of attributes including default attributes.
9321
     */
9322
38.6M
    maxAtts = nratts + nbTotalDef;
9323
9324
    /*
9325
     * Verify that attribute names are unique.
9326
     */
9327
38.6M
    if (maxAtts > 1) {
9328
5.89M
        attrHashSize = 4;
9329
9.31M
        while (attrHashSize / 2 < (unsigned) maxAtts)
9330
3.42M
            attrHashSize *= 2;
9331
9332
5.89M
        if (attrHashSize > ctxt->attrHashMax) {
9333
258k
            xmlAttrHashBucket *tmp;
9334
9335
258k
            tmp = xmlRealloc(ctxt->attrHash, attrHashSize * sizeof(tmp[0]));
9336
258k
            if (tmp == NULL) {
9337
0
                xmlErrMemory(ctxt);
9338
0
                goto done;
9339
0
            }
9340
9341
258k
            ctxt->attrHash = tmp;
9342
258k
            ctxt->attrHashMax = attrHashSize;
9343
258k
        }
9344
9345
5.89M
        memset(ctxt->attrHash, -1, attrHashSize * sizeof(ctxt->attrHash[0]));
9346
9347
23.3M
        for (i = 0, j = 0; j < nratts; i += 5, j++) {
9348
17.4M
            const xmlChar *nsuri;
9349
17.4M
            unsigned hashValue, nameHashValue, uriHashValue;
9350
17.4M
            int res;
9351
9352
17.4M
            attname = atts[i];
9353
17.4M
            aprefix = atts[i+1];
9354
17.4M
            nsIndex = XML_PTR_TO_INT(atts[i+2]);
9355
            /* Hash values always have bit 31 set, see dict.c */
9356
17.4M
            nameHashValue = ctxt->attallocs[j] | 0x80000000;
9357
9358
17.4M
            if (nsIndex == NS_INDEX_EMPTY) {
9359
                /*
9360
                 * Prefix with empty namespace means an undeclared
9361
                 * prefix which was already reported above.
9362
                 */
9363
11.4M
                if (aprefix != NULL)
9364
392k
                    continue;
9365
11.0M
                nsuri = NULL;
9366
11.0M
                uriHashValue = URI_HASH_EMPTY;
9367
11.0M
            } else if (nsIndex == NS_INDEX_XML) {
9368
3.05k
                nsuri = ctxt->str_xml_ns;
9369
3.05k
                uriHashValue = URI_HASH_XML;
9370
5.93M
            } else {
9371
5.93M
                nsuri = ctxt->nsTab[nsIndex * 2 + 1];
9372
5.93M
                uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue;
9373
5.93M
            }
9374
9375
17.0M
            hashValue = xmlDictCombineHash(nameHashValue, uriHashValue);
9376
17.0M
            res = xmlAttrHashInsert(ctxt, attrHashSize, attname, nsuri,
9377
17.0M
                                    hashValue, i);
9378
17.0M
            if (res < 0)
9379
0
                continue;
9380
9381
            /*
9382
             * [ WFC: Unique Att Spec ]
9383
             * No attribute name may appear more than once in the same
9384
             * start-tag or empty-element tag.
9385
             * As extended by the Namespace in XML REC.
9386
             */
9387
17.0M
            if (res < INT_MAX) {
9388
65.9k
                if (aprefix == atts[res+1]) {
9389
65.6k
                    xmlErrAttributeDup(ctxt, aprefix, attname);
9390
65.6k
                    numDupErr += 1;
9391
65.6k
                } else {
9392
356
                    xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9393
356
                             "Namespaced Attribute %s in '%s' redefined\n",
9394
356
                             attname, nsuri, NULL);
9395
356
                    numNsErr += 1;
9396
356
                }
9397
65.9k
            }
9398
17.0M
        }
9399
5.89M
    }
9400
9401
    /*
9402
     * Default attributes
9403
     */
9404
38.6M
    if (ctxt->attsDefault != NULL) {
9405
172k
        xmlDefAttrsPtr defaults;
9406
9407
172k
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9408
172k
  if (defaults != NULL) {
9409
1.12M
      for (i = 0; i < defaults->nbAttrs; i++) {
9410
1.00M
                xmlDefAttr *attr = &defaults->attrs[i];
9411
1.00M
                const xmlChar *nsuri = NULL;
9412
1.00M
                unsigned hashValue, uriHashValue = 0;
9413
1.00M
                int res;
9414
9415
1.00M
          attname = attr->name.name;
9416
1.00M
    aprefix = attr->prefix.name;
9417
9418
1.00M
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL))
9419
37.0k
                    continue;
9420
971k
    if (aprefix == ctxt->str_xmlns)
9421
426k
                    continue;
9422
9423
544k
                if (aprefix == NULL) {
9424
304k
                    nsIndex = NS_INDEX_EMPTY;
9425
304k
                    nsuri = NULL;
9426
304k
                    uriHashValue = URI_HASH_EMPTY;
9427
304k
                } else if (aprefix == ctxt->str_xml) {
9428
53.9k
                    nsIndex = NS_INDEX_XML;
9429
53.9k
                    nsuri = ctxt->str_xml_ns;
9430
53.9k
                    uriHashValue = URI_HASH_XML;
9431
186k
                } else {
9432
186k
                    nsIndex = xmlParserNsLookup(ctxt, &attr->prefix, NULL);
9433
186k
                    if ((nsIndex == INT_MAX) ||
9434
184k
                        (nsIndex < ctxt->nsdb->minNsIndex)) {
9435
184k
                        xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9436
184k
                                 "Namespace prefix %s for %s on %s is not "
9437
184k
                                 "defined\n",
9438
184k
                                 aprefix, attname, localname);
9439
184k
                        nsIndex = NS_INDEX_EMPTY;
9440
184k
                        nsuri = NULL;
9441
184k
                        uriHashValue = URI_HASH_EMPTY;
9442
184k
                    } else {
9443
1.19k
                        nsuri = ctxt->nsTab[nsIndex * 2 + 1];
9444
1.19k
                        uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue;
9445
1.19k
                    }
9446
186k
                }
9447
9448
                /*
9449
                 * Check whether the attribute exists
9450
                 */
9451
544k
                if (maxAtts > 1) {
9452
519k
                    hashValue = xmlDictCombineHash(attr->name.hashValue,
9453
519k
                                                   uriHashValue);
9454
519k
                    res = xmlAttrHashInsert(ctxt, attrHashSize, attname, nsuri,
9455
519k
                                            hashValue, nbatts);
9456
519k
                    if (res < 0)
9457
0
                        continue;
9458
519k
                    if (res < INT_MAX) {
9459
14.4k
                        if (aprefix == atts[res+1])
9460
2.03k
                            continue;
9461
12.4k
                        xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9462
12.4k
                                 "Namespaced Attribute %s in '%s' redefined\n",
9463
12.4k
                                 attname, nsuri, NULL);
9464
12.4k
                    }
9465
519k
                }
9466
9467
542k
                xmlParserEntityCheck(ctxt, attr->expandedSize);
9468
9469
542k
                if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9470
8.52k
                    res = xmlCtxtGrowAttrs(ctxt);
9471
9472
8.52k
                    maxatts = ctxt->maxatts;
9473
8.52k
                    atts = ctxt->atts;
9474
9475
8.52k
                    if (res < 0) {
9476
0
                        localname = NULL;
9477
0
                        goto done;
9478
0
                    }
9479
8.52k
                }
9480
9481
542k
                atts[nbatts++] = attname;
9482
542k
                atts[nbatts++] = aprefix;
9483
542k
                atts[nbatts++] = XML_INT_TO_PTR(nsIndex);
9484
542k
                atts[nbatts++] = attr->value.name;
9485
542k
                atts[nbatts++] = attr->valueEnd;
9486
542k
                if ((ctxt->standalone == 1) && (attr->external != 0)) {
9487
0
                    xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9488
0
                            "standalone: attribute %s on %s defaulted "
9489
0
                            "from external subset\n",
9490
0
                            attname, localname);
9491
0
                }
9492
542k
                nbdef++;
9493
542k
      }
9494
119k
  }
9495
172k
    }
9496
9497
    /*
9498
     * Using a single hash table for nsUri/localName pairs cannot
9499
     * detect duplicate QNames reliably. The following example will
9500
     * only result in two namespace errors.
9501
     *
9502
     * <doc xmlns:a="a" xmlns:b="a">
9503
     *   <elem a:a="" b:a="" b:a=""/>
9504
     * </doc>
9505
     *
9506
     * If we saw more than one namespace error but no duplicate QNames
9507
     * were found, we have to scan for duplicate QNames.
9508
     */
9509
38.6M
    if ((numDupErr == 0) && (numNsErr > 1)) {
9510
23
        memset(ctxt->attrHash, -1,
9511
23
               attrHashSize * sizeof(ctxt->attrHash[0]));
9512
9513
156
        for (i = 0, j = 0; j < nratts; i += 5, j++) {
9514
133
            unsigned hashValue, nameHashValue, prefixHashValue;
9515
133
            int res;
9516
9517
133
            aprefix = atts[i+1];
9518
133
            if (aprefix == NULL)
9519
30
                continue;
9520
9521
103
            attname = atts[i];
9522
            /* Hash values always have bit 31 set, see dict.c */
9523
103
            nameHashValue = ctxt->attallocs[j] | 0x80000000;
9524
103
            prefixHashValue = xmlDictComputeHash(ctxt->dict, aprefix);
9525
9526
103
            hashValue = xmlDictCombineHash(nameHashValue, prefixHashValue);
9527
103
            res = xmlAttrHashInsertQName(ctxt, attrHashSize, attname,
9528
103
                                         aprefix, hashValue, i);
9529
103
            if (res < INT_MAX)
9530
38
                xmlErrAttributeDup(ctxt, aprefix, attname);
9531
103
        }
9532
23
    }
9533
9534
    /*
9535
     * Reconstruct attribute pointers
9536
     */
9537
61.6M
    for (i = 0, j = 0; i < nbatts; i += 5, j++) {
9538
        /* namespace URI */
9539
23.0M
        nsIndex = XML_PTR_TO_INT(atts[i+2]);
9540
23.0M
        if (nsIndex == INT_MAX)
9541
15.0M
            atts[i+2] = NULL;
9542
7.99M
        else if (nsIndex == INT_MAX - 1)
9543
159k
            atts[i+2] = ctxt->str_xml_ns;
9544
7.83M
        else
9545
7.83M
            atts[i+2] = ctxt->nsTab[nsIndex * 2 + 1];
9546
9547
23.0M
        if ((j < nratts) && (ctxt->attallocs[j] & 0x80000000) == 0) {
9548
22.4M
            atts[i+3] = BASE_PTR + XML_PTR_TO_INT(atts[i+3]);  /* value */
9549
22.4M
            atts[i+4] = BASE_PTR + XML_PTR_TO_INT(atts[i+4]);  /* valuend */
9550
22.4M
        }
9551
23.0M
    }
9552
9553
38.6M
    uri = xmlParserNsLookupUri(ctxt, &hprefix);
9554
38.6M
    if ((prefix != NULL) && (uri == NULL)) {
9555
677k
  xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9556
677k
           "Namespace prefix %s on %s is not defined\n",
9557
677k
     prefix, localname, NULL);
9558
677k
    }
9559
38.6M
    *pref = prefix;
9560
38.6M
    *URI = uri;
9561
9562
    /*
9563
     * SAX callback
9564
     */
9565
38.6M
    if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9566
38.6M
  (!ctxt->disableSAX)) {
9567
38.4M
  if (nbNs > 0)
9568
555k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix, uri,
9569
555k
                          nbNs, ctxt->nsTab + 2 * (ctxt->nsNr - nbNs),
9570
555k
        nbatts / 5, nbdef, atts);
9571
37.9M
  else
9572
37.9M
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix, uri,
9573
37.9M
                          0, NULL, nbatts / 5, nbdef, atts);
9574
38.4M
    }
9575
9576
38.6M
done:
9577
    /*
9578
     * Free allocated attribute values
9579
     */
9580
38.6M
    if (attval != 0) {
9581
446k
  for (i = 0, j = 0; j < nratts; i += 5, j++)
9582
345k
      if (ctxt->attallocs[j] & 0x80000000)
9583
113k
          xmlFree((xmlChar *) atts[i+3]);
9584
101k
    }
9585
9586
38.6M
    *nbNsPtr = nbNs;
9587
38.6M
    return(localname);
9588
38.6M
}
9589
9590
/**
9591
 * xmlParseEndTag2:
9592
 * @ctxt:  an XML parser context
9593
 * @line:  line of the start tag
9594
 * @nsNr:  number of namespaces on the start tag
9595
 *
9596
 * Parse an end tag. Always consumes '</'.
9597
 *
9598
 * [42] ETag ::= '</' Name S? '>'
9599
 *
9600
 * With namespace
9601
 *
9602
 * [NS 9] ETag ::= '</' QName S? '>'
9603
 */
9604
9605
static void
9606
8.71M
xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9607
8.71M
    const xmlChar *name;
9608
9609
8.71M
    GROW;
9610
8.71M
    if ((RAW != '<') || (NXT(1) != '/')) {
9611
5
  xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9612
5
  return;
9613
5
    }
9614
8.71M
    SKIP(2);
9615
9616
8.71M
    if (tag->prefix == NULL)
9617
2.66M
        name = xmlParseNameAndCompare(ctxt, ctxt->name);
9618
6.05M
    else
9619
6.05M
        name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9620
9621
    /*
9622
     * We should definitely be at the ending "S? '>'" part
9623
     */
9624
8.71M
    GROW;
9625
8.71M
    SKIP_BLANKS;
9626
8.71M
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9627
7.68k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9628
7.68k
    } else
9629
8.70M
  NEXT1;
9630
9631
    /*
9632
     * [ WFC: Element Type Match ]
9633
     * The Name in an element's end-tag must match the element type in the
9634
     * start-tag.
9635
     *
9636
     */
9637
8.71M
    if (name != (xmlChar*)1) {
9638
16.0k
        if (name == NULL) name = BAD_CAST "unparsable";
9639
16.0k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9640
16.0k
         "Opening and ending tag mismatch: %s line %d and %s\n",
9641
16.0k
                    ctxt->name, tag->line, name);
9642
16.0k
    }
9643
9644
    /*
9645
     * SAX: End of Tag
9646
     */
9647
8.71M
    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9648
8.71M
  (!ctxt->disableSAX))
9649
8.69M
  ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9650
8.69M
                                tag->URI);
9651
9652
8.71M
    spacePop(ctxt);
9653
8.71M
    if (tag->nsNr != 0)
9654
187k
  xmlParserNsPop(ctxt, tag->nsNr);
9655
8.71M
}
9656
9657
/**
9658
 * xmlParseCDSect:
9659
 * @ctxt:  an XML parser context
9660
 *
9661
 * DEPRECATED: Internal function, don't use.
9662
 *
9663
 * Parse escaped pure raw content. Always consumes '<!['.
9664
 *
9665
 * [18] CDSect ::= CDStart CData CDEnd
9666
 *
9667
 * [19] CDStart ::= '<![CDATA['
9668
 *
9669
 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9670
 *
9671
 * [21] CDEnd ::= ']]>'
9672
 */
9673
void
9674
15.3k
xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9675
15.3k
    xmlChar *buf = NULL;
9676
15.3k
    int len = 0;
9677
15.3k
    int size = XML_PARSER_BUFFER_SIZE;
9678
15.3k
    int r, rl;
9679
15.3k
    int s, sl;
9680
15.3k
    int cur, l;
9681
15.3k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9682
15.3k
                    XML_MAX_HUGE_LENGTH :
9683
15.3k
                    XML_MAX_TEXT_LENGTH;
9684
9685
15.3k
    if ((CUR != '<') || (NXT(1) != '!') || (NXT(2) != '['))
9686
0
        return;
9687
15.3k
    SKIP(3);
9688
9689
15.3k
    if (!CMP6(CUR_PTR, 'C', 'D', 'A', 'T', 'A', '['))
9690
0
        return;
9691
15.3k
    SKIP(6);
9692
9693
15.3k
    r = xmlCurrentCharRecover(ctxt, &rl);
9694
15.3k
    if (!IS_CHAR(r)) {
9695
20
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9696
20
        goto out;
9697
20
    }
9698
15.2k
    NEXTL(rl);
9699
15.2k
    s = xmlCurrentCharRecover(ctxt, &sl);
9700
15.2k
    if (!IS_CHAR(s)) {
9701
32
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9702
32
        goto out;
9703
32
    }
9704
15.2k
    NEXTL(sl);
9705
15.2k
    cur = xmlCurrentCharRecover(ctxt, &l);
9706
15.2k
    buf = xmlMalloc(size);
9707
15.2k
    if (buf == NULL) {
9708
0
  xmlErrMemory(ctxt);
9709
0
        goto out;
9710
0
    }
9711
60.2M
    while (IS_CHAR(cur) &&
9712
60.2M
           ((r != ']') || (s != ']') || (cur != '>'))) {
9713
60.2M
  if (len + 5 >= size) {
9714
79.2k
      xmlChar *tmp;
9715
79.2k
            int newSize;
9716
9717
79.2k
            newSize = xmlGrowCapacity(size, 1, 1, maxLength);
9718
79.2k
            if (newSize < 0) {
9719
0
                xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9720
0
                               "CData section too big found\n");
9721
0
                goto out;
9722
0
            }
9723
79.2k
      tmp = xmlRealloc(buf, newSize);
9724
79.2k
      if (tmp == NULL) {
9725
0
    xmlErrMemory(ctxt);
9726
0
                goto out;
9727
0
      }
9728
79.2k
      buf = tmp;
9729
79.2k
      size = newSize;
9730
79.2k
  }
9731
60.2M
  COPY_BUF(buf, len, r);
9732
60.2M
  r = s;
9733
60.2M
  rl = sl;
9734
60.2M
  s = cur;
9735
60.2M
  sl = l;
9736
60.2M
  NEXTL(l);
9737
60.2M
  cur = xmlCurrentCharRecover(ctxt, &l);
9738
60.2M
    }
9739
15.2k
    buf[len] = 0;
9740
15.2k
    if (cur != '>') {
9741
401
  xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9742
401
                       "CData section not finished\n%.50s\n", buf);
9743
401
        goto out;
9744
401
    }
9745
14.8k
    NEXTL(l);
9746
9747
    /*
9748
     * OK the buffer is to be consumed as cdata.
9749
     */
9750
14.8k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9751
14.8k
        if ((ctxt->sax->cdataBlock != NULL) &&
9752
0
            ((ctxt->options & XML_PARSE_NOCDATA) == 0)) {
9753
0
            ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9754
14.8k
        } else if (ctxt->sax->characters != NULL) {
9755
14.8k
            ctxt->sax->characters(ctxt->userData, buf, len);
9756
14.8k
        }
9757
14.8k
    }
9758
9759
15.3k
out:
9760
15.3k
    xmlFree(buf);
9761
15.3k
}
9762
9763
/**
9764
 * xmlParseContentInternal:
9765
 * @ctxt:  an XML parser context
9766
 *
9767
 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9768
 * unexpected EOF to the caller.
9769
 */
9770
9771
static void
9772
5.75k
xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9773
5.75k
    int oldNameNr = ctxt->nameNr;
9774
5.75k
    int oldSpaceNr = ctxt->spaceNr;
9775
5.75k
    int oldNodeNr = ctxt->nodeNr;
9776
9777
5.75k
    GROW;
9778
1.78M
    while ((ctxt->input->cur < ctxt->input->end) &&
9779
1.78M
     (PARSER_STOPPED(ctxt) == 0)) {
9780
1.78M
  const xmlChar *cur = ctxt->input->cur;
9781
9782
  /*
9783
   * First case : a Processing Instruction.
9784
   */
9785
1.78M
  if ((*cur == '<') && (cur[1] == '?')) {
9786
135
      xmlParsePI(ctxt);
9787
135
  }
9788
9789
  /*
9790
   * Second case : a CDSection
9791
   */
9792
  /* 2.6.0 test was *cur not RAW */
9793
1.78M
  else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9794
0
      xmlParseCDSect(ctxt);
9795
0
  }
9796
9797
  /*
9798
   * Third case :  a comment
9799
   */
9800
1.78M
  else if ((*cur == '<') && (NXT(1) == '!') &&
9801
0
     (NXT(2) == '-') && (NXT(3) == '-')) {
9802
0
      xmlParseComment(ctxt);
9803
0
  }
9804
9805
  /*
9806
   * Fourth case :  a sub-element.
9807
   */
9808
1.78M
  else if (*cur == '<') {
9809
1.43M
            if (NXT(1) == '/') {
9810
414k
                if (ctxt->nameNr <= oldNameNr)
9811
5.57k
                    break;
9812
408k
          xmlParseElementEnd(ctxt);
9813
1.01M
            } else {
9814
1.01M
          xmlParseElementStart(ctxt);
9815
1.01M
            }
9816
1.43M
  }
9817
9818
  /*
9819
   * Fifth case : a reference. If if has not been resolved,
9820
   *    parsing returns it's Name, create the node
9821
   */
9822
9823
358k
  else if (*cur == '&') {
9824
11
      xmlParseReference(ctxt);
9825
11
  }
9826
9827
  /*
9828
   * Last case, text. Note that References are handled directly.
9829
   */
9830
358k
  else {
9831
358k
      xmlParseCharDataInternal(ctxt, 0);
9832
358k
  }
9833
9834
1.78M
  SHRINK;
9835
1.78M
  GROW;
9836
1.78M
    }
9837
9838
5.75k
    if ((ctxt->nameNr > oldNameNr) &&
9839
175
        (ctxt->input->cur >= ctxt->input->end) &&
9840
170
        (ctxt->wellFormed)) {
9841
0
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9842
0
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9843
0
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9844
0
                "Premature end of data in tag %s line %d\n",
9845
0
                name, line, NULL);
9846
0
    }
9847
9848
    /*
9849
     * Clean up in error case
9850
     */
9851
9852
5.83k
    while (ctxt->nodeNr > oldNodeNr)
9853
71
        nodePop(ctxt);
9854
9855
17.3k
    while (ctxt->nameNr > oldNameNr) {
9856
11.5k
        xmlStartTag *tag = &ctxt->pushTab[ctxt->nameNr - 1];
9857
9858
11.5k
        if (tag->nsNr != 0)
9859
18
            xmlParserNsPop(ctxt, tag->nsNr);
9860
9861
11.5k
        namePop(ctxt);
9862
11.5k
    }
9863
9864
17.3k
    while (ctxt->spaceNr > oldSpaceNr)
9865
11.5k
        spacePop(ctxt);
9866
5.75k
}
9867
9868
/**
9869
 * xmlParseContent:
9870
 * @ctxt:  an XML parser context
9871
 *
9872
 * Parse XML element content. This is useful if you're only interested
9873
 * in custom SAX callbacks. If you want a node list, use
9874
 * xmlCtxtParseContent.
9875
 */
9876
void
9877
0
xmlParseContent(xmlParserCtxtPtr ctxt) {
9878
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
9879
0
        return;
9880
9881
0
    xmlCtxtInitializeLate(ctxt);
9882
9883
0
    xmlParseContentInternal(ctxt);
9884
9885
0
    xmlParserCheckEOF(ctxt, XML_ERR_NOT_WELL_BALANCED);
9886
0
}
9887
9888
/**
9889
 * xmlParseElement:
9890
 * @ctxt:  an XML parser context
9891
 *
9892
 * DEPRECATED: Internal function, don't use.
9893
 *
9894
 * parse an XML element
9895
 *
9896
 * [39] element ::= EmptyElemTag | STag content ETag
9897
 *
9898
 * [ WFC: Element Type Match ]
9899
 * The Name in an element's end-tag must match the element type in the
9900
 * start-tag.
9901
 *
9902
 */
9903
9904
void
9905
5.97k
xmlParseElement(xmlParserCtxtPtr ctxt) {
9906
5.97k
    if (xmlParseElementStart(ctxt) != 0)
9907
215
        return;
9908
9909
5.75k
    xmlParseContentInternal(ctxt);
9910
9911
5.75k
    if (ctxt->input->cur >= ctxt->input->end) {
9912
175
        if (ctxt->wellFormed) {
9913
0
            const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9914
0
            int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9915
0
            xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9916
0
                    "Premature end of data in tag %s line %d\n",
9917
0
                    name, line, NULL);
9918
0
        }
9919
175
        return;
9920
175
    }
9921
9922
5.58k
    xmlParseElementEnd(ctxt);
9923
5.58k
}
9924
9925
/**
9926
 * xmlParseElementStart:
9927
 * @ctxt:  an XML parser context
9928
 *
9929
 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
9930
 * opening tag was parsed, 1 if an empty element was parsed.
9931
 *
9932
 * Always consumes '<'.
9933
 */
9934
static int
9935
1.02M
xmlParseElementStart(xmlParserCtxtPtr ctxt) {
9936
1.02M
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
9937
1.02M
    const xmlChar *name;
9938
1.02M
    const xmlChar *prefix = NULL;
9939
1.02M
    const xmlChar *URI = NULL;
9940
1.02M
    xmlParserNodeInfo node_info;
9941
1.02M
    int line;
9942
1.02M
    xmlNodePtr cur;
9943
1.02M
    int nbNs = 0;
9944
9945
1.02M
    if (ctxt->nameNr > maxDepth) {
9946
5
        xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
9947
5
                "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9948
5
                ctxt->nameNr);
9949
5
  xmlHaltParser(ctxt);
9950
5
  return(-1);
9951
5
    }
9952
9953
    /* Capture start position */
9954
1.02M
    if (ctxt->record_info) {
9955
0
        node_info.begin_pos = ctxt->input->consumed +
9956
0
                          (CUR_PTR - ctxt->input->base);
9957
0
  node_info.begin_line = ctxt->input->line;
9958
0
    }
9959
9960
1.02M
    if (ctxt->spaceNr == 0)
9961
5.97k
  spacePush(ctxt, -1);
9962
1.01M
    else if (*ctxt->space == -2)
9963
0
  spacePush(ctxt, -1);
9964
1.01M
    else
9965
1.01M
  spacePush(ctxt, *ctxt->space);
9966
9967
1.02M
    line = ctxt->input->line;
9968
1.02M
#ifdef LIBXML_SAX1_ENABLED
9969
1.02M
    if (ctxt->sax2)
9970
1.02M
#endif /* LIBXML_SAX1_ENABLED */
9971
1.02M
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &nbNs);
9972
0
#ifdef LIBXML_SAX1_ENABLED
9973
0
    else
9974
0
  name = xmlParseStartTag(ctxt);
9975
1.02M
#endif /* LIBXML_SAX1_ENABLED */
9976
1.02M
    if (name == NULL) {
9977
6.92k
  spacePop(ctxt);
9978
6.92k
        return(-1);
9979
6.92k
    }
9980
1.01M
    nameNsPush(ctxt, name, prefix, URI, line, nbNs);
9981
1.01M
    cur = ctxt->node;
9982
9983
1.01M
#ifdef LIBXML_VALID_ENABLED
9984
    /*
9985
     * [ VC: Root Element Type ]
9986
     * The Name in the document type declaration must match the element
9987
     * type of the root element.
9988
     */
9989
1.01M
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9990
0
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
9991
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9992
1.01M
#endif /* LIBXML_VALID_ENABLED */
9993
9994
    /*
9995
     * Check for an Empty Element.
9996
     */
9997
1.01M
    if ((RAW == '/') && (NXT(1) == '>')) {
9998
545k
        SKIP(2);
9999
545k
  if (ctxt->sax2) {
10000
545k
      if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10001
545k
    (!ctxt->disableSAX))
10002
531k
    ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
10003
545k
#ifdef LIBXML_SAX1_ENABLED
10004
545k
  } else {
10005
0
      if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10006
0
    (!ctxt->disableSAX))
10007
0
    ctxt->sax->endElement(ctxt->userData, name);
10008
0
#endif /* LIBXML_SAX1_ENABLED */
10009
0
  }
10010
545k
  namePop(ctxt);
10011
545k
  spacePop(ctxt);
10012
545k
  if (nbNs > 0)
10013
1.16k
      xmlParserNsPop(ctxt, nbNs);
10014
545k
  if (cur != NULL && ctxt->record_info) {
10015
0
            node_info.node = cur;
10016
0
            node_info.end_pos = ctxt->input->consumed +
10017
0
                                (CUR_PTR - ctxt->input->base);
10018
0
            node_info.end_line = ctxt->input->line;
10019
0
            xmlParserAddNodeInfo(ctxt, &node_info);
10020
0
  }
10021
545k
  return(1);
10022
545k
    }
10023
469k
    if (RAW == '>') {
10024
426k
        NEXT1;
10025
426k
        if (cur != NULL && ctxt->record_info) {
10026
0
            node_info.node = cur;
10027
0
            node_info.end_pos = 0;
10028
0
            node_info.end_line = 0;
10029
0
            xmlParserAddNodeInfo(ctxt, &node_info);
10030
0
        }
10031
426k
    } else {
10032
43.7k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10033
43.7k
         "Couldn't find end of Start Tag %s line %d\n",
10034
43.7k
                    name, line, NULL);
10035
10036
  /*
10037
   * end of parsing of this node.
10038
   */
10039
43.7k
  nodePop(ctxt);
10040
43.7k
  namePop(ctxt);
10041
43.7k
  spacePop(ctxt);
10042
43.7k
  if (nbNs > 0)
10043
125
      xmlParserNsPop(ctxt, nbNs);
10044
43.7k
  return(-1);
10045
43.7k
    }
10046
10047
426k
    return(0);
10048
469k
}
10049
10050
/**
10051
 * xmlParseElementEnd:
10052
 * @ctxt:  an XML parser context
10053
 *
10054
 * Parse the end of an XML element. Always consumes '</'.
10055
 */
10056
static void
10057
414k
xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
10058
414k
    xmlNodePtr cur = ctxt->node;
10059
10060
414k
    if (ctxt->nameNr <= 0) {
10061
0
        if ((RAW == '<') && (NXT(1) == '/'))
10062
0
            SKIP(2);
10063
0
        return;
10064
0
    }
10065
10066
    /*
10067
     * parse the end of tag: '</' should be here.
10068
     */
10069
414k
    if (ctxt->sax2) {
10070
414k
  xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
10071
414k
  namePop(ctxt);
10072
414k
    }
10073
0
#ifdef LIBXML_SAX1_ENABLED
10074
0
    else
10075
0
  xmlParseEndTag1(ctxt, 0);
10076
414k
#endif /* LIBXML_SAX1_ENABLED */
10077
10078
    /*
10079
     * Capture end position
10080
     */
10081
414k
    if (cur != NULL && ctxt->record_info) {
10082
0
        xmlParserNodeInfoPtr node_info;
10083
10084
0
        node_info = (xmlParserNodeInfoPtr) xmlParserFindNodeInfo(ctxt, cur);
10085
0
        if (node_info != NULL) {
10086
0
            node_info->end_pos = ctxt->input->consumed +
10087
0
                                 (CUR_PTR - ctxt->input->base);
10088
0
            node_info->end_line = ctxt->input->line;
10089
0
        }
10090
0
    }
10091
414k
}
10092
10093
/**
10094
 * xmlParseVersionNum:
10095
 * @ctxt:  an XML parser context
10096
 *
10097
 * DEPRECATED: Internal function, don't use.
10098
 *
10099
 * parse the XML version value.
10100
 *
10101
 * [26] VersionNum ::= '1.' [0-9]+
10102
 *
10103
 * In practice allow [0-9].[0-9]+ at that level
10104
 *
10105
 * Returns the string giving the XML version number, or NULL
10106
 */
10107
xmlChar *
10108
181k
xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10109
181k
    xmlChar *buf = NULL;
10110
181k
    int len = 0;
10111
181k
    int size = 10;
10112
181k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
10113
175k
                    XML_MAX_TEXT_LENGTH :
10114
181k
                    XML_MAX_NAME_LENGTH;
10115
181k
    xmlChar cur;
10116
10117
181k
    buf = xmlMalloc(size);
10118
181k
    if (buf == NULL) {
10119
0
  xmlErrMemory(ctxt);
10120
0
  return(NULL);
10121
0
    }
10122
181k
    cur = CUR;
10123
181k
    if (!((cur >= '0') && (cur <= '9'))) {
10124
119
  xmlFree(buf);
10125
119
  return(NULL);
10126
119
    }
10127
181k
    buf[len++] = cur;
10128
181k
    NEXT;
10129
181k
    cur=CUR;
10130
181k
    if (cur != '.') {
10131
52
  xmlFree(buf);
10132
52
  return(NULL);
10133
52
    }
10134
180k
    buf[len++] = cur;
10135
180k
    NEXT;
10136
180k
    cur=CUR;
10137
369k
    while ((cur >= '0') && (cur <= '9')) {
10138
188k
  if (len + 1 >= size) {
10139
437
      xmlChar *tmp;
10140
437
            int newSize;
10141
10142
437
            newSize = xmlGrowCapacity(size, 1, 1, maxLength);
10143
437
            if (newSize < 0) {
10144
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "VersionNum");
10145
0
                xmlFree(buf);
10146
0
                return(NULL);
10147
0
            }
10148
437
      tmp = xmlRealloc(buf, newSize);
10149
437
      if (tmp == NULL) {
10150
0
    xmlErrMemory(ctxt);
10151
0
          xmlFree(buf);
10152
0
    return(NULL);
10153
0
      }
10154
437
      buf = tmp;
10155
437
            size = newSize;
10156
437
  }
10157
188k
  buf[len++] = cur;
10158
188k
  NEXT;
10159
188k
  cur=CUR;
10160
188k
    }
10161
180k
    buf[len] = 0;
10162
180k
    return(buf);
10163
180k
}
10164
10165
/**
10166
 * xmlParseVersionInfo:
10167
 * @ctxt:  an XML parser context
10168
 *
10169
 * DEPRECATED: Internal function, don't use.
10170
 *
10171
 * parse the XML version.
10172
 *
10173
 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10174
 *
10175
 * [25] Eq ::= S? '=' S?
10176
 *
10177
 * Returns the version string, e.g. "1.0"
10178
 */
10179
10180
xmlChar *
10181
182k
xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10182
182k
    xmlChar *version = NULL;
10183
10184
182k
    if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10185
181k
  SKIP(7);
10186
181k
  SKIP_BLANKS;
10187
181k
  if (RAW != '=') {
10188
41
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10189
41
      return(NULL);
10190
41
        }
10191
181k
  NEXT;
10192
181k
  SKIP_BLANKS;
10193
181k
  if (RAW == '"') {
10194
180k
      NEXT;
10195
180k
      version = xmlParseVersionNum(ctxt);
10196
180k
      if (RAW != '"') {
10197
183
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10198
183
      } else
10199
180k
          NEXT;
10200
180k
  } else if (RAW == '\''){
10201
490
      NEXT;
10202
490
      version = xmlParseVersionNum(ctxt);
10203
490
      if (RAW != '\'') {
10204
10
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10205
10
      } else
10206
480
          NEXT;
10207
490
  } else {
10208
56
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10209
56
  }
10210
181k
    }
10211
182k
    return(version);
10212
182k
}
10213
10214
/**
10215
 * xmlParseEncName:
10216
 * @ctxt:  an XML parser context
10217
 *
10218
 * DEPRECATED: Internal function, don't use.
10219
 *
10220
 * parse the XML encoding name
10221
 *
10222
 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10223
 *
10224
 * Returns the encoding name value or NULL
10225
 */
10226
xmlChar *
10227
150k
xmlParseEncName(xmlParserCtxtPtr ctxt) {
10228
150k
    xmlChar *buf = NULL;
10229
150k
    int len = 0;
10230
150k
    int size = 10;
10231
150k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
10232
144k
                    XML_MAX_TEXT_LENGTH :
10233
150k
                    XML_MAX_NAME_LENGTH;
10234
150k
    xmlChar cur;
10235
10236
150k
    cur = CUR;
10237
150k
    if (((cur >= 'a') && (cur <= 'z')) ||
10238
150k
        ((cur >= 'A') && (cur <= 'Z'))) {
10239
150k
  buf = xmlMalloc(size);
10240
150k
  if (buf == NULL) {
10241
0
      xmlErrMemory(ctxt);
10242
0
      return(NULL);
10243
0
  }
10244
10245
150k
  buf[len++] = cur;
10246
150k
  NEXT;
10247
150k
  cur = CUR;
10248
801k
  while (((cur >= 'a') && (cur <= 'z')) ||
10249
760k
         ((cur >= 'A') && (cur <= 'Z')) ||
10250
460k
         ((cur >= '0') && (cur <= '9')) ||
10251
304k
         (cur == '.') || (cur == '_') ||
10252
651k
         (cur == '-')) {
10253
651k
      if (len + 1 >= size) {
10254
1.57k
          xmlChar *tmp;
10255
1.57k
                int newSize;
10256
10257
1.57k
                newSize = xmlGrowCapacity(size, 1, 1, maxLength);
10258
1.57k
                if (newSize < 0) {
10259
0
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "EncName");
10260
0
                    xmlFree(buf);
10261
0
                    return(NULL);
10262
0
                }
10263
1.57k
    tmp = xmlRealloc(buf, newSize);
10264
1.57k
    if (tmp == NULL) {
10265
0
        xmlErrMemory(ctxt);
10266
0
        xmlFree(buf);
10267
0
        return(NULL);
10268
0
    }
10269
1.57k
    buf = tmp;
10270
1.57k
                size = newSize;
10271
1.57k
      }
10272
651k
      buf[len++] = cur;
10273
651k
      NEXT;
10274
651k
      cur = CUR;
10275
651k
        }
10276
150k
  buf[len] = 0;
10277
150k
    } else {
10278
57
  xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10279
57
    }
10280
150k
    return(buf);
10281
150k
}
10282
10283
/**
10284
 * xmlParseEncodingDecl:
10285
 * @ctxt:  an XML parser context
10286
 *
10287
 * DEPRECATED: Internal function, don't use.
10288
 *
10289
 * parse the XML encoding declaration
10290
 *
10291
 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'")
10292
 *
10293
 * this setups the conversion filters.
10294
 *
10295
 * Returns the encoding value or NULL
10296
 */
10297
10298
const xmlChar *
10299
152k
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10300
152k
    xmlChar *encoding = NULL;
10301
10302
152k
    SKIP_BLANKS;
10303
152k
    if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g') == 0)
10304
2.59k
        return(NULL);
10305
10306
150k
    SKIP(8);
10307
150k
    SKIP_BLANKS;
10308
150k
    if (RAW != '=') {
10309
34
        xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10310
34
        return(NULL);
10311
34
    }
10312
150k
    NEXT;
10313
150k
    SKIP_BLANKS;
10314
150k
    if (RAW == '"') {
10315
150k
        NEXT;
10316
150k
        encoding = xmlParseEncName(ctxt);
10317
150k
        if (RAW != '"') {
10318
197
            xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10319
197
            xmlFree((xmlChar *) encoding);
10320
197
            return(NULL);
10321
197
        } else
10322
149k
            NEXT;
10323
150k
    } else if (RAW == '\''){
10324
39
        NEXT;
10325
39
        encoding = xmlParseEncName(ctxt);
10326
39
        if (RAW != '\'') {
10327
16
            xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10328
16
            xmlFree((xmlChar *) encoding);
10329
16
            return(NULL);
10330
16
        } else
10331
23
            NEXT;
10332
39
    } else {
10333
23
        xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10334
23
    }
10335
10336
149k
    if (encoding == NULL)
10337
31
        return(NULL);
10338
10339
149k
    xmlSetDeclaredEncoding(ctxt, encoding);
10340
10341
149k
    return(ctxt->encoding);
10342
149k
}
10343
10344
/**
10345
 * xmlParseSDDecl:
10346
 * @ctxt:  an XML parser context
10347
 *
10348
 * DEPRECATED: Internal function, don't use.
10349
 *
10350
 * parse the XML standalone declaration
10351
 *
10352
 * [32] SDDecl ::= S 'standalone' Eq
10353
 *                 (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10354
 *
10355
 * [ VC: Standalone Document Declaration ]
10356
 * TODO The standalone document declaration must have the value "no"
10357
 * if any external markup declarations contain declarations of:
10358
 *  - attributes with default values, if elements to which these
10359
 *    attributes apply appear in the document without specifications
10360
 *    of values for these attributes, or
10361
 *  - entities (other than amp, lt, gt, apos, quot), if references
10362
 *    to those entities appear in the document, or
10363
 *  - attributes with values subject to normalization, where the
10364
 *    attribute appears in the document with a value which will change
10365
 *    as a result of normalization, or
10366
 *  - element types with element content, if white space occurs directly
10367
 *    within any instance of those types.
10368
 *
10369
 * Returns:
10370
 *   1 if standalone="yes"
10371
 *   0 if standalone="no"
10372
 *  -2 if standalone attribute is missing or invalid
10373
 *    (A standalone value of -2 means that the XML declaration was found,
10374
 *     but no value was specified for the standalone attribute).
10375
 */
10376
10377
int
10378
103k
xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10379
103k
    int standalone = -2;
10380
10381
103k
    SKIP_BLANKS;
10382
103k
    if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10383
101k
  SKIP(10);
10384
101k
        SKIP_BLANKS;
10385
101k
  if (RAW != '=') {
10386
12
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10387
12
      return(standalone);
10388
12
        }
10389
101k
  NEXT;
10390
101k
  SKIP_BLANKS;
10391
101k
        if (RAW == '\''){
10392
54
      NEXT;
10393
54
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10394
38
          standalone = 0;
10395
38
                SKIP(2);
10396
38
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10397
6
                 (NXT(2) == 's')) {
10398
3
          standalone = 1;
10399
3
    SKIP(3);
10400
13
            } else {
10401
13
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10402
13
      }
10403
54
      if (RAW != '\'') {
10404
14
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10405
14
      } else
10406
40
          NEXT;
10407
101k
  } else if (RAW == '"'){
10408
101k
      NEXT;
10409
101k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10410
462
          standalone = 0;
10411
462
    SKIP(2);
10412
101k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10413
100k
                 (NXT(2) == 's')) {
10414
100k
          standalone = 1;
10415
100k
                SKIP(3);
10416
100k
            } else {
10417
46
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10418
46
      }
10419
101k
      if (RAW != '"') {
10420
61
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10421
61
      } else
10422
101k
          NEXT;
10423
101k
  } else {
10424
11
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10425
11
        }
10426
101k
    }
10427
103k
    return(standalone);
10428
103k
}
10429
10430
/**
10431
 * xmlParseXMLDecl:
10432
 * @ctxt:  an XML parser context
10433
 *
10434
 * DEPRECATED: Internal function, don't use.
10435
 *
10436
 * parse an XML declaration header
10437
 *
10438
 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10439
 */
10440
10441
void
10442
182k
xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10443
182k
    xmlChar *version;
10444
10445
    /*
10446
     * This value for standalone indicates that the document has an
10447
     * XML declaration but it does not have a standalone attribute.
10448
     * It will be overwritten later if a standalone attribute is found.
10449
     */
10450
10451
182k
    ctxt->standalone = -2;
10452
10453
    /*
10454
     * We know that '<?xml' is here.
10455
     */
10456
182k
    SKIP(5);
10457
10458
182k
    if (!IS_BLANK_CH(RAW)) {
10459
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10460
0
                 "Blank needed after '<?xml'\n");
10461
0
    }
10462
182k
    SKIP_BLANKS;
10463
10464
    /*
10465
     * We must have the VersionInfo here.
10466
     */
10467
182k
    version = xmlParseVersionInfo(ctxt);
10468
182k
    if (version == NULL) {
10469
1.14k
  xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10470
180k
    } else {
10471
180k
  if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10472
      /*
10473
       * Changed here for XML-1.0 5th edition
10474
       */
10475
6.65k
      if (ctxt->options & XML_PARSE_OLD10) {
10476
0
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10477
0
                "Unsupported version '%s'\n",
10478
0
                version);
10479
6.65k
      } else {
10480
6.65k
          if ((version[0] == '1') && ((version[1] == '.'))) {
10481
6.47k
        xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10482
6.47k
                      "Unsupported version '%s'\n",
10483
6.47k
          version, NULL);
10484
6.47k
    } else {
10485
180
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10486
180
              "Unsupported version '%s'\n",
10487
180
              version);
10488
180
    }
10489
6.65k
      }
10490
6.65k
  }
10491
180k
  if (ctxt->version != NULL)
10492
0
      xmlFree((void *) ctxt->version);
10493
180k
  ctxt->version = version;
10494
180k
    }
10495
10496
    /*
10497
     * We may have the encoding declaration
10498
     */
10499
182k
    if (!IS_BLANK_CH(RAW)) {
10500
30.4k
        if ((RAW == '?') && (NXT(1) == '>')) {
10501
29.2k
      SKIP(2);
10502
29.2k
      return;
10503
29.2k
  }
10504
1.21k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10505
1.21k
    }
10506
152k
    xmlParseEncodingDecl(ctxt);
10507
10508
    /*
10509
     * We may have the standalone status.
10510
     */
10511
152k
    if ((ctxt->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10512
49.0k
        if ((RAW == '?') && (NXT(1) == '>')) {
10513
48.9k
      SKIP(2);
10514
48.9k
      return;
10515
48.9k
  }
10516
42
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10517
42
    }
10518
10519
    /*
10520
     * We can grow the input buffer freely at that point
10521
     */
10522
103k
    GROW;
10523
10524
103k
    SKIP_BLANKS;
10525
103k
    ctxt->standalone = xmlParseSDDecl(ctxt);
10526
10527
103k
    SKIP_BLANKS;
10528
103k
    if ((RAW == '?') && (NXT(1) == '>')) {
10529
102k
        SKIP(2);
10530
102k
    } else if (RAW == '>') {
10531
        /* Deprecated old WD ... */
10532
59
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10533
59
  NEXT;
10534
1.75k
    } else {
10535
1.75k
        int c;
10536
10537
1.75k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10538
949k
        while ((PARSER_STOPPED(ctxt) == 0) &&
10539
949k
               ((c = CUR) != 0)) {
10540
948k
            NEXT;
10541
948k
            if (c == '>')
10542
1.22k
                break;
10543
948k
        }
10544
1.75k
    }
10545
103k
}
10546
10547
/**
10548
 * xmlCtxtGetVersion:
10549
 * @ctxt:  parser context
10550
 *
10551
 * Available since 2.14.0.
10552
 *
10553
 * Returns the version from the XML declaration.
10554
 */
10555
const xmlChar *
10556
0
xmlCtxtGetVersion(xmlParserCtxtPtr ctxt) {
10557
0
    if (ctxt == NULL)
10558
0
        return(NULL);
10559
10560
0
    return(ctxt->version);
10561
0
}
10562
10563
/**
10564
 * xmlCtxtGetStandalone:
10565
 * @ctxt:  parser context
10566
 *
10567
 * Available since 2.14.0.
10568
 *
10569
 * Returns the value from the standalone document declaration.
10570
 */
10571
int
10572
0
xmlCtxtGetStandalone(xmlParserCtxtPtr ctxt) {
10573
0
    if (ctxt == NULL)
10574
0
        return(0);
10575
10576
0
    return(ctxt->standalone);
10577
0
}
10578
10579
/**
10580
 * xmlParseMisc:
10581
 * @ctxt:  an XML parser context
10582
 *
10583
 * DEPRECATED: Internal function, don't use.
10584
 *
10585
 * parse an XML Misc* optional field.
10586
 *
10587
 * [27] Misc ::= Comment | PI |  S
10588
 */
10589
10590
void
10591
11.9k
xmlParseMisc(xmlParserCtxtPtr ctxt) {
10592
12.0k
    while (PARSER_STOPPED(ctxt) == 0) {
10593
12.0k
        SKIP_BLANKS;
10594
12.0k
        GROW;
10595
12.0k
        if ((RAW == '<') && (NXT(1) == '?')) {
10596
54
      xmlParsePI(ctxt);
10597
11.9k
        } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10598
0
      xmlParseComment(ctxt);
10599
11.9k
        } else {
10600
11.9k
            break;
10601
11.9k
        }
10602
12.0k
    }
10603
11.9k
}
10604
10605
static void
10606
158k
xmlFinishDocument(xmlParserCtxtPtr ctxt) {
10607
158k
    xmlDocPtr doc;
10608
10609
    /*
10610
     * SAX: end of the document processing.
10611
     */
10612
158k
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10613
5.97k
        ctxt->sax->endDocument(ctxt->userData);
10614
10615
158k
    doc = ctxt->myDoc;
10616
158k
    if (doc != NULL) {
10617
5.99k
        if (ctxt->wellFormed) {
10618
5.69k
            doc->properties |= XML_DOC_WELLFORMED;
10619
5.69k
            if (ctxt->valid)
10620
5.69k
                doc->properties |= XML_DOC_DTDVALID;
10621
5.69k
            if (ctxt->nsWellFormed)
10622
5.69k
                doc->properties |= XML_DOC_NSVALID;
10623
5.69k
        }
10624
10625
5.99k
        if (ctxt->options & XML_PARSE_OLD10)
10626
0
            doc->properties |= XML_DOC_OLD10;
10627
10628
        /*
10629
         * Remove locally kept entity definitions if the tree was not built
10630
         */
10631
5.99k
  if (xmlStrEqual(doc->version, SAX_COMPAT_MODE)) {
10632
17
            xmlFreeDoc(doc);
10633
17
            ctxt->myDoc = NULL;
10634
17
        }
10635
5.99k
    }
10636
158k
}
10637
10638
/**
10639
 * xmlParseDocument:
10640
 * @ctxt:  an XML parser context
10641
 *
10642
 * Parse an XML document and invoke the SAX handlers. This is useful
10643
 * if you're only interested in custom SAX callbacks. If you want a
10644
 * document tree, use xmlCtxtParseDocument.
10645
 *
10646
 * Returns 0, -1 in case of error.
10647
 */
10648
10649
int
10650
6.01k
xmlParseDocument(xmlParserCtxtPtr ctxt) {
10651
6.01k
    if ((ctxt == NULL) || (ctxt->input == NULL))
10652
0
        return(-1);
10653
10654
6.01k
    GROW;
10655
10656
    /*
10657
     * SAX: detecting the level.
10658
     */
10659
6.01k
    xmlCtxtInitializeLate(ctxt);
10660
10661
6.01k
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10662
6.01k
        ctxt->sax->setDocumentLocator(ctxt->userData,
10663
6.01k
                (xmlSAXLocator *) &xmlDefaultSAXLocator);
10664
6.01k
    }
10665
10666
6.01k
    xmlDetectEncoding(ctxt);
10667
10668
6.01k
    if (CUR == 0) {
10669
36
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10670
36
  return(-1);
10671
36
    }
10672
10673
5.97k
    GROW;
10674
5.97k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10675
10676
  /*
10677
   * Note that we will switch encoding on the fly.
10678
   */
10679
5.69k
  xmlParseXMLDecl(ctxt);
10680
5.69k
  SKIP_BLANKS;
10681
5.69k
    } else {
10682
279
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10683
279
        if (ctxt->version == NULL) {
10684
0
            xmlErrMemory(ctxt);
10685
0
            return(-1);
10686
0
        }
10687
279
    }
10688
5.97k
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10689
5.97k
        ctxt->sax->startDocument(ctxt->userData);
10690
5.97k
    if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10691
5.97k
        (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10692
0
  ctxt->myDoc->compression = ctxt->input->buf->compressed;
10693
0
    }
10694
10695
    /*
10696
     * The Misc part of the Prolog
10697
     */
10698
5.97k
    xmlParseMisc(ctxt);
10699
10700
    /*
10701
     * Then possibly doc type declaration(s) and more Misc
10702
     * (doctypedecl Misc*)?
10703
     */
10704
5.97k
    GROW;
10705
5.97k
    if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10706
10707
0
  ctxt->inSubset = 1;
10708
0
  xmlParseDocTypeDecl(ctxt);
10709
0
  if (RAW == '[') {
10710
0
      xmlParseInternalSubset(ctxt);
10711
0
  } else if (RAW == '>') {
10712
0
            NEXT;
10713
0
        }
10714
10715
  /*
10716
   * Create and update the external subset.
10717
   */
10718
0
  ctxt->inSubset = 2;
10719
0
  if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10720
0
      (!ctxt->disableSAX))
10721
0
      ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10722
0
                                ctxt->extSubSystem, ctxt->extSubURI);
10723
0
  ctxt->inSubset = 0;
10724
10725
0
        xmlCleanSpecialAttr(ctxt);
10726
10727
0
  xmlParseMisc(ctxt);
10728
0
    }
10729
10730
    /*
10731
     * Time to start parsing the tree itself
10732
     */
10733
5.97k
    GROW;
10734
5.97k
    if (RAW != '<') {
10735
3
        if (ctxt->wellFormed)
10736
1
            xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10737
1
                           "Start tag expected, '<' not found\n");
10738
5.97k
    } else {
10739
5.97k
  xmlParseElement(ctxt);
10740
10741
  /*
10742
   * The Misc part at the end
10743
   */
10744
5.97k
  xmlParseMisc(ctxt);
10745
10746
5.97k
        xmlParserCheckEOF(ctxt, XML_ERR_DOCUMENT_END);
10747
5.97k
    }
10748
10749
5.97k
    ctxt->instate = XML_PARSER_EOF;
10750
5.97k
    xmlFinishDocument(ctxt);
10751
10752
5.97k
    if (! ctxt->wellFormed) {
10753
286
  ctxt->valid = 0;
10754
286
  return(-1);
10755
286
    }
10756
10757
5.69k
    return(0);
10758
5.97k
}
10759
10760
/**
10761
 * xmlParseExtParsedEnt:
10762
 * @ctxt:  an XML parser context
10763
 *
10764
 * DEPRECATED: Internal function, don't use.
10765
 *
10766
 * parse a general parsed entity
10767
 * An external general parsed entity is well-formed if it matches the
10768
 * production labeled extParsedEnt.
10769
 *
10770
 * [78] extParsedEnt ::= TextDecl? content
10771
 *
10772
 * Returns 0, -1 in case of error. the parser context is augmented
10773
 *                as a result of the parsing.
10774
 */
10775
10776
int
10777
0
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10778
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
10779
0
        return(-1);
10780
10781
0
    xmlCtxtInitializeLate(ctxt);
10782
10783
0
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10784
0
        ctxt->sax->setDocumentLocator(ctxt->userData,
10785
0
                (xmlSAXLocator *) &xmlDefaultSAXLocator);
10786
0
    }
10787
10788
0
    xmlDetectEncoding(ctxt);
10789
10790
0
    if (CUR == 0) {
10791
0
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10792
0
    }
10793
10794
    /*
10795
     * Check for the XMLDecl in the Prolog.
10796
     */
10797
0
    GROW;
10798
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10799
10800
  /*
10801
   * Note that we will switch encoding on the fly.
10802
   */
10803
0
  xmlParseXMLDecl(ctxt);
10804
0
  SKIP_BLANKS;
10805
0
    } else {
10806
0
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10807
0
    }
10808
0
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10809
0
        ctxt->sax->startDocument(ctxt->userData);
10810
10811
    /*
10812
     * Doing validity checking on chunk doesn't make sense
10813
     */
10814
0
    ctxt->options &= ~XML_PARSE_DTDVALID;
10815
0
    ctxt->validate = 0;
10816
0
    ctxt->depth = 0;
10817
10818
0
    xmlParseContentInternal(ctxt);
10819
10820
0
    if (ctxt->input->cur < ctxt->input->end)
10821
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10822
10823
    /*
10824
     * SAX: end of the document processing.
10825
     */
10826
0
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10827
0
        ctxt->sax->endDocument(ctxt->userData);
10828
10829
0
    if (! ctxt->wellFormed) return(-1);
10830
0
    return(0);
10831
0
}
10832
10833
#ifdef LIBXML_PUSH_ENABLED
10834
/************************************************************************
10835
 *                  *
10836
 *    Progressive parsing interfaces        *
10837
 *                  *
10838
 ************************************************************************/
10839
10840
/**
10841
 * xmlParseLookupChar:
10842
 * @ctxt:  an XML parser context
10843
 * @c:  character
10844
 *
10845
 * Check whether the input buffer contains a character.
10846
 */
10847
static int
10848
4.21M
xmlParseLookupChar(xmlParserCtxtPtr ctxt, int c) {
10849
4.21M
    const xmlChar *cur;
10850
10851
4.21M
    if (ctxt->checkIndex == 0) {
10852
4.21M
        cur = ctxt->input->cur + 1;
10853
4.21M
    } else {
10854
609
        cur = ctxt->input->cur + ctxt->checkIndex;
10855
609
    }
10856
10857
4.21M
    if (memchr(cur, c, ctxt->input->end - cur) == NULL) {
10858
817
        size_t index = ctxt->input->end - ctxt->input->cur;
10859
10860
817
        if (index > LONG_MAX) {
10861
0
            ctxt->checkIndex = 0;
10862
0
            return(1);
10863
0
        }
10864
817
        ctxt->checkIndex = index;
10865
817
        return(0);
10866
4.21M
    } else {
10867
4.21M
        ctxt->checkIndex = 0;
10868
4.21M
        return(1);
10869
4.21M
    }
10870
4.21M
}
10871
10872
/**
10873
 * xmlParseLookupString:
10874
 * @ctxt:  an XML parser context
10875
 * @startDelta: delta to apply at the start
10876
 * @str:  string
10877
 * @strLen:  length of string
10878
 *
10879
 * Check whether the input buffer contains a string.
10880
 */
10881
static const xmlChar *
10882
xmlParseLookupString(xmlParserCtxtPtr ctxt, size_t startDelta,
10883
109k
                     const char *str, size_t strLen) {
10884
109k
    const xmlChar *cur, *term;
10885
10886
109k
    if (ctxt->checkIndex == 0) {
10887
108k
        cur = ctxt->input->cur + startDelta;
10888
108k
    } else {
10889
1.01k
        cur = ctxt->input->cur + ctxt->checkIndex;
10890
1.01k
    }
10891
10892
109k
    term = BAD_CAST strstr((const char *) cur, str);
10893
109k
    if (term == NULL) {
10894
1.38k
        const xmlChar *end = ctxt->input->end;
10895
1.38k
        size_t index;
10896
10897
        /* Rescan (strLen - 1) characters. */
10898
1.38k
        if ((size_t) (end - cur) < strLen)
10899
33
            end = cur;
10900
1.35k
        else
10901
1.35k
            end -= strLen - 1;
10902
1.38k
        index = end - ctxt->input->cur;
10903
1.38k
        if (index > LONG_MAX) {
10904
0
            ctxt->checkIndex = 0;
10905
0
            return(ctxt->input->end - strLen);
10906
0
        }
10907
1.38k
        ctxt->checkIndex = index;
10908
108k
    } else {
10909
108k
        ctxt->checkIndex = 0;
10910
108k
    }
10911
10912
109k
    return(term);
10913
109k
}
10914
10915
/**
10916
 * xmlParseLookupCharData:
10917
 * @ctxt:  an XML parser context
10918
 *
10919
 * Check whether the input buffer contains terminated char data.
10920
 */
10921
static int
10922
86.1k
xmlParseLookupCharData(xmlParserCtxtPtr ctxt) {
10923
86.1k
    const xmlChar *cur = ctxt->input->cur + ctxt->checkIndex;
10924
86.1k
    const xmlChar *end = ctxt->input->end;
10925
86.1k
    size_t index;
10926
10927
1.07M
    while (cur < end) {
10928
1.07M
        if ((*cur == '<') || (*cur == '&')) {
10929
83.4k
            ctxt->checkIndex = 0;
10930
83.4k
            return(1);
10931
83.4k
        }
10932
990k
        cur++;
10933
990k
    }
10934
10935
2.65k
    index = cur - ctxt->input->cur;
10936
2.65k
    if (index > LONG_MAX) {
10937
0
        ctxt->checkIndex = 0;
10938
0
        return(1);
10939
0
    }
10940
2.65k
    ctxt->checkIndex = index;
10941
2.65k
    return(0);
10942
2.65k
}
10943
10944
/**
10945
 * xmlParseLookupGt:
10946
 * @ctxt:  an XML parser context
10947
 *
10948
 * Check whether there's enough data in the input buffer to finish parsing
10949
 * a start tag. This has to take quotes into account.
10950
 */
10951
static int
10952
22.3M
xmlParseLookupGt(xmlParserCtxtPtr ctxt) {
10953
22.3M
    const xmlChar *cur;
10954
22.3M
    const xmlChar *end = ctxt->input->end;
10955
22.3M
    int state = ctxt->endCheckState;
10956
22.3M
    size_t index;
10957
10958
22.3M
    if (ctxt->checkIndex == 0)
10959
22.3M
        cur = ctxt->input->cur + 1;
10960
8.40k
    else
10961
8.40k
        cur = ctxt->input->cur + ctxt->checkIndex;
10962
10963
553M
    while (cur < end) {
10964
553M
        if (state) {
10965
145M
            if (*cur == state)
10966
11.8M
                state = 0;
10967
408M
        } else if (*cur == '\'' || *cur == '"') {
10968
11.8M
            state = *cur;
10969
396M
        } else if (*cur == '>') {
10970
22.3M
            ctxt->checkIndex = 0;
10971
22.3M
            ctxt->endCheckState = 0;
10972
22.3M
            return(1);
10973
22.3M
        }
10974
531M
        cur++;
10975
531M
    }
10976
10977
14.0k
    index = cur - ctxt->input->cur;
10978
14.0k
    if (index > LONG_MAX) {
10979
0
        ctxt->checkIndex = 0;
10980
0
        ctxt->endCheckState = 0;
10981
0
        return(1);
10982
0
    }
10983
14.0k
    ctxt->checkIndex = index;
10984
14.0k
    ctxt->endCheckState = state;
10985
14.0k
    return(0);
10986
14.0k
}
10987
10988
/**
10989
 * xmlParseLookupInternalSubset:
10990
 * @ctxt:  an XML parser context
10991
 *
10992
 * Check whether there's enough data in the input buffer to finish parsing
10993
 * the internal subset.
10994
 */
10995
static int
10996
991
xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt) {
10997
    /*
10998
     * Sorry, but progressive parsing of the internal subset is not
10999
     * supported. We first check that the full content of the internal
11000
     * subset is available and parsing is launched only at that point.
11001
     * Internal subset ends with "']' S? '>'" in an unescaped section and
11002
     * not in a ']]>' sequence which are conditional sections.
11003
     */
11004
991
    const xmlChar *cur, *start;
11005
991
    const xmlChar *end = ctxt->input->end;
11006
991
    int state = ctxt->endCheckState;
11007
991
    size_t index;
11008
11009
991
    if (ctxt->checkIndex == 0) {
11010
764
        cur = ctxt->input->cur + 1;
11011
764
    } else {
11012
227
        cur = ctxt->input->cur + ctxt->checkIndex;
11013
227
    }
11014
991
    start = cur;
11015
11016
19.0M
    while (cur < end) {
11017
19.0M
        if (state == '-') {
11018
2.21M
            if ((*cur == '-') &&
11019
133k
                (cur[1] == '-') &&
11020
68.2k
                (cur[2] == '>')) {
11021
29.0k
                state = 0;
11022
29.0k
                cur += 3;
11023
29.0k
                start = cur;
11024
29.0k
                continue;
11025
29.0k
            }
11026
2.21M
        }
11027
16.8M
        else if (state == ']') {
11028
14.1k
            if (*cur == '>') {
11029
152
                ctxt->checkIndex = 0;
11030
152
                ctxt->endCheckState = 0;
11031
152
                return(1);
11032
152
            }
11033
14.0k
            if (IS_BLANK_CH(*cur)) {
11034
3.00k
                state = ' ';
11035
11.0k
            } else if (*cur != ']') {
11036
3.80k
                state = 0;
11037
3.80k
                start = cur;
11038
3.80k
                continue;
11039
3.80k
            }
11040
14.0k
        }
11041
16.7M
        else if (state == ' ') {
11042
27.1k
            if (*cur == '>') {
11043
5
                ctxt->checkIndex = 0;
11044
5
                ctxt->endCheckState = 0;
11045
5
                return(1);
11046
5
            }
11047
27.1k
            if (!IS_BLANK_CH(*cur)) {
11048
2.99k
                state = 0;
11049
2.99k
                start = cur;
11050
2.99k
                continue;
11051
2.99k
            }
11052
27.1k
        }
11053
16.7M
        else if (state != 0) {
11054
11.4M
            if (*cur == state) {
11055
98.3k
                state = 0;
11056
98.3k
                start = cur + 1;
11057
98.3k
            }
11058
11.4M
        }
11059
5.35M
        else if (*cur == '<') {
11060
167k
            if ((cur[1] == '!') &&
11061
78.3k
                (cur[2] == '-') &&
11062
29.3k
                (cur[3] == '-')) {
11063
29.1k
                state = '-';
11064
29.1k
                cur += 4;
11065
                /* Don't treat <!--> as comment */
11066
29.1k
                start = cur;
11067
29.1k
                continue;
11068
29.1k
            }
11069
167k
        }
11070
5.18M
        else if ((*cur == '"') || (*cur == '\'') || (*cur == ']')) {
11071
105k
            state = *cur;
11072
105k
        }
11073
11074
18.9M
        cur++;
11075
18.9M
    }
11076
11077
    /*
11078
     * Rescan the three last characters to detect "<!--" and "-->"
11079
     * split across chunks.
11080
     */
11081
834
    if ((state == 0) || (state == '-')) {
11082
370
        if (cur - start < 3)
11083
20
            cur = start;
11084
350
        else
11085
350
            cur -= 3;
11086
370
    }
11087
834
    index = cur - ctxt->input->cur;
11088
834
    if (index > LONG_MAX) {
11089
0
        ctxt->checkIndex = 0;
11090
0
        ctxt->endCheckState = 0;
11091
0
        return(1);
11092
0
    }
11093
834
    ctxt->checkIndex = index;
11094
834
    ctxt->endCheckState = state;
11095
834
    return(0);
11096
834
}
11097
11098
/**
11099
 * xmlParseTryOrFinish:
11100
 * @ctxt:  an XML parser context
11101
 * @terminate:  last chunk indicator
11102
 *
11103
 * Try to progress on parsing
11104
 *
11105
 * Returns zero if no parsing was possible
11106
 */
11107
static int
11108
271k
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11109
271k
    int ret = 0;
11110
271k
    size_t avail;
11111
271k
    xmlChar cur, next;
11112
11113
271k
    if (ctxt->input == NULL)
11114
0
        return(0);
11115
11116
271k
    if ((ctxt->input != NULL) &&
11117
271k
        (ctxt->input->cur - ctxt->input->base > 4096)) {
11118
22.7k
        xmlParserShrink(ctxt);
11119
22.7k
    }
11120
11121
106M
    while (ctxt->disableSAX == 0) {
11122
106M
        avail = ctxt->input->end - ctxt->input->cur;
11123
106M
        if (avail < 1)
11124
142k
      goto done;
11125
106M
        switch (ctxt->instate) {
11126
1.54k
            case XML_PARSER_EOF:
11127
          /*
11128
     * Document parsing is done !
11129
     */
11130
1.54k
          goto done;
11131
244k
            case XML_PARSER_START:
11132
                /*
11133
                 * Very first chars read from the document flow.
11134
                 */
11135
244k
                if ((!terminate) && (avail < 4))
11136
0
                    goto done;
11137
11138
                /*
11139
                 * We need more bytes to detect EBCDIC code pages.
11140
                 * See xmlDetectEBCDIC.
11141
                 */
11142
244k
                if ((CMP4(CUR_PTR, 0x4C, 0x6F, 0xA7, 0x94)) &&
11143
6
                    (!terminate) && (avail < 200))
11144
0
                    goto done;
11145
11146
244k
                xmlDetectEncoding(ctxt);
11147
244k
                ctxt->instate = XML_PARSER_XML_DECL;
11148
244k
    break;
11149
11150
244k
            case XML_PARSER_XML_DECL:
11151
244k
    if ((!terminate) && (avail < 2))
11152
0
        goto done;
11153
244k
    cur = ctxt->input->cur[0];
11154
244k
    next = ctxt->input->cur[1];
11155
244k
          if ((cur == '<') && (next == '?')) {
11156
        /* PI or XML decl */
11157
198k
        if ((!terminate) &&
11158
24.1k
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11159
165
      goto done;
11160
197k
        if ((ctxt->input->cur[2] == 'x') &&
11161
193k
      (ctxt->input->cur[3] == 'm') &&
11162
192k
      (ctxt->input->cur[4] == 'l') &&
11163
190k
      (IS_BLANK_CH(ctxt->input->cur[5]))) {
11164
176k
      ret += 5;
11165
176k
      xmlParseXMLDecl(ctxt);
11166
176k
        } else {
11167
21.5k
      ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11168
21.5k
                        if (ctxt->version == NULL) {
11169
0
                            xmlErrMemory(ctxt);
11170
0
                            break;
11171
0
                        }
11172
21.5k
        }
11173
197k
    } else {
11174
46.2k
        ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11175
46.2k
        if (ctxt->version == NULL) {
11176
0
            xmlErrMemory(ctxt);
11177
0
      break;
11178
0
        }
11179
46.2k
    }
11180
244k
                if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
11181
0
                    ctxt->sax->setDocumentLocator(ctxt->userData,
11182
0
                            (xmlSAXLocator *) &xmlDefaultSAXLocator);
11183
0
                }
11184
244k
                if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11185
0
                    (!ctxt->disableSAX))
11186
0
                    ctxt->sax->startDocument(ctxt->userData);
11187
244k
                ctxt->instate = XML_PARSER_MISC;
11188
244k
    break;
11189
37.6M
            case XML_PARSER_START_TAG: {
11190
37.6M
          const xmlChar *name;
11191
37.6M
    const xmlChar *prefix = NULL;
11192
37.6M
    const xmlChar *URI = NULL;
11193
37.6M
                int line = ctxt->input->line;
11194
37.6M
    int nbNs = 0;
11195
11196
37.6M
    if ((!terminate) && (avail < 2))
11197
4
        goto done;
11198
37.6M
    cur = ctxt->input->cur[0];
11199
37.6M
          if (cur != '<') {
11200
436
        xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
11201
436
                                   "Start tag expected, '<' not found");
11202
436
                    ctxt->instate = XML_PARSER_EOF;
11203
436
                    xmlFinishDocument(ctxt);
11204
436
        goto done;
11205
436
    }
11206
37.6M
    if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11207
12.4k
                    goto done;
11208
37.6M
    if (ctxt->spaceNr == 0)
11209
0
        spacePush(ctxt, -1);
11210
37.6M
    else if (*ctxt->space == -2)
11211
10.1M
        spacePush(ctxt, -1);
11212
27.4M
    else
11213
27.4M
        spacePush(ctxt, *ctxt->space);
11214
37.6M
#ifdef LIBXML_SAX1_ENABLED
11215
37.6M
    if (ctxt->sax2)
11216
37.6M
#endif /* LIBXML_SAX1_ENABLED */
11217
37.6M
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &nbNs);
11218
1.47k
#ifdef LIBXML_SAX1_ENABLED
11219
1.47k
    else
11220
1.47k
        name = xmlParseStartTag(ctxt);
11221
37.6M
#endif /* LIBXML_SAX1_ENABLED */
11222
37.6M
    if (name == NULL) {
11223
5.19k
        spacePop(ctxt);
11224
5.19k
                    ctxt->instate = XML_PARSER_EOF;
11225
5.19k
                    xmlFinishDocument(ctxt);
11226
5.19k
        goto done;
11227
5.19k
    }
11228
37.6M
#ifdef LIBXML_VALID_ENABLED
11229
    /*
11230
     * [ VC: Root Element Type ]
11231
     * The Name in the document type declaration must match
11232
     * the element type of the root element.
11233
     */
11234
37.6M
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11235
0
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
11236
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11237
37.6M
#endif /* LIBXML_VALID_ENABLED */
11238
11239
    /*
11240
     * Check for an Empty Element.
11241
     */
11242
37.6M
    if ((RAW == '/') && (NXT(1) == '>')) {
11243
9.16M
        SKIP(2);
11244
11245
9.16M
        if (ctxt->sax2) {
11246
9.16M
      if ((ctxt->sax != NULL) &&
11247
9.16M
          (ctxt->sax->endElementNs != NULL) &&
11248
9.16M
          (!ctxt->disableSAX))
11249
9.16M
          ctxt->sax->endElementNs(ctxt->userData, name,
11250
9.16M
                                  prefix, URI);
11251
9.16M
      if (nbNs > 0)
11252
193k
          xmlParserNsPop(ctxt, nbNs);
11253
9.16M
#ifdef LIBXML_SAX1_ENABLED
11254
9.16M
        } else {
11255
2
      if ((ctxt->sax != NULL) &&
11256
0
          (ctxt->sax->endElement != NULL) &&
11257
0
          (!ctxt->disableSAX))
11258
0
          ctxt->sax->endElement(ctxt->userData, name);
11259
2
#endif /* LIBXML_SAX1_ENABLED */
11260
2
        }
11261
9.16M
        spacePop(ctxt);
11262
28.4M
    } else if (RAW == '>') {
11263
28.3M
        NEXT;
11264
28.3M
                    nameNsPush(ctxt, name, prefix, URI, line, nbNs);
11265
28.3M
    } else {
11266
61.5k
        xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11267
61.5k
           "Couldn't find end of Start Tag %s\n",
11268
61.5k
           name);
11269
61.5k
        nodePop(ctxt);
11270
61.5k
        spacePop(ctxt);
11271
61.5k
                    if (nbNs > 0)
11272
6.27k
                        xmlParserNsPop(ctxt, nbNs);
11273
61.5k
    }
11274
11275
37.6M
                if (ctxt->nameNr == 0)
11276
11.4k
                    ctxt->instate = XML_PARSER_EPILOG;
11277
37.6M
                else
11278
37.6M
                    ctxt->instate = XML_PARSER_CONTENT;
11279
37.6M
                break;
11280
37.6M
      }
11281
59.2M
            case XML_PARSER_CONTENT: {
11282
59.2M
    cur = ctxt->input->cur[0];
11283
11284
59.2M
    if (cur == '<') {
11285
45.7M
                    if ((!terminate) && (avail < 2))
11286
640
                        goto done;
11287
45.7M
        next = ctxt->input->cur[1];
11288
11289
45.7M
                    if (next == '/') {
11290
8.30M
                        ctxt->instate = XML_PARSER_END_TAG;
11291
8.30M
                        break;
11292
37.4M
                    } else if (next == '?') {
11293
17.6k
                        if ((!terminate) &&
11294
10.5k
                            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11295
106
                            goto done;
11296
17.5k
                        xmlParsePI(ctxt);
11297
17.5k
                        ctxt->instate = XML_PARSER_CONTENT;
11298
17.5k
                        break;
11299
37.4M
                    } else if (next == '!') {
11300
78.7k
                        if ((!terminate) && (avail < 3))
11301
8
                            goto done;
11302
78.7k
                        next = ctxt->input->cur[2];
11303
11304
78.7k
                        if (next == '-') {
11305
62.2k
                            if ((!terminate) && (avail < 4))
11306
9
                                goto done;
11307
62.2k
                            if (ctxt->input->cur[3] == '-') {
11308
62.2k
                                if ((!terminate) &&
11309
42.8k
                                    (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11310
111
                                    goto done;
11311
62.1k
                                xmlParseComment(ctxt);
11312
62.1k
                                ctxt->instate = XML_PARSER_CONTENT;
11313
62.1k
                                break;
11314
62.2k
                            }
11315
62.2k
                        } else if (next == '[') {
11316
16.2k
                            if ((!terminate) && (avail < 9))
11317
6
                                goto done;
11318
16.2k
                            if ((ctxt->input->cur[2] == '[') &&
11319
16.2k
                                (ctxt->input->cur[3] == 'C') &&
11320
16.2k
                                (ctxt->input->cur[4] == 'D') &&
11321
16.2k
                                (ctxt->input->cur[5] == 'A') &&
11322
16.2k
                                (ctxt->input->cur[6] == 'T') &&
11323
16.2k
                                (ctxt->input->cur[7] == 'A') &&
11324
16.1k
                                (ctxt->input->cur[8] == '[')) {
11325
16.1k
                                if ((!terminate) &&
11326
7.03k
                                    (!xmlParseLookupString(ctxt, 9, "]]>", 3)))
11327
877
                                    goto done;
11328
15.3k
                                ctxt->instate = XML_PARSER_CDATA_SECTION;
11329
15.3k
                                xmlParseCDSect(ctxt);
11330
15.3k
                                ctxt->instate = XML_PARSER_CONTENT;
11331
15.3k
                                break;
11332
16.1k
                            }
11333
16.2k
                        }
11334
78.7k
                    }
11335
45.7M
    } else if (cur == '&') {
11336
404k
        if ((!terminate) && (!xmlParseLookupChar(ctxt, ';')))
11337
91
      goto done;
11338
404k
        xmlParseReference(ctxt);
11339
404k
                    break;
11340
13.0M
    } else {
11341
        /* TODO Avoid the extra copy, handle directly !!! */
11342
        /*
11343
         * Goal of the following test is:
11344
         *  - minimize calls to the SAX 'character' callback
11345
         *    when they are mergeable
11346
         *  - handle an problem for isBlank when we only parse
11347
         *    a sequence of blank chars and the next one is
11348
         *    not available to check against '<' presence.
11349
         *  - tries to homogenize the differences in SAX
11350
         *    callbacks between the push and pull versions
11351
         *    of the parser.
11352
         */
11353
13.0M
        if (avail < XML_PARSER_BIG_BUFFER_SIZE) {
11354
478k
      if ((!terminate) && (!xmlParseLookupCharData(ctxt)))
11355
2.65k
          goto done;
11356
478k
                    }
11357
13.0M
                    ctxt->checkIndex = 0;
11358
13.0M
        xmlParseCharDataInternal(ctxt, !terminate);
11359
13.0M
                    break;
11360
13.0M
    }
11361
11362
37.3M
                ctxt->instate = XML_PARSER_START_TAG;
11363
37.3M
    break;
11364
59.2M
      }
11365
8.30M
            case XML_PARSER_END_TAG:
11366
8.30M
    if ((!terminate) && (!xmlParseLookupChar(ctxt, '>')))
11367
726
        goto done;
11368
8.30M
    if (ctxt->sax2) {
11369
8.30M
              xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11370
8.30M
        nameNsPop(ctxt);
11371
8.30M
    }
11372
21
#ifdef LIBXML_SAX1_ENABLED
11373
21
      else
11374
21
        xmlParseEndTag1(ctxt, 0);
11375
8.30M
#endif /* LIBXML_SAX1_ENABLED */
11376
8.30M
    if (ctxt->nameNr == 0) {
11377
135k
        ctxt->instate = XML_PARSER_EPILOG;
11378
8.16M
    } else {
11379
8.16M
        ctxt->instate = XML_PARSER_CONTENT;
11380
8.16M
    }
11381
8.30M
    break;
11382
293k
            case XML_PARSER_MISC:
11383
297k
            case XML_PARSER_PROLOG:
11384
313k
            case XML_PARSER_EPILOG:
11385
313k
    SKIP_BLANKS;
11386
313k
                avail = ctxt->input->end - ctxt->input->cur;
11387
313k
    if (avail < 1)
11388
10.9k
        goto done;
11389
302k
    if (ctxt->input->cur[0] == '<') {
11390
300k
                    if ((!terminate) && (avail < 2))
11391
10
                        goto done;
11392
300k
                    next = ctxt->input->cur[1];
11393
300k
                    if (next == '?') {
11394
52.0k
                        if ((!terminate) &&
11395
22.4k
                            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11396
77
                            goto done;
11397
51.9k
                        xmlParsePI(ctxt);
11398
51.9k
                        break;
11399
248k
                    } else if (next == '!') {
11400
15.2k
                        if ((!terminate) && (avail < 3))
11401
5
                            goto done;
11402
11403
15.2k
                        if (ctxt->input->cur[2] == '-') {
11404
5.64k
                            if ((!terminate) && (avail < 4))
11405
1
                                goto done;
11406
5.64k
                            if (ctxt->input->cur[3] == '-') {
11407
5.62k
                                if ((!terminate) &&
11408
2.70k
                                    (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11409
51
                                    goto done;
11410
5.57k
                                xmlParseComment(ctxt);
11411
5.57k
                                break;
11412
5.62k
                            }
11413
9.58k
                        } else if (ctxt->instate == XML_PARSER_MISC) {
11414
9.56k
                            if ((!terminate) && (avail < 9))
11415
1
                                goto done;
11416
9.56k
                            if ((ctxt->input->cur[2] == 'D') &&
11417
9.55k
                                (ctxt->input->cur[3] == 'O') &&
11418
9.54k
                                (ctxt->input->cur[4] == 'C') &&
11419
9.53k
                                (ctxt->input->cur[5] == 'T') &&
11420
9.52k
                                (ctxt->input->cur[6] == 'Y') &&
11421
9.51k
                                (ctxt->input->cur[7] == 'P') &&
11422
9.50k
                                (ctxt->input->cur[8] == 'E')) {
11423
9.50k
                                if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11424
105
                                    goto done;
11425
9.39k
                                ctxt->inSubset = 1;
11426
9.39k
                                xmlParseDocTypeDecl(ctxt);
11427
9.39k
                                if (RAW == '[') {
11428
8.83k
                                    ctxt->instate = XML_PARSER_DTD;
11429
8.83k
                                } else {
11430
560
                                    if (RAW == '>')
11431
314
                                        NEXT;
11432
                                    /*
11433
                                     * Create and update the external subset.
11434
                                     */
11435
560
                                    ctxt->inSubset = 2;
11436
560
                                    if ((ctxt->sax != NULL) &&
11437
560
                                        (!ctxt->disableSAX) &&
11438
307
                                        (ctxt->sax->externalSubset != NULL))
11439
0
                                        ctxt->sax->externalSubset(
11440
0
                                                ctxt->userData,
11441
0
                                                ctxt->intSubName,
11442
0
                                                ctxt->extSubSystem,
11443
0
                                                ctxt->extSubURI);
11444
560
                                    ctxt->inSubset = 0;
11445
560
                                    xmlCleanSpecialAttr(ctxt);
11446
560
                                    ctxt->instate = XML_PARSER_PROLOG;
11447
560
                                }
11448
9.39k
                                break;
11449
9.50k
                            }
11450
9.56k
                        }
11451
15.2k
                    }
11452
300k
                }
11453
11454
235k
                if (ctxt->instate == XML_PARSER_EPILOG) {
11455
1.54k
                    if (ctxt->errNo == XML_ERR_OK)
11456
218
                        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11457
1.54k
        ctxt->instate = XML_PARSER_EOF;
11458
1.54k
                    xmlFinishDocument(ctxt);
11459
233k
                } else {
11460
233k
        ctxt->instate = XML_PARSER_START_TAG;
11461
233k
    }
11462
235k
    break;
11463
9.58k
            case XML_PARSER_DTD: {
11464
9.58k
                if ((!terminate) && (!xmlParseLookupInternalSubset(ctxt)))
11465
834
                    goto done;
11466
8.75k
    xmlParseInternalSubset(ctxt);
11467
8.75k
    ctxt->inSubset = 2;
11468
8.75k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11469
3.19k
        (ctxt->sax->externalSubset != NULL))
11470
0
        ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11471
0
          ctxt->extSubSystem, ctxt->extSubURI);
11472
8.75k
    ctxt->inSubset = 0;
11473
8.75k
    xmlCleanSpecialAttr(ctxt);
11474
8.75k
    ctxt->instate = XML_PARSER_PROLOG;
11475
8.75k
                break;
11476
9.58k
      }
11477
0
            default:
11478
0
                xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
11479
0
      "PP: internal error\n");
11480
0
    ctxt->instate = XML_PARSER_EOF;
11481
0
    break;
11482
106M
  }
11483
106M
    }
11484
271k
done:
11485
271k
    return(ret);
11486
271k
}
11487
11488
/**
11489
 * xmlParseChunk:
11490
 * @ctxt:  an XML parser context
11491
 * @chunk:  chunk of memory
11492
 * @size:  size of chunk in bytes
11493
 * @terminate:  last chunk indicator
11494
 *
11495
 * Parse a chunk of memory in push parser mode.
11496
 *
11497
 * Assumes that the parser context was initialized with
11498
 * xmlCreatePushParserCtxt.
11499
 *
11500
 * The last chunk, which will often be empty, must be marked with
11501
 * the @terminate flag. With the default SAX callbacks, the resulting
11502
 * document will be available in ctxt->myDoc. This pointer will not
11503
 * be freed when calling xmlFreeParserCtxt and must be freed by the
11504
 * caller. If the document isn't well-formed, it will still be returned
11505
 * in ctxt->myDoc.
11506
 *
11507
 * As an exception, xmlCtxtResetPush will free the document in
11508
 * ctxt->myDoc. So ctxt->myDoc should be set to NULL after extracting
11509
 * the document.
11510
 *
11511
 * Returns an xmlParserErrors code (0 on success).
11512
 */
11513
int
11514
xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
11515
271k
              int terminate) {
11516
271k
    size_t curBase;
11517
271k
    size_t maxLength;
11518
271k
    size_t pos;
11519
271k
    int end_in_lf = 0;
11520
271k
    int res;
11521
11522
271k
    if ((ctxt == NULL) || (size < 0))
11523
0
        return(XML_ERR_ARGUMENT);
11524
271k
    if ((chunk == NULL) && (size > 0))
11525
0
        return(XML_ERR_ARGUMENT);
11526
271k
    if ((ctxt->input == NULL) || (ctxt->input->buf == NULL))
11527
0
        return(XML_ERR_ARGUMENT);
11528
271k
    if (ctxt->disableSAX != 0)
11529
0
        return(ctxt->errNo);
11530
11531
271k
    ctxt->input->flags |= XML_INPUT_PROGRESSIVE;
11532
271k
    if (ctxt->instate == XML_PARSER_START)
11533
244k
        xmlCtxtInitializeLate(ctxt);
11534
271k
    if ((size > 0) && (chunk != NULL) && (!terminate) &&
11535
40.9k
        (chunk[size - 1] == '\r')) {
11536
156
  end_in_lf = 1;
11537
156
  size--;
11538
156
    }
11539
11540
    /*
11541
     * Also push an empty chunk to make sure that the raw buffer
11542
     * will be flushed if there is an encoder.
11543
     */
11544
271k
    pos = ctxt->input->cur - ctxt->input->base;
11545
271k
    res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11546
271k
    xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
11547
271k
    if (res < 0) {
11548
4
        xmlCtxtErrIO(ctxt, ctxt->input->buf->error, NULL);
11549
4
        xmlHaltParser(ctxt);
11550
4
        return(ctxt->errNo);
11551
4
    }
11552
11553
271k
    xmlParseTryOrFinish(ctxt, terminate);
11554
11555
271k
    curBase = ctxt->input->cur - ctxt->input->base;
11556
271k
    maxLength = (ctxt->options & XML_PARSE_HUGE) ?
11557
271k
                XML_MAX_HUGE_LENGTH :
11558
271k
                XML_MAX_LOOKUP_LIMIT;
11559
271k
    if (curBase > maxLength) {
11560
0
        xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
11561
0
                    "Buffer size limit exceeded, try XML_PARSE_HUGE\n");
11562
0
        xmlHaltParser(ctxt);
11563
0
    }
11564
11565
271k
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX != 0))
11566
97.3k
        return(ctxt->errNo);
11567
11568
173k
    if (end_in_lf == 1) {
11569
135
  pos = ctxt->input->cur - ctxt->input->base;
11570
135
  res = xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
11571
135
  xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
11572
135
        if (res < 0) {
11573
0
            xmlCtxtErrIO(ctxt, ctxt->input->buf->error, NULL);
11574
0
            xmlHaltParser(ctxt);
11575
0
            return(ctxt->errNo);
11576
0
        }
11577
135
    }
11578
173k
    if (terminate) {
11579
  /*
11580
   * Check for termination
11581
   */
11582
146k
        if ((ctxt->instate != XML_PARSER_EOF) &&
11583
144k
            (ctxt->instate != XML_PARSER_EPILOG)) {
11584
10.0k
            if (ctxt->nameNr > 0) {
11585
9.83k
                const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
11586
9.83k
                int line = ctxt->pushTab[ctxt->nameNr - 1].line;
11587
9.83k
                xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
11588
9.83k
                        "Premature end of data in tag %s line %d\n",
11589
9.83k
                        name, line, NULL);
11590
9.83k
            } else if (ctxt->instate == XML_PARSER_START) {
11591
0
                xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11592
179
            } else {
11593
179
                xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
11594
179
                               "Start tag expected, '<' not found\n");
11595
179
            }
11596
136k
        } else {
11597
136k
            xmlParserCheckEOF(ctxt, XML_ERR_DOCUMENT_END);
11598
136k
        }
11599
146k
  if (ctxt->instate != XML_PARSER_EOF) {
11600
144k
            ctxt->instate = XML_PARSER_EOF;
11601
144k
            xmlFinishDocument(ctxt);
11602
144k
  }
11603
146k
    }
11604
173k
    if (ctxt->wellFormed == 0)
11605
10.0k
  return((xmlParserErrors) ctxt->errNo);
11606
163k
    else
11607
163k
        return(0);
11608
173k
}
11609
11610
/************************************************************************
11611
 *                  *
11612
 *    I/O front end functions to the parser     *
11613
 *                  *
11614
 ************************************************************************/
11615
11616
/**
11617
 * xmlCreatePushParserCtxt:
11618
 * @sax:  a SAX handler (optional)
11619
 * @user_data:  user data for SAX callbacks (optional)
11620
 * @chunk:  initial chunk (optional, deprecated)
11621
 * @size:  size of initial chunk in bytes
11622
 * @filename:  file name or URI (optional)
11623
 *
11624
 * Create a parser context for using the XML parser in push mode.
11625
 * See xmlParseChunk.
11626
 *
11627
 * Passing an initial chunk is useless and deprecated.
11628
 *
11629
 * The push parser doesn't support recovery mode or the
11630
 * XML_PARSE_NOBLANKS option.
11631
 *
11632
 * @filename is used as base URI to fetch external entities and for
11633
 * error reports.
11634
 *
11635
 * Returns the new parser context or NULL if a memory allocation
11636
 * failed.
11637
 */
11638
11639
xmlParserCtxtPtr
11640
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11641
244k
                        const char *chunk, int size, const char *filename) {
11642
244k
    xmlParserCtxtPtr ctxt;
11643
244k
    xmlParserInputPtr input;
11644
11645
244k
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
11646
244k
    if (ctxt == NULL)
11647
0
  return(NULL);
11648
11649
244k
    ctxt->options &= ~XML_PARSE_NODICT;
11650
244k
    ctxt->dictNames = 1;
11651
11652
244k
    input = xmlNewPushInput(filename, chunk, size);
11653
244k
    if (input == NULL) {
11654
0
  xmlFreeParserCtxt(ctxt);
11655
0
  return(NULL);
11656
0
    }
11657
244k
    if (xmlCtxtPushInput(ctxt, input) < 0) {
11658
0
        xmlFreeInputStream(input);
11659
0
        xmlFreeParserCtxt(ctxt);
11660
0
        return(NULL);
11661
0
    }
11662
11663
244k
    return(ctxt);
11664
244k
}
11665
#endif /* LIBXML_PUSH_ENABLED */
11666
11667
/**
11668
 * xmlStopParser:
11669
 * @ctxt:  an XML parser context
11670
 *
11671
 * Blocks further parser processing
11672
 */
11673
void
11674
0
xmlStopParser(xmlParserCtxtPtr ctxt) {
11675
0
    if (ctxt == NULL)
11676
0
        return;
11677
0
    xmlHaltParser(ctxt);
11678
0
    if (ctxt->errNo != XML_ERR_NO_MEMORY)
11679
0
        ctxt->errNo = XML_ERR_USER_STOP;
11680
0
}
11681
11682
/**
11683
 * xmlCreateIOParserCtxt:
11684
 * @sax:  a SAX handler (optional)
11685
 * @user_data:  user data for SAX callbacks (optional)
11686
 * @ioread:  an I/O read function
11687
 * @ioclose:  an I/O close function (optional)
11688
 * @ioctx:  an I/O handler
11689
 * @enc:  the charset encoding if known (deprecated)
11690
 *
11691
 * Create a parser context for using the XML parser with an existing
11692
 * I/O stream
11693
 *
11694
 * Returns the new parser context or NULL
11695
 */
11696
xmlParserCtxtPtr
11697
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11698
                      xmlInputReadCallback ioread,
11699
                      xmlInputCloseCallback ioclose,
11700
0
                      void *ioctx, xmlCharEncoding enc) {
11701
0
    xmlParserCtxtPtr ctxt;
11702
0
    xmlParserInputPtr input;
11703
0
    const char *encoding;
11704
11705
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
11706
0
    if (ctxt == NULL)
11707
0
  return(NULL);
11708
11709
0
    encoding = xmlGetCharEncodingName(enc);
11710
0
    input = xmlCtxtNewInputFromIO(ctxt, NULL, ioread, ioclose, ioctx,
11711
0
                                  encoding, 0);
11712
0
    if (input == NULL) {
11713
0
  xmlFreeParserCtxt(ctxt);
11714
0
        return (NULL);
11715
0
    }
11716
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
11717
0
        xmlFreeInputStream(input);
11718
0
        xmlFreeParserCtxt(ctxt);
11719
0
        return(NULL);
11720
0
    }
11721
11722
0
    return(ctxt);
11723
0
}
11724
11725
#ifdef LIBXML_VALID_ENABLED
11726
/************************************************************************
11727
 *                  *
11728
 *    Front ends when parsing a DTD       *
11729
 *                  *
11730
 ************************************************************************/
11731
11732
/**
11733
 * xmlCtxtParseDtd:
11734
 * @ctxt:  a parser context
11735
 * @input:  a parser input
11736
 * @publicId:  public ID of the DTD (optional)
11737
 * @systemId:  system ID of the DTD (optional)
11738
 *
11739
 * Parse a DTD.
11740
 *
11741
 * Option XML_PARSE_DTDLOAD should be enabled in the parser context
11742
 * to make external entities work.
11743
 *
11744
 * Availabe since 2.14.0.
11745
 *
11746
 * Returns the resulting xmlDtdPtr or NULL in case of error.
11747
 * @input will be freed by the function in any case.
11748
 */
11749
xmlDtdPtr
11750
xmlCtxtParseDtd(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
11751
0
                const xmlChar *publicId, const xmlChar *systemId) {
11752
0
    xmlDtdPtr ret = NULL;
11753
11754
0
    if ((ctxt == NULL) || (input == NULL)) {
11755
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
11756
0
        xmlFreeInputStream(input);
11757
0
        return(NULL);
11758
0
    }
11759
11760
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
11761
0
        xmlFreeInputStream(input);
11762
0
        return(NULL);
11763
0
    }
11764
11765
0
    if (publicId == NULL)
11766
0
        publicId = BAD_CAST "none";
11767
0
    if (systemId == NULL)
11768
0
        systemId = BAD_CAST "none";
11769
11770
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11771
0
    if (ctxt->myDoc == NULL) {
11772
0
        xmlErrMemory(ctxt);
11773
0
        goto error;
11774
0
    }
11775
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
11776
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11777
0
                                       publicId, systemId);
11778
0
    if (ctxt->myDoc->extSubset == NULL) {
11779
0
        xmlErrMemory(ctxt);
11780
0
        xmlFreeDoc(ctxt->myDoc);
11781
0
        goto error;
11782
0
    }
11783
11784
0
    xmlParseExternalSubset(ctxt, publicId, systemId);
11785
11786
0
    if (ctxt->wellFormed) {
11787
0
        ret = ctxt->myDoc->extSubset;
11788
0
        ctxt->myDoc->extSubset = NULL;
11789
0
        if (ret != NULL) {
11790
0
            xmlNodePtr tmp;
11791
11792
0
            ret->doc = NULL;
11793
0
            tmp = ret->children;
11794
0
            while (tmp != NULL) {
11795
0
                tmp->doc = NULL;
11796
0
                tmp = tmp->next;
11797
0
            }
11798
0
        }
11799
0
    } else {
11800
0
        ret = NULL;
11801
0
    }
11802
0
    xmlFreeDoc(ctxt->myDoc);
11803
0
    ctxt->myDoc = NULL;
11804
11805
0
error:
11806
0
    xmlFreeInputStream(xmlCtxtPopInput(ctxt));
11807
11808
0
    return(ret);
11809
0
}
11810
11811
/**
11812
 * xmlIOParseDTD:
11813
 * @sax:  the SAX handler block or NULL
11814
 * @input:  an Input Buffer
11815
 * @enc:  the charset encoding if known
11816
 *
11817
 * DEPRECATED: Use xmlCtxtParseDtd.
11818
 *
11819
 * Load and parse a DTD
11820
 *
11821
 * Returns the resulting xmlDtdPtr or NULL in case of error.
11822
 * @input will be freed by the function in any case.
11823
 */
11824
11825
xmlDtdPtr
11826
xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
11827
0
        xmlCharEncoding enc) {
11828
0
    xmlDtdPtr ret = NULL;
11829
0
    xmlParserCtxtPtr ctxt;
11830
0
    xmlParserInputPtr pinput = NULL;
11831
11832
0
    if (input == NULL)
11833
0
  return(NULL);
11834
11835
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
11836
0
    if (ctxt == NULL) {
11837
0
        xmlFreeParserInputBuffer(input);
11838
0
  return(NULL);
11839
0
    }
11840
0
    xmlCtxtSetOptions(ctxt, XML_PARSE_DTDLOAD);
11841
11842
    /*
11843
     * generate a parser input from the I/O handler
11844
     */
11845
11846
0
    pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
11847
0
    if (pinput == NULL) {
11848
0
        xmlFreeParserInputBuffer(input);
11849
0
  xmlFreeParserCtxt(ctxt);
11850
0
  return(NULL);
11851
0
    }
11852
11853
0
    if (enc != XML_CHAR_ENCODING_NONE) {
11854
0
        xmlSwitchEncoding(ctxt, enc);
11855
0
    }
11856
11857
0
    ret = xmlCtxtParseDtd(ctxt, pinput, NULL, NULL);
11858
11859
0
    xmlFreeParserCtxt(ctxt);
11860
0
    return(ret);
11861
0
}
11862
11863
/**
11864
 * xmlSAXParseDTD:
11865
 * @sax:  the SAX handler block
11866
 * @ExternalID:  a NAME* containing the External ID of the DTD
11867
 * @SystemID:  a NAME* containing the URL to the DTD
11868
 *
11869
 * DEPRECATED: Use xmlCtxtParseDtd.
11870
 *
11871
 * Load and parse an external subset.
11872
 *
11873
 * Returns the resulting xmlDtdPtr or NULL in case of error.
11874
 */
11875
11876
xmlDtdPtr
11877
xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
11878
0
                          const xmlChar *SystemID) {
11879
0
    xmlDtdPtr ret = NULL;
11880
0
    xmlParserCtxtPtr ctxt;
11881
0
    xmlParserInputPtr input = NULL;
11882
0
    xmlChar* systemIdCanonic;
11883
11884
0
    if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
11885
11886
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
11887
0
    if (ctxt == NULL) {
11888
0
  return(NULL);
11889
0
    }
11890
0
    xmlCtxtSetOptions(ctxt, XML_PARSE_DTDLOAD);
11891
11892
    /*
11893
     * Canonicalise the system ID
11894
     */
11895
0
    systemIdCanonic = xmlCanonicPath(SystemID);
11896
0
    if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
11897
0
  xmlFreeParserCtxt(ctxt);
11898
0
  return(NULL);
11899
0
    }
11900
11901
    /*
11902
     * Ask the Entity resolver to load the damn thing
11903
     */
11904
11905
0
    if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
11906
0
  input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
11907
0
                                   systemIdCanonic);
11908
0
    if (input == NULL) {
11909
0
  xmlFreeParserCtxt(ctxt);
11910
0
  if (systemIdCanonic != NULL)
11911
0
      xmlFree(systemIdCanonic);
11912
0
  return(NULL);
11913
0
    }
11914
11915
0
    if (input->filename == NULL)
11916
0
  input->filename = (char *) systemIdCanonic;
11917
0
    else
11918
0
  xmlFree(systemIdCanonic);
11919
11920
0
    ret = xmlCtxtParseDtd(ctxt, input, ExternalID, SystemID);
11921
11922
0
    xmlFreeParserCtxt(ctxt);
11923
0
    return(ret);
11924
0
}
11925
11926
11927
/**
11928
 * xmlParseDTD:
11929
 * @ExternalID:  a NAME* containing the External ID of the DTD
11930
 * @SystemID:  a NAME* containing the URL to the DTD
11931
 *
11932
 * Load and parse an external subset.
11933
 *
11934
 * Returns the resulting xmlDtdPtr or NULL in case of error.
11935
 */
11936
11937
xmlDtdPtr
11938
0
xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
11939
0
    return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
11940
0
}
11941
#endif /* LIBXML_VALID_ENABLED */
11942
11943
/************************************************************************
11944
 *                  *
11945
 *    Front ends when parsing an Entity     *
11946
 *                  *
11947
 ************************************************************************/
11948
11949
static xmlNodePtr
11950
xmlCtxtParseContentInternal(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
11951
0
                            int hasTextDecl, int buildTree) {
11952
0
    xmlNodePtr root = NULL;
11953
0
    xmlNodePtr list = NULL;
11954
0
    xmlChar *rootName = BAD_CAST "#root";
11955
0
    int result;
11956
11957
0
    if (buildTree) {
11958
0
        root = xmlNewDocNode(ctxt->myDoc, NULL, rootName, NULL);
11959
0
        if (root == NULL) {
11960
0
            xmlErrMemory(ctxt);
11961
0
            goto error;
11962
0
        }
11963
0
    }
11964
11965
0
    if (xmlCtxtPushInput(ctxt, input) < 0)
11966
0
        goto error;
11967
11968
0
    nameNsPush(ctxt, rootName, NULL, NULL, 0, 0);
11969
0
    spacePush(ctxt, -1);
11970
11971
0
    if (buildTree)
11972
0
        nodePush(ctxt, root);
11973
11974
0
    if (hasTextDecl) {
11975
0
        xmlDetectEncoding(ctxt);
11976
11977
        /*
11978
         * Parse a possible text declaration first
11979
         */
11980
0
        if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
11981
0
            (IS_BLANK_CH(NXT(5)))) {
11982
0
            xmlParseTextDecl(ctxt);
11983
            /*
11984
             * An XML-1.0 document can't reference an entity not XML-1.0
11985
             */
11986
0
            if ((xmlStrEqual(ctxt->version, BAD_CAST "1.0")) &&
11987
0
                (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
11988
0
                xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
11989
0
                               "Version mismatch between document and "
11990
0
                               "entity\n");
11991
0
            }
11992
0
        }
11993
0
    }
11994
11995
0
    xmlParseContentInternal(ctxt);
11996
11997
0
    if (ctxt->input->cur < ctxt->input->end)
11998
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11999
12000
0
    if ((ctxt->wellFormed) ||
12001
0
        ((ctxt->recovery) && (!xmlCtxtIsCatastrophicError(ctxt)))) {
12002
0
        if (root != NULL) {
12003
0
            xmlNodePtr cur;
12004
12005
            /*
12006
             * Unlink newly created node list.
12007
             */
12008
0
            list = root->children;
12009
0
            root->children = NULL;
12010
0
            root->last = NULL;
12011
0
            for (cur = list; cur != NULL; cur = cur->next)
12012
0
                cur->parent = NULL;
12013
0
        }
12014
0
    }
12015
12016
    /*
12017
     * Read the rest of the stream in case of errors. We want
12018
     * to account for the whole entity size.
12019
     */
12020
0
    do {
12021
0
        ctxt->input->cur = ctxt->input->end;
12022
0
        xmlParserShrink(ctxt);
12023
0
        result = xmlParserGrow(ctxt);
12024
0
    } while (result > 0);
12025
12026
0
    if (buildTree)
12027
0
        nodePop(ctxt);
12028
12029
0
    namePop(ctxt);
12030
0
    spacePop(ctxt);
12031
12032
0
    xmlCtxtPopInput(ctxt);
12033
12034
0
error:
12035
0
    xmlFreeNode(root);
12036
12037
0
    return(list);
12038
0
}
12039
12040
static void
12041
0
xmlCtxtParseEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr ent) {
12042
0
    xmlParserInputPtr input;
12043
0
    xmlNodePtr list;
12044
0
    unsigned long consumed;
12045
0
    int isExternal;
12046
0
    int buildTree;
12047
0
    int oldMinNsIndex;
12048
0
    int oldNodelen, oldNodemem;
12049
12050
0
    isExternal = (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY);
12051
0
    buildTree = (ctxt->node != NULL);
12052
12053
    /*
12054
     * Recursion check
12055
     */
12056
0
    if (ent->flags & XML_ENT_EXPANDING) {
12057
0
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
12058
0
        xmlHaltParser(ctxt);
12059
0
        goto error;
12060
0
    }
12061
12062
    /*
12063
     * Load entity
12064
     */
12065
0
    input = xmlNewEntityInputStream(ctxt, ent);
12066
0
    if (input == NULL)
12067
0
        goto error;
12068
12069
    /*
12070
     * When building a tree, we need to limit the scope of namespace
12071
     * declarations, so that entities don't reference xmlNs structs
12072
     * from the parent of a reference.
12073
     */
12074
0
    oldMinNsIndex = ctxt->nsdb->minNsIndex;
12075
0
    if (buildTree)
12076
0
        ctxt->nsdb->minNsIndex = ctxt->nsNr;
12077
12078
0
    oldNodelen = ctxt->nodelen;
12079
0
    oldNodemem = ctxt->nodemem;
12080
0
    ctxt->nodelen = 0;
12081
0
    ctxt->nodemem = 0;
12082
12083
    /*
12084
     * Parse content
12085
     *
12086
     * This initiates a recursive call chain:
12087
     *
12088
     * - xmlCtxtParseContentInternal
12089
     * - xmlParseContentInternal
12090
     * - xmlParseReference
12091
     * - xmlCtxtParseEntity
12092
     *
12093
     * The nesting depth is limited by the maximum number of inputs,
12094
     * see xmlCtxtPushInput.
12095
     *
12096
     * It's possible to make this non-recursive (minNsIndex must be
12097
     * stored in the input struct) at the expense of code readability.
12098
     */
12099
12100
0
    ent->flags |= XML_ENT_EXPANDING;
12101
12102
0
    list = xmlCtxtParseContentInternal(ctxt, input, isExternal, buildTree);
12103
12104
0
    ent->flags &= ~XML_ENT_EXPANDING;
12105
12106
0
    ctxt->nsdb->minNsIndex = oldMinNsIndex;
12107
0
    ctxt->nodelen = oldNodelen;
12108
0
    ctxt->nodemem = oldNodemem;
12109
12110
    /*
12111
     * Entity size accounting
12112
     */
12113
0
    consumed = input->consumed;
12114
0
    xmlSaturatedAddSizeT(&consumed, input->end - input->base);
12115
12116
0
    if ((ent->flags & XML_ENT_CHECKED) == 0)
12117
0
        xmlSaturatedAdd(&ent->expandedSize, consumed);
12118
12119
0
    if ((ent->flags & XML_ENT_PARSED) == 0) {
12120
0
        if (isExternal)
12121
0
            xmlSaturatedAdd(&ctxt->sizeentities, consumed);
12122
12123
0
        ent->children = list;
12124
12125
0
        while (list != NULL) {
12126
0
            list->parent = (xmlNodePtr) ent;
12127
12128
            /*
12129
             * Downstream code like the nginx xslt module can set
12130
             * ctxt->myDoc->extSubset to a separate DTD, so the entity
12131
             * might have a different or a NULL document.
12132
             */
12133
0
            if (list->doc != ent->doc)
12134
0
                xmlSetTreeDoc(list, ent->doc);
12135
12136
0
            if (list->next == NULL)
12137
0
                ent->last = list;
12138
0
            list = list->next;
12139
0
        }
12140
0
    } else {
12141
0
        xmlFreeNodeList(list);
12142
0
    }
12143
12144
0
    xmlFreeInputStream(input);
12145
12146
0
error:
12147
0
    ent->flags |= XML_ENT_PARSED | XML_ENT_CHECKED;
12148
0
}
12149
12150
/**
12151
 * xmlParseCtxtExternalEntity:
12152
 * @ctxt:  the existing parsing context
12153
 * @URL:  the URL for the entity to load
12154
 * @ID:  the System ID for the entity to load
12155
 * @listOut:  the return value for the set of parsed nodes
12156
 *
12157
 * Parse an external general entity within an existing parsing context
12158
 * An external general parsed entity is well-formed if it matches the
12159
 * production labeled extParsedEnt.
12160
 *
12161
 * [78] extParsedEnt ::= TextDecl? content
12162
 *
12163
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12164
 *    the parser error code otherwise
12165
 */
12166
12167
int
12168
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctxt, const xmlChar *URL,
12169
0
                           const xmlChar *ID, xmlNodePtr *listOut) {
12170
0
    xmlParserInputPtr input;
12171
0
    xmlNodePtr list;
12172
12173
0
    if (listOut != NULL)
12174
0
        *listOut = NULL;
12175
12176
0
    if (ctxt == NULL)
12177
0
        return(XML_ERR_ARGUMENT);
12178
12179
0
    input = xmlLoadResource(ctxt, (char *) URL, (char *) ID,
12180
0
                            XML_RESOURCE_GENERAL_ENTITY);
12181
0
    if (input == NULL)
12182
0
        return(ctxt->errNo);
12183
12184
0
    xmlCtxtInitializeLate(ctxt);
12185
12186
0
    list = xmlCtxtParseContentInternal(ctxt, input, /* hasTextDecl */ 1, 1);
12187
0
    if (listOut != NULL)
12188
0
        *listOut = list;
12189
0
    else
12190
0
        xmlFreeNodeList(list);
12191
12192
0
    xmlFreeInputStream(input);
12193
0
    return(ctxt->errNo);
12194
0
}
12195
12196
#ifdef LIBXML_SAX1_ENABLED
12197
/**
12198
 * xmlParseExternalEntity:
12199
 * @doc:  the document the chunk pertains to
12200
 * @sax:  the SAX handler block (possibly NULL)
12201
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12202
 * @depth:  Used for loop detection, use 0
12203
 * @URL:  the URL for the entity to load
12204
 * @ID:  the System ID for the entity to load
12205
 * @list:  the return value for the set of parsed nodes
12206
 *
12207
 * DEPRECATED: Use xmlParseCtxtExternalEntity.
12208
 *
12209
 * Parse an external general entity
12210
 * An external general parsed entity is well-formed if it matches the
12211
 * production labeled extParsedEnt.
12212
 *
12213
 * [78] extParsedEnt ::= TextDecl? content
12214
 *
12215
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12216
 *    the parser error code otherwise
12217
 */
12218
12219
int
12220
xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
12221
0
    int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *list) {
12222
0
    xmlParserCtxtPtr ctxt;
12223
0
    int ret;
12224
12225
0
    if (list != NULL)
12226
0
        *list = NULL;
12227
12228
0
    if (doc == NULL)
12229
0
        return(XML_ERR_ARGUMENT);
12230
12231
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12232
0
    if (ctxt == NULL)
12233
0
        return(XML_ERR_NO_MEMORY);
12234
12235
0
    ctxt->depth = depth;
12236
0
    ctxt->myDoc = doc;
12237
0
    ret = xmlParseCtxtExternalEntity(ctxt, URL, ID, list);
12238
12239
0
    xmlFreeParserCtxt(ctxt);
12240
0
    return(ret);
12241
0
}
12242
12243
/**
12244
 * xmlParseBalancedChunkMemory:
12245
 * @doc:  the document the chunk pertains to (must not be NULL)
12246
 * @sax:  the SAX handler block (possibly NULL)
12247
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12248
 * @depth:  Used for loop detection, use 0
12249
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
12250
 * @lst:  the return value for the set of parsed nodes
12251
 *
12252
 * Parse a well-balanced chunk of an XML document
12253
 * called by the parser
12254
 * The allowed sequence for the Well Balanced Chunk is the one defined by
12255
 * the content production in the XML grammar:
12256
 *
12257
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12258
 *
12259
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12260
 *    the parser error code otherwise
12261
 */
12262
12263
int
12264
xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12265
0
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
12266
0
    return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
12267
0
                                                depth, string, lst, 0 );
12268
0
}
12269
#endif /* LIBXML_SAX1_ENABLED */
12270
12271
/**
12272
 * xmlCtxtParseContent:
12273
 * @ctxt:  parser context
12274
 * @input:  parser input
12275
 * @node:  target node or document
12276
 * @hasTextDecl:  whether to parse text declaration
12277
 *
12278
 * Parse a well-balanced chunk of XML matching the 'content' production.
12279
 *
12280
 * Namespaces in scope of @node and entities of @node's document are
12281
 * recognized. When validating, the DTD of @node's document is used.
12282
 *
12283
 * Always consumes @input even in error case.
12284
 *
12285
 * Available since 2.14.0.
12286
 *
12287
 * Returns a node list or NULL in case of error.
12288
 */
12289
xmlNodePtr
12290
xmlCtxtParseContent(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
12291
0
                    xmlNodePtr node, int hasTextDecl) {
12292
0
    xmlDocPtr doc;
12293
0
    xmlNodePtr cur, list = NULL;
12294
0
    int nsnr = 0;
12295
0
    xmlDictPtr oldDict;
12296
0
    int oldOptions, oldDictNames, oldLoadSubset;
12297
12298
0
    if ((ctxt == NULL) || (input == NULL) || (node == NULL)) {
12299
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
12300
0
        goto exit;
12301
0
    }
12302
12303
0
    doc = node->doc;
12304
0
    if (doc == NULL) {
12305
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
12306
0
        goto exit;
12307
0
    }
12308
12309
0
    switch (node->type) {
12310
0
        case XML_ELEMENT_NODE:
12311
0
        case XML_DOCUMENT_NODE:
12312
0
        case XML_HTML_DOCUMENT_NODE:
12313
0
            break;
12314
12315
0
        case XML_ATTRIBUTE_NODE:
12316
0
        case XML_TEXT_NODE:
12317
0
        case XML_CDATA_SECTION_NODE:
12318
0
        case XML_ENTITY_REF_NODE:
12319
0
        case XML_PI_NODE:
12320
0
        case XML_COMMENT_NODE:
12321
0
            for (cur = node->parent; cur != NULL; cur = node->parent) {
12322
0
                if ((cur->type == XML_ELEMENT_NODE) ||
12323
0
                    (cur->type == XML_DOCUMENT_NODE) ||
12324
0
                    (cur->type == XML_HTML_DOCUMENT_NODE)) {
12325
0
                    node = cur;
12326
0
                    break;
12327
0
                }
12328
0
            }
12329
0
            break;
12330
12331
0
        default:
12332
0
            xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
12333
0
            goto exit;
12334
0
    }
12335
12336
0
#ifdef LIBXML_HTML_ENABLED
12337
0
    if (ctxt->html)
12338
0
        htmlCtxtReset(ctxt);
12339
0
    else
12340
0
#endif
12341
0
        xmlCtxtReset(ctxt);
12342
12343
0
    oldDict = ctxt->dict;
12344
0
    oldOptions = ctxt->options;
12345
0
    oldDictNames = ctxt->dictNames;
12346
0
    oldLoadSubset = ctxt->loadsubset;
12347
12348
    /*
12349
     * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
12350
     */
12351
0
    if (doc->dict != NULL) {
12352
0
        ctxt->dict = doc->dict;
12353
0
    } else {
12354
0
        ctxt->options |= XML_PARSE_NODICT;
12355
0
        ctxt->dictNames = 0;
12356
0
    }
12357
12358
    /*
12359
     * Disable IDs
12360
     */
12361
0
    ctxt->loadsubset |= XML_SKIP_IDS;
12362
12363
0
    ctxt->myDoc = doc;
12364
12365
0
#ifdef LIBXML_HTML_ENABLED
12366
0
    if (ctxt->html) {
12367
        /*
12368
         * When parsing in context, it makes no sense to add implied
12369
         * elements like html/body/etc...
12370
         */
12371
0
        ctxt->options |= HTML_PARSE_NOIMPLIED;
12372
12373
0
        list = htmlCtxtParseContentInternal(ctxt, input);
12374
0
    } else
12375
0
#endif
12376
0
    {
12377
0
        xmlCtxtInitializeLate(ctxt);
12378
12379
        /*
12380
         * initialize the SAX2 namespaces stack
12381
         */
12382
0
        cur = node;
12383
0
        while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
12384
0
            xmlNsPtr ns = cur->nsDef;
12385
0
            xmlHashedString hprefix, huri;
12386
12387
0
            while (ns != NULL) {
12388
0
                hprefix = xmlDictLookupHashed(ctxt->dict, ns->prefix, -1);
12389
0
                huri = xmlDictLookupHashed(ctxt->dict, ns->href, -1);
12390
0
                if (xmlParserNsPush(ctxt, &hprefix, &huri, ns, 1) > 0)
12391
0
                    nsnr++;
12392
0
                ns = ns->next;
12393
0
            }
12394
0
            cur = cur->parent;
12395
0
        }
12396
12397
0
        list = xmlCtxtParseContentInternal(ctxt, input, hasTextDecl, 1);
12398
12399
0
        if (nsnr > 0)
12400
0
            xmlParserNsPop(ctxt, nsnr);
12401
0
    }
12402
12403
0
    ctxt->dict = oldDict;
12404
0
    ctxt->options = oldOptions;
12405
0
    ctxt->dictNames = oldDictNames;
12406
0
    ctxt->loadsubset = oldLoadSubset;
12407
0
    ctxt->myDoc = NULL;
12408
0
    ctxt->node = NULL;
12409
12410
0
exit:
12411
0
    xmlFreeInputStream(input);
12412
0
    return(list);
12413
0
}
12414
12415
/**
12416
 * xmlParseInNodeContext:
12417
 * @node:  the context node
12418
 * @data:  the input string
12419
 * @datalen:  the input string length in bytes
12420
 * @options:  a combination of xmlParserOption
12421
 * @listOut:  the return value for the set of parsed nodes
12422
 *
12423
 * Parse a well-balanced chunk of an XML document
12424
 * within the context (DTD, namespaces, etc ...) of the given node.
12425
 *
12426
 * The allowed sequence for the data is a Well Balanced Chunk defined by
12427
 * the content production in the XML grammar:
12428
 *
12429
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12430
 *
12431
 * This function assumes the encoding of @node's document which is
12432
 * typically not what you want. A better alternative is
12433
 * xmlCtxtParseContent.
12434
 *
12435
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12436
 * error code otherwise
12437
 */
12438
xmlParserErrors
12439
xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
12440
0
                      int options, xmlNodePtr *listOut) {
12441
0
    xmlParserCtxtPtr ctxt;
12442
0
    xmlParserInputPtr input;
12443
0
    xmlDocPtr doc;
12444
0
    xmlNodePtr list;
12445
0
    xmlParserErrors ret;
12446
12447
0
    if (listOut == NULL)
12448
0
        return(XML_ERR_INTERNAL_ERROR);
12449
0
    *listOut = NULL;
12450
12451
0
    if ((node == NULL) || (data == NULL) || (datalen < 0))
12452
0
        return(XML_ERR_INTERNAL_ERROR);
12453
12454
0
    doc = node->doc;
12455
0
    if (doc == NULL)
12456
0
        return(XML_ERR_INTERNAL_ERROR);
12457
12458
0
#ifdef LIBXML_HTML_ENABLED
12459
0
    if (doc->type == XML_HTML_DOCUMENT_NODE) {
12460
0
        ctxt = htmlNewParserCtxt();
12461
0
    }
12462
0
    else
12463
0
#endif
12464
0
        ctxt = xmlNewParserCtxt();
12465
12466
0
    if (ctxt == NULL)
12467
0
        return(XML_ERR_NO_MEMORY);
12468
12469
0
    input = xmlCtxtNewInputFromMemory(ctxt, NULL, data, datalen,
12470
0
                                      (const char *) doc->encoding,
12471
0
                                      XML_INPUT_BUF_STATIC);
12472
0
    if (input == NULL) {
12473
0
        xmlFreeParserCtxt(ctxt);
12474
0
        return(XML_ERR_NO_MEMORY);
12475
0
    }
12476
12477
0
    xmlCtxtUseOptions(ctxt, options);
12478
12479
0
    list = xmlCtxtParseContent(ctxt, input, node, /* hasTextDecl */ 0);
12480
12481
0
    if (list == NULL) {
12482
0
        ret = ctxt->errNo;
12483
0
        if (ret == XML_ERR_ARGUMENT)
12484
0
            ret = XML_ERR_INTERNAL_ERROR;
12485
0
    } else {
12486
0
        ret = XML_ERR_OK;
12487
0
        *listOut = list;
12488
0
    }
12489
12490
0
    xmlFreeParserCtxt(ctxt);
12491
12492
0
    return(ret);
12493
0
}
12494
12495
#ifdef LIBXML_SAX1_ENABLED
12496
/**
12497
 * xmlParseBalancedChunkMemoryRecover:
12498
 * @doc:  the document the chunk pertains to (must not be NULL)
12499
 * @sax:  the SAX handler block (possibly NULL)
12500
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12501
 * @depth:  Used for loop detection, use 0
12502
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
12503
 * @listOut:  the return value for the set of parsed nodes
12504
 * @recover: return nodes even if the data is broken (use 0)
12505
 *
12506
 * Parse a well-balanced chunk of an XML document
12507
 *
12508
 * The allowed sequence for the Well Balanced Chunk is the one defined by
12509
 * the content production in the XML grammar:
12510
 *
12511
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12512
 *
12513
 * Returns 0 if the chunk is well balanced, or thehe parser error code
12514
 * otherwise.
12515
 *
12516
 * In case recover is set to 1, the nodelist will not be empty even if
12517
 * the parsed chunk is not well balanced, assuming the parsing succeeded to
12518
 * some extent.
12519
 */
12520
int
12521
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12522
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *listOut,
12523
0
     int recover) {
12524
0
    xmlParserCtxtPtr ctxt;
12525
0
    xmlParserInputPtr input;
12526
0
    xmlNodePtr list;
12527
0
    int ret;
12528
12529
0
    if (listOut != NULL)
12530
0
        *listOut = NULL;
12531
12532
0
    if (string == NULL)
12533
0
        return(XML_ERR_ARGUMENT);
12534
12535
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12536
0
    if (ctxt == NULL)
12537
0
        return(XML_ERR_NO_MEMORY);
12538
12539
0
    xmlCtxtInitializeLate(ctxt);
12540
12541
0
    ctxt->depth = depth;
12542
0
    ctxt->myDoc = doc;
12543
0
    if (recover) {
12544
0
        ctxt->options |= XML_PARSE_RECOVER;
12545
0
        ctxt->recovery = 1;
12546
0
    }
12547
12548
0
    input = xmlNewStringInputStream(ctxt, string);
12549
0
    if (input == NULL) {
12550
0
        ret = ctxt->errNo;
12551
0
        goto error;
12552
0
    }
12553
12554
0
    list = xmlCtxtParseContentInternal(ctxt, input, /* hasTextDecl */ 0, 1);
12555
0
    if (listOut != NULL)
12556
0
        *listOut = list;
12557
0
    else
12558
0
        xmlFreeNodeList(list);
12559
12560
0
    if (!ctxt->wellFormed)
12561
0
        ret = ctxt->errNo;
12562
0
    else
12563
0
        ret = XML_ERR_OK;
12564
12565
0
error:
12566
0
    xmlFreeInputStream(input);
12567
0
    xmlFreeParserCtxt(ctxt);
12568
0
    return(ret);
12569
0
}
12570
12571
/**
12572
 * xmlSAXParseEntity:
12573
 * @sax:  the SAX handler block
12574
 * @filename:  the filename
12575
 *
12576
 * DEPRECATED: Don't use.
12577
 *
12578
 * parse an XML external entity out of context and build a tree.
12579
 * It use the given SAX function block to handle the parsing callback.
12580
 * If sax is NULL, fallback to the default DOM tree building routines.
12581
 *
12582
 * [78] extParsedEnt ::= TextDecl? content
12583
 *
12584
 * This correspond to a "Well Balanced" chunk
12585
 *
12586
 * Returns the resulting document tree
12587
 */
12588
12589
xmlDocPtr
12590
0
xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
12591
0
    xmlDocPtr ret;
12592
0
    xmlParserCtxtPtr ctxt;
12593
12594
0
    ctxt = xmlCreateFileParserCtxt(filename);
12595
0
    if (ctxt == NULL) {
12596
0
  return(NULL);
12597
0
    }
12598
0
    if (sax != NULL) {
12599
0
        if (sax->initialized == XML_SAX2_MAGIC) {
12600
0
            *ctxt->sax = *sax;
12601
0
        } else {
12602
0
            memset(ctxt->sax, 0, sizeof(*ctxt->sax));
12603
0
            memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12604
0
        }
12605
0
        ctxt->userData = NULL;
12606
0
    }
12607
12608
0
    xmlParseExtParsedEnt(ctxt);
12609
12610
0
    if (ctxt->wellFormed) {
12611
0
  ret = ctxt->myDoc;
12612
0
    } else {
12613
0
        ret = NULL;
12614
0
        xmlFreeDoc(ctxt->myDoc);
12615
0
    }
12616
12617
0
    xmlFreeParserCtxt(ctxt);
12618
12619
0
    return(ret);
12620
0
}
12621
12622
/**
12623
 * xmlParseEntity:
12624
 * @filename:  the filename
12625
 *
12626
 * parse an XML external entity out of context and build a tree.
12627
 *
12628
 * [78] extParsedEnt ::= TextDecl? content
12629
 *
12630
 * This correspond to a "Well Balanced" chunk
12631
 *
12632
 * Returns the resulting document tree
12633
 */
12634
12635
xmlDocPtr
12636
0
xmlParseEntity(const char *filename) {
12637
0
    return(xmlSAXParseEntity(NULL, filename));
12638
0
}
12639
#endif /* LIBXML_SAX1_ENABLED */
12640
12641
/**
12642
 * xmlCreateEntityParserCtxt:
12643
 * @URL:  the entity URL
12644
 * @ID:  the entity PUBLIC ID
12645
 * @base:  a possible base for the target URI
12646
 *
12647
 * DEPRECATED: Don't use.
12648
 *
12649
 * Create a parser context for an external entity
12650
 * Automatic support for ZLIB/Compress compressed document is provided
12651
 * by default if found at compile-time.
12652
 *
12653
 * Returns the new parser context or NULL
12654
 */
12655
xmlParserCtxtPtr
12656
xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
12657
0
                    const xmlChar *base) {
12658
0
    xmlParserCtxtPtr ctxt;
12659
0
    xmlParserInputPtr input;
12660
0
    xmlChar *uri = NULL;
12661
12662
0
    ctxt = xmlNewParserCtxt();
12663
0
    if (ctxt == NULL)
12664
0
  return(NULL);
12665
12666
0
    if (base != NULL) {
12667
0
        if (xmlBuildURISafe(URL, base, &uri) < 0)
12668
0
            goto error;
12669
0
        if (uri != NULL)
12670
0
            URL = uri;
12671
0
    }
12672
12673
0
    input = xmlLoadResource(ctxt, (char *) URL, (char *) ID,
12674
0
                            XML_RESOURCE_UNKNOWN);
12675
0
    if (input == NULL)
12676
0
        goto error;
12677
12678
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
12679
0
        xmlFreeInputStream(input);
12680
0
        goto error;
12681
0
    }
12682
12683
0
    xmlFree(uri);
12684
0
    return(ctxt);
12685
12686
0
error:
12687
0
    xmlFree(uri);
12688
0
    xmlFreeParserCtxt(ctxt);
12689
0
    return(NULL);
12690
0
}
12691
12692
/************************************************************************
12693
 *                  *
12694
 *    Front ends when parsing from a file     *
12695
 *                  *
12696
 ************************************************************************/
12697
12698
/**
12699
 * xmlCreateURLParserCtxt:
12700
 * @filename:  the filename or URL
12701
 * @options:  a combination of xmlParserOption
12702
 *
12703
 * DEPRECATED: Use xmlNewParserCtxt and xmlCtxtReadFile.
12704
 *
12705
 * Create a parser context for a file or URL content.
12706
 * Automatic support for ZLIB/Compress compressed document is provided
12707
 * by default if found at compile-time and for file accesses
12708
 *
12709
 * Returns the new parser context or NULL
12710
 */
12711
xmlParserCtxtPtr
12712
xmlCreateURLParserCtxt(const char *filename, int options)
12713
0
{
12714
0
    xmlParserCtxtPtr ctxt;
12715
0
    xmlParserInputPtr input;
12716
12717
0
    ctxt = xmlNewParserCtxt();
12718
0
    if (ctxt == NULL)
12719
0
  return(NULL);
12720
12721
0
    options |= XML_PARSE_UNZIP;
12722
12723
0
    xmlCtxtUseOptions(ctxt, options);
12724
0
    ctxt->linenumbers = 1;
12725
12726
0
    input = xmlLoadResource(ctxt, filename, NULL, XML_RESOURCE_MAIN_DOCUMENT);
12727
0
    if (input == NULL) {
12728
0
  xmlFreeParserCtxt(ctxt);
12729
0
  return(NULL);
12730
0
    }
12731
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
12732
0
        xmlFreeInputStream(input);
12733
0
        xmlFreeParserCtxt(ctxt);
12734
0
        return(NULL);
12735
0
    }
12736
12737
0
    return(ctxt);
12738
0
}
12739
12740
/**
12741
 * xmlCreateFileParserCtxt:
12742
 * @filename:  the filename
12743
 *
12744
 * DEPRECATED: Use xmlNewParserCtxt and xmlCtxtReadFile.
12745
 *
12746
 * Create a parser context for a file content.
12747
 * Automatic support for ZLIB/Compress compressed document is provided
12748
 * by default if found at compile-time.
12749
 *
12750
 * Returns the new parser context or NULL
12751
 */
12752
xmlParserCtxtPtr
12753
xmlCreateFileParserCtxt(const char *filename)
12754
0
{
12755
0
    return(xmlCreateURLParserCtxt(filename, 0));
12756
0
}
12757
12758
#ifdef LIBXML_SAX1_ENABLED
12759
/**
12760
 * xmlSAXParseFileWithData:
12761
 * @sax:  the SAX handler block
12762
 * @filename:  the filename
12763
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
12764
 *             documents
12765
 * @data:  the userdata
12766
 *
12767
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
12768
 *
12769
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12770
 * compressed document is provided by default if found at compile-time.
12771
 * It use the given SAX function block to handle the parsing callback.
12772
 * If sax is NULL, fallback to the default DOM tree building routines.
12773
 *
12774
 * User data (void *) is stored within the parser context in the
12775
 * context's _private member, so it is available nearly everywhere in libxml
12776
 *
12777
 * Returns the resulting document tree
12778
 */
12779
12780
xmlDocPtr
12781
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
12782
0
                        int recovery, void *data) {
12783
0
    xmlDocPtr ret = NULL;
12784
0
    xmlParserCtxtPtr ctxt;
12785
0
    xmlParserInputPtr input;
12786
12787
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12788
0
    if (ctxt == NULL)
12789
0
  return(NULL);
12790
12791
0
    if (data != NULL)
12792
0
  ctxt->_private = data;
12793
12794
0
    if (recovery) {
12795
0
        ctxt->options |= XML_PARSE_RECOVER;
12796
0
        ctxt->recovery = 1;
12797
0
    }
12798
12799
0
    if ((filename != NULL) && (filename[0] == '-') && (filename[1] == 0))
12800
0
        input = xmlCtxtNewInputFromFd(ctxt, filename, STDIN_FILENO, NULL, 0);
12801
0
    else
12802
0
        input = xmlCtxtNewInputFromUrl(ctxt, filename, NULL, NULL, 0);
12803
12804
0
    if (input != NULL)
12805
0
        ret = xmlCtxtParseDocument(ctxt, input);
12806
12807
0
    xmlFreeParserCtxt(ctxt);
12808
0
    return(ret);
12809
0
}
12810
12811
/**
12812
 * xmlSAXParseFile:
12813
 * @sax:  the SAX handler block
12814
 * @filename:  the filename
12815
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
12816
 *             documents
12817
 *
12818
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
12819
 *
12820
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12821
 * compressed document is provided by default if found at compile-time.
12822
 * It use the given SAX function block to handle the parsing callback.
12823
 * If sax is NULL, fallback to the default DOM tree building routines.
12824
 *
12825
 * Returns the resulting document tree
12826
 */
12827
12828
xmlDocPtr
12829
xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
12830
0
                          int recovery) {
12831
0
    return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
12832
0
}
12833
12834
/**
12835
 * xmlRecoverDoc:
12836
 * @cur:  a pointer to an array of xmlChar
12837
 *
12838
 * DEPRECATED: Use xmlReadDoc with XML_PARSE_RECOVER.
12839
 *
12840
 * parse an XML in-memory document and build a tree.
12841
 * In the case the document is not Well Formed, a attempt to build a
12842
 * tree is tried anyway
12843
 *
12844
 * Returns the resulting document tree or NULL in case of failure
12845
 */
12846
12847
xmlDocPtr
12848
0
xmlRecoverDoc(const xmlChar *cur) {
12849
0
    return(xmlSAXParseDoc(NULL, cur, 1));
12850
0
}
12851
12852
/**
12853
 * xmlParseFile:
12854
 * @filename:  the filename
12855
 *
12856
 * DEPRECATED: Use xmlReadFile.
12857
 *
12858
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12859
 * compressed document is provided by default if found at compile-time.
12860
 *
12861
 * Returns the resulting document tree if the file was wellformed,
12862
 * NULL otherwise.
12863
 */
12864
12865
xmlDocPtr
12866
0
xmlParseFile(const char *filename) {
12867
0
    return(xmlSAXParseFile(NULL, filename, 0));
12868
0
}
12869
12870
/**
12871
 * xmlRecoverFile:
12872
 * @filename:  the filename
12873
 *
12874
 * DEPRECATED: Use xmlReadFile with XML_PARSE_RECOVER.
12875
 *
12876
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12877
 * compressed document is provided by default if found at compile-time.
12878
 * In the case the document is not Well Formed, it attempts to build
12879
 * a tree anyway
12880
 *
12881
 * Returns the resulting document tree or NULL in case of failure
12882
 */
12883
12884
xmlDocPtr
12885
0
xmlRecoverFile(const char *filename) {
12886
0
    return(xmlSAXParseFile(NULL, filename, 1));
12887
0
}
12888
12889
12890
/**
12891
 * xmlSetupParserForBuffer:
12892
 * @ctxt:  an XML parser context
12893
 * @buffer:  a xmlChar * buffer
12894
 * @filename:  a file name
12895
 *
12896
 * DEPRECATED: Don't use.
12897
 *
12898
 * Setup the parser context to parse a new buffer; Clears any prior
12899
 * contents from the parser context. The buffer parameter must not be
12900
 * NULL, but the filename parameter can be
12901
 */
12902
void
12903
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
12904
                             const char* filename)
12905
0
{
12906
0
    xmlParserInputPtr input;
12907
12908
0
    if ((ctxt == NULL) || (buffer == NULL))
12909
0
        return;
12910
12911
0
    xmlClearParserCtxt(ctxt);
12912
12913
0
    input = xmlCtxtNewInputFromString(ctxt, filename, (const char *) buffer,
12914
0
                                      NULL, 0);
12915
0
    if (input == NULL)
12916
0
        return;
12917
0
    if (xmlCtxtPushInput(ctxt, input) < 0)
12918
0
        xmlFreeInputStream(input);
12919
0
}
12920
12921
/**
12922
 * xmlSAXUserParseFile:
12923
 * @sax:  a SAX handler
12924
 * @user_data:  The user data returned on SAX callbacks
12925
 * @filename:  a file name
12926
 *
12927
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
12928
 *
12929
 * parse an XML file and call the given SAX handler routines.
12930
 * Automatic support for ZLIB/Compress compressed document is provided
12931
 *
12932
 * Returns 0 in case of success or a error number otherwise
12933
 */
12934
int
12935
xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
12936
0
                    const char *filename) {
12937
0
    int ret = 0;
12938
0
    xmlParserCtxtPtr ctxt;
12939
12940
0
    ctxt = xmlCreateFileParserCtxt(filename);
12941
0
    if (ctxt == NULL) return -1;
12942
0
    if (sax != NULL) {
12943
0
        if (sax->initialized == XML_SAX2_MAGIC) {
12944
0
            *ctxt->sax = *sax;
12945
0
        } else {
12946
0
            memset(ctxt->sax, 0, sizeof(*ctxt->sax));
12947
0
            memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12948
0
        }
12949
0
  ctxt->userData = user_data;
12950
0
    }
12951
12952
0
    xmlParseDocument(ctxt);
12953
12954
0
    if (ctxt->wellFormed)
12955
0
  ret = 0;
12956
0
    else {
12957
0
        if (ctxt->errNo != 0)
12958
0
      ret = ctxt->errNo;
12959
0
  else
12960
0
      ret = -1;
12961
0
    }
12962
0
    if (ctxt->myDoc != NULL) {
12963
0
        xmlFreeDoc(ctxt->myDoc);
12964
0
  ctxt->myDoc = NULL;
12965
0
    }
12966
0
    xmlFreeParserCtxt(ctxt);
12967
12968
0
    return ret;
12969
0
}
12970
#endif /* LIBXML_SAX1_ENABLED */
12971
12972
/************************************************************************
12973
 *                  *
12974
 *    Front ends when parsing from memory     *
12975
 *                  *
12976
 ************************************************************************/
12977
12978
/**
12979
 * xmlCreateMemoryParserCtxt:
12980
 * @buffer:  a pointer to a char array
12981
 * @size:  the size of the array
12982
 *
12983
 * Create a parser context for an XML in-memory document. The input buffer
12984
 * must not contain a terminating null byte.
12985
 *
12986
 * Returns the new parser context or NULL
12987
 */
12988
xmlParserCtxtPtr
12989
0
xmlCreateMemoryParserCtxt(const char *buffer, int size) {
12990
0
    xmlParserCtxtPtr ctxt;
12991
0
    xmlParserInputPtr input;
12992
12993
0
    if (size < 0)
12994
0
  return(NULL);
12995
12996
0
    ctxt = xmlNewParserCtxt();
12997
0
    if (ctxt == NULL)
12998
0
  return(NULL);
12999
13000
0
    input = xmlCtxtNewInputFromMemory(ctxt, NULL, buffer, size, NULL, 0);
13001
0
    if (input == NULL) {
13002
0
  xmlFreeParserCtxt(ctxt);
13003
0
  return(NULL);
13004
0
    }
13005
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
13006
0
        xmlFreeInputStream(input);
13007
0
        xmlFreeParserCtxt(ctxt);
13008
0
        return(NULL);
13009
0
    }
13010
13011
0
    return(ctxt);
13012
0
}
13013
13014
#ifdef LIBXML_SAX1_ENABLED
13015
/**
13016
 * xmlSAXParseMemoryWithData:
13017
 * @sax:  the SAX handler block
13018
 * @buffer:  an pointer to a char array
13019
 * @size:  the size of the array
13020
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13021
 *             documents
13022
 * @data:  the userdata
13023
 *
13024
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
13025
 *
13026
 * parse an XML in-memory block and use the given SAX function block
13027
 * to handle the parsing callback. If sax is NULL, fallback to the default
13028
 * DOM tree building routines.
13029
 *
13030
 * User data (void *) is stored within the parser context in the
13031
 * context's _private member, so it is available nearly everywhere in libxml
13032
 *
13033
 * Returns the resulting document tree
13034
 */
13035
13036
xmlDocPtr
13037
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
13038
0
                          int size, int recovery, void *data) {
13039
0
    xmlDocPtr ret = NULL;
13040
0
    xmlParserCtxtPtr ctxt;
13041
0
    xmlParserInputPtr input;
13042
13043
0
    if (size < 0)
13044
0
        return(NULL);
13045
13046
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
13047
0
    if (ctxt == NULL)
13048
0
        return(NULL);
13049
13050
0
    if (data != NULL)
13051
0
  ctxt->_private=data;
13052
13053
0
    if (recovery) {
13054
0
        ctxt->options |= XML_PARSE_RECOVER;
13055
0
        ctxt->recovery = 1;
13056
0
    }
13057
13058
0
    input = xmlCtxtNewInputFromMemory(ctxt, NULL, buffer, size, NULL,
13059
0
                                      XML_INPUT_BUF_STATIC);
13060
13061
0
    if (input != NULL)
13062
0
        ret = xmlCtxtParseDocument(ctxt, input);
13063
13064
0
    xmlFreeParserCtxt(ctxt);
13065
0
    return(ret);
13066
0
}
13067
13068
/**
13069
 * xmlSAXParseMemory:
13070
 * @sax:  the SAX handler block
13071
 * @buffer:  an pointer to a char array
13072
 * @size:  the size of the array
13073
 * @recovery:  work in recovery mode, i.e. tries to read not Well Formed
13074
 *             documents
13075
 *
13076
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
13077
 *
13078
 * parse an XML in-memory block and use the given SAX function block
13079
 * to handle the parsing callback. If sax is NULL, fallback to the default
13080
 * DOM tree building routines.
13081
 *
13082
 * Returns the resulting document tree
13083
 */
13084
xmlDocPtr
13085
xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
13086
0
            int size, int recovery) {
13087
0
    return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
13088
0
}
13089
13090
/**
13091
 * xmlParseMemory:
13092
 * @buffer:  an pointer to a char array
13093
 * @size:  the size of the array
13094
 *
13095
 * DEPRECATED: Use xmlReadMemory.
13096
 *
13097
 * parse an XML in-memory block and build a tree.
13098
 *
13099
 * Returns the resulting document tree
13100
 */
13101
13102
0
xmlDocPtr xmlParseMemory(const char *buffer, int size) {
13103
0
   return(xmlSAXParseMemory(NULL, buffer, size, 0));
13104
0
}
13105
13106
/**
13107
 * xmlRecoverMemory:
13108
 * @buffer:  an pointer to a char array
13109
 * @size:  the size of the array
13110
 *
13111
 * DEPRECATED: Use xmlReadMemory with XML_PARSE_RECOVER.
13112
 *
13113
 * parse an XML in-memory block and build a tree.
13114
 * In the case the document is not Well Formed, an attempt to
13115
 * build a tree is tried anyway
13116
 *
13117
 * Returns the resulting document tree or NULL in case of error
13118
 */
13119
13120
0
xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
13121
0
   return(xmlSAXParseMemory(NULL, buffer, size, 1));
13122
0
}
13123
13124
/**
13125
 * xmlSAXUserParseMemory:
13126
 * @sax:  a SAX handler
13127
 * @user_data:  The user data returned on SAX callbacks
13128
 * @buffer:  an in-memory XML document input
13129
 * @size:  the length of the XML document in bytes
13130
 *
13131
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
13132
 *
13133
 * parse an XML in-memory buffer and call the given SAX handler routines.
13134
 *
13135
 * Returns 0 in case of success or a error number otherwise
13136
 */
13137
int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
13138
0
        const char *buffer, int size) {
13139
0
    int ret = 0;
13140
0
    xmlParserCtxtPtr ctxt;
13141
13142
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13143
0
    if (ctxt == NULL) return -1;
13144
0
    if (sax != NULL) {
13145
0
        if (sax->initialized == XML_SAX2_MAGIC) {
13146
0
            *ctxt->sax = *sax;
13147
0
        } else {
13148
0
            memset(ctxt->sax, 0, sizeof(*ctxt->sax));
13149
0
            memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
13150
0
        }
13151
0
  ctxt->userData = user_data;
13152
0
    }
13153
13154
0
    xmlParseDocument(ctxt);
13155
13156
0
    if (ctxt->wellFormed)
13157
0
  ret = 0;
13158
0
    else {
13159
0
        if (ctxt->errNo != 0)
13160
0
      ret = ctxt->errNo;
13161
0
  else
13162
0
      ret = -1;
13163
0
    }
13164
0
    if (ctxt->myDoc != NULL) {
13165
0
        xmlFreeDoc(ctxt->myDoc);
13166
0
  ctxt->myDoc = NULL;
13167
0
    }
13168
0
    xmlFreeParserCtxt(ctxt);
13169
13170
0
    return ret;
13171
0
}
13172
#endif /* LIBXML_SAX1_ENABLED */
13173
13174
/**
13175
 * xmlCreateDocParserCtxt:
13176
 * @str:  a pointer to an array of xmlChar
13177
 *
13178
 * Creates a parser context for an XML in-memory document.
13179
 *
13180
 * Returns the new parser context or NULL
13181
 */
13182
xmlParserCtxtPtr
13183
0
xmlCreateDocParserCtxt(const xmlChar *str) {
13184
0
    xmlParserCtxtPtr ctxt;
13185
0
    xmlParserInputPtr input;
13186
13187
0
    ctxt = xmlNewParserCtxt();
13188
0
    if (ctxt == NULL)
13189
0
  return(NULL);
13190
13191
0
    input = xmlCtxtNewInputFromString(ctxt, NULL, (const char *) str, NULL, 0);
13192
0
    if (input == NULL) {
13193
0
  xmlFreeParserCtxt(ctxt);
13194
0
  return(NULL);
13195
0
    }
13196
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
13197
0
        xmlFreeInputStream(input);
13198
0
        xmlFreeParserCtxt(ctxt);
13199
0
        return(NULL);
13200
0
    }
13201
13202
0
    return(ctxt);
13203
0
}
13204
13205
#ifdef LIBXML_SAX1_ENABLED
13206
/**
13207
 * xmlSAXParseDoc:
13208
 * @sax:  the SAX handler block
13209
 * @cur:  a pointer to an array of xmlChar
13210
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13211
 *             documents
13212
 *
13213
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadDoc.
13214
 *
13215
 * parse an XML in-memory document and build a tree.
13216
 * It use the given SAX function block to handle the parsing callback.
13217
 * If sax is NULL, fallback to the default DOM tree building routines.
13218
 *
13219
 * Returns the resulting document tree
13220
 */
13221
13222
xmlDocPtr
13223
0
xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
13224
0
    xmlDocPtr ret;
13225
0
    xmlParserCtxtPtr ctxt;
13226
0
    xmlSAXHandlerPtr oldsax = NULL;
13227
13228
0
    if (cur == NULL) return(NULL);
13229
13230
13231
0
    ctxt = xmlCreateDocParserCtxt(cur);
13232
0
    if (ctxt == NULL) return(NULL);
13233
0
    if (sax != NULL) {
13234
0
        oldsax = ctxt->sax;
13235
0
        ctxt->sax = sax;
13236
0
        ctxt->userData = NULL;
13237
0
    }
13238
13239
0
    xmlParseDocument(ctxt);
13240
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13241
0
    else {
13242
0
       ret = NULL;
13243
0
       xmlFreeDoc(ctxt->myDoc);
13244
0
       ctxt->myDoc = NULL;
13245
0
    }
13246
0
    if (sax != NULL)
13247
0
  ctxt->sax = oldsax;
13248
0
    xmlFreeParserCtxt(ctxt);
13249
13250
0
    return(ret);
13251
0
}
13252
13253
/**
13254
 * xmlParseDoc:
13255
 * @cur:  a pointer to an array of xmlChar
13256
 *
13257
 * DEPRECATED: Use xmlReadDoc.
13258
 *
13259
 * parse an XML in-memory document and build a tree.
13260
 *
13261
 * Returns the resulting document tree
13262
 */
13263
13264
xmlDocPtr
13265
0
xmlParseDoc(const xmlChar *cur) {
13266
0
    return(xmlSAXParseDoc(NULL, cur, 0));
13267
0
}
13268
#endif /* LIBXML_SAX1_ENABLED */
13269
13270
/************************************************************************
13271
 *                  *
13272
 *  New set (2.6.0) of simpler and more flexible APIs   *
13273
 *                  *
13274
 ************************************************************************/
13275
13276
/**
13277
 * DICT_FREE:
13278
 * @str:  a string
13279
 *
13280
 * Free a string if it is not owned by the "dict" dictionary in the
13281
 * current scope
13282
 */
13283
#define DICT_FREE(str)            \
13284
24.0k
  if ((str) && ((!dict) ||       \
13285
0
      (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
13286
24.0k
      xmlFree((char *)(str));
13287
13288
/**
13289
 * xmlCtxtReset:
13290
 * @ctxt: an XML parser context
13291
 *
13292
 * Reset a parser context
13293
 */
13294
void
13295
xmlCtxtReset(xmlParserCtxtPtr ctxt)
13296
6.01k
{
13297
6.01k
    xmlParserInputPtr input;
13298
6.01k
    xmlDictPtr dict;
13299
13300
6.01k
    if (ctxt == NULL)
13301
0
        return;
13302
13303
6.01k
    dict = ctxt->dict;
13304
13305
6.01k
    while ((input = xmlCtxtPopInput(ctxt)) != NULL) { /* Non consuming */
13306
0
        xmlFreeInputStream(input);
13307
0
    }
13308
6.01k
    ctxt->inputNr = 0;
13309
6.01k
    ctxt->input = NULL;
13310
13311
6.01k
    ctxt->spaceNr = 0;
13312
6.01k
    if (ctxt->spaceTab != NULL) {
13313
6.01k
  ctxt->spaceTab[0] = -1;
13314
6.01k
  ctxt->space = &ctxt->spaceTab[0];
13315
6.01k
    } else {
13316
0
        ctxt->space = NULL;
13317
0
    }
13318
13319
13320
6.01k
    ctxt->nodeNr = 0;
13321
6.01k
    ctxt->node = NULL;
13322
13323
6.01k
    ctxt->nameNr = 0;
13324
6.01k
    ctxt->name = NULL;
13325
13326
6.01k
    ctxt->nsNr = 0;
13327
6.01k
    xmlParserNsReset(ctxt->nsdb);
13328
13329
6.01k
    DICT_FREE(ctxt->version);
13330
6.01k
    ctxt->version = NULL;
13331
6.01k
    DICT_FREE(ctxt->encoding);
13332
6.01k
    ctxt->encoding = NULL;
13333
6.01k
    DICT_FREE(ctxt->extSubURI);
13334
6.01k
    ctxt->extSubURI = NULL;
13335
6.01k
    DICT_FREE(ctxt->extSubSystem);
13336
6.01k
    ctxt->extSubSystem = NULL;
13337
13338
6.01k
    if (ctxt->directory != NULL) {
13339
0
        xmlFree(ctxt->directory);
13340
0
        ctxt->directory = NULL;
13341
0
    }
13342
13343
6.01k
    if (ctxt->myDoc != NULL)
13344
0
        xmlFreeDoc(ctxt->myDoc);
13345
6.01k
    ctxt->myDoc = NULL;
13346
13347
6.01k
    ctxt->standalone = -1;
13348
6.01k
    ctxt->hasExternalSubset = 0;
13349
6.01k
    ctxt->hasPErefs = 0;
13350
6.01k
    ctxt->html = 0;
13351
6.01k
    ctxt->instate = XML_PARSER_START;
13352
13353
6.01k
    ctxt->wellFormed = 1;
13354
6.01k
    ctxt->nsWellFormed = 1;
13355
6.01k
    ctxt->disableSAX = 0;
13356
6.01k
    ctxt->valid = 1;
13357
6.01k
    ctxt->record_info = 0;
13358
6.01k
    ctxt->checkIndex = 0;
13359
6.01k
    ctxt->endCheckState = 0;
13360
6.01k
    ctxt->inSubset = 0;
13361
6.01k
    ctxt->errNo = XML_ERR_OK;
13362
6.01k
    ctxt->depth = 0;
13363
6.01k
    ctxt->catalogs = NULL;
13364
6.01k
    ctxt->sizeentities = 0;
13365
6.01k
    ctxt->sizeentcopy = 0;
13366
6.01k
    xmlInitNodeInfoSeq(&ctxt->node_seq);
13367
13368
6.01k
    if (ctxt->attsDefault != NULL) {
13369
0
        xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
13370
0
        ctxt->attsDefault = NULL;
13371
0
    }
13372
6.01k
    if (ctxt->attsSpecial != NULL) {
13373
0
        xmlHashFree(ctxt->attsSpecial, NULL);
13374
0
        ctxt->attsSpecial = NULL;
13375
0
    }
13376
13377
6.01k
#ifdef LIBXML_CATALOG_ENABLED
13378
6.01k
    if (ctxt->catalogs != NULL)
13379
0
  xmlCatalogFreeLocal(ctxt->catalogs);
13380
6.01k
#endif
13381
6.01k
    ctxt->nbErrors = 0;
13382
6.01k
    ctxt->nbWarnings = 0;
13383
6.01k
    if (ctxt->lastError.code != XML_ERR_OK)
13384
0
        xmlResetError(&ctxt->lastError);
13385
6.01k
}
13386
13387
/**
13388
 * xmlCtxtResetPush:
13389
 * @ctxt: an XML parser context
13390
 * @chunk:  a pointer to an array of chars
13391
 * @size:  number of chars in the array
13392
 * @filename:  an optional file name or URI
13393
 * @encoding:  the document encoding, or NULL
13394
 *
13395
 * Reset a push parser context
13396
 *
13397
 * Returns 0 in case of success and 1 in case of error
13398
 */
13399
int
13400
xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
13401
                 int size, const char *filename, const char *encoding)
13402
0
{
13403
0
    xmlParserInputPtr input;
13404
13405
0
    if (ctxt == NULL)
13406
0
        return(1);
13407
13408
0
    xmlCtxtReset(ctxt);
13409
13410
0
    input = xmlNewPushInput(filename, chunk, size);
13411
0
    if (input == NULL)
13412
0
        return(1);
13413
13414
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
13415
0
        xmlFreeInputStream(input);
13416
0
        return(1);
13417
0
    }
13418
13419
0
    if (encoding != NULL)
13420
0
        xmlSwitchEncodingName(ctxt, encoding);
13421
13422
0
    return(0);
13423
0
}
13424
13425
static int
13426
xmlCtxtSetOptionsInternal(xmlParserCtxtPtr ctxt, int options, int keepMask)
13427
250k
{
13428
250k
    int allMask;
13429
13430
250k
    if (ctxt == NULL)
13431
0
        return(-1);
13432
13433
    /*
13434
     * XInclude options aren't handled by the parser.
13435
     *
13436
     * XML_PARSE_XINCLUDE
13437
     * XML_PARSE_NOXINCNODE
13438
     * XML_PARSE_NOBASEFIX
13439
     */
13440
250k
    allMask = XML_PARSE_RECOVER |
13441
250k
              XML_PARSE_NOENT |
13442
250k
              XML_PARSE_DTDLOAD |
13443
250k
              XML_PARSE_DTDATTR |
13444
250k
              XML_PARSE_DTDVALID |
13445
250k
              XML_PARSE_NOERROR |
13446
250k
              XML_PARSE_NOWARNING |
13447
250k
              XML_PARSE_PEDANTIC |
13448
250k
              XML_PARSE_NOBLANKS |
13449
250k
#ifdef LIBXML_SAX1_ENABLED
13450
250k
              XML_PARSE_SAX1 |
13451
250k
#endif
13452
250k
              XML_PARSE_NONET |
13453
250k
              XML_PARSE_NODICT |
13454
250k
              XML_PARSE_NSCLEAN |
13455
250k
              XML_PARSE_NOCDATA |
13456
250k
              XML_PARSE_COMPACT |
13457
250k
              XML_PARSE_OLD10 |
13458
250k
              XML_PARSE_HUGE |
13459
250k
              XML_PARSE_OLDSAX |
13460
250k
              XML_PARSE_IGNORE_ENC |
13461
250k
              XML_PARSE_BIG_LINES |
13462
250k
              XML_PARSE_NO_XXE |
13463
250k
              XML_PARSE_UNZIP |
13464
250k
              XML_PARSE_NO_SYS_CATALOG |
13465
250k
              XML_PARSE_CATALOG_PI;
13466
13467
250k
    ctxt->options = (ctxt->options & keepMask) | (options & allMask);
13468
13469
    /*
13470
     * For some options, struct members are historically the source
13471
     * of truth. The values are initalized from global variables and
13472
     * old code could also modify them directly. Several older API
13473
     * functions that don't take an options argument rely on these
13474
     * deprecated mechanisms.
13475
     *
13476
     * Once public access to struct members and the globals are
13477
     * disabled, we can use the options bitmask as source of
13478
     * truth, making all these struct members obsolete.
13479
     *
13480
     * The XML_DETECT_IDS flags is misnamed. It simply enables
13481
     * loading of the external subset.
13482
     */
13483
250k
    ctxt->recovery = (options & XML_PARSE_RECOVER) ? 1 : 0;
13484
250k
    ctxt->replaceEntities = (options & XML_PARSE_NOENT) ? 1 : 0;
13485
250k
    ctxt->loadsubset = (options & XML_PARSE_DTDLOAD) ? XML_DETECT_IDS : 0;
13486
250k
    ctxt->loadsubset |= (options & XML_PARSE_DTDATTR) ? XML_COMPLETE_ATTRS : 0;
13487
250k
    ctxt->validate = (options & XML_PARSE_DTDVALID) ? 1 : 0;
13488
250k
    ctxt->pedantic = (options & XML_PARSE_PEDANTIC) ? 1 : 0;
13489
250k
    ctxt->keepBlanks = (options & XML_PARSE_NOBLANKS) ? 0 : 1;
13490
250k
    ctxt->dictNames = (options & XML_PARSE_NODICT) ? 0 : 1;
13491
13492
250k
    if (options & XML_PARSE_HUGE) {
13493
244k
        if (ctxt->dict != NULL)
13494
244k
            xmlDictSetLimit(ctxt->dict, 0);
13495
244k
    }
13496
13497
250k
    ctxt->linenumbers = 1;
13498
13499
250k
    return(options & ~allMask);
13500
250k
}
13501
13502
/**
13503
 * xmlCtxtSetOptions:
13504
 * @ctxt: an XML parser context
13505
 * @options:  a bitmask of xmlParserOption values
13506
 *
13507
 * Applies the options to the parser context. Unset options are
13508
 * cleared.
13509
 *
13510
 * Available since 2.13.0. With older versions, you can use
13511
 * xmlCtxtUseOptions.
13512
 *
13513
 * XML_PARSE_RECOVER
13514
 *
13515
 * Enable "recovery" mode which allows non-wellformed documents.
13516
 * How this mode behaves exactly is unspecified and may change
13517
 * without further notice. Use of this feature is DISCOURAGED.
13518
 *
13519
 * Not supported by the push parser.
13520
 *
13521
 * XML_PARSE_NOENT
13522
 *
13523
 * Despite the confusing name, this option enables substitution
13524
 * of entities. The resulting tree won't contain any entity
13525
 * reference nodes.
13526
 *
13527
 * This option also enables loading of external entities (both
13528
 * general and parameter entities) which is dangerous. If you
13529
 * process untrusted data, it's recommended to set the
13530
 * XML_PARSE_NO_XXE option to disable loading of external
13531
 * entities.
13532
 *
13533
 * XML_PARSE_DTDLOAD
13534
 *
13535
 * Enables loading of an external DTD and the loading and
13536
 * substitution of external parameter entities. Has no effect
13537
 * if XML_PARSE_NO_XXE is set.
13538
 *
13539
 * XML_PARSE_DTDATTR
13540
 *
13541
 * Adds default attributes from the DTD to the result document.
13542
 *
13543
 * Implies XML_PARSE_DTDLOAD, but loading of external content
13544
 * can be disabled with XML_PARSE_NO_XXE.
13545
 *
13546
 * XML_PARSE_DTDVALID
13547
 *
13548
 * This option enables DTD validation which requires to load
13549
 * external DTDs and external entities (both general and
13550
 * parameter entities) unless XML_PARSE_NO_XXE was set.
13551
 *
13552
 * XML_PARSE_NO_XXE
13553
 *
13554
 * Disables loading of external DTDs or entities.
13555
 *
13556
 * Available since 2.13.0.
13557
 *
13558
 * XML_PARSE_NOERROR
13559
 *
13560
 * Disable error and warning reports to the error handlers.
13561
 * Errors are still accessible with xmlCtxtGetLastError.
13562
 *
13563
 * XML_PARSE_NOWARNING
13564
 *
13565
 * Disable warning reports.
13566
 *
13567
 * XML_PARSE_PEDANTIC
13568
 *
13569
 * Enable some pedantic warnings.
13570
 *
13571
 * XML_PARSE_NOBLANKS
13572
 *
13573
 * Remove some whitespace from the result document. Where to
13574
 * remove whitespace depends on DTD element declarations or a
13575
 * broken heuristic with unfixable bugs. Use of this option is
13576
 * DISCOURAGED.
13577
 *
13578
 * Not supported by the push parser.
13579
 *
13580
 * XML_PARSE_SAX1
13581
 *
13582
 * Always invoke the deprecated SAX1 startElement and endElement
13583
 * handlers. This option is DEPRECATED.
13584
 *
13585
 * XML_PARSE_NONET
13586
 *
13587
 * Disable network access with the builtin HTTP client.
13588
 *
13589
 * XML_PARSE_NODICT
13590
 *
13591
 * Create a document without interned strings, making all
13592
 * strings separate memory allocations.
13593
 *
13594
 * XML_PARSE_NSCLEAN
13595
 *
13596
 * Remove redundant namespace declarations from the result
13597
 * document.
13598
 *
13599
 * XML_PARSE_NOCDATA
13600
 *
13601
 * Output normal text nodes instead of CDATA nodes.
13602
 *
13603
 * XML_PARSE_COMPACT
13604
 *
13605
 * Store small strings directly in the node struct to save
13606
 * memory.
13607
 *
13608
 * XML_PARSE_OLD10
13609
 *
13610
 * Use old Name productions from before XML 1.0 Fifth Edition.
13611
 * This options is DEPRECATED.
13612
 *
13613
 * XML_PARSE_HUGE
13614
 *
13615
 * Relax some internal limits.
13616
 *
13617
 * Maximum size of text nodes, tags, comments, processing instructions,
13618
 * CDATA sections, entity values
13619
 *
13620
 * normal: 10M
13621
 * huge:    1B
13622
 *
13623
 * Maximum size of names, system literals, pubid literals
13624
 *
13625
 * normal: 50K
13626
 * huge:   10M
13627
 *
13628
 * Maximum nesting depth of elements
13629
 *
13630
 * normal:  256
13631
 * huge:   2048
13632
 *
13633
 * Maximum nesting depth of entities
13634
 *
13635
 * normal: 20
13636
 * huge:   40
13637
 *
13638
 * XML_PARSE_OLDSAX
13639
 *
13640
 * Enable an unspecified legacy mode for SAX parsers. This
13641
 * option is DEPRECATED.
13642
 *
13643
 * XML_PARSE_IGNORE_ENC
13644
 *
13645
 * Ignore the encoding in the XML declaration. This option is
13646
 * mostly unneeded these days. The only effect is to enforce
13647
 * UTF-8 decoding of ASCII-like data.
13648
 *
13649
 * XML_PARSE_BIG_LINES
13650
 *
13651
 * Enable reporting of line numbers larger than 65535.
13652
 *
13653
 * XML_PARSE_UNZIP
13654
 *
13655
 * Enable input decompression. Setting this option is discouraged
13656
 * to avoid zip bombs.
13657
 *
13658
 * Available since 2.14.0.
13659
 *
13660
 * XML_PARSE_NO_SYS_CATALOG
13661
 *
13662
 * Disables the global system XML catalog.
13663
 *
13664
 * Available since 2.14.0.
13665
 *
13666
 * XML_PARSE_CATALOG_PI
13667
 *
13668
 * Enable XML catalog processing instructions.
13669
 *
13670
 * Available since 2.14.0.
13671
 *
13672
 * Returns 0 in case of success, the set of unknown or unimplemented options
13673
 *         in case of error.
13674
 */
13675
int
13676
xmlCtxtSetOptions(xmlParserCtxtPtr ctxt, int options)
13677
0
{
13678
0
#ifdef LIBXML_HTML_ENABLED
13679
0
    if ((ctxt != NULL) && (ctxt->html))
13680
0
        return(htmlCtxtSetOptions(ctxt, options));
13681
0
#endif
13682
13683
0
    return(xmlCtxtSetOptionsInternal(ctxt, options, 0));
13684
0
}
13685
13686
/**
13687
 * xmlCtxtGetOptions:
13688
 * @ctxt: an XML parser context
13689
 *
13690
 * Get the current options of the parser context.
13691
 *
13692
 * Available since 2.14.0.
13693
 *
13694
 * Returns the current options set in the parser context, or -1 if ctxt is NULL.
13695
 */
13696
int
13697
xmlCtxtGetOptions(xmlParserCtxtPtr ctxt)
13698
0
{
13699
0
    if (ctxt == NULL)
13700
0
        return(-1);
13701
13702
0
    return(ctxt->options);
13703
0
}
13704
13705
/**
13706
 * xmlCtxtUseOptions:
13707
 * @ctxt: an XML parser context
13708
 * @options:  a combination of xmlParserOption
13709
 *
13710
 * DEPRECATED: Use xmlCtxtSetOptions.
13711
 *
13712
 * Applies the options to the parser context. The following options
13713
 * are never cleared and can only be enabled:
13714
 *
13715
 * XML_PARSE_NOERROR
13716
 * XML_PARSE_NOWARNING
13717
 * XML_PARSE_NONET
13718
 * XML_PARSE_NSCLEAN
13719
 * XML_PARSE_NOCDATA
13720
 * XML_PARSE_COMPACT
13721
 * XML_PARSE_OLD10
13722
 * XML_PARSE_HUGE
13723
 * XML_PARSE_OLDSAX
13724
 * XML_PARSE_IGNORE_ENC
13725
 * XML_PARSE_BIG_LINES
13726
 *
13727
 * Returns 0 in case of success, the set of unknown or unimplemented options
13728
 *         in case of error.
13729
 */
13730
int
13731
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
13732
250k
{
13733
250k
    int keepMask;
13734
13735
250k
#ifdef LIBXML_HTML_ENABLED
13736
250k
    if ((ctxt != NULL) && (ctxt->html))
13737
0
        return(htmlCtxtUseOptions(ctxt, options));
13738
250k
#endif
13739
13740
    /*
13741
     * For historic reasons, some options can only be enabled.
13742
     */
13743
250k
    keepMask = XML_PARSE_NOERROR |
13744
250k
               XML_PARSE_NOWARNING |
13745
250k
               XML_PARSE_NONET |
13746
250k
               XML_PARSE_NSCLEAN |
13747
250k
               XML_PARSE_NOCDATA |
13748
250k
               XML_PARSE_COMPACT |
13749
250k
               XML_PARSE_OLD10 |
13750
250k
               XML_PARSE_HUGE |
13751
250k
               XML_PARSE_OLDSAX |
13752
250k
               XML_PARSE_IGNORE_ENC |
13753
250k
               XML_PARSE_BIG_LINES;
13754
13755
250k
    return(xmlCtxtSetOptionsInternal(ctxt, options, keepMask));
13756
250k
}
13757
13758
/**
13759
 * xmlCtxtSetMaxAmplification:
13760
 * @ctxt: an XML parser context
13761
 * @maxAmpl:  maximum amplification factor
13762
 *
13763
 * To protect against exponential entity expansion ("billion laughs"), the
13764
 * size of serialized output is (roughly) limited to the input size
13765
 * multiplied by this factor. The default value is 5.
13766
 *
13767
 * When working with documents making heavy use of entity expansion, it can
13768
 * be necessary to increase the value. For security reasons, this should only
13769
 * be considered when processing trusted input.
13770
 */
13771
void
13772
xmlCtxtSetMaxAmplification(xmlParserCtxtPtr ctxt, unsigned maxAmpl)
13773
0
{
13774
0
    ctxt->maxAmpl = maxAmpl;
13775
0
}
13776
13777
/**
13778
 * xmlCtxtParseDocument:
13779
 * @ctxt:  an XML parser context
13780
 * @input:  parser input
13781
 *
13782
 * Parse an XML document and return the resulting document tree.
13783
 * Takes ownership of the input object.
13784
 *
13785
 * Available since 2.13.0.
13786
 *
13787
 * Returns the resulting document tree or NULL
13788
 */
13789
xmlDocPtr
13790
xmlCtxtParseDocument(xmlParserCtxtPtr ctxt, xmlParserInputPtr input)
13791
6.01k
{
13792
6.01k
    xmlDocPtr ret = NULL;
13793
13794
6.01k
    if ((ctxt == NULL) || (input == NULL)) {
13795
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
13796
0
        xmlFreeInputStream(input);
13797
0
        return(NULL);
13798
0
    }
13799
13800
    /* assert(ctxt->inputNr == 0); */
13801
6.01k
    while (ctxt->inputNr > 0)
13802
0
        xmlFreeInputStream(xmlCtxtPopInput(ctxt));
13803
13804
6.01k
    if (xmlCtxtPushInput(ctxt, input) < 0) {
13805
0
        xmlFreeInputStream(input);
13806
0
        return(NULL);
13807
0
    }
13808
13809
6.01k
    xmlParseDocument(ctxt);
13810
13811
6.01k
    ret = xmlCtxtGetDocument(ctxt);
13812
13813
    /* assert(ctxt->inputNr == 1); */
13814
12.0k
    while (ctxt->inputNr > 0)
13815
6.01k
        xmlFreeInputStream(xmlCtxtPopInput(ctxt));
13816
13817
6.01k
    return(ret);
13818
6.01k
}
13819
13820
/**
13821
 * xmlReadDoc:
13822
 * @cur:  a pointer to a zero terminated string
13823
 * @URL:  base URL (optional)
13824
 * @encoding:  the document encoding (optional)
13825
 * @options:  a combination of xmlParserOption
13826
 *
13827
 * Convenience function to parse an XML document from a
13828
 * zero-terminated string.
13829
 *
13830
 * See xmlCtxtReadDoc for details.
13831
 *
13832
 * Returns the resulting document tree
13833
 */
13834
xmlDocPtr
13835
xmlReadDoc(const xmlChar *cur, const char *URL, const char *encoding,
13836
           int options)
13837
0
{
13838
0
    xmlParserCtxtPtr ctxt;
13839
0
    xmlParserInputPtr input;
13840
0
    xmlDocPtr doc = NULL;
13841
13842
0
    ctxt = xmlNewParserCtxt();
13843
0
    if (ctxt == NULL)
13844
0
        return(NULL);
13845
13846
0
    xmlCtxtUseOptions(ctxt, options);
13847
13848
0
    input = xmlCtxtNewInputFromString(ctxt, URL, (const char *) cur, encoding,
13849
0
                                      XML_INPUT_BUF_STATIC);
13850
13851
0
    if (input != NULL)
13852
0
        doc = xmlCtxtParseDocument(ctxt, input);
13853
13854
0
    xmlFreeParserCtxt(ctxt);
13855
0
    return(doc);
13856
0
}
13857
13858
/**
13859
 * xmlReadFile:
13860
 * @filename:  a file or URL
13861
 * @encoding:  the document encoding (optional)
13862
 * @options:  a combination of xmlParserOption
13863
 *
13864
 * Convenience function to parse an XML file from the filesystem,
13865
 * the network or a global user-define resource loader.
13866
 *
13867
 * This function always enables the XML_PARSE_UNZIP option for
13868
 * backward compatibility. If a "-" filename is passed, it will
13869
 * read from stdin. Both of these features are potentially
13870
 * insecure and might be removed from later versions.
13871
 *
13872
 * See xmlCtxtReadFile for details.
13873
 *
13874
 * Returns the resulting document tree
13875
 */
13876
xmlDocPtr
13877
xmlReadFile(const char *filename, const char *encoding, int options)
13878
0
{
13879
0
    xmlParserCtxtPtr ctxt;
13880
0
    xmlParserInputPtr input;
13881
0
    xmlDocPtr doc = NULL;
13882
13883
0
    ctxt = xmlNewParserCtxt();
13884
0
    if (ctxt == NULL)
13885
0
        return(NULL);
13886
13887
0
    options |= XML_PARSE_UNZIP;
13888
13889
0
    xmlCtxtUseOptions(ctxt, options);
13890
13891
    /*
13892
     * Backward compatibility for users of command line utilities like
13893
     * xmlstarlet expecting "-" to mean stdin. This is dangerous and
13894
     * should be removed at some point.
13895
     */
13896
0
    if ((filename != NULL) && (filename[0] == '-') && (filename[1] == 0))
13897
0
        input = xmlCtxtNewInputFromFd(ctxt, filename, STDIN_FILENO,
13898
0
                                      encoding, 0);
13899
0
    else
13900
0
        input = xmlCtxtNewInputFromUrl(ctxt, filename, NULL, encoding, 0);
13901
13902
0
    if (input != NULL)
13903
0
        doc = xmlCtxtParseDocument(ctxt, input);
13904
13905
0
    xmlFreeParserCtxt(ctxt);
13906
0
    return(doc);
13907
0
}
13908
13909
/**
13910
 * xmlReadMemory:
13911
 * @buffer:  a pointer to a char array
13912
 * @size:  the size of the array
13913
 * @url:  base URL (optional)
13914
 * @encoding:  the document encoding (optional)
13915
 * @options:  a combination of xmlParserOption
13916
 *
13917
 * Parse an XML in-memory document and build a tree. The input buffer must
13918
 * not contain a terminating null byte.
13919
 *
13920
 * See xmlCtxtReadMemory for details.
13921
 *
13922
 * Returns the resulting document tree
13923
 */
13924
xmlDocPtr
13925
xmlReadMemory(const char *buffer, int size, const char *url,
13926
              const char *encoding, int options)
13927
0
{
13928
0
    xmlParserCtxtPtr ctxt;
13929
0
    xmlParserInputPtr input;
13930
0
    xmlDocPtr doc = NULL;
13931
13932
0
    if (size < 0)
13933
0
  return(NULL);
13934
13935
0
    ctxt = xmlNewParserCtxt();
13936
0
    if (ctxt == NULL)
13937
0
        return(NULL);
13938
13939
0
    xmlCtxtUseOptions(ctxt, options);
13940
13941
0
    input = xmlCtxtNewInputFromMemory(ctxt, url, buffer, size, encoding,
13942
0
                                      XML_INPUT_BUF_STATIC);
13943
13944
0
    if (input != NULL)
13945
0
        doc = xmlCtxtParseDocument(ctxt, input);
13946
13947
0
    xmlFreeParserCtxt(ctxt);
13948
0
    return(doc);
13949
0
}
13950
13951
/**
13952
 * xmlReadFd:
13953
 * @fd:  an open file descriptor
13954
 * @URL:  base URL (optional)
13955
 * @encoding:  the document encoding (optional)
13956
 * @options:  a combination of xmlParserOption
13957
 *
13958
 * Parse an XML from a file descriptor and build a tree.
13959
 *
13960
 * See xmlCtxtReadFd for details.
13961
 *
13962
 * NOTE that the file descriptor will not be closed when the
13963
 * context is freed or reset.
13964
 *
13965
 * Returns the resulting document tree
13966
 */
13967
xmlDocPtr
13968
xmlReadFd(int fd, const char *URL, const char *encoding, int options)
13969
0
{
13970
0
    xmlParserCtxtPtr ctxt;
13971
0
    xmlParserInputPtr input;
13972
0
    xmlDocPtr doc = NULL;
13973
13974
0
    ctxt = xmlNewParserCtxt();
13975
0
    if (ctxt == NULL)
13976
0
        return(NULL);
13977
13978
0
    xmlCtxtUseOptions(ctxt, options);
13979
13980
0
    input = xmlCtxtNewInputFromFd(ctxt, URL, fd, encoding, 0);
13981
13982
0
    if (input != NULL)
13983
0
        doc = xmlCtxtParseDocument(ctxt, input);
13984
13985
0
    xmlFreeParserCtxt(ctxt);
13986
0
    return(doc);
13987
0
}
13988
13989
/**
13990
 * xmlReadIO:
13991
 * @ioread:  an I/O read function
13992
 * @ioclose:  an I/O close function (optional)
13993
 * @ioctx:  an I/O handler
13994
 * @URL:  base URL (optional)
13995
 * @encoding:  the document encoding (optional)
13996
 * @options:  a combination of xmlParserOption
13997
 *
13998
 * Parse an XML document from I/O functions and context and build a tree.
13999
 *
14000
 * See xmlCtxtReadIO for details.
14001
 *
14002
 * Returns the resulting document tree
14003
 */
14004
xmlDocPtr
14005
xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
14006
          void *ioctx, const char *URL, const char *encoding, int options)
14007
0
{
14008
0
    xmlParserCtxtPtr ctxt;
14009
0
    xmlParserInputPtr input;
14010
0
    xmlDocPtr doc = NULL;
14011
14012
0
    ctxt = xmlNewParserCtxt();
14013
0
    if (ctxt == NULL)
14014
0
        return(NULL);
14015
14016
0
    xmlCtxtUseOptions(ctxt, options);
14017
14018
0
    input = xmlCtxtNewInputFromIO(ctxt, URL, ioread, ioclose, ioctx,
14019
0
                                  encoding, 0);
14020
14021
0
    if (input != NULL)
14022
0
        doc = xmlCtxtParseDocument(ctxt, input);
14023
14024
0
    xmlFreeParserCtxt(ctxt);
14025
0
    return(doc);
14026
0
}
14027
14028
/**
14029
 * xmlCtxtReadDoc:
14030
 * @ctxt:  an XML parser context
14031
 * @str:  a pointer to a zero terminated string
14032
 * @URL:  base URL (optional)
14033
 * @encoding:  the document encoding (optional)
14034
 * @options:  a combination of xmlParserOption
14035
 *
14036
 * Parse an XML in-memory document and build a tree.
14037
 *
14038
 * @URL is used as base to resolve external entities and for error
14039
 * reporting.
14040
 *
14041
 * See xmlCtxtUseOptions for details.
14042
 *
14043
 * Returns the resulting document tree
14044
 */
14045
xmlDocPtr
14046
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar *str,
14047
               const char *URL, const char *encoding, int options)
14048
0
{
14049
0
    xmlParserInputPtr input;
14050
14051
0
    if (ctxt == NULL)
14052
0
        return(NULL);
14053
14054
0
    xmlCtxtReset(ctxt);
14055
0
    xmlCtxtUseOptions(ctxt, options);
14056
14057
0
    input = xmlCtxtNewInputFromString(ctxt, URL, (const char *) str, encoding,
14058
0
                                      XML_INPUT_BUF_STATIC);
14059
0
    if (input == NULL)
14060
0
        return(NULL);
14061
14062
0
    return(xmlCtxtParseDocument(ctxt, input));
14063
0
}
14064
14065
/**
14066
 * xmlCtxtReadFile:
14067
 * @ctxt:  an XML parser context
14068
 * @filename:  a file or URL
14069
 * @encoding:  the document encoding (optional)
14070
 * @options:  a combination of xmlParserOption
14071
 *
14072
 * Parse an XML file from the filesystem, the network or a user-defined
14073
 * resource loader.
14074
 *
14075
 * This function always enables the XML_PARSE_UNZIP option for
14076
 * backward compatibility. This feature is potentially insecure
14077
 * and might be removed from later versions.
14078
 *
14079
 * Returns the resulting document tree
14080
 */
14081
xmlDocPtr
14082
xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
14083
                const char *encoding, int options)
14084
0
{
14085
0
    xmlParserInputPtr input;
14086
14087
0
    if (ctxt == NULL)
14088
0
        return(NULL);
14089
14090
0
    options |= XML_PARSE_UNZIP;
14091
14092
0
    xmlCtxtReset(ctxt);
14093
0
    xmlCtxtUseOptions(ctxt, options);
14094
14095
0
    input = xmlCtxtNewInputFromUrl(ctxt, filename, NULL, encoding, 0);
14096
0
    if (input == NULL)
14097
0
        return(NULL);
14098
14099
0
    return(xmlCtxtParseDocument(ctxt, input));
14100
0
}
14101
14102
/**
14103
 * xmlCtxtReadMemory:
14104
 * @ctxt:  an XML parser context
14105
 * @buffer:  a pointer to a char array
14106
 * @size:  the size of the array
14107
 * @URL:  base URL (optional)
14108
 * @encoding:  the document encoding (optional)
14109
 * @options:  a combination of xmlParserOption
14110
 *
14111
 * Parse an XML in-memory document and build a tree. The input buffer must
14112
 * not contain a terminating null byte.
14113
 *
14114
 * @URL is used as base to resolve external entities and for error
14115
 * reporting.
14116
 *
14117
 * See xmlCtxtUseOptions for details.
14118
 *
14119
 * Returns the resulting document tree
14120
 */
14121
xmlDocPtr
14122
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
14123
                  const char *URL, const char *encoding, int options)
14124
0
{
14125
0
    xmlParserInputPtr input;
14126
14127
0
    if ((ctxt == NULL) || (size < 0))
14128
0
        return(NULL);
14129
14130
0
    xmlCtxtReset(ctxt);
14131
0
    xmlCtxtUseOptions(ctxt, options);
14132
14133
0
    input = xmlCtxtNewInputFromMemory(ctxt, URL, buffer, size, encoding,
14134
0
                                      XML_INPUT_BUF_STATIC);
14135
0
    if (input == NULL)
14136
0
        return(NULL);
14137
14138
0
    return(xmlCtxtParseDocument(ctxt, input));
14139
0
}
14140
14141
/**
14142
 * xmlCtxtReadFd:
14143
 * @ctxt:  an XML parser context
14144
 * @fd:  an open file descriptor
14145
 * @URL:  base URL (optional)
14146
 * @encoding:  the document encoding (optional)
14147
 * @options:  a combination of xmlParserOption
14148
 *
14149
 * Parse an XML document from a file descriptor and build a tree.
14150
 *
14151
 * NOTE that the file descriptor will not be closed when the
14152
 * context is freed or reset.
14153
 *
14154
 * @URL is used as base to resolve external entities and for error
14155
 * reporting.
14156
 *
14157
 * See xmlCtxtUseOptions for details.
14158
 *
14159
 * Returns the resulting document tree
14160
 */
14161
xmlDocPtr
14162
xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
14163
              const char *URL, const char *encoding, int options)
14164
0
{
14165
0
    xmlParserInputPtr input;
14166
14167
0
    if (ctxt == NULL)
14168
0
        return(NULL);
14169
14170
0
    xmlCtxtReset(ctxt);
14171
0
    xmlCtxtUseOptions(ctxt, options);
14172
14173
0
    input = xmlCtxtNewInputFromFd(ctxt, URL, fd, encoding, 0);
14174
0
    if (input == NULL)
14175
0
        return(NULL);
14176
14177
0
    return(xmlCtxtParseDocument(ctxt, input));
14178
0
}
14179
14180
/**
14181
 * xmlCtxtReadIO:
14182
 * @ctxt:  an XML parser context
14183
 * @ioread:  an I/O read function
14184
 * @ioclose:  an I/O close function
14185
 * @ioctx:  an I/O handler
14186
 * @URL:  the base URL to use for the document
14187
 * @encoding:  the document encoding, or NULL
14188
 * @options:  a combination of xmlParserOption
14189
 *
14190
 * parse an XML document from I/O functions and source and build a tree.
14191
 * This reuses the existing @ctxt parser context
14192
 *
14193
 * @URL is used as base to resolve external entities and for error
14194
 * reporting.
14195
 *
14196
 * See xmlCtxtUseOptions for details.
14197
 *
14198
 * Returns the resulting document tree
14199
 */
14200
xmlDocPtr
14201
xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
14202
              xmlInputCloseCallback ioclose, void *ioctx,
14203
        const char *URL,
14204
              const char *encoding, int options)
14205
6.01k
{
14206
6.01k
    xmlParserInputPtr input;
14207
14208
6.01k
    if (ctxt == NULL)
14209
0
        return(NULL);
14210
14211
6.01k
    xmlCtxtReset(ctxt);
14212
6.01k
    xmlCtxtUseOptions(ctxt, options);
14213
14214
6.01k
    input = xmlCtxtNewInputFromIO(ctxt, URL, ioread, ioclose, ioctx,
14215
6.01k
                                  encoding, 0);
14216
6.01k
    if (input == NULL)
14217
0
        return(NULL);
14218
14219
6.01k
    return(xmlCtxtParseDocument(ctxt, input));
14220
6.01k
}
14221