Coverage Report

Created: 2026-06-13 06:44

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libxml2-2.14.6/parser.c
Line
Count
Source
1
/*
2
 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
 *            implemented on top of the SAX interfaces
4
 *
5
 * References:
6
 *   The XML specification:
7
 *     http://www.w3.org/TR/REC-xml
8
 *   Original 1.0 version:
9
 *     http://www.w3.org/TR/1998/REC-xml-19980210
10
 *   XML second edition working draft
11
 *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12
 *
13
 * Okay this is a big file, the parser core is around 7000 lines, then it
14
 * is followed by the progressive parser top routines, then the various
15
 * high level APIs to call the parser and a few miscellaneous functions.
16
 * A number of helper functions and deprecated ones have been moved to
17
 * parserInternals.c to reduce this file size.
18
 * As much as possible the functions are associated with their relative
19
 * production in the XML specification. A few productions defining the
20
 * different ranges of character are actually implanted either in
21
 * parserInternals.h or parserInternals.c
22
 * The DOM tree build is realized from the default SAX callbacks in
23
 * the module SAX2.c.
24
 * The routines doing the validation checks are in valid.c and called either
25
 * from the SAX callbacks or as standalone functions using a preparsed
26
 * document.
27
 *
28
 * See Copyright for the status of this software.
29
 *
30
 * daniel@veillard.com
31
 */
32
33
/* To avoid EBCDIC trouble when parsing on zOS */
34
#if defined(__MVS__)
35
#pragma convert("ISO8859-1")
36
#endif
37
38
#define IN_LIBXML
39
#include "libxml.h"
40
41
#if defined(_WIN32)
42
#define XML_DIR_SEP '\\'
43
#else
44
#define XML_DIR_SEP '/'
45
#endif
46
47
#include <stdlib.h>
48
#include <limits.h>
49
#include <string.h>
50
#include <stdarg.h>
51
#include <stddef.h>
52
#include <ctype.h>
53
#include <stdlib.h>
54
#include <libxml/parser.h>
55
#include <libxml/xmlmemory.h>
56
#include <libxml/tree.h>
57
#include <libxml/parserInternals.h>
58
#include <libxml/valid.h>
59
#include <libxml/entities.h>
60
#include <libxml/xmlerror.h>
61
#include <libxml/encoding.h>
62
#include <libxml/xmlIO.h>
63
#include <libxml/uri.h>
64
#include <libxml/SAX2.h>
65
#include <libxml/HTMLparser.h>
66
#ifdef LIBXML_CATALOG_ENABLED
67
#include <libxml/catalog.h>
68
#endif
69
70
#include "private/buf.h"
71
#include "private/dict.h"
72
#include "private/entities.h"
73
#include "private/error.h"
74
#include "private/html.h"
75
#include "private/io.h"
76
#include "private/memory.h"
77
#include "private/parser.h"
78
79
9.18M
#define NS_INDEX_EMPTY  INT_MAX
80
325k
#define NS_INDEX_XML    (INT_MAX - 1)
81
3.76M
#define URI_HASH_EMPTY  0xD943A04E
82
44.8k
#define URI_HASH_XML    0xF0451F02
83
84
#ifndef STDIN_FILENO
85
0
  #define STDIN_FILENO 0
86
#endif
87
88
#ifndef SIZE_MAX
89
  #define SIZE_MAX ((size_t) -1)
90
#endif
91
92
299k
#define XML_MAX_ATTRS 100000000 /* 100 million */
93
94
struct _xmlStartTag {
95
    const xmlChar *prefix;
96
    const xmlChar *URI;
97
    int line;
98
    int nsNr;
99
};
100
101
typedef struct {
102
    void *saxData;
103
    unsigned prefixHashValue;
104
    unsigned uriHashValue;
105
    unsigned elementId;
106
    int oldIndex;
107
} xmlParserNsExtra;
108
109
typedef struct {
110
    unsigned hashValue;
111
    int index;
112
} xmlParserNsBucket;
113
114
struct _xmlParserNsData {
115
    xmlParserNsExtra *extra;
116
117
    unsigned hashSize;
118
    unsigned hashElems;
119
    xmlParserNsBucket *hash;
120
121
    unsigned elementId;
122
    int defaultNsIndex;
123
    int minNsIndex;
124
};
125
126
static int
127
xmlParseElementStart(xmlParserCtxtPtr ctxt);
128
129
static void
130
xmlParseElementEnd(xmlParserCtxtPtr ctxt);
131
132
static xmlEntityPtr
133
xmlLookupGeneralEntity(xmlParserCtxtPtr ctxt, const xmlChar *name, int inAttr);
134
135
static const xmlChar *
136
xmlParseEntityRefInternal(xmlParserCtxtPtr ctxt);
137
138
/************************************************************************
139
 *                  *
140
 *  Arbitrary limits set in the parser. See XML_PARSE_HUGE    *
141
 *                  *
142
 ************************************************************************/
143
144
#define XML_PARSER_BIG_ENTITY 1000
145
#define XML_PARSER_LOT_ENTITY 5000
146
147
/*
148
 * Constants for protection against abusive entity expansion
149
 * ("billion laughs").
150
 */
151
152
/*
153
 * A certain amount of entity expansion which is always allowed.
154
 */
155
799k
#define XML_PARSER_ALLOWED_EXPANSION 1000000
156
157
/*
158
 * Fixed cost for each entity reference. This crudely models processing time
159
 * as well to protect, for example, against exponential expansion of empty
160
 * or very short entities.
161
 */
162
809k
#define XML_ENT_FIXED_COST 20
163
164
87.0M
#define XML_PARSER_BIG_BUFFER_SIZE 300
165
1.33M
#define XML_PARSER_BUFFER_SIZE 100
166
206k
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
167
168
/**
169
 * XML_PARSER_CHUNK_SIZE
170
 *
171
 * When calling GROW that's the minimal amount of data
172
 * the parser expected to have received. It is not a hard
173
 * limit but an optimization when reading strings like Names
174
 * It is not strictly needed as long as inputs available characters
175
 * are followed by 0, which should be provided by the I/O level
176
 */
177
#define XML_PARSER_CHUNK_SIZE 100
178
179
/**
180
 * xmlParserVersion:
181
 *
182
 * Constant string describing the internal version of the library
183
 */
184
const char *const
185
xmlParserVersion = LIBXML_VERSION_STRING LIBXML_VERSION_EXTRA;
186
187
/*
188
 * List of XML prefixed PI allowed by W3C specs
189
 */
190
191
static const char* const xmlW3CPIs[] = {
192
    "xml-stylesheet",
193
    "xml-model",
194
    NULL
195
};
196
197
198
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
199
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
200
                                              const xmlChar **str);
201
202
static void
203
xmlCtxtParseEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr ent);
204
205
static int
206
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
207
208
/************************************************************************
209
 *                  *
210
 *    Some factorized error routines        *
211
 *                  *
212
 ************************************************************************/
213
214
static void
215
0
xmlErrMemory(xmlParserCtxtPtr ctxt) {
216
0
    xmlCtxtErrMemory(ctxt);
217
0
}
218
219
/**
220
 * xmlErrAttributeDup:
221
 * @ctxt:  an XML parser context
222
 * @prefix:  the attribute prefix
223
 * @localname:  the attribute localname
224
 *
225
 * Handle a redefinition of attribute error
226
 */
227
static void
228
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
229
                   const xmlChar * localname)
230
105k
{
231
105k
    if (prefix == NULL)
232
97.6k
        xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, XML_ERR_ATTRIBUTE_REDEFINED,
233
97.6k
                   XML_ERR_FATAL, localname, NULL, NULL, 0,
234
97.6k
                   "Attribute %s redefined\n", localname);
235
7.89k
    else
236
7.89k
        xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, XML_ERR_ATTRIBUTE_REDEFINED,
237
7.89k
                   XML_ERR_FATAL, prefix, localname, NULL, 0,
238
7.89k
                   "Attribute %s:%s redefined\n", prefix, localname);
239
105k
}
240
241
/**
242
 * xmlFatalErrMsg:
243
 * @ctxt:  an XML parser context
244
 * @error:  the error number
245
 * @msg:  the error message
246
 *
247
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
248
 */
249
static void LIBXML_ATTR_FORMAT(3,0)
250
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
251
               const char *msg)
252
6.94M
{
253
6.94M
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
254
6.94M
               NULL, NULL, NULL, 0, "%s", msg);
255
6.94M
}
256
257
/**
258
 * xmlWarningMsg:
259
 * @ctxt:  an XML parser context
260
 * @error:  the error number
261
 * @msg:  the error message
262
 * @str1:  extra data
263
 * @str2:  extra data
264
 *
265
 * Handle a warning.
266
 */
267
void LIBXML_ATTR_FORMAT(3,0)
268
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
269
              const char *msg, const xmlChar *str1, const xmlChar *str2)
270
44.7k
{
271
44.7k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_WARNING,
272
44.7k
               str1, str2, NULL, 0, msg, str1, str2);
273
44.7k
}
274
275
/**
276
 * xmlValidityError:
277
 * @ctxt:  an XML parser context
278
 * @error:  the error number
279
 * @msg:  the error message
280
 * @str1:  extra data
281
 *
282
 * Handle a validity error.
283
 */
284
static void LIBXML_ATTR_FORMAT(3,0)
285
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
286
              const char *msg, const xmlChar *str1, const xmlChar *str2)
287
0
{
288
0
    ctxt->valid = 0;
289
290
0
    xmlCtxtErr(ctxt, NULL, XML_FROM_DTD, error, XML_ERR_ERROR,
291
0
               str1, str2, NULL, 0, msg, str1, str2);
292
0
}
293
294
/**
295
 * xmlFatalErrMsgInt:
296
 * @ctxt:  an XML parser context
297
 * @error:  the error number
298
 * @msg:  the error message
299
 * @val:  an integer value
300
 *
301
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
302
 */
303
static void LIBXML_ATTR_FORMAT(3,0)
304
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
305
                  const char *msg, int val)
306
3.86M
{
307
3.86M
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
308
3.86M
               NULL, NULL, NULL, val, msg, val);
309
3.86M
}
310
311
/**
312
 * xmlFatalErrMsgStrIntStr:
313
 * @ctxt:  an XML parser context
314
 * @error:  the error number
315
 * @msg:  the error message
316
 * @str1:  an string info
317
 * @val:  an integer value
318
 * @str2:  an string info
319
 *
320
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
321
 */
322
static void LIBXML_ATTR_FORMAT(3,0)
323
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
324
                  const char *msg, const xmlChar *str1, int val,
325
      const xmlChar *str2)
326
551k
{
327
551k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
328
551k
               str1, str2, NULL, val, msg, str1, val, str2);
329
551k
}
330
331
/**
332
 * xmlFatalErrMsgStr:
333
 * @ctxt:  an XML parser context
334
 * @error:  the error number
335
 * @msg:  the error message
336
 * @val:  a string value
337
 *
338
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
339
 */
340
static void LIBXML_ATTR_FORMAT(3,0)
341
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
342
                  const char *msg, const xmlChar * val)
343
2.41M
{
344
2.41M
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
345
2.41M
               val, NULL, NULL, 0, msg, val);
346
2.41M
}
347
348
/**
349
 * xmlErrMsgStr:
350
 * @ctxt:  an XML parser context
351
 * @error:  the error number
352
 * @msg:  the error message
353
 * @val:  a string value
354
 *
355
 * Handle a non fatal parser error
356
 */
357
static void LIBXML_ATTR_FORMAT(3,0)
358
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
359
                  const char *msg, const xmlChar * val)
360
0
{
361
0
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_ERROR,
362
0
               val, NULL, NULL, 0, msg, val);
363
0
}
364
365
/**
366
 * xmlNsErr:
367
 * @ctxt:  an XML parser context
368
 * @error:  the error number
369
 * @msg:  the message
370
 * @info1:  extra information string
371
 * @info2:  extra information string
372
 *
373
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
374
 */
375
static void LIBXML_ATTR_FORMAT(3,0)
376
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
377
         const char *msg,
378
         const xmlChar * info1, const xmlChar * info2,
379
         const xmlChar * info3)
380
645k
{
381
645k
    ctxt->nsWellFormed = 0;
382
383
645k
    xmlCtxtErr(ctxt, NULL, XML_FROM_NAMESPACE, error, XML_ERR_ERROR,
384
645k
               info1, info2, info3, 0, msg, info1, info2, info3);
385
645k
}
386
387
/**
388
 * xmlNsWarn
389
 * @ctxt:  an XML parser context
390
 * @error:  the error number
391
 * @msg:  the message
392
 * @info1:  extra information string
393
 * @info2:  extra information string
394
 *
395
 * Handle a namespace warning error
396
 */
397
static void LIBXML_ATTR_FORMAT(3,0)
398
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
399
         const char *msg,
400
         const xmlChar * info1, const xmlChar * info2,
401
         const xmlChar * info3)
402
12.8k
{
403
12.8k
    xmlCtxtErr(ctxt, NULL, XML_FROM_NAMESPACE, error, XML_ERR_WARNING,
404
12.8k
               info1, info2, info3, 0, msg, info1, info2, info3);
405
12.8k
}
406
407
static void
408
2.42M
xmlSaturatedAdd(unsigned long *dst, unsigned long val) {
409
2.42M
    if (val > ULONG_MAX - *dst)
410
0
        *dst = ULONG_MAX;
411
2.42M
    else
412
2.42M
        *dst += val;
413
2.42M
}
414
415
static void
416
809k
xmlSaturatedAddSizeT(unsigned long *dst, size_t val) {
417
809k
    if (val > ULONG_MAX - *dst)
418
0
        *dst = ULONG_MAX;
419
809k
    else
420
809k
        *dst += val;
421
809k
}
422
423
/**
424
 * xmlParserEntityCheck:
425
 * @ctxt:  parser context
426
 * @extra:  sum of unexpanded entity sizes
427
 *
428
 * Check for non-linear entity expansion behaviour.
429
 *
430
 * In some cases like xmlExpandEntityInAttValue, this function is called
431
 * for each, possibly nested entity and its unexpanded content length.
432
 *
433
 * In other cases like xmlParseReference, it's only called for each
434
 * top-level entity with its unexpanded content length plus the sum of
435
 * the unexpanded content lengths (plus fixed cost) of all nested
436
 * entities.
437
 *
438
 * Summing the unexpanded lengths also adds the length of the reference.
439
 * This is by design. Taking the length of the entity name into account
440
 * discourages attacks that try to waste CPU time with abusively long
441
 * entity names. See test/recurse/lol6.xml for example. Each call also
442
 * adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with
443
 * short entities.
444
 *
445
 * Returns 1 on error, 0 on success.
446
 */
447
static int
448
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long extra)
449
799k
{
450
799k
    unsigned long consumed;
451
799k
    unsigned long *expandedSize;
452
799k
    xmlParserInputPtr input = ctxt->input;
453
799k
    xmlEntityPtr entity = input->entity;
454
455
799k
    if ((entity) && (entity->flags & XML_ENT_CHECKED))
456
175
        return(0);
457
458
    /*
459
     * Compute total consumed bytes so far, including input streams of
460
     * external entities.
461
     */
462
799k
    consumed = input->consumed;
463
799k
    xmlSaturatedAddSizeT(&consumed, input->cur - input->base);
464
799k
    xmlSaturatedAdd(&consumed, ctxt->sizeentities);
465
466
799k
    if (entity)
467
1.20k
        expandedSize = &entity->expandedSize;
468
798k
    else
469
798k
        expandedSize = &ctxt->sizeentcopy;
470
471
    /*
472
     * Add extra cost and some fixed cost.
473
     */
474
799k
    xmlSaturatedAdd(expandedSize, extra);
475
799k
    xmlSaturatedAdd(expandedSize, XML_ENT_FIXED_COST);
476
477
    /*
478
     * It's important to always use saturation arithmetic when tracking
479
     * entity sizes to make the size checks reliable. If "sizeentcopy"
480
     * overflows, we have to abort.
481
     */
482
799k
    if ((*expandedSize > XML_PARSER_ALLOWED_EXPANSION) &&
483
12.2k
        ((*expandedSize >= ULONG_MAX) ||
484
12.2k
         (*expandedSize / ctxt->maxAmpl > consumed))) {
485
288
        xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
486
288
                       "Maximum entity amplification factor exceeded, see "
487
288
                       "xmlCtxtSetMaxAmplification.\n");
488
288
        xmlHaltParser(ctxt);
489
288
        return(1);
490
288
    }
491
492
799k
    return(0);
493
799k
}
494
495
/************************************************************************
496
 *                  *
497
 *    Library wide options          *
498
 *                  *
499
 ************************************************************************/
500
501
/**
502
  * xmlHasFeature:
503
  * @feature: the feature to be examined
504
  *
505
  * Examines if the library has been compiled with a given feature.
506
  *
507
  * Returns a non-zero value if the feature exist, otherwise zero.
508
  * Returns zero (0) if the feature does not exist or an unknown
509
  * unknown feature is requested, non-zero otherwise.
510
  */
511
int
512
xmlHasFeature(xmlFeature feature)
513
0
{
514
0
    switch (feature) {
515
0
  case XML_WITH_THREAD:
516
0
#ifdef LIBXML_THREAD_ENABLED
517
0
      return(1);
518
#else
519
      return(0);
520
#endif
521
0
        case XML_WITH_TREE:
522
0
            return(1);
523
0
        case XML_WITH_OUTPUT:
524
0
#ifdef LIBXML_OUTPUT_ENABLED
525
0
            return(1);
526
#else
527
            return(0);
528
#endif
529
0
        case XML_WITH_PUSH:
530
0
#ifdef LIBXML_PUSH_ENABLED
531
0
            return(1);
532
#else
533
            return(0);
534
#endif
535
0
        case XML_WITH_READER:
536
0
#ifdef LIBXML_READER_ENABLED
537
0
            return(1);
538
#else
539
            return(0);
540
#endif
541
0
        case XML_WITH_PATTERN:
542
0
#ifdef LIBXML_PATTERN_ENABLED
543
0
            return(1);
544
#else
545
            return(0);
546
#endif
547
0
        case XML_WITH_WRITER:
548
0
#ifdef LIBXML_WRITER_ENABLED
549
0
            return(1);
550
#else
551
            return(0);
552
#endif
553
0
        case XML_WITH_SAX1:
554
0
#ifdef LIBXML_SAX1_ENABLED
555
0
            return(1);
556
#else
557
            return(0);
558
#endif
559
0
        case XML_WITH_HTTP:
560
#ifdef LIBXML_HTTP_ENABLED
561
            return(1);
562
#else
563
0
            return(0);
564
0
#endif
565
0
        case XML_WITH_VALID:
566
0
#ifdef LIBXML_VALID_ENABLED
567
0
            return(1);
568
#else
569
            return(0);
570
#endif
571
0
        case XML_WITH_HTML:
572
0
#ifdef LIBXML_HTML_ENABLED
573
0
            return(1);
574
#else
575
            return(0);
576
#endif
577
0
        case XML_WITH_LEGACY:
578
0
            return(0);
579
0
        case XML_WITH_C14N:
580
0
#ifdef LIBXML_C14N_ENABLED
581
0
            return(1);
582
#else
583
            return(0);
584
#endif
585
0
        case XML_WITH_CATALOG:
586
0
#ifdef LIBXML_CATALOG_ENABLED
587
0
            return(1);
588
#else
589
            return(0);
590
#endif
591
0
        case XML_WITH_XPATH:
592
0
#ifdef LIBXML_XPATH_ENABLED
593
0
            return(1);
594
#else
595
            return(0);
596
#endif
597
0
        case XML_WITH_XPTR:
598
0
#ifdef LIBXML_XPTR_ENABLED
599
0
            return(1);
600
#else
601
            return(0);
602
#endif
603
0
        case XML_WITH_XINCLUDE:
604
0
#ifdef LIBXML_XINCLUDE_ENABLED
605
0
            return(1);
606
#else
607
            return(0);
608
#endif
609
0
        case XML_WITH_ICONV:
610
0
#ifdef LIBXML_ICONV_ENABLED
611
0
            return(1);
612
#else
613
            return(0);
614
#endif
615
0
        case XML_WITH_ISO8859X:
616
0
#ifdef LIBXML_ISO8859X_ENABLED
617
0
            return(1);
618
#else
619
            return(0);
620
#endif
621
0
        case XML_WITH_UNICODE:
622
0
            return(0);
623
0
        case XML_WITH_REGEXP:
624
0
#ifdef LIBXML_REGEXP_ENABLED
625
0
            return(1);
626
#else
627
            return(0);
628
#endif
629
0
        case XML_WITH_AUTOMATA:
630
0
#ifdef LIBXML_REGEXP_ENABLED
631
0
            return(1);
632
#else
633
            return(0);
634
#endif
635
0
        case XML_WITH_EXPR:
636
#ifdef LIBXML_EXPR_ENABLED
637
            return(1);
638
#else
639
0
            return(0);
640
0
#endif
641
0
        case XML_WITH_RELAXNG:
642
0
#ifdef LIBXML_RELAXNG_ENABLED
643
0
            return(1);
644
#else
645
            return(0);
646
#endif
647
0
        case XML_WITH_SCHEMAS:
648
0
#ifdef LIBXML_SCHEMAS_ENABLED
649
0
            return(1);
650
#else
651
            return(0);
652
#endif
653
0
        case XML_WITH_SCHEMATRON:
654
0
#ifdef LIBXML_SCHEMATRON_ENABLED
655
0
            return(1);
656
#else
657
            return(0);
658
#endif
659
0
        case XML_WITH_MODULES:
660
0
#ifdef LIBXML_MODULES_ENABLED
661
0
            return(1);
662
#else
663
            return(0);
664
#endif
665
0
        case XML_WITH_DEBUG:
666
0
#ifdef LIBXML_DEBUG_ENABLED
667
0
            return(1);
668
#else
669
            return(0);
670
#endif
671
0
        case XML_WITH_DEBUG_MEM:
672
0
            return(0);
673
0
        case XML_WITH_ZLIB:
674
#ifdef LIBXML_ZLIB_ENABLED
675
            return(1);
676
#else
677
0
            return(0);
678
0
#endif
679
0
        case XML_WITH_LZMA:
680
#ifdef LIBXML_LZMA_ENABLED
681
            return(1);
682
#else
683
0
            return(0);
684
0
#endif
685
0
        case XML_WITH_ICU:
686
#ifdef LIBXML_ICU_ENABLED
687
            return(1);
688
#else
689
0
            return(0);
690
0
#endif
691
0
        default:
692
0
      break;
693
0
     }
694
0
     return(0);
695
0
}
696
697
/************************************************************************
698
 *                  *
699
 *      Simple string buffer        *
700
 *                  *
701
 ************************************************************************/
702
703
typedef struct {
704
    xmlChar *mem;
705
    unsigned size;
706
    unsigned cap; /* size < cap */
707
    unsigned max; /* size <= max */
708
    xmlParserErrors code;
709
} xmlSBuf;
710
711
static void
712
5.94M
xmlSBufInit(xmlSBuf *buf, unsigned max) {
713
5.94M
    buf->mem = NULL;
714
5.94M
    buf->size = 0;
715
5.94M
    buf->cap = 0;
716
5.94M
    buf->max = max;
717
5.94M
    buf->code = XML_ERR_OK;
718
5.94M
}
719
720
static int
721
523k
xmlSBufGrow(xmlSBuf *buf, unsigned len) {
722
523k
    xmlChar *mem;
723
523k
    unsigned cap;
724
725
523k
    if (len >= UINT_MAX / 2 - buf->size) {
726
0
        if (buf->code == XML_ERR_OK)
727
0
            buf->code = XML_ERR_RESOURCE_LIMIT;
728
0
        return(-1);
729
0
    }
730
731
523k
    cap = (buf->size + len) * 2;
732
523k
    if (cap < 240)
733
420k
        cap = 240;
734
735
523k
    mem = xmlRealloc(buf->mem, cap);
736
523k
    if (mem == NULL) {
737
0
        buf->code = XML_ERR_NO_MEMORY;
738
0
        return(-1);
739
0
    }
740
741
523k
    buf->mem = mem;
742
523k
    buf->cap = cap;
743
744
523k
    return(0);
745
523k
}
746
747
static void
748
49.8M
xmlSBufAddString(xmlSBuf *buf, const xmlChar *str, unsigned len) {
749
49.8M
    if (buf->max - buf->size < len) {
750
0
        if (buf->code == XML_ERR_OK)
751
0
            buf->code = XML_ERR_RESOURCE_LIMIT;
752
0
        return;
753
0
    }
754
755
49.8M
    if (buf->cap - buf->size <= len) {
756
473k
        if (xmlSBufGrow(buf, len) < 0)
757
0
            return;
758
473k
    }
759
760
49.8M
    if (len > 0)
761
49.8M
        memcpy(buf->mem + buf->size, str, len);
762
49.8M
    buf->size += len;
763
49.8M
}
764
765
static void
766
45.8M
xmlSBufAddCString(xmlSBuf *buf, const char *str, unsigned len) {
767
45.8M
    xmlSBufAddString(buf, (const xmlChar *) str, len);
768
45.8M
}
769
770
static void
771
226k
xmlSBufAddChar(xmlSBuf *buf, int c) {
772
226k
    xmlChar *end;
773
774
226k
    if (buf->max - buf->size < 4) {
775
0
        if (buf->code == XML_ERR_OK)
776
0
            buf->code = XML_ERR_RESOURCE_LIMIT;
777
0
        return;
778
0
    }
779
780
226k
    if (buf->cap - buf->size <= 4) {
781
49.2k
        if (xmlSBufGrow(buf, 4) < 0)
782
0
            return;
783
49.2k
    }
784
785
226k
    end = buf->mem + buf->size;
786
787
226k
    if (c < 0x80) {
788
204k
        *end = (xmlChar) c;
789
204k
        buf->size += 1;
790
204k
    } else {
791
21.7k
        buf->size += xmlCopyCharMultiByte(end, c);
792
21.7k
    }
793
226k
}
794
795
static void
796
27.5M
xmlSBufAddReplChar(xmlSBuf *buf) {
797
27.5M
    xmlSBufAddCString(buf, "\xEF\xBF\xBD", 3);
798
27.5M
}
799
800
static void
801
0
xmlSBufReportError(xmlSBuf *buf, xmlParserCtxtPtr ctxt, const char *errMsg) {
802
0
    if (buf->code == XML_ERR_NO_MEMORY)
803
0
        xmlCtxtErrMemory(ctxt);
804
0
    else
805
0
        xmlFatalErr(ctxt, buf->code, errMsg);
806
0
}
807
808
static xmlChar *
809
xmlSBufFinish(xmlSBuf *buf, int *sizeOut, xmlParserCtxtPtr ctxt,
810
423k
              const char *errMsg) {
811
423k
    if (buf->mem == NULL) {
812
13.6k
        buf->mem = xmlMalloc(1);
813
13.6k
        if (buf->mem == NULL) {
814
0
            buf->code = XML_ERR_NO_MEMORY;
815
13.6k
        } else {
816
13.6k
            buf->mem[0] = 0;
817
13.6k
        }
818
409k
    } else {
819
409k
        buf->mem[buf->size] = 0;
820
409k
    }
821
822
423k
    if (buf->code == XML_ERR_OK) {
823
423k
        if (sizeOut != NULL)
824
318k
            *sizeOut = buf->size;
825
423k
        return(buf->mem);
826
423k
    }
827
828
0
    xmlSBufReportError(buf, ctxt, errMsg);
829
830
0
    xmlFree(buf->mem);
831
832
0
    if (sizeOut != NULL)
833
0
        *sizeOut = 0;
834
0
    return(NULL);
835
423k
}
836
837
static void
838
5.43M
xmlSBufCleanup(xmlSBuf *buf, xmlParserCtxtPtr ctxt, const char *errMsg) {
839
5.43M
    if (buf->code != XML_ERR_OK)
840
0
        xmlSBufReportError(buf, ctxt, errMsg);
841
842
5.43M
    xmlFree(buf->mem);
843
5.43M
}
844
845
static int
846
xmlUTF8MultibyteLen(xmlParserCtxtPtr ctxt, const xmlChar *str,
847
27.0M
                    const char *errMsg) {
848
27.0M
    int c = str[0];
849
27.0M
    int c1 = str[1];
850
851
27.0M
    if ((c1 & 0xC0) != 0x80)
852
5.37M
        goto encoding_error;
853
854
21.7M
    if (c < 0xE0) {
855
        /* 2-byte sequence */
856
20.9M
        if (c < 0xC2)
857
17.4M
            goto encoding_error;
858
859
3.49M
        return(2);
860
20.9M
    } else {
861
754k
        int c2 = str[2];
862
863
754k
        if ((c2 & 0xC0) != 0x80)
864
20.3k
            goto encoding_error;
865
866
734k
        if (c < 0xF0) {
867
            /* 3-byte sequence */
868
683k
            if (c == 0xE0) {
869
                /* overlong */
870
9.66k
                if (c1 < 0xA0)
871
2.53k
                    goto encoding_error;
872
673k
            } else if (c == 0xED) {
873
                /* surrogate */
874
4.41k
                if (c1 >= 0xA0)
875
2.37k
                    goto encoding_error;
876
669k
            } else if (c == 0xEF) {
877
                /* U+FFFE and U+FFFF are invalid Chars */
878
148k
                if ((c1 == 0xBF) && (c2 >= 0xBE))
879
1.34k
                    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR, errMsg);
880
148k
            }
881
882
678k
            return(3);
883
683k
        } else {
884
            /* 4-byte sequence */
885
50.7k
            if ((str[3] & 0xC0) != 0x80)
886
1.80k
                goto encoding_error;
887
48.9k
            if (c == 0xF0) {
888
                /* overlong */
889
9.06k
                if (c1 < 0x90)
890
6.67k
                    goto encoding_error;
891
39.8k
            } else if (c >= 0xF4) {
892
                /* greater than 0x10FFFF */
893
8.85k
                if ((c > 0xF4) || (c1 >= 0x90))
894
8.36k
                    goto encoding_error;
895
8.85k
            }
896
897
33.9k
            return(4);
898
48.9k
        }
899
734k
    }
900
901
22.8M
encoding_error:
902
    /* Only report the first error */
903
22.8M
    if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
904
18.7k
        xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
905
18.7k
        ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
906
18.7k
    }
907
908
22.8M
    return(0);
909
21.7M
}
910
911
/************************************************************************
912
 *                  *
913
 *    SAX2 defaulted attributes handling      *
914
 *                  *
915
 ************************************************************************/
916
917
/**
918
 * xmlCtxtInitializeLate:
919
 * @ctxt:  an XML parser context
920
 *
921
 * Final initialization of the parser context before starting to parse.
922
 *
923
 * This accounts for users modifying struct members of parser context
924
 * directly.
925
 */
926
static void
927
218k
xmlCtxtInitializeLate(xmlParserCtxtPtr ctxt) {
928
218k
    xmlSAXHandlerPtr sax;
929
930
    /* Avoid unused variable warning if features are disabled. */
931
218k
    (void) sax;
932
933
    /*
934
     * Changing the SAX struct directly is still widespread practice
935
     * in internal and external code.
936
     */
937
218k
    if (ctxt == NULL) return;
938
218k
    sax = ctxt->sax;
939
218k
#ifdef LIBXML_SAX1_ENABLED
940
    /*
941
     * Only enable SAX2 if there SAX2 element handlers, except when there
942
     * are no element handlers at all.
943
     */
944
218k
    if (((ctxt->options & XML_PARSE_SAX1) == 0) &&
945
218k
        (sax) &&
946
218k
        (sax->initialized == XML_SAX2_MAGIC) &&
947
218k
        ((sax->startElementNs != NULL) ||
948
0
         (sax->endElementNs != NULL) ||
949
0
         ((sax->startElement == NULL) && (sax->endElement == NULL))))
950
218k
        ctxt->sax2 = 1;
951
#else
952
    ctxt->sax2 = 1;
953
#endif /* LIBXML_SAX1_ENABLED */
954
955
    /*
956
     * Some users replace the dictionary directly in the context struct.
957
     * We really need an API function to do that cleanly.
958
     */
959
218k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
960
218k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
961
218k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
962
218k
    if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
963
218k
    (ctxt->str_xml_ns == NULL)) {
964
0
        xmlErrMemory(ctxt);
965
0
    }
966
967
218k
    xmlDictSetLimit(ctxt->dict,
968
218k
                    (ctxt->options & XML_PARSE_HUGE) ?
969
0
                        0 :
970
218k
                        XML_MAX_DICTIONARY_LIMIT);
971
218k
}
972
973
typedef struct {
974
    xmlHashedString prefix;
975
    xmlHashedString name;
976
    xmlHashedString value;
977
    const xmlChar *valueEnd;
978
    int external;
979
    int expandedSize;
980
} xmlDefAttr;
981
982
typedef struct _xmlDefAttrs xmlDefAttrs;
983
typedef xmlDefAttrs *xmlDefAttrsPtr;
984
struct _xmlDefAttrs {
985
    int nbAttrs;  /* number of defaulted attributes on that element */
986
    int maxAttrs;       /* the size of the array */
987
#if __STDC_VERSION__ >= 199901L
988
    /* Using a C99 flexible array member avoids UBSan errors. */
989
    xmlDefAttr attrs[] ATTRIBUTE_COUNTED_BY(maxAttrs);
990
#else
991
    xmlDefAttr attrs[1];
992
#endif
993
};
994
995
/**
996
 * xmlAttrNormalizeSpace:
997
 * @src: the source string
998
 * @dst: the target string
999
 *
1000
 * Normalize the space in non CDATA attribute values:
1001
 * If the attribute type is not CDATA, then the XML processor MUST further
1002
 * process the normalized attribute value by discarding any leading and
1003
 * trailing space (#x20) characters, and by replacing sequences of space
1004
 * (#x20) characters by a single space (#x20) character.
1005
 * Note that the size of dst need to be at least src, and if one doesn't need
1006
 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1007
 * passing src as dst is just fine.
1008
 *
1009
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1010
 *         is needed.
1011
 */
1012
static xmlChar *
1013
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1014
43.2k
{
1015
43.2k
    if ((src == NULL) || (dst == NULL))
1016
0
        return(NULL);
1017
1018
46.6k
    while (*src == 0x20) src++;
1019
5.06M
    while (*src != 0) {
1020
5.02M
  if (*src == 0x20) {
1021
538k
      while (*src == 0x20) src++;
1022
138k
      if (*src != 0)
1023
127k
    *dst++ = 0x20;
1024
4.88M
  } else {
1025
4.88M
      *dst++ = *src++;
1026
4.88M
  }
1027
5.02M
    }
1028
43.2k
    *dst = 0;
1029
43.2k
    if (dst == src)
1030
27.7k
       return(NULL);
1031
15.5k
    return(dst);
1032
43.2k
}
1033
1034
/**
1035
 * xmlAddDefAttrs:
1036
 * @ctxt:  an XML parser context
1037
 * @fullname:  the element fullname
1038
 * @fullattr:  the attribute fullname
1039
 * @value:  the attribute value
1040
 *
1041
 * Add a defaulted attribute for an element
1042
 */
1043
static void
1044
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1045
               const xmlChar *fullname,
1046
               const xmlChar *fullattr,
1047
43.4k
               const xmlChar *value) {
1048
43.4k
    xmlDefAttrsPtr defaults;
1049
43.4k
    xmlDefAttr *attr;
1050
43.4k
    int len, expandedSize;
1051
43.4k
    xmlHashedString name;
1052
43.4k
    xmlHashedString prefix;
1053
43.4k
    xmlHashedString hvalue;
1054
43.4k
    const xmlChar *localname;
1055
1056
    /*
1057
     * Allows to detect attribute redefinitions
1058
     */
1059
43.4k
    if (ctxt->attsSpecial != NULL) {
1060
35.3k
        if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1061
20.7k
      return;
1062
35.3k
    }
1063
1064
22.6k
    if (ctxt->attsDefault == NULL) {
1065
8.14k
        ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1066
8.14k
  if (ctxt->attsDefault == NULL)
1067
0
      goto mem_error;
1068
8.14k
    }
1069
1070
    /*
1071
     * split the element name into prefix:localname , the string found
1072
     * are within the DTD and then not associated to namespace names.
1073
     */
1074
22.6k
    localname = xmlSplitQName3(fullname, &len);
1075
22.6k
    if (localname == NULL) {
1076
19.6k
        name = xmlDictLookupHashed(ctxt->dict, fullname, -1);
1077
19.6k
  prefix.name = NULL;
1078
19.6k
    } else {
1079
2.99k
        name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1080
2.99k
  prefix = xmlDictLookupHashed(ctxt->dict, fullname, len);
1081
2.99k
        if (prefix.name == NULL)
1082
0
            goto mem_error;
1083
2.99k
    }
1084
22.6k
    if (name.name == NULL)
1085
0
        goto mem_error;
1086
1087
    /*
1088
     * make sure there is some storage
1089
     */
1090
22.6k
    defaults = xmlHashLookup2(ctxt->attsDefault, name.name, prefix.name);
1091
22.6k
    if ((defaults == NULL) ||
1092
13.9k
        (defaults->nbAttrs >= defaults->maxAttrs)) {
1093
10.7k
        xmlDefAttrsPtr temp;
1094
10.7k
        int newSize;
1095
1096
10.7k
        if (defaults == NULL) {
1097
8.71k
            newSize = 4;
1098
8.71k
        } else {
1099
1.99k
            if ((defaults->maxAttrs >= XML_MAX_ATTRS) ||
1100
1.99k
                ((size_t) defaults->maxAttrs >
1101
1.99k
                     SIZE_MAX / 2 / sizeof(temp[0]) - sizeof(*defaults)))
1102
0
                goto mem_error;
1103
1104
1.99k
            if (defaults->maxAttrs > XML_MAX_ATTRS / 2)
1105
0
                newSize = XML_MAX_ATTRS;
1106
1.99k
            else
1107
1.99k
                newSize = defaults->maxAttrs * 2;
1108
1.99k
        }
1109
10.7k
        temp = xmlRealloc(defaults,
1110
10.7k
                          sizeof(*defaults) + newSize * sizeof(xmlDefAttr));
1111
10.7k
  if (temp == NULL)
1112
0
      goto mem_error;
1113
10.7k
        if (defaults == NULL)
1114
8.71k
            temp->nbAttrs = 0;
1115
10.7k
  temp->maxAttrs = newSize;
1116
10.7k
        defaults = temp;
1117
10.7k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name.name, prefix.name,
1118
10.7k
                          defaults, NULL) < 0) {
1119
0
      xmlFree(defaults);
1120
0
      goto mem_error;
1121
0
  }
1122
10.7k
    }
1123
1124
    /*
1125
     * Split the attribute name into prefix:localname , the string found
1126
     * are within the DTD and hen not associated to namespace names.
1127
     */
1128
22.6k
    localname = xmlSplitQName3(fullattr, &len);
1129
22.6k
    if (localname == NULL) {
1130
14.1k
        name = xmlDictLookupHashed(ctxt->dict, fullattr, -1);
1131
14.1k
  prefix.name = NULL;
1132
14.1k
    } else {
1133
8.54k
        name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1134
8.54k
  prefix = xmlDictLookupHashed(ctxt->dict, fullattr, len);
1135
8.54k
        if (prefix.name == NULL)
1136
0
            goto mem_error;
1137
8.54k
    }
1138
22.6k
    if (name.name == NULL)
1139
0
        goto mem_error;
1140
1141
    /* intern the string and precompute the end */
1142
22.6k
    len = strlen((const char *) value);
1143
22.6k
    hvalue = xmlDictLookupHashed(ctxt->dict, value, len);
1144
22.6k
    if (hvalue.name == NULL)
1145
0
        goto mem_error;
1146
1147
22.6k
    expandedSize = strlen((const char *) name.name);
1148
22.6k
    if (prefix.name != NULL)
1149
8.54k
        expandedSize += strlen((const char *) prefix.name);
1150
22.6k
    expandedSize += len;
1151
1152
22.6k
    attr = &defaults->attrs[defaults->nbAttrs++];
1153
22.6k
    attr->name = name;
1154
22.6k
    attr->prefix = prefix;
1155
22.6k
    attr->value = hvalue;
1156
22.6k
    attr->valueEnd = hvalue.name + len;
1157
22.6k
    attr->external = PARSER_EXTERNAL(ctxt);
1158
22.6k
    attr->expandedSize = expandedSize;
1159
1160
22.6k
    return;
1161
1162
0
mem_error:
1163
0
    xmlErrMemory(ctxt);
1164
0
}
1165
1166
/**
1167
 * xmlAddSpecialAttr:
1168
 * @ctxt:  an XML parser context
1169
 * @fullname:  the element fullname
1170
 * @fullattr:  the attribute fullname
1171
 * @type:  the attribute type
1172
 *
1173
 * Register this attribute type
1174
 */
1175
static void
1176
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1177
      const xmlChar *fullname,
1178
      const xmlChar *fullattr,
1179
      int type)
1180
64.4k
{
1181
64.4k
    if (ctxt->attsSpecial == NULL) {
1182
9.61k
        ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1183
9.61k
  if (ctxt->attsSpecial == NULL)
1184
0
      goto mem_error;
1185
9.61k
    }
1186
1187
64.4k
    if (xmlHashAdd2(ctxt->attsSpecial, fullname, fullattr,
1188
64.4k
                    XML_INT_TO_PTR(type)) < 0)
1189
0
        goto mem_error;
1190
64.4k
    return;
1191
1192
64.4k
mem_error:
1193
0
    xmlErrMemory(ctxt);
1194
0
}
1195
1196
/**
1197
 * xmlCleanSpecialAttrCallback:
1198
 *
1199
 * Removes CDATA attributes from the special attribute table
1200
 */
1201
static void
1202
xmlCleanSpecialAttrCallback(void *payload, void *data,
1203
                            const xmlChar *fullname, const xmlChar *fullattr,
1204
27.6k
                            const xmlChar *unused ATTRIBUTE_UNUSED) {
1205
27.6k
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1206
1207
27.6k
    if (XML_PTR_TO_INT(payload) == XML_ATTRIBUTE_CDATA) {
1208
978
        xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1209
978
    }
1210
27.6k
}
1211
1212
/**
1213
 * xmlCleanSpecialAttr:
1214
 * @ctxt:  an XML parser context
1215
 *
1216
 * Trim the list of attributes defined to remove all those of type
1217
 * CDATA as they are not special. This call should be done when finishing
1218
 * to parse the DTD and before starting to parse the document root.
1219
 */
1220
static void
1221
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1222
51.1k
{
1223
51.1k
    if (ctxt->attsSpecial == NULL)
1224
41.5k
        return;
1225
1226
9.61k
    xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1227
1228
9.61k
    if (xmlHashSize(ctxt->attsSpecial) == 0) {
1229
88
        xmlHashFree(ctxt->attsSpecial, NULL);
1230
88
        ctxt->attsSpecial = NULL;
1231
88
    }
1232
9.61k
}
1233
1234
/**
1235
 * xmlCheckLanguageID:
1236
 * @lang:  pointer to the string value
1237
 *
1238
 * DEPRECATED: Internal function, do not use.
1239
 *
1240
 * Checks that the value conforms to the LanguageID production:
1241
 *
1242
 * NOTE: this is somewhat deprecated, those productions were removed from
1243
 *       the XML Second edition.
1244
 *
1245
 * [33] LanguageID ::= Langcode ('-' Subcode)*
1246
 * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1247
 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1248
 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1249
 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1250
 * [38] Subcode ::= ([a-z] | [A-Z])+
1251
 *
1252
 * The current REC reference the successors of RFC 1766, currently 5646
1253
 *
1254
 * http://www.rfc-editor.org/rfc/rfc5646.txt
1255
 * langtag       = language
1256
 *                 ["-" script]
1257
 *                 ["-" region]
1258
 *                 *("-" variant)
1259
 *                 *("-" extension)
1260
 *                 ["-" privateuse]
1261
 * language      = 2*3ALPHA            ; shortest ISO 639 code
1262
 *                 ["-" extlang]       ; sometimes followed by
1263
 *                                     ; extended language subtags
1264
 *               / 4ALPHA              ; or reserved for future use
1265
 *               / 5*8ALPHA            ; or registered language subtag
1266
 *
1267
 * extlang       = 3ALPHA              ; selected ISO 639 codes
1268
 *                 *2("-" 3ALPHA)      ; permanently reserved
1269
 *
1270
 * script        = 4ALPHA              ; ISO 15924 code
1271
 *
1272
 * region        = 2ALPHA              ; ISO 3166-1 code
1273
 *               / 3DIGIT              ; UN M.49 code
1274
 *
1275
 * variant       = 5*8alphanum         ; registered variants
1276
 *               / (DIGIT 3alphanum)
1277
 *
1278
 * extension     = singleton 1*("-" (2*8alphanum))
1279
 *
1280
 *                                     ; Single alphanumerics
1281
 *                                     ; "x" reserved for private use
1282
 * singleton     = DIGIT               ; 0 - 9
1283
 *               / %x41-57             ; A - W
1284
 *               / %x59-5A             ; Y - Z
1285
 *               / %x61-77             ; a - w
1286
 *               / %x79-7A             ; y - z
1287
 *
1288
 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1289
 * The parser below doesn't try to cope with extension or privateuse
1290
 * that could be added but that's not interoperable anyway
1291
 *
1292
 * Returns 1 if correct 0 otherwise
1293
 **/
1294
int
1295
xmlCheckLanguageID(const xmlChar * lang)
1296
0
{
1297
0
    const xmlChar *cur = lang, *nxt;
1298
1299
0
    if (cur == NULL)
1300
0
        return (0);
1301
0
    if (((cur[0] == 'i') && (cur[1] == '-')) ||
1302
0
        ((cur[0] == 'I') && (cur[1] == '-')) ||
1303
0
        ((cur[0] == 'x') && (cur[1] == '-')) ||
1304
0
        ((cur[0] == 'X') && (cur[1] == '-'))) {
1305
        /*
1306
         * Still allow IANA code and user code which were coming
1307
         * from the previous version of the XML-1.0 specification
1308
         * it's deprecated but we should not fail
1309
         */
1310
0
        cur += 2;
1311
0
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1312
0
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
1313
0
            cur++;
1314
0
        return(cur[0] == 0);
1315
0
    }
1316
0
    nxt = cur;
1317
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1318
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1319
0
           nxt++;
1320
0
    if (nxt - cur >= 4) {
1321
        /*
1322
         * Reserved
1323
         */
1324
0
        if ((nxt - cur > 8) || (nxt[0] != 0))
1325
0
            return(0);
1326
0
        return(1);
1327
0
    }
1328
0
    if (nxt - cur < 2)
1329
0
        return(0);
1330
    /* we got an ISO 639 code */
1331
0
    if (nxt[0] == 0)
1332
0
        return(1);
1333
0
    if (nxt[0] != '-')
1334
0
        return(0);
1335
1336
0
    nxt++;
1337
0
    cur = nxt;
1338
    /* now we can have extlang or script or region or variant */
1339
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1340
0
        goto region_m49;
1341
1342
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1343
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1344
0
           nxt++;
1345
0
    if (nxt - cur == 4)
1346
0
        goto script;
1347
0
    if (nxt - cur == 2)
1348
0
        goto region;
1349
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1350
0
        goto variant;
1351
0
    if (nxt - cur != 3)
1352
0
        return(0);
1353
    /* we parsed an extlang */
1354
0
    if (nxt[0] == 0)
1355
0
        return(1);
1356
0
    if (nxt[0] != '-')
1357
0
        return(0);
1358
1359
0
    nxt++;
1360
0
    cur = nxt;
1361
    /* now we can have script or region or variant */
1362
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1363
0
        goto region_m49;
1364
1365
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1366
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1367
0
           nxt++;
1368
0
    if (nxt - cur == 2)
1369
0
        goto region;
1370
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1371
0
        goto variant;
1372
0
    if (nxt - cur != 4)
1373
0
        return(0);
1374
    /* we parsed a script */
1375
0
script:
1376
0
    if (nxt[0] == 0)
1377
0
        return(1);
1378
0
    if (nxt[0] != '-')
1379
0
        return(0);
1380
1381
0
    nxt++;
1382
0
    cur = nxt;
1383
    /* now we can have region or variant */
1384
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1385
0
        goto region_m49;
1386
1387
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1388
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1389
0
           nxt++;
1390
1391
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1392
0
        goto variant;
1393
0
    if (nxt - cur != 2)
1394
0
        return(0);
1395
    /* we parsed a region */
1396
0
region:
1397
0
    if (nxt[0] == 0)
1398
0
        return(1);
1399
0
    if (nxt[0] != '-')
1400
0
        return(0);
1401
1402
0
    nxt++;
1403
0
    cur = nxt;
1404
    /* now we can just have a variant */
1405
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1406
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1407
0
           nxt++;
1408
1409
0
    if ((nxt - cur < 5) || (nxt - cur > 8))
1410
0
        return(0);
1411
1412
    /* we parsed a variant */
1413
0
variant:
1414
0
    if (nxt[0] == 0)
1415
0
        return(1);
1416
0
    if (nxt[0] != '-')
1417
0
        return(0);
1418
    /* extensions and private use subtags not checked */
1419
0
    return (1);
1420
1421
0
region_m49:
1422
0
    if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1423
0
        ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1424
0
        nxt += 3;
1425
0
        goto region;
1426
0
    }
1427
0
    return(0);
1428
0
}
1429
1430
/************************************************************************
1431
 *                  *
1432
 *    Parser stacks related functions and macros    *
1433
 *                  *
1434
 ************************************************************************/
1435
1436
static xmlChar *
1437
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar **str);
1438
1439
/**
1440
 * xmlParserNsCreate:
1441
 *
1442
 * Create a new namespace database.
1443
 *
1444
 * Returns the new obejct.
1445
 */
1446
xmlParserNsData *
1447
217k
xmlParserNsCreate(void) {
1448
217k
    xmlParserNsData *nsdb = xmlMalloc(sizeof(*nsdb));
1449
1450
217k
    if (nsdb == NULL)
1451
0
        return(NULL);
1452
217k
    memset(nsdb, 0, sizeof(*nsdb));
1453
217k
    nsdb->defaultNsIndex = INT_MAX;
1454
1455
217k
    return(nsdb);
1456
217k
}
1457
1458
/**
1459
 * xmlParserNsFree:
1460
 * @nsdb: namespace database
1461
 *
1462
 * Free a namespace database.
1463
 */
1464
void
1465
217k
xmlParserNsFree(xmlParserNsData *nsdb) {
1466
217k
    if (nsdb == NULL)
1467
0
        return;
1468
1469
217k
    xmlFree(nsdb->extra);
1470
217k
    xmlFree(nsdb->hash);
1471
217k
    xmlFree(nsdb);
1472
217k
}
1473
1474
/**
1475
 * xmlParserNsReset:
1476
 * @nsdb: namespace database
1477
 *
1478
 * Reset a namespace database.
1479
 */
1480
static void
1481
0
xmlParserNsReset(xmlParserNsData *nsdb) {
1482
0
    if (nsdb == NULL)
1483
0
        return;
1484
1485
0
    nsdb->hashElems = 0;
1486
0
    nsdb->elementId = 0;
1487
0
    nsdb->defaultNsIndex = INT_MAX;
1488
1489
0
    if (nsdb->hash)
1490
0
        memset(nsdb->hash, 0, nsdb->hashSize * sizeof(nsdb->hash[0]));
1491
0
}
1492
1493
/**
1494
 * xmlParserStartElement:
1495
 * @nsdb: namespace database
1496
 *
1497
 * Signal that a new element has started.
1498
 *
1499
 * Returns 0 on success, -1 if the element counter overflowed.
1500
 */
1501
static int
1502
10.3M
xmlParserNsStartElement(xmlParserNsData *nsdb) {
1503
10.3M
    if (nsdb->elementId == UINT_MAX)
1504
0
        return(-1);
1505
10.3M
    nsdb->elementId++;
1506
1507
10.3M
    return(0);
1508
10.3M
}
1509
1510
/**
1511
 * xmlParserNsLookup:
1512
 * @ctxt: parser context
1513
 * @prefix: namespace prefix
1514
 * @bucketPtr: optional bucket (return value)
1515
 *
1516
 * Lookup namespace with given prefix. If @bucketPtr is non-NULL, it will
1517
 * be set to the matching bucket, or the first empty bucket if no match
1518
 * was found.
1519
 *
1520
 * Returns the namespace index on success, INT_MAX if no namespace was
1521
 * found.
1522
 */
1523
static int
1524
xmlParserNsLookup(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1525
17.8M
                  xmlParserNsBucket **bucketPtr) {
1526
17.8M
    xmlParserNsBucket *bucket, *tombstone;
1527
17.8M
    unsigned index, hashValue;
1528
1529
17.8M
    if (prefix->name == NULL)
1530
16.6M
        return(ctxt->nsdb->defaultNsIndex);
1531
1532
1.11M
    if (ctxt->nsdb->hashSize == 0)
1533
134k
        return(INT_MAX);
1534
1535
981k
    hashValue = prefix->hashValue;
1536
981k
    index = hashValue & (ctxt->nsdb->hashSize - 1);
1537
981k
    bucket = &ctxt->nsdb->hash[index];
1538
981k
    tombstone = NULL;
1539
1540
1.05M
    while (bucket->hashValue) {
1541
932k
        if (bucket->index == INT_MAX) {
1542
45.1k
            if (tombstone == NULL)
1543
44.2k
                tombstone = bucket;
1544
887k
        } else if (bucket->hashValue == hashValue) {
1545
854k
            if (ctxt->nsTab[bucket->index * 2] == prefix->name) {
1546
854k
                if (bucketPtr != NULL)
1547
130k
                    *bucketPtr = bucket;
1548
854k
                return(bucket->index);
1549
854k
            }
1550
854k
        }
1551
1552
78.3k
        index++;
1553
78.3k
        bucket++;
1554
78.3k
        if (index == ctxt->nsdb->hashSize) {
1555
4.82k
            index = 0;
1556
4.82k
            bucket = ctxt->nsdb->hash;
1557
4.82k
        }
1558
78.3k
    }
1559
1560
127k
    if (bucketPtr != NULL)
1561
49.2k
        *bucketPtr = tombstone ? tombstone : bucket;
1562
127k
    return(INT_MAX);
1563
981k
}
1564
1565
/**
1566
 * xmlParserNsLookupUri:
1567
 * @ctxt: parser context
1568
 * @prefix: namespace prefix
1569
 *
1570
 * Lookup namespace URI with given prefix.
1571
 *
1572
 * Returns the namespace URI on success, NULL if no namespace was found.
1573
 */
1574
static const xmlChar *
1575
10.3M
xmlParserNsLookupUri(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix) {
1576
10.3M
    const xmlChar *ret;
1577
10.3M
    int nsIndex;
1578
1579
10.3M
    if (prefix->name == ctxt->str_xml)
1580
6.54k
        return(ctxt->str_xml_ns);
1581
1582
    /*
1583
     * minNsIndex is used when building an entity tree. We must
1584
     * ignore namespaces declared outside the entity.
1585
     */
1586
10.3M
    nsIndex = xmlParserNsLookup(ctxt, prefix, NULL);
1587
10.3M
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1588
3.48M
        return(NULL);
1589
1590
6.81M
    ret = ctxt->nsTab[nsIndex * 2 + 1];
1591
6.81M
    if (ret[0] == 0)
1592
5.42k
        ret = NULL;
1593
6.81M
    return(ret);
1594
10.3M
}
1595
1596
/**
1597
 * xmlParserNsLookupSax:
1598
 * @ctxt: parser context
1599
 * @prefix: namespace prefix
1600
 *
1601
 * Lookup extra data for the given prefix. This returns data stored
1602
 * with xmlParserNsUdpateSax.
1603
 *
1604
 * Returns the data on success, NULL if no namespace was found.
1605
 */
1606
void *
1607
6.90M
xmlParserNsLookupSax(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
1608
6.90M
    xmlHashedString hprefix;
1609
6.90M
    int nsIndex;
1610
1611
6.90M
    if (prefix == ctxt->str_xml)
1612
161k
        return(NULL);
1613
1614
6.74M
    hprefix.name = prefix;
1615
6.74M
    if (prefix != NULL)
1616
297k
        hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1617
6.44M
    else
1618
6.44M
        hprefix.hashValue = 0;
1619
6.74M
    nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1620
6.74M
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1621
0
        return(NULL);
1622
1623
6.74M
    return(ctxt->nsdb->extra[nsIndex].saxData);
1624
6.74M
}
1625
1626
/**
1627
 * xmlParserNsUpdateSax:
1628
 * @ctxt: parser context
1629
 * @prefix: namespace prefix
1630
 * @saxData: extra data for SAX handler
1631
 *
1632
 * Sets or updates extra data for the given prefix. This value will be
1633
 * returned by xmlParserNsLookupSax as long as the namespace with the
1634
 * given prefix is in scope.
1635
 *
1636
 * Returns the data on success, NULL if no namespace was found.
1637
 */
1638
int
1639
xmlParserNsUpdateSax(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
1640
320k
                     void *saxData) {
1641
320k
    xmlHashedString hprefix;
1642
320k
    int nsIndex;
1643
1644
320k
    if (prefix == ctxt->str_xml)
1645
0
        return(-1);
1646
1647
320k
    hprefix.name = prefix;
1648
320k
    if (prefix != NULL)
1649
115k
        hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1650
205k
    else
1651
205k
        hprefix.hashValue = 0;
1652
320k
    nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1653
320k
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1654
0
        return(-1);
1655
1656
320k
    ctxt->nsdb->extra[nsIndex].saxData = saxData;
1657
320k
    return(0);
1658
320k
}
1659
1660
/**
1661
 * xmlParserNsGrow:
1662
 * @ctxt: parser context
1663
 *
1664
 * Grows the namespace tables.
1665
 *
1666
 * Returns 0 on success, -1 if a memory allocation failed.
1667
 */
1668
static int
1669
95.3k
xmlParserNsGrow(xmlParserCtxtPtr ctxt) {
1670
95.3k
    const xmlChar **table;
1671
95.3k
    xmlParserNsExtra *extra;
1672
95.3k
    int newSize;
1673
1674
95.3k
    newSize = xmlGrowCapacity(ctxt->nsMax,
1675
95.3k
                              sizeof(table[0]) + sizeof(extra[0]),
1676
95.3k
                              16, XML_MAX_ITEMS);
1677
95.3k
    if (newSize < 0)
1678
0
        goto error;
1679
1680
95.3k
    table = xmlRealloc(ctxt->nsTab, 2 * newSize * sizeof(table[0]));
1681
95.3k
    if (table == NULL)
1682
0
        goto error;
1683
95.3k
    ctxt->nsTab = table;
1684
1685
95.3k
    extra = xmlRealloc(ctxt->nsdb->extra, newSize * sizeof(extra[0]));
1686
95.3k
    if (extra == NULL)
1687
0
        goto error;
1688
95.3k
    ctxt->nsdb->extra = extra;
1689
1690
95.3k
    ctxt->nsMax = newSize;
1691
95.3k
    return(0);
1692
1693
0
error:
1694
0
    xmlErrMemory(ctxt);
1695
0
    return(-1);
1696
95.3k
}
1697
1698
/**
1699
 * xmlParserNsPush:
1700
 * @ctxt: parser context
1701
 * @prefix: prefix with hash value
1702
 * @uri: uri with hash value
1703
 * @saxData: extra data for SAX handler
1704
 * @defAttr: whether the namespace comes from a default attribute
1705
 *
1706
 * Push a new namespace on the table.
1707
 *
1708
 * Returns 1 if the namespace was pushed, 0 if the namespace was ignored,
1709
 * -1 if a memory allocation failed.
1710
 */
1711
static int
1712
xmlParserNsPush(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1713
332k
                const xmlHashedString *uri, void *saxData, int defAttr) {
1714
332k
    xmlParserNsBucket *bucket = NULL;
1715
332k
    xmlParserNsExtra *extra;
1716
332k
    const xmlChar **ns;
1717
332k
    unsigned hashValue, nsIndex, oldIndex;
1718
1719
332k
    if ((prefix != NULL) && (prefix->name == ctxt->str_xml))
1720
448
        return(0);
1721
1722
332k
    if ((ctxt->nsNr >= ctxt->nsMax) && (xmlParserNsGrow(ctxt) < 0)) {
1723
0
        xmlErrMemory(ctxt);
1724
0
        return(-1);
1725
0
    }
1726
1727
    /*
1728
     * Default namespace and 'xml' namespace
1729
     */
1730
332k
    if ((prefix == NULL) || (prefix->name == NULL)) {
1731
214k
        oldIndex = ctxt->nsdb->defaultNsIndex;
1732
1733
214k
        if (oldIndex != INT_MAX) {
1734
92.7k
            extra = &ctxt->nsdb->extra[oldIndex];
1735
1736
92.7k
            if (extra->elementId == ctxt->nsdb->elementId) {
1737
8.78k
                if (defAttr == 0)
1738
8.65k
                    xmlErrAttributeDup(ctxt, NULL, BAD_CAST "xmlns");
1739
8.78k
                return(0);
1740
8.78k
            }
1741
1742
83.9k
            if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1743
0
                (uri->name == ctxt->nsTab[oldIndex * 2 + 1]))
1744
0
                return(0);
1745
83.9k
        }
1746
1747
205k
        ctxt->nsdb->defaultNsIndex = ctxt->nsNr;
1748
205k
        goto populate_entry;
1749
214k
    }
1750
1751
    /*
1752
     * Hash table lookup
1753
     */
1754
117k
    oldIndex = xmlParserNsLookup(ctxt, prefix, &bucket);
1755
117k
    if (oldIndex != INT_MAX) {
1756
45.9k
        extra = &ctxt->nsdb->extra[oldIndex];
1757
1758
        /*
1759
         * Check for duplicate definitions on the same element.
1760
         */
1761
45.9k
        if (extra->elementId == ctxt->nsdb->elementId) {
1762
2.01k
            if (defAttr == 0)
1763
2.01k
                xmlErrAttributeDup(ctxt, BAD_CAST "xmlns", prefix->name);
1764
2.01k
            return(0);
1765
2.01k
        }
1766
1767
43.9k
        if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1768
0
            (uri->name == ctxt->nsTab[bucket->index * 2 + 1]))
1769
0
            return(0);
1770
1771
43.9k
        bucket->index = ctxt->nsNr;
1772
43.9k
        goto populate_entry;
1773
43.9k
    }
1774
1775
    /*
1776
     * Insert new bucket
1777
     */
1778
1779
71.6k
    hashValue = prefix->hashValue;
1780
1781
    /*
1782
     * Grow hash table, 50% fill factor
1783
     */
1784
71.6k
    if (ctxt->nsdb->hashElems + 1 > ctxt->nsdb->hashSize / 2) {
1785
23.8k
        xmlParserNsBucket *newHash;
1786
23.8k
        unsigned newSize, i, index;
1787
1788
23.8k
        if (ctxt->nsdb->hashSize > UINT_MAX / 2) {
1789
0
            xmlErrMemory(ctxt);
1790
0
            return(-1);
1791
0
        }
1792
23.8k
        newSize = ctxt->nsdb->hashSize ? ctxt->nsdb->hashSize * 2 : 16;
1793
23.8k
        newHash = xmlMalloc(newSize * sizeof(newHash[0]));
1794
23.8k
        if (newHash == NULL) {
1795
0
            xmlErrMemory(ctxt);
1796
0
            return(-1);
1797
0
        }
1798
23.8k
        memset(newHash, 0, newSize * sizeof(newHash[0]));
1799
1800
134k
        for (i = 0; i < ctxt->nsdb->hashSize; i++) {
1801
110k
            unsigned hv = ctxt->nsdb->hash[i].hashValue;
1802
110k
            unsigned newIndex;
1803
1804
110k
            if ((hv == 0) || (ctxt->nsdb->hash[i].index == INT_MAX))
1805
109k
                continue;
1806
1.41k
            newIndex = hv & (newSize - 1);
1807
1808
1.88k
            while (newHash[newIndex].hashValue != 0) {
1809
471
                newIndex++;
1810
471
                if (newIndex == newSize)
1811
35
                    newIndex = 0;
1812
471
            }
1813
1814
1.41k
            newHash[newIndex] = ctxt->nsdb->hash[i];
1815
1.41k
        }
1816
1817
23.8k
        xmlFree(ctxt->nsdb->hash);
1818
23.8k
        ctxt->nsdb->hash = newHash;
1819
23.8k
        ctxt->nsdb->hashSize = newSize;
1820
1821
        /*
1822
         * Relookup
1823
         */
1824
23.8k
        index = hashValue & (newSize - 1);
1825
1826
23.9k
        while (newHash[index].hashValue != 0) {
1827
129
            index++;
1828
129
            if (index == newSize)
1829
14
                index = 0;
1830
129
        }
1831
1832
23.8k
        bucket = &newHash[index];
1833
23.8k
    }
1834
1835
71.6k
    bucket->hashValue = hashValue;
1836
71.6k
    bucket->index = ctxt->nsNr;
1837
71.6k
    ctxt->nsdb->hashElems++;
1838
71.6k
    oldIndex = INT_MAX;
1839
1840
321k
populate_entry:
1841
321k
    nsIndex = ctxt->nsNr;
1842
1843
321k
    ns = &ctxt->nsTab[nsIndex * 2];
1844
321k
    ns[0] = prefix ? prefix->name : NULL;
1845
321k
    ns[1] = uri->name;
1846
1847
321k
    extra = &ctxt->nsdb->extra[nsIndex];
1848
321k
    extra->saxData = saxData;
1849
321k
    extra->prefixHashValue = prefix ? prefix->hashValue : 0;
1850
321k
    extra->uriHashValue = uri->hashValue;
1851
321k
    extra->elementId = ctxt->nsdb->elementId;
1852
321k
    extra->oldIndex = oldIndex;
1853
1854
321k
    ctxt->nsNr++;
1855
1856
321k
    return(1);
1857
71.6k
}
1858
1859
/**
1860
 * xmlParserNsPop:
1861
 * @ctxt: an XML parser context
1862
 * @nr:  the number to pop
1863
 *
1864
 * Pops the top @nr namespaces and restores the hash table.
1865
 *
1866
 * Returns the number of namespaces popped.
1867
 */
1868
static int
1869
xmlParserNsPop(xmlParserCtxtPtr ctxt, int nr)
1870
214k
{
1871
214k
    int i;
1872
1873
    /* assert(nr <= ctxt->nsNr); */
1874
1875
459k
    for (i = ctxt->nsNr - 1; i >= ctxt->nsNr - nr; i--) {
1876
244k
        const xmlChar *prefix = ctxt->nsTab[i * 2];
1877
244k
        xmlParserNsExtra *extra = &ctxt->nsdb->extra[i];
1878
1879
244k
        if (prefix == NULL) {
1880
159k
            ctxt->nsdb->defaultNsIndex = extra->oldIndex;
1881
159k
        } else {
1882
85.0k
            xmlHashedString hprefix;
1883
85.0k
            xmlParserNsBucket *bucket = NULL;
1884
1885
85.0k
            hprefix.name = prefix;
1886
85.0k
            hprefix.hashValue = extra->prefixHashValue;
1887
85.0k
            xmlParserNsLookup(ctxt, &hprefix, &bucket);
1888
            /* assert(bucket && bucket->hashValue); */
1889
85.0k
            bucket->index = extra->oldIndex;
1890
85.0k
        }
1891
244k
    }
1892
1893
214k
    ctxt->nsNr -= nr;
1894
214k
    return(nr);
1895
214k
}
1896
1897
static int
1898
177k
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt) {
1899
177k
    const xmlChar **atts;
1900
177k
    unsigned *attallocs;
1901
177k
    int newSize;
1902
1903
177k
    newSize = xmlGrowCapacity(ctxt->maxatts / 5,
1904
177k
                              sizeof(atts[0]) * 5 + sizeof(attallocs[0]),
1905
177k
                              10, XML_MAX_ATTRS);
1906
177k
    if (newSize < 0) {
1907
0
        xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
1908
0
                    "Maximum number of attributes exceeded");
1909
0
        return(-1);
1910
0
    }
1911
1912
177k
    atts = xmlRealloc(ctxt->atts, newSize * sizeof(atts[0]) * 5);
1913
177k
    if (atts == NULL)
1914
0
        goto mem_error;
1915
177k
    ctxt->atts = atts;
1916
1917
177k
    attallocs = xmlRealloc(ctxt->attallocs,
1918
177k
                           newSize * sizeof(attallocs[0]));
1919
177k
    if (attallocs == NULL)
1920
0
        goto mem_error;
1921
177k
    ctxt->attallocs = attallocs;
1922
1923
177k
    ctxt->maxatts = newSize * 5;
1924
1925
177k
    return(0);
1926
1927
0
mem_error:
1928
0
    xmlErrMemory(ctxt);
1929
0
    return(-1);
1930
177k
}
1931
1932
/**
1933
 * xmlCtxtPushInput:
1934
 * @ctxt:  an XML parser context
1935
 * @value:  the parser input
1936
 *
1937
 * Pushes a new parser input on top of the input stack
1938
 *
1939
 * Returns -1 in case of error, the index in the stack otherwise
1940
 */
1941
int
1942
xmlCtxtPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1943
245k
{
1944
245k
    char *directory = NULL;
1945
245k
    int maxDepth;
1946
1947
245k
    if ((ctxt == NULL) || (value == NULL))
1948
0
        return(-1);
1949
1950
245k
    maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
1951
1952
245k
    if (ctxt->inputNr >= ctxt->inputMax) {
1953
9.70k
        xmlParserInputPtr *tmp;
1954
9.70k
        int newSize;
1955
1956
9.70k
        newSize = xmlGrowCapacity(ctxt->inputMax, sizeof(tmp[0]),
1957
9.70k
                                  5, maxDepth);
1958
9.70k
        if (newSize < 0) {
1959
0
            xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
1960
0
                           "Maximum entity nesting depth exceeded");
1961
0
            xmlHaltParser(ctxt);
1962
0
            return(-1);
1963
0
        }
1964
9.70k
        tmp = xmlRealloc(ctxt->inputTab, newSize * sizeof(tmp[0]));
1965
9.70k
        if (tmp == NULL) {
1966
0
            xmlErrMemory(ctxt);
1967
0
            return(-1);
1968
0
        }
1969
9.70k
        ctxt->inputTab = tmp;
1970
9.70k
        ctxt->inputMax = newSize;
1971
9.70k
    }
1972
1973
245k
    if ((ctxt->inputNr == 0) && (value->filename != NULL)) {
1974
81.4k
        directory = xmlParserGetDirectory(value->filename);
1975
81.4k
        if (directory == NULL) {
1976
0
            xmlErrMemory(ctxt);
1977
0
            return(-1);
1978
0
        }
1979
81.4k
    }
1980
1981
245k
    if (ctxt->input_id >= INT_MAX) {
1982
0
        xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT, "Input ID overflow\n");
1983
0
        return(-1);
1984
0
    }
1985
1986
245k
    ctxt->inputTab[ctxt->inputNr] = value;
1987
245k
    ctxt->input = value;
1988
1989
245k
    if (ctxt->inputNr == 0) {
1990
217k
        xmlFree(ctxt->directory);
1991
217k
        ctxt->directory = directory;
1992
217k
    }
1993
1994
    /*
1995
     * Internally, the input ID is only used to detect parameter entity
1996
     * boundaries. But there are entity loaders in downstream code that
1997
     * detect the main document by checking for "input_id == 1".
1998
     */
1999
245k
    value->id = ctxt->input_id++;
2000
2001
245k
    return(ctxt->inputNr++);
2002
245k
}
2003
2004
/**
2005
 * xmlCtxtPopInput:
2006
 * @ctxt: an XML parser context
2007
 *
2008
 * Pops the top parser input from the input stack
2009
 *
2010
 * Returns the input just removed
2011
 */
2012
xmlParserInputPtr
2013
xmlCtxtPopInput(xmlParserCtxtPtr ctxt)
2014
680k
{
2015
680k
    xmlParserInputPtr ret;
2016
2017
680k
    if (ctxt == NULL)
2018
0
        return(NULL);
2019
680k
    if (ctxt->inputNr <= 0)
2020
434k
        return (NULL);
2021
245k
    ctxt->inputNr--;
2022
245k
    if (ctxt->inputNr > 0)
2023
28.4k
        ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
2024
217k
    else
2025
217k
        ctxt->input = NULL;
2026
245k
    ret = ctxt->inputTab[ctxt->inputNr];
2027
245k
    ctxt->inputTab[ctxt->inputNr] = NULL;
2028
245k
    return (ret);
2029
680k
}
2030
2031
/**
2032
 * nodePush:
2033
 * @ctxt:  an XML parser context
2034
 * @value:  the element node
2035
 *
2036
 * DEPRECATED: Internal function, do not use.
2037
 *
2038
 * Pushes a new element node on top of the node stack
2039
 *
2040
 * Returns -1 in case of error, the index in the stack otherwise
2041
 */
2042
int
2043
nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
2044
10.3M
{
2045
10.3M
    if (ctxt == NULL)
2046
0
        return(0);
2047
2048
10.3M
    if (ctxt->nodeNr >= ctxt->nodeMax) {
2049
393k
        int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
2050
393k
        xmlNodePtr *tmp;
2051
393k
        int newSize;
2052
2053
393k
        newSize = xmlGrowCapacity(ctxt->nodeMax, sizeof(tmp[0]),
2054
393k
                                  10, maxDepth);
2055
393k
        if (newSize < 0) {
2056
277
            xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
2057
277
                    "Excessive depth in document: %d,"
2058
277
                    " use XML_PARSE_HUGE option\n",
2059
277
                    ctxt->nodeNr);
2060
277
            xmlHaltParser(ctxt);
2061
277
            return(-1);
2062
277
        }
2063
2064
393k
  tmp = xmlRealloc(ctxt->nodeTab, newSize * sizeof(tmp[0]));
2065
393k
        if (tmp == NULL) {
2066
0
            xmlErrMemory(ctxt);
2067
0
            return (-1);
2068
0
        }
2069
393k
        ctxt->nodeTab = tmp;
2070
393k
  ctxt->nodeMax = newSize;
2071
393k
    }
2072
2073
10.3M
    ctxt->nodeTab[ctxt->nodeNr] = value;
2074
10.3M
    ctxt->node = value;
2075
10.3M
    return (ctxt->nodeNr++);
2076
10.3M
}
2077
2078
/**
2079
 * nodePop:
2080
 * @ctxt: an XML parser context
2081
 *
2082
 * DEPRECATED: Internal function, do not use.
2083
 *
2084
 * Pops the top element node from the node stack
2085
 *
2086
 * Returns the node just removed
2087
 */
2088
xmlNodePtr
2089
nodePop(xmlParserCtxtPtr ctxt)
2090
9.27M
{
2091
9.27M
    xmlNodePtr ret;
2092
2093
9.27M
    if (ctxt == NULL) return(NULL);
2094
9.27M
    if (ctxt->nodeNr <= 0)
2095
388
        return (NULL);
2096
9.27M
    ctxt->nodeNr--;
2097
9.27M
    if (ctxt->nodeNr > 0)
2098
9.20M
        ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
2099
68.8k
    else
2100
68.8k
        ctxt->node = NULL;
2101
9.27M
    ret = ctxt->nodeTab[ctxt->nodeNr];
2102
9.27M
    ctxt->nodeTab[ctxt->nodeNr] = NULL;
2103
9.27M
    return (ret);
2104
9.27M
}
2105
2106
/**
2107
 * nameNsPush:
2108
 * @ctxt:  an XML parser context
2109
 * @value:  the element name
2110
 * @prefix:  the element prefix
2111
 * @URI:  the element namespace name
2112
 * @line:  the current line number for error messages
2113
 * @nsNr:  the number of namespaces pushed on the namespace table
2114
 *
2115
 * Pushes a new element name/prefix/URL on top of the name stack
2116
 *
2117
 * Returns -1 in case of error, the index in the stack otherwise
2118
 */
2119
static int
2120
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
2121
           const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
2122
4.58M
{
2123
4.58M
    xmlStartTag *tag;
2124
2125
4.58M
    if (ctxt->nameNr >= ctxt->nameMax) {
2126
358k
        const xmlChar **tmp;
2127
358k
        xmlStartTag *tmp2;
2128
358k
        int newSize;
2129
2130
358k
        newSize = xmlGrowCapacity(ctxt->nameMax,
2131
358k
                                  sizeof(tmp[0]) + sizeof(tmp2[0]),
2132
358k
                                  10, XML_MAX_ITEMS);
2133
358k
        if (newSize < 0)
2134
0
            goto mem_error;
2135
2136
358k
        tmp = xmlRealloc(ctxt->nameTab, newSize * sizeof(tmp[0]));
2137
358k
        if (tmp == NULL)
2138
0
      goto mem_error;
2139
358k
  ctxt->nameTab = tmp;
2140
2141
358k
        tmp2 = xmlRealloc(ctxt->pushTab, newSize * sizeof(tmp2[0]));
2142
358k
        if (tmp2 == NULL)
2143
0
      goto mem_error;
2144
358k
  ctxt->pushTab = tmp2;
2145
2146
358k
        ctxt->nameMax = newSize;
2147
4.22M
    } else if (ctxt->pushTab == NULL) {
2148
126k
        ctxt->pushTab = xmlMalloc(ctxt->nameMax * sizeof(ctxt->pushTab[0]));
2149
126k
        if (ctxt->pushTab == NULL)
2150
0
            goto mem_error;
2151
126k
    }
2152
4.58M
    ctxt->nameTab[ctxt->nameNr] = value;
2153
4.58M
    ctxt->name = value;
2154
4.58M
    tag = &ctxt->pushTab[ctxt->nameNr];
2155
4.58M
    tag->prefix = prefix;
2156
4.58M
    tag->URI = URI;
2157
4.58M
    tag->line = line;
2158
4.58M
    tag->nsNr = nsNr;
2159
4.58M
    return (ctxt->nameNr++);
2160
0
mem_error:
2161
0
    xmlErrMemory(ctxt);
2162
0
    return (-1);
2163
4.58M
}
2164
#ifdef LIBXML_PUSH_ENABLED
2165
/**
2166
 * nameNsPop:
2167
 * @ctxt: an XML parser context
2168
 *
2169
 * Pops the top element/prefix/URI name from the name stack
2170
 *
2171
 * Returns the name just removed
2172
 */
2173
static const xmlChar *
2174
nameNsPop(xmlParserCtxtPtr ctxt)
2175
3.40M
{
2176
3.40M
    const xmlChar *ret;
2177
2178
3.40M
    if (ctxt->nameNr <= 0)
2179
0
        return (NULL);
2180
3.40M
    ctxt->nameNr--;
2181
3.40M
    if (ctxt->nameNr > 0)
2182
3.36M
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2183
39.3k
    else
2184
39.3k
        ctxt->name = NULL;
2185
3.40M
    ret = ctxt->nameTab[ctxt->nameNr];
2186
3.40M
    ctxt->nameTab[ctxt->nameNr] = NULL;
2187
3.40M
    return (ret);
2188
3.40M
}
2189
#endif /* LIBXML_PUSH_ENABLED */
2190
2191
/**
2192
 * namePop:
2193
 * @ctxt: an XML parser context
2194
 *
2195
 * DEPRECATED: Internal function, do not use.
2196
 *
2197
 * Pops the top element name from the name stack
2198
 *
2199
 * Returns the name just removed
2200
 */
2201
static const xmlChar *
2202
namePop(xmlParserCtxtPtr ctxt)
2203
139k
{
2204
139k
    const xmlChar *ret;
2205
2206
139k
    if ((ctxt == NULL) || (ctxt->nameNr <= 0))
2207
0
        return (NULL);
2208
139k
    ctxt->nameNr--;
2209
139k
    if (ctxt->nameNr > 0)
2210
139k
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2211
0
    else
2212
0
        ctxt->name = NULL;
2213
139k
    ret = ctxt->nameTab[ctxt->nameNr];
2214
139k
    ctxt->nameTab[ctxt->nameNr] = NULL;
2215
139k
    return (ret);
2216
139k
}
2217
2218
10.3M
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
2219
10.3M
    if (ctxt->spaceNr >= ctxt->spaceMax) {
2220
504k
        int *tmp;
2221
504k
        int newSize;
2222
2223
504k
        newSize = xmlGrowCapacity(ctxt->spaceMax, sizeof(tmp[0]),
2224
504k
                                  10, XML_MAX_ITEMS);
2225
504k
        if (newSize < 0) {
2226
0
      xmlErrMemory(ctxt);
2227
0
      return(-1);
2228
0
        }
2229
2230
504k
        tmp = xmlRealloc(ctxt->spaceTab, newSize * sizeof(tmp[0]));
2231
504k
        if (tmp == NULL) {
2232
0
      xmlErrMemory(ctxt);
2233
0
      return(-1);
2234
0
  }
2235
504k
  ctxt->spaceTab = tmp;
2236
2237
504k
        ctxt->spaceMax = newSize;
2238
504k
    }
2239
10.3M
    ctxt->spaceTab[ctxt->spaceNr] = val;
2240
10.3M
    ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
2241
10.3M
    return(ctxt->spaceNr++);
2242
10.3M
}
2243
2244
9.30M
static int spacePop(xmlParserCtxtPtr ctxt) {
2245
9.30M
    int ret;
2246
9.30M
    if (ctxt->spaceNr <= 0) return(0);
2247
9.30M
    ctxt->spaceNr--;
2248
9.30M
    if (ctxt->spaceNr > 0)
2249
9.30M
  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
2250
0
    else
2251
0
        ctxt->space = &ctxt->spaceTab[0];
2252
9.30M
    ret = ctxt->spaceTab[ctxt->spaceNr];
2253
9.30M
    ctxt->spaceTab[ctxt->spaceNr] = -1;
2254
9.30M
    return(ret);
2255
9.30M
}
2256
2257
/*
2258
 * Macros for accessing the content. Those should be used only by the parser,
2259
 * and not exported.
2260
 *
2261
 * Dirty macros, i.e. one often need to make assumption on the context to
2262
 * use them
2263
 *
2264
 *   CUR_PTR return the current pointer to the xmlChar to be parsed.
2265
 *           To be used with extreme caution since operations consuming
2266
 *           characters may move the input buffer to a different location !
2267
 *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
2268
 *           This should be used internally by the parser
2269
 *           only to compare to ASCII values otherwise it would break when
2270
 *           running with UTF-8 encoding.
2271
 *   RAW     same as CUR but in the input buffer, bypass any token
2272
 *           extraction that may have been done
2273
 *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
2274
 *           to compare on ASCII based substring.
2275
 *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2276
 *           strings without newlines within the parser.
2277
 *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2278
 *           defined char within the parser.
2279
 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2280
 *
2281
 *   NEXT    Skip to the next character, this does the proper decoding
2282
 *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2283
 *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2284
 *   CUR_SCHAR  same but operate on a string instead of the context
2285
 *   COPY_BUF  copy the current unicode char to the target buffer, increment
2286
 *            the index
2287
 *   GROW, SHRINK  handling of input buffers
2288
 */
2289
2290
86.9M
#define RAW (*ctxt->input->cur)
2291
276M
#define CUR (*ctxt->input->cur)
2292
20.7M
#define NXT(val) ctxt->input->cur[(val)]
2293
495M
#define CUR_PTR ctxt->input->cur
2294
39.8M
#define BASE_PTR ctxt->input->base
2295
2296
#define CMP4( s, c1, c2, c3, c4 ) \
2297
5.16M
  ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2298
2.68M
    ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2299
#define CMP5( s, c1, c2, c3, c4, c5 ) \
2300
4.78M
  ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2301
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2302
3.38M
  ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2303
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2304
1.96M
  ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2305
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2306
1.55M
  ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2307
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2308
716k
  ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2309
716k
    ((unsigned char *) s)[ 8 ] == c9 )
2310
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2311
53.8k
  ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2312
53.8k
    ((unsigned char *) s)[ 9 ] == c10 )
2313
2314
11.4M
#define SKIP(val) do {             \
2315
11.4M
    ctxt->input->cur += (val),ctxt->input->col+=(val);      \
2316
11.4M
    if (*ctxt->input->cur == 0)           \
2317
11.4M
        xmlParserGrow(ctxt);           \
2318
11.4M
  } while (0)
2319
2320
#define SKIPL(val) do {             \
2321
    int skipl;                \
2322
    for(skipl=0; skipl<val; skipl++) {          \
2323
  if (*(ctxt->input->cur) == '\n') {        \
2324
  ctxt->input->line++; ctxt->input->col = 1;      \
2325
  } else ctxt->input->col++;          \
2326
  ctxt->input->cur++;           \
2327
    }                 \
2328
    if (*ctxt->input->cur == 0)           \
2329
        xmlParserGrow(ctxt);            \
2330
  } while (0)
2331
2332
#define SHRINK \
2333
6.21M
    if (!PARSER_PROGRESSIVE(ctxt)) \
2334
6.21M
  xmlParserShrink(ctxt);
2335
2336
#define GROW \
2337
84.6M
    if ((!PARSER_PROGRESSIVE(ctxt)) && \
2338
84.6M
        (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2339
454k
  xmlParserGrow(ctxt);
2340
2341
31.0M
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2342
2343
1.01M
#define SKIP_BLANKS_PE xmlSkipBlankCharsPE(ctxt)
2344
2345
24.2M
#define NEXT xmlNextChar(ctxt)
2346
2347
13.5M
#define NEXT1 {               \
2348
13.5M
  ctxt->input->col++;           \
2349
13.5M
  ctxt->input->cur++;           \
2350
13.5M
  if (*ctxt->input->cur == 0)         \
2351
13.5M
      xmlParserGrow(ctxt);           \
2352
13.5M
    }
2353
2354
570M
#define NEXTL(l) do {             \
2355
570M
    if (*(ctxt->input->cur) == '\n') {         \
2356
47.5M
  ctxt->input->line++; ctxt->input->col = 1;      \
2357
523M
    } else ctxt->input->col++;           \
2358
570M
    ctxt->input->cur += l;        \
2359
570M
  } while (0)
2360
2361
2.44M
#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2362
2363
#define COPY_BUF(b, i, v)           \
2364
326M
    if (v < 0x80) b[i++] = v;           \
2365
326M
    else i += xmlCopyCharMultiByte(&b[i],v)
2366
2367
static int
2368
327M
xmlCurrentCharRecover(xmlParserCtxtPtr ctxt, int *len) {
2369
327M
    int c = xmlCurrentChar(ctxt, len);
2370
2371
327M
    if (c == XML_INVALID_CHAR)
2372
18.8M
        c = 0xFFFD; /* replacement character */
2373
2374
327M
    return(c);
2375
327M
}
2376
2377
/**
2378
 * xmlSkipBlankChars:
2379
 * @ctxt:  the XML parser context
2380
 *
2381
 * DEPRECATED: Internal function, do not use.
2382
 *
2383
 * Skip whitespace in the input stream.
2384
 *
2385
 * Returns the number of space chars skipped
2386
 */
2387
int
2388
31.7M
xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2389
31.7M
    const xmlChar *cur;
2390
31.7M
    int res = 0;
2391
2392
31.7M
    cur = ctxt->input->cur;
2393
72.3M
    while (IS_BLANK_CH(*cur)) {
2394
72.3M
        if (*cur == '\n') {
2395
4.11M
            ctxt->input->line++; ctxt->input->col = 1;
2396
68.2M
        } else {
2397
68.2M
            ctxt->input->col++;
2398
68.2M
        }
2399
72.3M
        cur++;
2400
72.3M
        if (res < INT_MAX)
2401
72.3M
            res++;
2402
72.3M
        if (*cur == 0) {
2403
126k
            ctxt->input->cur = cur;
2404
126k
            xmlParserGrow(ctxt);
2405
126k
            cur = ctxt->input->cur;
2406
126k
        }
2407
72.3M
    }
2408
31.7M
    ctxt->input->cur = cur;
2409
2410
31.7M
    if (res > 4)
2411
177k
        GROW;
2412
2413
31.7M
    return(res);
2414
31.7M
}
2415
2416
static void
2417
26.4k
xmlPopPE(xmlParserCtxtPtr ctxt) {
2418
26.4k
    unsigned long consumed;
2419
26.4k
    xmlEntityPtr ent;
2420
2421
26.4k
    ent = ctxt->input->entity;
2422
2423
26.4k
    ent->flags &= ~XML_ENT_EXPANDING;
2424
2425
26.4k
    if ((ent->flags & XML_ENT_CHECKED) == 0) {
2426
8.00k
        int result;
2427
2428
        /*
2429
         * Read the rest of the stream in case of errors. We want
2430
         * to account for the whole entity size.
2431
         */
2432
8.00k
        do {
2433
8.00k
            ctxt->input->cur = ctxt->input->end;
2434
8.00k
            xmlParserShrink(ctxt);
2435
8.00k
            result = xmlParserGrow(ctxt);
2436
8.00k
        } while (result > 0);
2437
2438
8.00k
        consumed = ctxt->input->consumed;
2439
8.00k
        xmlSaturatedAddSizeT(&consumed,
2440
8.00k
                             ctxt->input->end - ctxt->input->base);
2441
2442
8.00k
        xmlSaturatedAdd(&ent->expandedSize, consumed);
2443
2444
        /*
2445
         * Add to sizeentities when parsing an external entity
2446
         * for the first time.
2447
         */
2448
8.00k
        if (ent->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
2449
0
            xmlSaturatedAdd(&ctxt->sizeentities, consumed);
2450
0
        }
2451
2452
8.00k
        ent->flags |= XML_ENT_CHECKED;
2453
8.00k
    }
2454
2455
26.4k
    xmlFreeInputStream(xmlCtxtPopInput(ctxt));
2456
2457
26.4k
    xmlParserEntityCheck(ctxt, ent->expandedSize);
2458
2459
26.4k
    GROW;
2460
26.4k
}
2461
2462
/**
2463
 * xmlSkipBlankCharsPE:
2464
 * @ctxt:  the XML parser context
2465
 *
2466
 * Skip whitespace in the input stream, also handling parameter
2467
 * entities.
2468
 *
2469
 * Returns the number of space chars skipped
2470
 */
2471
static int
2472
1.01M
xmlSkipBlankCharsPE(xmlParserCtxtPtr ctxt) {
2473
1.01M
    int res = 0;
2474
1.01M
    int inParam;
2475
1.01M
    int expandParam;
2476
2477
1.01M
    inParam = PARSER_IN_PE(ctxt);
2478
1.01M
    expandParam = PARSER_EXTERNAL(ctxt);
2479
2480
1.01M
    if (!inParam && !expandParam)
2481
699k
        return(xmlSkipBlankChars(ctxt));
2482
2483
    /*
2484
     * It's Okay to use CUR/NEXT here since all the blanks are on
2485
     * the ASCII range.
2486
     */
2487
619k
    while (PARSER_STOPPED(ctxt) == 0) {
2488
619k
        if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2489
283k
            NEXT;
2490
336k
        } else if (CUR == '%') {
2491
19.0k
            if ((expandParam == 0) ||
2492
0
                (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2493
19.0k
                break;
2494
2495
            /*
2496
             * Expand parameter entity. We continue to consume
2497
             * whitespace at the start of the entity and possible
2498
             * even consume the whole entity and pop it. We might
2499
             * even pop multiple PEs in this loop.
2500
             */
2501
0
            xmlParsePEReference(ctxt);
2502
2503
0
            inParam = PARSER_IN_PE(ctxt);
2504
0
            expandParam = PARSER_EXTERNAL(ctxt);
2505
317k
        } else if (CUR == 0) {
2506
21.2k
            if (inParam == 0)
2507
44
                break;
2508
2509
21.2k
            xmlPopPE(ctxt);
2510
2511
21.2k
            inParam = PARSER_IN_PE(ctxt);
2512
21.2k
            expandParam = PARSER_EXTERNAL(ctxt);
2513
296k
        } else {
2514
296k
            break;
2515
296k
        }
2516
2517
        /*
2518
         * Also increase the counter when entering or exiting a PERef.
2519
         * The spec says: "When a parameter-entity reference is recognized
2520
         * in the DTD and included, its replacement text MUST be enlarged
2521
         * by the attachment of one leading and one following space (#x20)
2522
         * character."
2523
         */
2524
304k
        if (res < INT_MAX)
2525
304k
            res++;
2526
304k
    }
2527
2528
315k
    return(res);
2529
1.01M
}
2530
2531
/************************************************************************
2532
 *                  *
2533
 *    Commodity functions to handle entities      *
2534
 *                  *
2535
 ************************************************************************/
2536
2537
/**
2538
 * xmlPopInput:
2539
 * @ctxt:  an XML parser context
2540
 *
2541
 * DEPRECATED: Internal function, don't use.
2542
 *
2543
 * Returns the current xmlChar in the parser context
2544
 */
2545
xmlChar
2546
0
xmlPopInput(xmlParserCtxtPtr ctxt) {
2547
0
    xmlParserInputPtr input;
2548
2549
0
    if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2550
0
    input = xmlCtxtPopInput(ctxt);
2551
0
    xmlFreeInputStream(input);
2552
0
    if (*ctxt->input->cur == 0)
2553
0
        xmlParserGrow(ctxt);
2554
0
    return(CUR);
2555
0
}
2556
2557
/**
2558
 * xmlPushInput:
2559
 * @ctxt:  an XML parser context
2560
 * @input:  an XML parser input fragment (entity, XML fragment ...).
2561
 *
2562
 * DEPRECATED: Internal function, don't use.
2563
 *
2564
 * Push an input stream onto the stack.
2565
 *
2566
 * Returns -1 in case of error or the index in the input stack
2567
 */
2568
int
2569
0
xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2570
0
    int ret;
2571
2572
0
    if ((ctxt == NULL) || (input == NULL))
2573
0
        return(-1);
2574
2575
0
    ret = xmlCtxtPushInput(ctxt, input);
2576
0
    if (ret >= 0)
2577
0
        GROW;
2578
0
    return(ret);
2579
0
}
2580
2581
/**
2582
 * xmlParseCharRef:
2583
 * @ctxt:  an XML parser context
2584
 *
2585
 * DEPRECATED: Internal function, don't use.
2586
 *
2587
 * Parse a numeric character reference. Always consumes '&'.
2588
 *
2589
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2590
 *                  '&#x' [0-9a-fA-F]+ ';'
2591
 *
2592
 * [ WFC: Legal Character ]
2593
 * Characters referred to using character references must match the
2594
 * production for Char.
2595
 *
2596
 * Returns the value parsed (as an int), 0 in case of error
2597
 */
2598
int
2599
452k
xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2600
452k
    int val = 0;
2601
452k
    int count = 0;
2602
2603
    /*
2604
     * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2605
     */
2606
452k
    if ((RAW == '&') && (NXT(1) == '#') &&
2607
452k
        (NXT(2) == 'x')) {
2608
153k
  SKIP(3);
2609
153k
  GROW;
2610
437k
  while ((RAW != ';') && (PARSER_STOPPED(ctxt) == 0)) {
2611
300k
      if (count++ > 20) {
2612
1.71k
    count = 0;
2613
1.71k
    GROW;
2614
1.71k
      }
2615
300k
      if ((RAW >= '0') && (RAW <= '9'))
2616
179k
          val = val * 16 + (CUR - '0');
2617
121k
      else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2618
83.3k
          val = val * 16 + (CUR - 'a') + 10;
2619
37.9k
      else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2620
21.4k
          val = val * 16 + (CUR - 'A') + 10;
2621
16.5k
      else {
2622
16.5k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2623
16.5k
    val = 0;
2624
16.5k
    break;
2625
16.5k
      }
2626
284k
      if (val > 0x110000)
2627
28.7k
          val = 0x110000;
2628
2629
284k
      NEXT;
2630
284k
      count++;
2631
284k
  }
2632
153k
  if (RAW == ';') {
2633
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2634
136k
      ctxt->input->col++;
2635
136k
      ctxt->input->cur++;
2636
136k
  }
2637
298k
    } else if  ((RAW == '&') && (NXT(1) == '#')) {
2638
298k
  SKIP(2);
2639
298k
  GROW;
2640
913k
  while (RAW != ';') { /* loop blocked by count */
2641
691k
      if (count++ > 20) {
2642
3.14k
    count = 0;
2643
3.14k
    GROW;
2644
3.14k
      }
2645
691k
      if ((RAW >= '0') && (RAW <= '9'))
2646
615k
          val = val * 10 + (CUR - '0');
2647
76.0k
      else {
2648
76.0k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2649
76.0k
    val = 0;
2650
76.0k
    break;
2651
76.0k
      }
2652
615k
      if (val > 0x110000)
2653
82.5k
          val = 0x110000;
2654
2655
615k
      NEXT;
2656
615k
      count++;
2657
615k
  }
2658
298k
  if (RAW == ';') {
2659
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2660
222k
      ctxt->input->col++;
2661
222k
      ctxt->input->cur++;
2662
222k
  }
2663
298k
    } else {
2664
0
        if (RAW == '&')
2665
0
            SKIP(1);
2666
0
        xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2667
0
    }
2668
2669
    /*
2670
     * [ WFC: Legal Character ]
2671
     * Characters referred to using character references must match the
2672
     * production for Char.
2673
     */
2674
452k
    if (val >= 0x110000) {
2675
14.1k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2676
14.1k
                "xmlParseCharRef: character reference out of bounds\n",
2677
14.1k
          val);
2678
14.1k
        val = 0xFFFD;
2679
437k
    } else if (!IS_CHAR(val)) {
2680
110k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2681
110k
                          "xmlParseCharRef: invalid xmlChar value %d\n",
2682
110k
                    val);
2683
110k
    }
2684
452k
    return(val);
2685
452k
}
2686
2687
/**
2688
 * xmlParseStringCharRef:
2689
 * @ctxt:  an XML parser context
2690
 * @str:  a pointer to an index in the string
2691
 *
2692
 * parse Reference declarations, variant parsing from a string rather
2693
 * than an an input flow.
2694
 *
2695
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2696
 *                  '&#x' [0-9a-fA-F]+ ';'
2697
 *
2698
 * [ WFC: Legal Character ]
2699
 * Characters referred to using character references must match the
2700
 * production for Char.
2701
 *
2702
 * Returns the value parsed (as an int), 0 in case of error, str will be
2703
 *         updated to the current value of the index
2704
 */
2705
static int
2706
93.1k
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2707
93.1k
    const xmlChar *ptr;
2708
93.1k
    xmlChar cur;
2709
93.1k
    int val = 0;
2710
2711
93.1k
    if ((str == NULL) || (*str == NULL)) return(0);
2712
93.1k
    ptr = *str;
2713
93.1k
    cur = *ptr;
2714
93.1k
    if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2715
15.1k
  ptr += 3;
2716
15.1k
  cur = *ptr;
2717
66.0k
  while (cur != ';') { /* Non input consuming loop */
2718
57.2k
      if ((cur >= '0') && (cur <= '9'))
2719
24.9k
          val = val * 16 + (cur - '0');
2720
32.2k
      else if ((cur >= 'a') && (cur <= 'f'))
2721
11.2k
          val = val * 16 + (cur - 'a') + 10;
2722
21.0k
      else if ((cur >= 'A') && (cur <= 'F'))
2723
14.7k
          val = val * 16 + (cur - 'A') + 10;
2724
6.31k
      else {
2725
6.31k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2726
6.31k
    val = 0;
2727
6.31k
    break;
2728
6.31k
      }
2729
50.9k
      if (val > 0x110000)
2730
9.10k
          val = 0x110000;
2731
2732
50.9k
      ptr++;
2733
50.9k
      cur = *ptr;
2734
50.9k
  }
2735
15.1k
  if (cur == ';')
2736
8.81k
      ptr++;
2737
77.9k
    } else if  ((cur == '&') && (ptr[1] == '#')){
2738
77.9k
  ptr += 2;
2739
77.9k
  cur = *ptr;
2740
257k
  while (cur != ';') { /* Non input consuming loops */
2741
180k
      if ((cur >= '0') && (cur <= '9'))
2742
179k
          val = val * 10 + (cur - '0');
2743
1.21k
      else {
2744
1.21k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2745
1.21k
    val = 0;
2746
1.21k
    break;
2747
1.21k
      }
2748
179k
      if (val > 0x110000)
2749
4.62k
          val = 0x110000;
2750
2751
179k
      ptr++;
2752
179k
      cur = *ptr;
2753
179k
  }
2754
77.9k
  if (cur == ';')
2755
76.7k
      ptr++;
2756
77.9k
    } else {
2757
0
  xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2758
0
  return(0);
2759
0
    }
2760
93.1k
    *str = ptr;
2761
2762
    /*
2763
     * [ WFC: Legal Character ]
2764
     * Characters referred to using character references must match the
2765
     * production for Char.
2766
     */
2767
93.1k
    if (val >= 0x110000) {
2768
1.41k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2769
1.41k
                "xmlParseStringCharRef: character reference out of bounds\n",
2770
1.41k
                val);
2771
91.6k
    } else if (IS_CHAR(val)) {
2772
80.9k
        return(val);
2773
80.9k
    } else {
2774
10.7k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2775
10.7k
        "xmlParseStringCharRef: invalid xmlChar value %d\n",
2776
10.7k
        val);
2777
10.7k
    }
2778
12.1k
    return(0);
2779
93.1k
}
2780
2781
/**
2782
 * xmlParserHandlePEReference:
2783
 * @ctxt:  the parser context
2784
 *
2785
 * DEPRECATED: Internal function, do not use.
2786
 *
2787
 * [69] PEReference ::= '%' Name ';'
2788
 *
2789
 * [ WFC: No Recursion ]
2790
 * A parsed entity must not contain a recursive
2791
 * reference to itself, either directly or indirectly.
2792
 *
2793
 * [ WFC: Entity Declared ]
2794
 * In a document without any DTD, a document with only an internal DTD
2795
 * subset which contains no parameter entity references, or a document
2796
 * with "standalone='yes'", ...  ... The declaration of a parameter
2797
 * entity must precede any reference to it...
2798
 *
2799
 * [ VC: Entity Declared ]
2800
 * In a document with an external subset or external parameter entities
2801
 * with "standalone='no'", ...  ... The declaration of a parameter entity
2802
 * must precede any reference to it...
2803
 *
2804
 * [ WFC: In DTD ]
2805
 * Parameter-entity references may only appear in the DTD.
2806
 * NOTE: misleading but this is handled.
2807
 *
2808
 * A PEReference may have been detected in the current input stream
2809
 * the handling is done accordingly to
2810
 *      http://www.w3.org/TR/REC-xml#entproc
2811
 * i.e.
2812
 *   - Included in literal in entity values
2813
 *   - Included as Parameter Entity reference within DTDs
2814
 */
2815
void
2816
0
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2817
0
    xmlParsePEReference(ctxt);
2818
0
}
2819
2820
/**
2821
 * xmlStringLenDecodeEntities:
2822
 * @ctxt:  the parser context
2823
 * @str:  the input string
2824
 * @len: the string length
2825
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2826
 * @end:  an end marker xmlChar, 0 if none
2827
 * @end2:  an end marker xmlChar, 0 if none
2828
 * @end3:  an end marker xmlChar, 0 if none
2829
 *
2830
 * DEPRECATED: Internal function, don't use.
2831
 *
2832
 * Returns A newly allocated string with the substitution done. The caller
2833
 *      must deallocate it !
2834
 */
2835
xmlChar *
2836
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2837
                           int what ATTRIBUTE_UNUSED,
2838
0
                           xmlChar end, xmlChar end2, xmlChar end3) {
2839
0
    if ((ctxt == NULL) || (str == NULL) || (len < 0))
2840
0
        return(NULL);
2841
2842
0
    if ((str[len] != 0) ||
2843
0
        (end != 0) || (end2 != 0) || (end3 != 0))
2844
0
        return(NULL);
2845
2846
0
    return(xmlExpandEntitiesInAttValue(ctxt, str, 0));
2847
0
}
2848
2849
/**
2850
 * xmlStringDecodeEntities:
2851
 * @ctxt:  the parser context
2852
 * @str:  the input string
2853
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2854
 * @end:  an end marker xmlChar, 0 if none
2855
 * @end2:  an end marker xmlChar, 0 if none
2856
 * @end3:  an end marker xmlChar, 0 if none
2857
 *
2858
 * DEPRECATED: Internal function, don't use.
2859
 *
2860
 * Returns A newly allocated string with the substitution done. The caller
2861
 *      must deallocate it !
2862
 */
2863
xmlChar *
2864
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str,
2865
                        int what ATTRIBUTE_UNUSED,
2866
0
            xmlChar end, xmlChar  end2, xmlChar end3) {
2867
0
    if ((ctxt == NULL) || (str == NULL))
2868
0
        return(NULL);
2869
2870
0
    if ((end != 0) || (end2 != 0) || (end3 != 0))
2871
0
        return(NULL);
2872
2873
0
    return(xmlExpandEntitiesInAttValue(ctxt, str, 0));
2874
0
}
2875
2876
/************************************************************************
2877
 *                  *
2878
 *    Commodity functions, cleanup needed ?     *
2879
 *                  *
2880
 ************************************************************************/
2881
2882
/**
2883
 * areBlanks:
2884
 * @ctxt:  an XML parser context
2885
 * @str:  a xmlChar *
2886
 * @len:  the size of @str
2887
 * @blank_chars: we know the chars are blanks
2888
 *
2889
 * Is this a sequence of blank chars that one can ignore ?
2890
 *
2891
 * Returns 1 if ignorable 0 otherwise.
2892
 */
2893
2894
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2895
11.5M
                     int blank_chars) {
2896
11.5M
    int i;
2897
11.5M
    xmlNodePtr lastChild;
2898
2899
    /*
2900
     * Check for xml:space value.
2901
     */
2902
11.5M
    if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2903
11.0M
        (*(ctxt->space) == -2))
2904
7.12M
  return(0);
2905
2906
    /*
2907
     * Check that the string is made of blanks
2908
     */
2909
4.42M
    if (blank_chars == 0) {
2910
4.52M
  for (i = 0;i < len;i++)
2911
4.46M
      if (!(IS_BLANK_CH(str[i]))) return(0);
2912
2.87M
    }
2913
2914
    /*
2915
     * Look if the element is mixed content in the DTD if available
2916
     */
2917
1.60M
    if (ctxt->node == NULL) return(0);
2918
1.60M
    if (ctxt->myDoc != NULL) {
2919
1.60M
        xmlElementPtr elemDecl = NULL;
2920
1.60M
        xmlDocPtr doc = ctxt->myDoc;
2921
1.60M
        const xmlChar *prefix = NULL;
2922
2923
1.60M
        if (ctxt->node->ns)
2924
1.29M
            prefix = ctxt->node->ns->prefix;
2925
1.60M
        if (doc->intSubset != NULL)
2926
74.2k
            elemDecl = xmlHashLookup2(doc->intSubset->elements, ctxt->node->name,
2927
74.2k
                                      prefix);
2928
1.60M
        if ((elemDecl == NULL) && (doc->extSubset != NULL))
2929
0
            elemDecl = xmlHashLookup2(doc->extSubset->elements, ctxt->node->name,
2930
0
                                      prefix);
2931
1.60M
        if (elemDecl != NULL) {
2932
1.27k
            if (elemDecl->etype == XML_ELEMENT_TYPE_ELEMENT)
2933
306
                return(1);
2934
971
            if ((elemDecl->etype == XML_ELEMENT_TYPE_ANY) ||
2935
971
                (elemDecl->etype == XML_ELEMENT_TYPE_MIXED))
2936
269
                return(0);
2937
971
        }
2938
1.60M
    }
2939
2940
    /*
2941
     * Otherwise, heuristic :-\
2942
     *
2943
     * When push parsing, we could be at the end of a chunk.
2944
     * This makes the look-ahead and consequently the NOBLANKS
2945
     * option unreliable.
2946
     */
2947
1.60M
    if ((RAW != '<') && (RAW != 0xD)) return(0);
2948
1.58M
    if ((ctxt->node->children == NULL) &&
2949
532k
  (RAW == '<') && (NXT(1) == '/')) return(0);
2950
2951
1.52M
    lastChild = xmlGetLastChild(ctxt->node);
2952
1.52M
    if (lastChild == NULL) {
2953
471k
        if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2954
0
            (ctxt->node->content != NULL)) return(0);
2955
1.05M
    } else if (xmlNodeIsText(lastChild))
2956
249
        return(0);
2957
1.05M
    else if ((ctxt->node->children != NULL) &&
2958
1.05M
             (xmlNodeIsText(ctxt->node->children)))
2959
434
        return(0);
2960
1.52M
    return(1);
2961
1.52M
}
2962
2963
/************************************************************************
2964
 *                  *
2965
 *    Extra stuff for namespace support     *
2966
 *  Relates to http://www.w3.org/TR/WD-xml-names      *
2967
 *                  *
2968
 ************************************************************************/
2969
2970
/**
2971
 * xmlSplitQName:
2972
 * @ctxt:  an XML parser context
2973
 * @name:  an XML parser context
2974
 * @prefixOut:  a xmlChar **
2975
 *
2976
 * DEPRECATED: Don't use.
2977
 *
2978
 * parse an UTF8 encoded XML qualified name string
2979
 *
2980
 * [NS 5] QName ::= (Prefix ':')? LocalPart
2981
 *
2982
 * [NS 6] Prefix ::= NCName
2983
 *
2984
 * [NS 7] LocalPart ::= NCName
2985
 *
2986
 * Returns the local part, and prefix is updated
2987
 *   to get the Prefix if any.
2988
 */
2989
2990
xmlChar *
2991
0
xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefixOut) {
2992
0
    xmlChar buf[XML_MAX_NAMELEN + 5];
2993
0
    xmlChar *buffer = NULL;
2994
0
    int len = 0;
2995
0
    int max = XML_MAX_NAMELEN;
2996
0
    xmlChar *ret = NULL;
2997
0
    xmlChar *prefix;
2998
0
    const xmlChar *cur = name;
2999
0
    int c;
3000
3001
0
    if (prefixOut == NULL) return(NULL);
3002
0
    *prefixOut = NULL;
3003
3004
0
    if (cur == NULL) return(NULL);
3005
3006
    /* nasty but well=formed */
3007
0
    if (cur[0] == ':')
3008
0
  return(xmlStrdup(name));
3009
3010
0
    c = *cur++;
3011
0
    while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
3012
0
  buf[len++] = c;
3013
0
  c = *cur++;
3014
0
    }
3015
0
    if (len >= max) {
3016
  /*
3017
   * Okay someone managed to make a huge name, so he's ready to pay
3018
   * for the processing speed.
3019
   */
3020
0
  max = len * 2;
3021
3022
0
  buffer = xmlMalloc(max);
3023
0
  if (buffer == NULL) {
3024
0
      xmlErrMemory(ctxt);
3025
0
      return(NULL);
3026
0
  }
3027
0
  memcpy(buffer, buf, len);
3028
0
  while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3029
0
      if (len + 10 > max) {
3030
0
          xmlChar *tmp;
3031
0
                int newSize;
3032
3033
0
                newSize = xmlGrowCapacity(max, 1, 1, XML_MAX_ITEMS);
3034
0
                if (newSize < 0) {
3035
0
        xmlErrMemory(ctxt);
3036
0
        xmlFree(buffer);
3037
0
        return(NULL);
3038
0
                }
3039
0
    tmp = xmlRealloc(buffer, newSize);
3040
0
    if (tmp == NULL) {
3041
0
        xmlErrMemory(ctxt);
3042
0
        xmlFree(buffer);
3043
0
        return(NULL);
3044
0
    }
3045
0
    buffer = tmp;
3046
0
    max = newSize;
3047
0
      }
3048
0
      buffer[len++] = c;
3049
0
      c = *cur++;
3050
0
  }
3051
0
  buffer[len] = 0;
3052
0
    }
3053
3054
0
    if ((c == ':') && (*cur == 0)) {
3055
0
        if (buffer != NULL)
3056
0
      xmlFree(buffer);
3057
0
  return(xmlStrdup(name));
3058
0
    }
3059
3060
0
    if (buffer == NULL) {
3061
0
  ret = xmlStrndup(buf, len);
3062
0
        if (ret == NULL) {
3063
0
      xmlErrMemory(ctxt);
3064
0
      return(NULL);
3065
0
        }
3066
0
    } else {
3067
0
  ret = buffer;
3068
0
  buffer = NULL;
3069
0
  max = XML_MAX_NAMELEN;
3070
0
    }
3071
3072
3073
0
    if (c == ':') {
3074
0
  c = *cur;
3075
0
        prefix = ret;
3076
0
  if (c == 0) {
3077
0
      ret = xmlStrndup(BAD_CAST "", 0);
3078
0
            if (ret == NULL) {
3079
0
                xmlFree(prefix);
3080
0
                return(NULL);
3081
0
            }
3082
0
            *prefixOut = prefix;
3083
0
            return(ret);
3084
0
  }
3085
0
  len = 0;
3086
3087
  /*
3088
   * Check that the first character is proper to start
3089
   * a new name
3090
   */
3091
0
  if (!(((c >= 0x61) && (c <= 0x7A)) ||
3092
0
        ((c >= 0x41) && (c <= 0x5A)) ||
3093
0
        (c == '_') || (c == ':'))) {
3094
0
      int l;
3095
0
      int first = CUR_SCHAR(cur, l);
3096
3097
0
      if (!IS_LETTER(first) && (first != '_')) {
3098
0
    xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3099
0
          "Name %s is not XML Namespace compliant\n",
3100
0
          name);
3101
0
      }
3102
0
  }
3103
0
  cur++;
3104
3105
0
  while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3106
0
      buf[len++] = c;
3107
0
      c = *cur++;
3108
0
  }
3109
0
  if (len >= max) {
3110
      /*
3111
       * Okay someone managed to make a huge name, so he's ready to pay
3112
       * for the processing speed.
3113
       */
3114
0
      max = len * 2;
3115
3116
0
      buffer = xmlMalloc(max);
3117
0
      if (buffer == NULL) {
3118
0
          xmlErrMemory(ctxt);
3119
0
                xmlFree(prefix);
3120
0
    return(NULL);
3121
0
      }
3122
0
      memcpy(buffer, buf, len);
3123
0
      while (c != 0) { /* tested bigname2.xml */
3124
0
    if (len + 10 > max) {
3125
0
        xmlChar *tmp;
3126
0
                    int newSize;
3127
3128
0
                    newSize = xmlGrowCapacity(max, 1, 1, XML_MAX_ITEMS);
3129
0
                    if (newSize < 0) {
3130
0
                        xmlErrMemory(ctxt);
3131
0
                        xmlFree(buffer);
3132
0
                        return(NULL);
3133
0
                    }
3134
0
        tmp = xmlRealloc(buffer, newSize);
3135
0
        if (tmp == NULL) {
3136
0
      xmlErrMemory(ctxt);
3137
0
                        xmlFree(prefix);
3138
0
      xmlFree(buffer);
3139
0
      return(NULL);
3140
0
        }
3141
0
        buffer = tmp;
3142
0
                    max = newSize;
3143
0
    }
3144
0
    buffer[len++] = c;
3145
0
    c = *cur++;
3146
0
      }
3147
0
      buffer[len] = 0;
3148
0
  }
3149
3150
0
  if (buffer == NULL) {
3151
0
      ret = xmlStrndup(buf, len);
3152
0
            if (ret == NULL) {
3153
0
                xmlFree(prefix);
3154
0
                return(NULL);
3155
0
            }
3156
0
  } else {
3157
0
      ret = buffer;
3158
0
  }
3159
3160
0
        *prefixOut = prefix;
3161
0
    }
3162
3163
0
    return(ret);
3164
0
}
3165
3166
/************************************************************************
3167
 *                  *
3168
 *      The parser itself       *
3169
 *  Relates to http://www.w3.org/TR/REC-xml       *
3170
 *                  *
3171
 ************************************************************************/
3172
3173
/************************************************************************
3174
 *                  *
3175
 *  Routines to parse Name, NCName and NmToken      *
3176
 *                  *
3177
 ************************************************************************/
3178
3179
/*
3180
 * The two following functions are related to the change of accepted
3181
 * characters for Name and NmToken in the Revision 5 of XML-1.0
3182
 * They correspond to the modified production [4] and the new production [4a]
3183
 * changes in that revision. Also note that the macros used for the
3184
 * productions Letter, Digit, CombiningChar and Extender are not needed
3185
 * anymore.
3186
 * We still keep compatibility to pre-revision5 parsing semantic if the
3187
 * new XML_PARSE_OLD10 option is given to the parser.
3188
 */
3189
static int
3190
2.13M
xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3191
2.13M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3192
        /*
3193
   * Use the new checks of production [4] [4a] amd [5] of the
3194
   * Update 5 of XML-1.0
3195
   */
3196
2.13M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3197
2.13M
      (((c >= 'a') && (c <= 'z')) ||
3198
1.14M
       ((c >= 'A') && (c <= 'Z')) ||
3199
983k
       (c == '_') || (c == ':') ||
3200
780k
       ((c >= 0xC0) && (c <= 0xD6)) ||
3201
765k
       ((c >= 0xD8) && (c <= 0xF6)) ||
3202
764k
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3203
762k
       ((c >= 0x370) && (c <= 0x37D)) ||
3204
762k
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3205
758k
       ((c >= 0x200C) && (c <= 0x200D)) ||
3206
757k
       ((c >= 0x2070) && (c <= 0x218F)) ||
3207
757k
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3208
756k
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3209
754k
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3210
753k
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3211
752k
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3212
1.38M
      return(1);
3213
2.13M
    } else {
3214
0
        if (IS_LETTER(c) || (c == '_') || (c == ':'))
3215
0
      return(1);
3216
0
    }
3217
752k
    return(0);
3218
2.13M
}
3219
3220
static int
3221
13.4M
xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3222
13.4M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3223
        /*
3224
   * Use the new checks of production [4] [4a] amd [5] of the
3225
   * Update 5 of XML-1.0
3226
   */
3227
13.4M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3228
13.3M
      (((c >= 'a') && (c <= 'z')) ||
3229
4.71M
       ((c >= 'A') && (c <= 'Z')) ||
3230
4.30M
       ((c >= '0') && (c <= '9')) || /* !start */
3231
4.02M
       (c == '_') || (c == ':') ||
3232
3.66M
       (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3233
3.54M
       ((c >= 0xC0) && (c <= 0xD6)) ||
3234
3.49M
       ((c >= 0xD8) && (c <= 0xF6)) ||
3235
2.29M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3236
2.25M
       ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3237
2.24M
       ((c >= 0x370) && (c <= 0x37D)) ||
3238
2.24M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3239
2.23M
       ((c >= 0x200C) && (c <= 0x200D)) ||
3240
2.23M
       ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3241
2.23M
       ((c >= 0x2070) && (c <= 0x218F)) ||
3242
2.22M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3243
2.22M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3244
1.29M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3245
1.29M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3246
1.24M
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3247
12.0M
       return(1);
3248
13.4M
    } else {
3249
0
        if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3250
0
            (c == '.') || (c == '-') ||
3251
0
      (c == '_') || (c == ':') ||
3252
0
      (IS_COMBINING(c)) ||
3253
0
      (IS_EXTENDER(c)))
3254
0
      return(1);
3255
0
    }
3256
1.43M
    return(0);
3257
13.4M
}
3258
3259
static const xmlChar *
3260
958k
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3261
958k
    const xmlChar *ret;
3262
958k
    int len = 0, l;
3263
958k
    int c;
3264
958k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3265
0
                    XML_MAX_TEXT_LENGTH :
3266
958k
                    XML_MAX_NAME_LENGTH;
3267
3268
    /*
3269
     * Handler for more complex cases
3270
     */
3271
958k
    c = xmlCurrentChar(ctxt, &l);
3272
958k
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3273
        /*
3274
   * Use the new checks of production [4] [4a] amd [5] of the
3275
   * Update 5 of XML-1.0
3276
   */
3277
958k
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3278
908k
      (!(((c >= 'a') && (c <= 'z')) ||
3279
854k
         ((c >= 'A') && (c <= 'Z')) ||
3280
808k
         (c == '_') || (c == ':') ||
3281
797k
         ((c >= 0xC0) && (c <= 0xD6)) ||
3282
792k
         ((c >= 0xD8) && (c <= 0xF6)) ||
3283
783k
         ((c >= 0xF8) && (c <= 0x2FF)) ||
3284
777k
         ((c >= 0x370) && (c <= 0x37D)) ||
3285
776k
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
3286
774k
         ((c >= 0x200C) && (c <= 0x200D)) ||
3287
773k
         ((c >= 0x2070) && (c <= 0x218F)) ||
3288
772k
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3289
770k
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
3290
768k
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
3291
768k
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3292
757k
         ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3293
757k
      return(NULL);
3294
757k
  }
3295
200k
  len += l;
3296
200k
  NEXTL(l);
3297
200k
  c = xmlCurrentChar(ctxt, &l);
3298
4.28M
  while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3299
4.23M
         (((c >= 'a') && (c <= 'z')) ||
3300
3.19M
          ((c >= 'A') && (c <= 'Z')) ||
3301
3.00M
          ((c >= '0') && (c <= '9')) || /* !start */
3302
2.92M
          (c == '_') || (c == ':') ||
3303
2.81M
          (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3304
2.76M
          ((c >= 0xC0) && (c <= 0xD6)) ||
3305
2.75M
          ((c >= 0xD8) && (c <= 0xF6)) ||
3306
914k
          ((c >= 0xF8) && (c <= 0x2FF)) ||
3307
820k
          ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3308
818k
          ((c >= 0x370) && (c <= 0x37D)) ||
3309
817k
          ((c >= 0x37F) && (c <= 0x1FFF)) ||
3310
811k
          ((c >= 0x200C) && (c <= 0x200D)) ||
3311
809k
          ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3312
809k
          ((c >= 0x2070) && (c <= 0x218F)) ||
3313
805k
          ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3314
799k
          ((c >= 0x3001) && (c <= 0xD7FF)) ||
3315
721k
          ((c >= 0xF900) && (c <= 0xFDCF)) ||
3316
719k
          ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3317
160k
          ((c >= 0x10000) && (c <= 0xEFFFF))
3318
4.23M
    )) {
3319
4.07M
            if (len <= INT_MAX - l)
3320
4.07M
          len += l;
3321
4.07M
      NEXTL(l);
3322
4.07M
      c = xmlCurrentChar(ctxt, &l);
3323
4.07M
  }
3324
200k
    } else {
3325
0
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3326
0
      (!IS_LETTER(c) && (c != '_') &&
3327
0
       (c != ':'))) {
3328
0
      return(NULL);
3329
0
  }
3330
0
  len += l;
3331
0
  NEXTL(l);
3332
0
  c = xmlCurrentChar(ctxt, &l);
3333
3334
0
  while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3335
0
         ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3336
0
    (c == '.') || (c == '-') ||
3337
0
    (c == '_') || (c == ':') ||
3338
0
    (IS_COMBINING(c)) ||
3339
0
    (IS_EXTENDER(c)))) {
3340
0
            if (len <= INT_MAX - l)
3341
0
          len += l;
3342
0
      NEXTL(l);
3343
0
      c = xmlCurrentChar(ctxt, &l);
3344
0
  }
3345
0
    }
3346
200k
    if (len > maxLength) {
3347
53
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3348
53
        return(NULL);
3349
53
    }
3350
200k
    if (ctxt->input->cur - ctxt->input->base < len) {
3351
        /*
3352
         * There were a couple of bugs where PERefs lead to to a change
3353
         * of the buffer. Check the buffer size to avoid passing an invalid
3354
         * pointer to xmlDictLookup.
3355
         */
3356
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3357
0
                    "unexpected change of input buffer");
3358
0
        return (NULL);
3359
0
    }
3360
200k
    if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3361
207
        ret = xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len);
3362
200k
    else
3363
200k
        ret = xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len);
3364
200k
    if (ret == NULL)
3365
0
        xmlErrMemory(ctxt);
3366
200k
    return(ret);
3367
200k
}
3368
3369
/**
3370
 * xmlParseName:
3371
 * @ctxt:  an XML parser context
3372
 *
3373
 * DEPRECATED: Internal function, don't use.
3374
 *
3375
 * parse an XML name.
3376
 *
3377
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3378
 *                  CombiningChar | Extender
3379
 *
3380
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3381
 *
3382
 * [6] Names ::= Name (#x20 Name)*
3383
 *
3384
 * Returns the Name parsed or NULL
3385
 */
3386
3387
const xmlChar *
3388
2.61M
xmlParseName(xmlParserCtxtPtr ctxt) {
3389
2.61M
    const xmlChar *in;
3390
2.61M
    const xmlChar *ret;
3391
2.61M
    size_t count = 0;
3392
2.61M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3393
0
                       XML_MAX_TEXT_LENGTH :
3394
2.61M
                       XML_MAX_NAME_LENGTH;
3395
3396
2.61M
    GROW;
3397
3398
    /*
3399
     * Accelerator for simple ASCII names
3400
     */
3401
2.61M
    in = ctxt->input->cur;
3402
2.61M
    if (((*in >= 0x61) && (*in <= 0x7A)) ||
3403
1.39M
  ((*in >= 0x41) && (*in <= 0x5A)) ||
3404
1.76M
  (*in == '_') || (*in == ':')) {
3405
1.76M
  in++;
3406
12.9M
  while (((*in >= 0x61) && (*in <= 0x7A)) ||
3407
3.52M
         ((*in >= 0x41) && (*in <= 0x5A)) ||
3408
2.58M
         ((*in >= 0x30) && (*in <= 0x39)) ||
3409
2.23M
         (*in == '_') || (*in == '-') ||
3410
1.85M
         (*in == ':') || (*in == '.'))
3411
11.1M
      in++;
3412
1.76M
  if ((*in > 0) && (*in < 0x80)) {
3413
1.65M
      count = in - ctxt->input->cur;
3414
1.65M
            if (count > maxLength) {
3415
28
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3416
28
                return(NULL);
3417
28
            }
3418
1.65M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3419
1.65M
      ctxt->input->cur = in;
3420
1.65M
      ctxt->input->col += count;
3421
1.65M
      if (ret == NULL)
3422
0
          xmlErrMemory(ctxt);
3423
1.65M
      return(ret);
3424
1.65M
  }
3425
1.76M
    }
3426
    /* accelerator for special cases */
3427
958k
    return(xmlParseNameComplex(ctxt));
3428
2.61M
}
3429
3430
static xmlHashedString
3431
1.26M
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3432
1.26M
    xmlHashedString ret;
3433
1.26M
    int len = 0, l;
3434
1.26M
    int c;
3435
1.26M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3436
0
                    XML_MAX_TEXT_LENGTH :
3437
1.26M
                    XML_MAX_NAME_LENGTH;
3438
1.26M
    size_t startPosition = 0;
3439
3440
1.26M
    ret.name = NULL;
3441
1.26M
    ret.hashValue = 0;
3442
3443
    /*
3444
     * Handler for more complex cases
3445
     */
3446
1.26M
    startPosition = CUR_PTR - BASE_PTR;
3447
1.26M
    c = xmlCurrentChar(ctxt, &l);
3448
1.26M
    if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3449
1.21M
  (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3450
994k
  return(ret);
3451
994k
    }
3452
3453
6.99M
    while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3454
6.96M
     (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3455
6.72M
        if (len <= INT_MAX - l)
3456
6.72M
      len += l;
3457
6.72M
  NEXTL(l);
3458
6.72M
  c = xmlCurrentChar(ctxt, &l);
3459
6.72M
    }
3460
266k
    if (len > maxLength) {
3461
58
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3462
58
        return(ret);
3463
58
    }
3464
266k
    ret = xmlDictLookupHashed(ctxt->dict, (BASE_PTR + startPosition), len);
3465
266k
    if (ret.name == NULL)
3466
0
        xmlErrMemory(ctxt);
3467
266k
    return(ret);
3468
266k
}
3469
3470
/**
3471
 * xmlParseNCName:
3472
 * @ctxt:  an XML parser context
3473
 * @len:  length of the string parsed
3474
 *
3475
 * parse an XML name.
3476
 *
3477
 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3478
 *                      CombiningChar | Extender
3479
 *
3480
 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3481
 *
3482
 * Returns the Name parsed or NULL
3483
 */
3484
3485
static xmlHashedString
3486
18.0M
xmlParseNCName(xmlParserCtxtPtr ctxt) {
3487
18.0M
    const xmlChar *in, *e;
3488
18.0M
    xmlHashedString ret;
3489
18.0M
    size_t count = 0;
3490
18.0M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3491
0
                       XML_MAX_TEXT_LENGTH :
3492
18.0M
                       XML_MAX_NAME_LENGTH;
3493
3494
18.0M
    ret.name = NULL;
3495
3496
    /*
3497
     * Accelerator for simple ASCII names
3498
     */
3499
18.0M
    in = ctxt->input->cur;
3500
18.0M
    e = ctxt->input->end;
3501
18.0M
    if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3502
9.73M
   ((*in >= 0x41) && (*in <= 0x5A)) ||
3503
17.0M
   (*in == '_')) && (in < e)) {
3504
17.0M
  in++;
3505
82.7M
  while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3506
24.5M
          ((*in >= 0x41) && (*in <= 0x5A)) ||
3507
19.6M
          ((*in >= 0x30) && (*in <= 0x39)) ||
3508
17.7M
          (*in == '_') || (*in == '-') ||
3509
65.7M
          (*in == '.')) && (in < e))
3510
65.7M
      in++;
3511
17.0M
  if (in >= e)
3512
5.25k
      goto complex;
3513
17.0M
  if ((*in > 0) && (*in < 0x80)) {
3514
16.7M
      count = in - ctxt->input->cur;
3515
16.7M
            if (count > maxLength) {
3516
120
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3517
120
                return(ret);
3518
120
            }
3519
16.7M
      ret = xmlDictLookupHashed(ctxt->dict, ctxt->input->cur, count);
3520
16.7M
      ctxt->input->cur = in;
3521
16.7M
      ctxt->input->col += count;
3522
16.7M
      if (ret.name == NULL) {
3523
0
          xmlErrMemory(ctxt);
3524
0
      }
3525
16.7M
      return(ret);
3526
16.7M
  }
3527
17.0M
    }
3528
1.26M
complex:
3529
1.26M
    return(xmlParseNCNameComplex(ctxt));
3530
18.0M
}
3531
3532
/**
3533
 * xmlParseNameAndCompare:
3534
 * @ctxt:  an XML parser context
3535
 *
3536
 * parse an XML name and compares for match
3537
 * (specialized for endtag parsing)
3538
 *
3539
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3540
 * and the name for mismatch
3541
 */
3542
3543
static const xmlChar *
3544
3.32M
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3545
3.32M
    register const xmlChar *cmp = other;
3546
3.32M
    register const xmlChar *in;
3547
3.32M
    const xmlChar *ret;
3548
3549
3.32M
    GROW;
3550
3551
3.32M
    in = ctxt->input->cur;
3552
18.9M
    while (*in != 0 && *in == *cmp) {
3553
15.6M
  ++in;
3554
15.6M
  ++cmp;
3555
15.6M
    }
3556
3.32M
    if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3557
  /* success */
3558
2.83M
  ctxt->input->col += in - ctxt->input->cur;
3559
2.83M
  ctxt->input->cur = in;
3560
2.83M
  return (const xmlChar*) 1;
3561
2.83M
    }
3562
    /* failure (or end of input buffer), check with full function */
3563
493k
    ret = xmlParseName (ctxt);
3564
    /* strings coming from the dictionary direct compare possible */
3565
493k
    if (ret == other) {
3566
9.51k
  return (const xmlChar*) 1;
3567
9.51k
    }
3568
484k
    return ret;
3569
493k
}
3570
3571
/**
3572
 * xmlParseStringName:
3573
 * @ctxt:  an XML parser context
3574
 * @str:  a pointer to the string pointer (IN/OUT)
3575
 *
3576
 * parse an XML name.
3577
 *
3578
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3579
 *                  CombiningChar | Extender
3580
 *
3581
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3582
 *
3583
 * [6] Names ::= Name (#x20 Name)*
3584
 *
3585
 * Returns the Name parsed or NULL. The @str pointer
3586
 * is updated to the current location in the string.
3587
 */
3588
3589
static xmlChar *
3590
922k
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3591
922k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3592
922k
    xmlChar *ret;
3593
922k
    const xmlChar *cur = *str;
3594
922k
    int len = 0, l;
3595
922k
    int c;
3596
922k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3597
0
                    XML_MAX_TEXT_LENGTH :
3598
922k
                    XML_MAX_NAME_LENGTH;
3599
3600
922k
    c = CUR_SCHAR(cur, l);
3601
922k
    if (!xmlIsNameStartChar(ctxt, c)) {
3602
2.30k
  return(NULL);
3603
2.30k
    }
3604
3605
920k
    COPY_BUF(buf, len, c);
3606
920k
    cur += l;
3607
920k
    c = CUR_SCHAR(cur, l);
3608
1.23M
    while (xmlIsNameChar(ctxt, c)) {
3609
316k
  COPY_BUF(buf, len, c);
3610
316k
  cur += l;
3611
316k
  c = CUR_SCHAR(cur, l);
3612
316k
  if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3613
      /*
3614
       * Okay someone managed to make a huge name, so he's ready to pay
3615
       * for the processing speed.
3616
       */
3617
489
      xmlChar *buffer;
3618
489
      int max = len * 2;
3619
3620
489
      buffer = xmlMalloc(max);
3621
489
      if (buffer == NULL) {
3622
0
          xmlErrMemory(ctxt);
3623
0
    return(NULL);
3624
0
      }
3625
489
      memcpy(buffer, buf, len);
3626
282k
      while (xmlIsNameChar(ctxt, c)) {
3627
282k
    if (len + 10 > max) {
3628
491
        xmlChar *tmp;
3629
491
                    int newSize;
3630
3631
491
                    newSize = xmlGrowCapacity(max, 1, 1, maxLength);
3632
491
                    if (newSize < 0) {
3633
3
                        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3634
3
                        xmlFree(buffer);
3635
3
                        return(NULL);
3636
3
                    }
3637
488
        tmp = xmlRealloc(buffer, newSize);
3638
488
        if (tmp == NULL) {
3639
0
      xmlErrMemory(ctxt);
3640
0
      xmlFree(buffer);
3641
0
      return(NULL);
3642
0
        }
3643
488
        buffer = tmp;
3644
488
                    max = newSize;
3645
488
    }
3646
282k
    COPY_BUF(buffer, len, c);
3647
282k
    cur += l;
3648
282k
    c = CUR_SCHAR(cur, l);
3649
282k
      }
3650
486
      buffer[len] = 0;
3651
486
      *str = cur;
3652
486
      return(buffer);
3653
489
  }
3654
316k
    }
3655
919k
    if (len > maxLength) {
3656
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3657
0
        return(NULL);
3658
0
    }
3659
919k
    *str = cur;
3660
919k
    ret = xmlStrndup(buf, len);
3661
919k
    if (ret == NULL)
3662
0
        xmlErrMemory(ctxt);
3663
919k
    return(ret);
3664
919k
}
3665
3666
/**
3667
 * xmlParseNmtoken:
3668
 * @ctxt:  an XML parser context
3669
 *
3670
 * DEPRECATED: Internal function, don't use.
3671
 *
3672
 * parse an XML Nmtoken.
3673
 *
3674
 * [7] Nmtoken ::= (NameChar)+
3675
 *
3676
 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3677
 *
3678
 * Returns the Nmtoken parsed or NULL
3679
 */
3680
3681
xmlChar *
3682
279k
xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3683
279k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3684
279k
    xmlChar *ret;
3685
279k
    int len = 0, l;
3686
279k
    int c;
3687
279k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3688
0
                    XML_MAX_TEXT_LENGTH :
3689
279k
                    XML_MAX_NAME_LENGTH;
3690
3691
279k
    c = xmlCurrentChar(ctxt, &l);
3692
3693
839k
    while (xmlIsNameChar(ctxt, c)) {
3694
560k
  COPY_BUF(buf, len, c);
3695
560k
  NEXTL(l);
3696
560k
  c = xmlCurrentChar(ctxt, &l);
3697
560k
  if (len >= XML_MAX_NAMELEN) {
3698
      /*
3699
       * Okay someone managed to make a huge token, so he's ready to pay
3700
       * for the processing speed.
3701
       */
3702
824
      xmlChar *buffer;
3703
824
      int max = len * 2;
3704
3705
824
      buffer = xmlMalloc(max);
3706
824
      if (buffer == NULL) {
3707
0
          xmlErrMemory(ctxt);
3708
0
    return(NULL);
3709
0
      }
3710
824
      memcpy(buffer, buf, len);
3711
4.16M
      while (xmlIsNameChar(ctxt, c)) {
3712
4.16M
    if (len + 10 > max) {
3713
2.43k
        xmlChar *tmp;
3714
2.43k
                    int newSize;
3715
3716
2.43k
                    newSize = xmlGrowCapacity(max, 1, 1, maxLength);
3717
2.43k
                    if (newSize < 0) {
3718
86
                        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3719
86
                        xmlFree(buffer);
3720
86
                        return(NULL);
3721
86
                    }
3722
2.34k
        tmp = xmlRealloc(buffer, newSize);
3723
2.34k
        if (tmp == NULL) {
3724
0
      xmlErrMemory(ctxt);
3725
0
      xmlFree(buffer);
3726
0
      return(NULL);
3727
0
        }
3728
2.34k
        buffer = tmp;
3729
2.34k
                    max = newSize;
3730
2.34k
    }
3731
4.16M
    COPY_BUF(buffer, len, c);
3732
4.16M
    NEXTL(l);
3733
4.16M
    c = xmlCurrentChar(ctxt, &l);
3734
4.16M
      }
3735
738
      buffer[len] = 0;
3736
738
      return(buffer);
3737
824
  }
3738
560k
    }
3739
278k
    if (len == 0)
3740
40.0k
        return(NULL);
3741
238k
    if (len > maxLength) {
3742
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3743
0
        return(NULL);
3744
0
    }
3745
238k
    ret = xmlStrndup(buf, len);
3746
238k
    if (ret == NULL)
3747
0
        xmlErrMemory(ctxt);
3748
238k
    return(ret);
3749
238k
}
3750
3751
/**
3752
 * xmlExpandPEsInEntityValue:
3753
 * @ctxt:  parser context
3754
 * @buf:  string buffer
3755
 * @str:  entity value
3756
 * @length:  size of entity value
3757
 * @depth:  nesting depth
3758
 *
3759
 * Validate an entity value and expand parameter entities.
3760
 */
3761
static void
3762
xmlExpandPEsInEntityValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf,
3763
61.0k
                          const xmlChar *str, int length, int depth) {
3764
61.0k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3765
61.0k
    const xmlChar *end, *chunk;
3766
61.0k
    int c, l;
3767
3768
61.0k
    if (str == NULL)
3769
0
        return;
3770
3771
61.0k
    depth += 1;
3772
61.0k
    if (depth > maxDepth) {
3773
0
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3774
0
                       "Maximum entity nesting depth exceeded");
3775
0
  return;
3776
0
    }
3777
3778
61.0k
    end = str + length;
3779
61.0k
    chunk = str;
3780
3781
33.2M
    while ((str < end) && (!PARSER_STOPPED(ctxt))) {
3782
33.2M
        c = *str;
3783
3784
33.2M
        if (c >= 0x80) {
3785
5.44M
            l = xmlUTF8MultibyteLen(ctxt, str,
3786
5.44M
                    "invalid character in entity value\n");
3787
5.44M
            if (l == 0) {
3788
5.13M
                if (chunk < str)
3789
131k
                    xmlSBufAddString(buf, chunk, str - chunk);
3790
5.13M
                xmlSBufAddReplChar(buf);
3791
5.13M
                str += 1;
3792
5.13M
                chunk = str;
3793
5.13M
            } else {
3794
307k
                str += l;
3795
307k
            }
3796
27.7M
        } else if (c == '&') {
3797
176k
            if (str[1] == '#') {
3798
53.8k
                if (chunk < str)
3799
46.3k
                    xmlSBufAddString(buf, chunk, str - chunk);
3800
3801
53.8k
                c = xmlParseStringCharRef(ctxt, &str);
3802
53.8k
                if (c == 0)
3803
11.9k
                    return;
3804
3805
41.9k
                xmlSBufAddChar(buf, c);
3806
3807
41.9k
                chunk = str;
3808
122k
            } else {
3809
122k
                xmlChar *name;
3810
3811
                /*
3812
                 * General entity references are checked for
3813
                 * syntactic validity.
3814
                 */
3815
122k
                str++;
3816
122k
                name = xmlParseStringName(ctxt, &str);
3817
3818
122k
                if ((name == NULL) || (*str++ != ';')) {
3819
4.90k
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3820
4.90k
                            "EntityValue: '&' forbidden except for entities "
3821
4.90k
                            "references\n");
3822
4.90k
                    xmlFree(name);
3823
4.90k
                    return;
3824
4.90k
                }
3825
3826
117k
                xmlFree(name);
3827
117k
            }
3828
27.5M
        } else if (c == '%') {
3829
1.74k
            xmlEntityPtr ent;
3830
3831
1.74k
            if (chunk < str)
3832
1.61k
                xmlSBufAddString(buf, chunk, str - chunk);
3833
3834
1.74k
            ent = xmlParseStringPEReference(ctxt, &str);
3835
1.74k
            if (ent == NULL)
3836
1.72k
                return;
3837
3838
27
            if (!PARSER_EXTERNAL(ctxt)) {
3839
27
                xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3840
27
                return;
3841
27
            }
3842
3843
0
            if (ent->content == NULL) {
3844
                /*
3845
                 * Note: external parsed entities will not be loaded,
3846
                 * it is not required for a non-validating parser to
3847
                 * complete external PEReferences coming from the
3848
                 * internal subset
3849
                 */
3850
0
                if (((ctxt->options & XML_PARSE_NO_XXE) == 0) &&
3851
0
                    ((ctxt->replaceEntities) ||
3852
0
                     (ctxt->validate))) {
3853
0
                    xmlLoadEntityContent(ctxt, ent);
3854
0
                } else {
3855
0
                    xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
3856
0
                                  "not validating will not read content for "
3857
0
                                  "PE entity %s\n", ent->name, NULL);
3858
0
                }
3859
0
            }
3860
3861
            /*
3862
             * TODO: Skip if ent->content is still NULL.
3863
             */
3864
3865
0
            if (xmlParserEntityCheck(ctxt, ent->length))
3866
0
                return;
3867
3868
0
            if (ent->flags & XML_ENT_EXPANDING) {
3869
0
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
3870
0
                xmlHaltParser(ctxt);
3871
0
                return;
3872
0
            }
3873
3874
0
            ent->flags |= XML_ENT_EXPANDING;
3875
0
            xmlExpandPEsInEntityValue(ctxt, buf, ent->content, ent->length,
3876
0
                                      depth);
3877
0
            ent->flags &= ~XML_ENT_EXPANDING;
3878
3879
0
            chunk = str;
3880
27.5M
        } else {
3881
            /* Normal ASCII char */
3882
27.5M
            if (!IS_BYTE_CHAR(c)) {
3883
180k
                xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3884
180k
                        "invalid character in entity value\n");
3885
180k
                if (chunk < str)
3886
16.8k
                    xmlSBufAddString(buf, chunk, str - chunk);
3887
180k
                xmlSBufAddReplChar(buf);
3888
180k
                str += 1;
3889
180k
                chunk = str;
3890
27.4M
            } else {
3891
27.4M
                str += 1;
3892
27.4M
            }
3893
27.5M
        }
3894
33.2M
    }
3895
3896
42.5k
    if (chunk < str)
3897
37.8k
        xmlSBufAddString(buf, chunk, str - chunk);
3898
42.5k
}
3899
3900
/**
3901
 * xmlParseEntityValue:
3902
 * @ctxt:  an XML parser context
3903
 * @orig:  if non-NULL store a copy of the original entity value
3904
 *
3905
 * DEPRECATED: Internal function, don't use.
3906
 *
3907
 * parse a value for ENTITY declarations
3908
 *
3909
 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3910
 *                 "'" ([^%&'] | PEReference | Reference)* "'"
3911
 *
3912
 * Returns the EntityValue parsed with reference substituted or NULL
3913
 */
3914
xmlChar *
3915
61.2k
xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3916
61.2k
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3917
0
                         XML_MAX_HUGE_LENGTH :
3918
61.2k
                         XML_MAX_TEXT_LENGTH;
3919
61.2k
    xmlSBuf buf;
3920
61.2k
    const xmlChar *start;
3921
61.2k
    int quote, length;
3922
3923
61.2k
    xmlSBufInit(&buf, maxLength);
3924
3925
61.2k
    GROW;
3926
3927
61.2k
    quote = CUR;
3928
61.2k
    if ((quote != '"') && (quote != '\'')) {
3929
0
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3930
0
  return(NULL);
3931
0
    }
3932
61.2k
    CUR_PTR++;
3933
3934
61.2k
    length = 0;
3935
3936
    /*
3937
     * Copy raw content of the entity into a buffer
3938
     */
3939
39.0M
    while (1) {
3940
39.0M
        int c;
3941
3942
39.0M
        if (PARSER_STOPPED(ctxt))
3943
0
            goto error;
3944
3945
39.0M
        if (CUR_PTR >= ctxt->input->end) {
3946
108
            xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3947
108
            goto error;
3948
108
        }
3949
3950
39.0M
        c = CUR;
3951
3952
39.0M
        if (c == 0) {
3953
75
            xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3954
75
                    "invalid character in entity value\n");
3955
75
            goto error;
3956
75
        }
3957
39.0M
        if (c == quote)
3958
61.0k
            break;
3959
38.9M
        NEXTL(1);
3960
38.9M
        length += 1;
3961
3962
        /*
3963
         * TODO: Check growth threshold
3964
         */
3965
38.9M
        if (ctxt->input->end - CUR_PTR < 10)
3966
8.37k
            GROW;
3967
38.9M
    }
3968
3969
61.0k
    start = CUR_PTR - length;
3970
3971
61.0k
    if (orig != NULL) {
3972
61.0k
        *orig = xmlStrndup(start, length);
3973
61.0k
        if (*orig == NULL)
3974
0
            xmlErrMemory(ctxt);
3975
61.0k
    }
3976
3977
61.0k
    xmlExpandPEsInEntityValue(ctxt, &buf, start, length, ctxt->inputNr);
3978
3979
61.0k
    NEXTL(1);
3980
3981
61.0k
    return(xmlSBufFinish(&buf, NULL, ctxt, "entity length too long"));
3982
3983
183
error:
3984
183
    xmlSBufCleanup(&buf, ctxt, "entity length too long");
3985
183
    return(NULL);
3986
61.2k
}
3987
3988
/**
3989
 * xmlCheckEntityInAttValue:
3990
 * @ctxt:  parser context
3991
 * @pent:  entity
3992
 * @depth:  nesting depth
3993
 *
3994
 * Check an entity reference in an attribute value for validity
3995
 * without expanding it.
3996
 */
3997
static void
3998
5.03k
xmlCheckEntityInAttValue(xmlParserCtxtPtr ctxt, xmlEntityPtr pent, int depth) {
3999
5.03k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
4000
5.03k
    const xmlChar *str;
4001
5.03k
    unsigned long expandedSize = pent->length;
4002
5.03k
    int c, flags;
4003
4004
5.03k
    depth += 1;
4005
5.03k
    if (depth > maxDepth) {
4006
48
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
4007
48
                       "Maximum entity nesting depth exceeded");
4008
48
  return;
4009
48
    }
4010
4011
4.98k
    if (pent->flags & XML_ENT_EXPANDING) {
4012
262
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
4013
262
        xmlHaltParser(ctxt);
4014
262
        return;
4015
262
    }
4016
4017
    /*
4018
     * If we're parsing a default attribute value in DTD content,
4019
     * the entity might reference other entities which weren't
4020
     * defined yet, so the check isn't reliable.
4021
     */
4022
4.72k
    if (ctxt->inSubset == 0)
4023
4.67k
        flags = XML_ENT_CHECKED | XML_ENT_VALIDATED;
4024
47
    else
4025
47
        flags = XML_ENT_VALIDATED;
4026
4027
4.72k
    str = pent->content;
4028
4.72k
    if (str == NULL)
4029
0
        goto done;
4030
4031
    /*
4032
     * Note that entity values are already validated. We only check
4033
     * for illegal less-than signs and compute the expanded size
4034
     * of the entity. No special handling for multi-byte characters
4035
     * is needed.
4036
     */
4037
2.42M
    while (!PARSER_STOPPED(ctxt)) {
4038
2.42M
        c = *str;
4039
4040
2.42M
  if (c != '&') {
4041
2.40M
            if (c == 0)
4042
3.33k
                break;
4043
4044
2.40M
            if (c == '<')
4045
8.60k
                xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
4046
8.60k
                        "'<' in entity '%s' is not allowed in attributes "
4047
8.60k
                        "values\n", pent->name);
4048
4049
2.40M
            str += 1;
4050
2.40M
        } else if (str[1] == '#') {
4051
907
            int val;
4052
4053
907
      val = xmlParseStringCharRef(ctxt, &str);
4054
907
      if (val == 0) {
4055
211
                pent->content[0] = 0;
4056
211
                break;
4057
211
            }
4058
15.0k
  } else {
4059
15.0k
            xmlChar *name;
4060
15.0k
            xmlEntityPtr ent;
4061
4062
15.0k
      name = xmlParseStringEntityRef(ctxt, &str);
4063
15.0k
      if (name == NULL) {
4064
127
                pent->content[0] = 0;
4065
127
                break;
4066
127
            }
4067
4068
14.8k
            ent = xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 1);
4069
14.8k
            xmlFree(name);
4070
4071
14.8k
            if ((ent != NULL) &&
4072
11.1k
                (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
4073
10.2k
                if ((ent->flags & flags) != flags) {
4074
3.11k
                    pent->flags |= XML_ENT_EXPANDING;
4075
3.11k
                    xmlCheckEntityInAttValue(ctxt, ent, depth);
4076
3.11k
                    pent->flags &= ~XML_ENT_EXPANDING;
4077
3.11k
                }
4078
4079
10.2k
                xmlSaturatedAdd(&expandedSize, ent->expandedSize);
4080
10.2k
                xmlSaturatedAdd(&expandedSize, XML_ENT_FIXED_COST);
4081
10.2k
            }
4082
14.8k
        }
4083
2.42M
    }
4084
4085
4.72k
done:
4086
4.72k
    if (ctxt->inSubset == 0)
4087
4.67k
        pent->expandedSize = expandedSize;
4088
4089
4.72k
    pent->flags |= flags;
4090
4.72k
}
4091
4092
/**
4093
 * xmlExpandEntityInAttValue:
4094
 * @ctxt:  parser context
4095
 * @buf:  string buffer
4096
 * @str:  entity or attribute value
4097
 * @pent:  entity for entity value, NULL for attribute values
4098
 * @normalize:  whether to collapse whitespace
4099
 * @inSpace:  whitespace state
4100
 * @depth:  nesting depth
4101
 * @check:  whether to check for amplification
4102
 *
4103
 * Expand general entity references in an entity or attribute value.
4104
 * Perform attribute value normalization.
4105
 */
4106
static void
4107
xmlExpandEntityInAttValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf,
4108
                          const xmlChar *str, xmlEntityPtr pent, int normalize,
4109
471k
                          int *inSpace, int depth, int check) {
4110
471k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
4111
471k
    int c, chunkSize;
4112
4113
471k
    if (str == NULL)
4114
0
        return;
4115
4116
471k
    depth += 1;
4117
471k
    if (depth > maxDepth) {
4118
0
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
4119
0
                       "Maximum entity nesting depth exceeded");
4120
0
  return;
4121
0
    }
4122
4123
471k
    if (pent != NULL) {
4124
471k
        if (pent->flags & XML_ENT_EXPANDING) {
4125
28
            xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
4126
28
            xmlHaltParser(ctxt);
4127
28
            return;
4128
28
        }
4129
4130
471k
        if (check) {
4131
471k
            if (xmlParserEntityCheck(ctxt, pent->length))
4132
22
                return;
4133
471k
        }
4134
471k
    }
4135
4136
471k
    chunkSize = 0;
4137
4138
    /*
4139
     * Note that entity values are already validated. No special
4140
     * handling for multi-byte characters is needed.
4141
     */
4142
39.4M
    while (!PARSER_STOPPED(ctxt)) {
4143
39.4M
        c = *str;
4144
4145
39.4M
  if (c != '&') {
4146
38.5M
            if (c == 0)
4147
467k
                break;
4148
4149
            /*
4150
             * If this function is called without an entity, it is used to
4151
             * expand entities in an attribute content where less-than was
4152
             * already unscaped and is allowed.
4153
             */
4154
38.1M
            if ((pent != NULL) && (c == '<')) {
4155
4.02k
                xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
4156
4.02k
                        "'<' in entity '%s' is not allowed in attributes "
4157
4.02k
                        "values\n", pent->name);
4158
4.02k
                break;
4159
4.02k
            }
4160
4161
38.1M
            if (c <= 0x20) {
4162
3.80M
                if ((normalize) && (*inSpace)) {
4163
                    /* Skip char */
4164
3.16k
                    if (chunkSize > 0) {
4165
179
                        xmlSBufAddString(buf, str - chunkSize, chunkSize);
4166
179
                        chunkSize = 0;
4167
179
                    }
4168
3.80M
                } else if (c < 0x20) {
4169
3.19M
                    if (chunkSize > 0) {
4170
422k
                        xmlSBufAddString(buf, str - chunkSize, chunkSize);
4171
422k
                        chunkSize = 0;
4172
422k
                    }
4173
4174
3.19M
                    xmlSBufAddCString(buf, " ", 1);
4175
3.19M
                } else {
4176
606k
                    chunkSize += 1;
4177
606k
                }
4178
4179
3.80M
                *inSpace = 1;
4180
34.3M
            } else {
4181
34.3M
                chunkSize += 1;
4182
34.3M
                *inSpace = 0;
4183
34.3M
            }
4184
4185
38.1M
            str += 1;
4186
38.1M
        } else if (str[1] == '#') {
4187
38.3k
            int val;
4188
4189
38.3k
            if (chunkSize > 0) {
4190
30.3k
                xmlSBufAddString(buf, str - chunkSize, chunkSize);
4191
30.3k
                chunkSize = 0;
4192
30.3k
            }
4193
4194
38.3k
      val = xmlParseStringCharRef(ctxt, &str);
4195
38.3k
      if (val == 0) {
4196
10
                if (pent != NULL)
4197
10
                    pent->content[0] = 0;
4198
10
                break;
4199
10
            }
4200
4201
38.3k
            if (val == ' ') {
4202
0
                if ((!normalize) || (!*inSpace))
4203
0
                    xmlSBufAddCString(buf, " ", 1);
4204
0
                *inSpace = 1;
4205
38.3k
            } else {
4206
38.3k
                xmlSBufAddChar(buf, val);
4207
38.3k
                *inSpace = 0;
4208
38.3k
            }
4209
783k
  } else {
4210
783k
            xmlChar *name;
4211
783k
            xmlEntityPtr ent;
4212
4213
783k
            if (chunkSize > 0) {
4214
373k
                xmlSBufAddString(buf, str - chunkSize, chunkSize);
4215
373k
                chunkSize = 0;
4216
373k
            }
4217
4218
783k
      name = xmlParseStringEntityRef(ctxt, &str);
4219
783k
            if (name == NULL) {
4220
8
                if (pent != NULL)
4221
8
                    pent->content[0] = 0;
4222
8
                break;
4223
8
            }
4224
4225
783k
            ent = xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 1);
4226
783k
            xmlFree(name);
4227
4228
783k
      if ((ent != NULL) &&
4229
434k
    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
4230
5.36k
    if (ent->content == NULL) {
4231
0
        xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
4232
0
          "predefined entity has no content\n");
4233
0
                    break;
4234
0
                }
4235
4236
5.36k
                xmlSBufAddString(buf, ent->content, ent->length);
4237
4238
5.36k
                *inSpace = 0;
4239
778k
      } else if ((ent != NULL) && (ent->content != NULL)) {
4240
428k
                if (pent != NULL)
4241
428k
                    pent->flags |= XML_ENT_EXPANDING;
4242
428k
    xmlExpandEntityInAttValue(ctxt, buf, ent->content, ent,
4243
428k
                                          normalize, inSpace, depth, check);
4244
428k
                if (pent != NULL)
4245
428k
                    pent->flags &= ~XML_ENT_EXPANDING;
4246
428k
      }
4247
783k
        }
4248
39.4M
    }
4249
4250
471k
    if (chunkSize > 0)
4251
433k
        xmlSBufAddString(buf, str - chunkSize, chunkSize);
4252
471k
}
4253
4254
/**
4255
 * xmlExpandEntitiesInAttValue:
4256
 * @ctxt:  parser context
4257
 * @str:  entity or attribute value
4258
 * @normalize:  whether to collapse whitespace
4259
 *
4260
 * Expand general entity references in an entity or attribute value.
4261
 * Perform attribute value normalization.
4262
 *
4263
 * Returns the expanded attribtue value.
4264
 */
4265
xmlChar *
4266
xmlExpandEntitiesInAttValue(xmlParserCtxtPtr ctxt, const xmlChar *str,
4267
0
                            int normalize) {
4268
0
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4269
0
                         XML_MAX_HUGE_LENGTH :
4270
0
                         XML_MAX_TEXT_LENGTH;
4271
0
    xmlSBuf buf;
4272
0
    int inSpace = 1;
4273
4274
0
    xmlSBufInit(&buf, maxLength);
4275
4276
0
    xmlExpandEntityInAttValue(ctxt, &buf, str, NULL, normalize, &inSpace,
4277
0
                              ctxt->inputNr, /* check */ 0);
4278
4279
0
    if ((normalize) && (inSpace) && (buf.size > 0))
4280
0
        buf.size--;
4281
4282
0
    return(xmlSBufFinish(&buf, NULL, ctxt, "AttValue length too long"));
4283
0
}
4284
4285
/**
4286
 * xmlParseAttValueInternal:
4287
 * @ctxt:  an XML parser context
4288
 * @len:  attribute len result
4289
 * @alloc:  whether the attribute was reallocated as a new string
4290
 * @normalize:  if 1 then further non-CDATA normalization must be done
4291
 *
4292
 * parse a value for an attribute.
4293
 * NOTE: if no normalization is needed, the routine will return pointers
4294
 *       directly from the data buffer.
4295
 *
4296
 * 3.3.3 Attribute-Value Normalization:
4297
 * Before the value of an attribute is passed to the application or
4298
 * checked for validity, the XML processor must normalize it as follows:
4299
 * - a character reference is processed by appending the referenced
4300
 *   character to the attribute value
4301
 * - an entity reference is processed by recursively processing the
4302
 *   replacement text of the entity
4303
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4304
 *   appending #x20 to the normalized value, except that only a single
4305
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4306
 *   parsed entity or the literal entity value of an internal parsed entity
4307
 * - other characters are processed by appending them to the normalized value
4308
 * If the declared value is not CDATA, then the XML processor must further
4309
 * process the normalized attribute value by discarding any leading and
4310
 * trailing space (#x20) characters, and by replacing sequences of space
4311
 * (#x20) characters by a single space (#x20) character.
4312
 * All attributes for which no declaration has been read should be treated
4313
 * by a non-validating parser as if declared CDATA.
4314
 *
4315
 * Returns the AttValue parsed or NULL. The value has to be freed by the
4316
 *     caller if it was copied, this can be detected by val[*len] == 0.
4317
 */
4318
static xmlChar *
4319
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *attlen, int *alloc,
4320
5.88M
                         int normalize, int isNamespace) {
4321
5.88M
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4322
0
                         XML_MAX_HUGE_LENGTH :
4323
5.88M
                         XML_MAX_TEXT_LENGTH;
4324
5.88M
    xmlSBuf buf;
4325
5.88M
    xmlChar *ret;
4326
5.88M
    int c, l, quote, flags, chunkSize;
4327
5.88M
    int inSpace = 1;
4328
5.88M
    int replaceEntities;
4329
4330
    /* Always expand namespace URIs */
4331
5.88M
    replaceEntities = (ctxt->replaceEntities) || (isNamespace);
4332
4333
5.88M
    xmlSBufInit(&buf, maxLength);
4334
4335
5.88M
    GROW;
4336
4337
5.88M
    quote = CUR;
4338
5.88M
    if ((quote != '"') && (quote != '\'')) {
4339
89.1k
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
4340
89.1k
  return(NULL);
4341
89.1k
    }
4342
5.79M
    NEXTL(1);
4343
4344
5.79M
    if (ctxt->inSubset == 0)
4345
5.74M
        flags = XML_ENT_CHECKED | XML_ENT_VALIDATED;
4346
47.0k
    else
4347
47.0k
        flags = XML_ENT_VALIDATED;
4348
4349
5.79M
    inSpace = 1;
4350
5.79M
    chunkSize = 0;
4351
4352
184M
    while (1) {
4353
184M
        if (PARSER_STOPPED(ctxt))
4354
310
            goto error;
4355
4356
184M
        if (CUR_PTR >= ctxt->input->end) {
4357
13.3k
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4358
13.3k
                           "AttValue: ' expected\n");
4359
13.3k
            goto error;
4360
13.3k
        }
4361
4362
        /*
4363
         * TODO: Check growth threshold
4364
         */
4365
184M
        if (ctxt->input->end - CUR_PTR < 10)
4366
266k
            GROW;
4367
4368
184M
        c = CUR;
4369
4370
184M
        if (c >= 0x80) {
4371
21.6M
            l = xmlUTF8MultibyteLen(ctxt, CUR_PTR,
4372
21.6M
                    "invalid character in attribute value\n");
4373
21.6M
            if (l == 0) {
4374
17.7M
                if (chunkSize > 0) {
4375
616k
                    xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4376
616k
                    chunkSize = 0;
4377
616k
                }
4378
17.7M
                xmlSBufAddReplChar(&buf);
4379
17.7M
                NEXTL(1);
4380
17.7M
            } else {
4381
3.89M
                chunkSize += l;
4382
3.89M
                NEXTL(l);
4383
3.89M
            }
4384
4385
21.6M
            inSpace = 0;
4386
163M
        } else if (c != '&') {
4387
162M
            if (c > 0x20) {
4388
139M
                if (c == quote)
4389
5.76M
                    break;
4390
4391
133M
                if (c == '<')
4392
1.17M
                    xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4393
4394
133M
                chunkSize += 1;
4395
133M
                inSpace = 0;
4396
133M
            } else if (!IS_BYTE_CHAR(c)) {
4397
4.49M
                xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4398
4.49M
                        "invalid character in attribute value\n");
4399
4.49M
                if (chunkSize > 0) {
4400
407k
                    xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4401
407k
                    chunkSize = 0;
4402
407k
                }
4403
4.49M
                xmlSBufAddReplChar(&buf);
4404
4.49M
                inSpace = 0;
4405
18.5M
            } else {
4406
                /* Whitespace */
4407
18.5M
                if ((normalize) && (inSpace)) {
4408
                    /* Skip char */
4409
129k
                    if (chunkSize > 0) {
4410
6.18k
                        xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4411
6.18k
                        chunkSize = 0;
4412
6.18k
                    }
4413
18.4M
                } else if (c < 0x20) {
4414
                    /* Convert to space */
4415
14.7M
                    if (chunkSize > 0) {
4416
465k
                        xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4417
465k
                        chunkSize = 0;
4418
465k
                    }
4419
4420
14.7M
                    xmlSBufAddCString(&buf, " ", 1);
4421
14.7M
                } else {
4422
3.70M
                    chunkSize += 1;
4423
3.70M
                }
4424
4425
18.5M
                inSpace = 1;
4426
4427
18.5M
                if ((c == 0xD) && (NXT(1) == 0xA))
4428
133k
                    CUR_PTR++;
4429
18.5M
            }
4430
4431
156M
            NEXTL(1);
4432
156M
        } else if (NXT(1) == '#') {
4433
203k
            int val;
4434
4435
203k
            if (chunkSize > 0) {
4436
60.4k
                xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4437
60.4k
                chunkSize = 0;
4438
60.4k
            }
4439
4440
203k
            val = xmlParseCharRef(ctxt);
4441
203k
            if (val == 0)
4442
14.1k
                goto error;
4443
4444
189k
            if ((val == '&') && (!replaceEntities)) {
4445
                /*
4446
                 * The reparsing will be done in xmlNodeParseContent()
4447
                 * called from SAX2.c
4448
                 */
4449
12.3k
                xmlSBufAddCString(&buf, "&#38;", 5);
4450
12.3k
                inSpace = 0;
4451
177k
            } else if (val == ' ') {
4452
31.0k
                if ((!normalize) || (!inSpace))
4453
15.2k
                    xmlSBufAddCString(&buf, " ", 1);
4454
31.0k
                inSpace = 1;
4455
146k
            } else {
4456
146k
                xmlSBufAddChar(&buf, val);
4457
146k
                inSpace = 0;
4458
146k
            }
4459
845k
        } else {
4460
845k
            const xmlChar *name;
4461
845k
            xmlEntityPtr ent;
4462
4463
845k
            if (chunkSize > 0) {
4464
198k
                xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4465
198k
                chunkSize = 0;
4466
198k
            }
4467
4468
845k
            name = xmlParseEntityRefInternal(ctxt);
4469
845k
            if (name == NULL) {
4470
                /*
4471
                 * Probably a literal '&' which wasn't escaped.
4472
                 * TODO: Handle gracefully in recovery mode.
4473
                 */
4474
303k
                continue;
4475
303k
            }
4476
4477
541k
            ent = xmlLookupGeneralEntity(ctxt, name, /* isAttr */ 1);
4478
541k
            if (ent == NULL)
4479
78.6k
                continue;
4480
4481
462k
            if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
4482
256k
                if ((ent->content[0] == '&') && (!replaceEntities))
4483
8.83k
                    xmlSBufAddCString(&buf, "&#38;", 5);
4484
247k
                else
4485
247k
                    xmlSBufAddString(&buf, ent->content, ent->length);
4486
256k
                inSpace = 0;
4487
256k
            } else if (replaceEntities) {
4488
43.1k
                xmlExpandEntityInAttValue(ctxt, &buf, ent->content, ent,
4489
43.1k
                                          normalize, &inSpace, ctxt->inputNr,
4490
43.1k
                                          /* check */ 1);
4491
162k
            } else {
4492
162k
                if ((ent->flags & flags) != flags)
4493
1.92k
                    xmlCheckEntityInAttValue(ctxt, ent, ctxt->inputNr);
4494
4495
162k
                if (xmlParserEntityCheck(ctxt, ent->expandedSize)) {
4496
39
                    ent->content[0] = 0;
4497
39
                    goto error;
4498
39
                }
4499
4500
                /*
4501
                 * Just output the reference
4502
                 */
4503
162k
                xmlSBufAddCString(&buf, "&", 1);
4504
162k
                xmlSBufAddString(&buf, ent->name, xmlStrlen(ent->name));
4505
162k
                xmlSBufAddCString(&buf, ";", 1);
4506
4507
162k
                inSpace = 0;
4508
162k
            }
4509
462k
  }
4510
184M
    }
4511
4512
5.76M
    if ((buf.mem == NULL) && (alloc != NULL)) {
4513
5.40M
        ret = (xmlChar *) CUR_PTR - chunkSize;
4514
4515
5.40M
        if (attlen != NULL)
4516
5.40M
            *attlen = chunkSize;
4517
5.40M
        if ((normalize) && (inSpace) && (chunkSize > 0))
4518
514
            *attlen -= 1;
4519
5.40M
        *alloc = 0;
4520
4521
        /* Report potential error */
4522
5.40M
        xmlSBufCleanup(&buf, ctxt, "AttValue length too long");
4523
5.40M
    } else {
4524
362k
        if (chunkSize > 0)
4525
314k
            xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4526
4527
362k
        if ((normalize) && (inSpace) && (buf.size > 0))
4528
9.90k
            buf.size--;
4529
4530
362k
        ret = xmlSBufFinish(&buf, attlen, ctxt, "AttValue length too long");
4531
4532
362k
        if (ret != NULL) {
4533
362k
            if (attlen != NULL)
4534
318k
                *attlen = buf.size;
4535
362k
            if (alloc != NULL)
4536
318k
                *alloc = 1;
4537
362k
        }
4538
362k
    }
4539
4540
5.76M
    NEXTL(1);
4541
4542
5.76M
    return(ret);
4543
4544
27.8k
error:
4545
27.8k
    xmlSBufCleanup(&buf, ctxt, "AttValue length too long");
4546
27.8k
    return(NULL);
4547
5.79M
}
4548
4549
/**
4550
 * xmlParseAttValue:
4551
 * @ctxt:  an XML parser context
4552
 *
4553
 * DEPRECATED: Internal function, don't use.
4554
 *
4555
 * parse a value for an attribute
4556
 * Note: the parser won't do substitution of entities here, this
4557
 * will be handled later in xmlStringGetNodeList
4558
 *
4559
 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4560
 *                   "'" ([^<&'] | Reference)* "'"
4561
 *
4562
 * 3.3.3 Attribute-Value Normalization:
4563
 * Before the value of an attribute is passed to the application or
4564
 * checked for validity, the XML processor must normalize it as follows:
4565
 * - a character reference is processed by appending the referenced
4566
 *   character to the attribute value
4567
 * - an entity reference is processed by recursively processing the
4568
 *   replacement text of the entity
4569
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4570
 *   appending #x20 to the normalized value, except that only a single
4571
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4572
 *   parsed entity or the literal entity value of an internal parsed entity
4573
 * - other characters are processed by appending them to the normalized value
4574
 * If the declared value is not CDATA, then the XML processor must further
4575
 * process the normalized attribute value by discarding any leading and
4576
 * trailing space (#x20) characters, and by replacing sequences of space
4577
 * (#x20) characters by a single space (#x20) character.
4578
 * All attributes for which no declaration has been read should be treated
4579
 * by a non-validating parser as if declared CDATA.
4580
 *
4581
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4582
 */
4583
4584
4585
xmlChar *
4586
49.7k
xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4587
49.7k
    if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4588
49.7k
    return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0, 0));
4589
49.7k
}
4590
4591
/**
4592
 * xmlParseSystemLiteral:
4593
 * @ctxt:  an XML parser context
4594
 *
4595
 * DEPRECATED: Internal function, don't use.
4596
 *
4597
 * parse an XML Literal
4598
 *
4599
 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4600
 *
4601
 * Returns the SystemLiteral parsed or NULL
4602
 */
4603
4604
xmlChar *
4605
14.4k
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4606
14.4k
    xmlChar *buf = NULL;
4607
14.4k
    int len = 0;
4608
14.4k
    int size = XML_PARSER_BUFFER_SIZE;
4609
14.4k
    int cur, l;
4610
14.4k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4611
0
                    XML_MAX_TEXT_LENGTH :
4612
14.4k
                    XML_MAX_NAME_LENGTH;
4613
14.4k
    xmlChar stop;
4614
4615
14.4k
    if (RAW == '"') {
4616
7.98k
        NEXT;
4617
7.98k
  stop = '"';
4618
7.98k
    } else if (RAW == '\'') {
4619
4.82k
        NEXT;
4620
4.82k
  stop = '\'';
4621
4.82k
    } else {
4622
1.67k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4623
1.67k
  return(NULL);
4624
1.67k
    }
4625
4626
12.8k
    buf = xmlMalloc(size);
4627
12.8k
    if (buf == NULL) {
4628
0
        xmlErrMemory(ctxt);
4629
0
  return(NULL);
4630
0
    }
4631
12.8k
    cur = xmlCurrentCharRecover(ctxt, &l);
4632
5.00M
    while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4633
4.99M
  if (len + 5 >= size) {
4634
9.75k
      xmlChar *tmp;
4635
9.75k
            int newSize;
4636
4637
9.75k
            newSize = xmlGrowCapacity(size, 1, 1, maxLength);
4638
9.75k
            if (newSize < 0) {
4639
20
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4640
20
                xmlFree(buf);
4641
20
                return(NULL);
4642
20
            }
4643
9.73k
      tmp = xmlRealloc(buf, newSize);
4644
9.73k
      if (tmp == NULL) {
4645
0
          xmlFree(buf);
4646
0
    xmlErrMemory(ctxt);
4647
0
    return(NULL);
4648
0
      }
4649
9.73k
      buf = tmp;
4650
9.73k
            size = newSize;
4651
9.73k
  }
4652
4.99M
  COPY_BUF(buf, len, cur);
4653
4.99M
  NEXTL(l);
4654
4.99M
  cur = xmlCurrentCharRecover(ctxt, &l);
4655
4.99M
    }
4656
12.7k
    buf[len] = 0;
4657
12.7k
    if (!IS_CHAR(cur)) {
4658
1.88k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4659
10.9k
    } else {
4660
10.9k
  NEXT;
4661
10.9k
    }
4662
12.7k
    return(buf);
4663
12.8k
}
4664
4665
/**
4666
 * xmlParsePubidLiteral:
4667
 * @ctxt:  an XML parser context
4668
 *
4669
 * DEPRECATED: Internal function, don't use.
4670
 *
4671
 * parse an XML public literal
4672
 *
4673
 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4674
 *
4675
 * Returns the PubidLiteral parsed or NULL.
4676
 */
4677
4678
xmlChar *
4679
10.4k
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4680
10.4k
    xmlChar *buf = NULL;
4681
10.4k
    int len = 0;
4682
10.4k
    int size = XML_PARSER_BUFFER_SIZE;
4683
10.4k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4684
0
                    XML_MAX_TEXT_LENGTH :
4685
10.4k
                    XML_MAX_NAME_LENGTH;
4686
10.4k
    xmlChar cur;
4687
10.4k
    xmlChar stop;
4688
4689
10.4k
    if (RAW == '"') {
4690
9.57k
        NEXT;
4691
9.57k
  stop = '"';
4692
9.57k
    } else if (RAW == '\'') {
4693
304
        NEXT;
4694
304
  stop = '\'';
4695
580
    } else {
4696
580
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4697
580
  return(NULL);
4698
580
    }
4699
9.88k
    buf = xmlMalloc(size);
4700
9.88k
    if (buf == NULL) {
4701
0
  xmlErrMemory(ctxt);
4702
0
  return(NULL);
4703
0
    }
4704
9.88k
    cur = CUR;
4705
1.04M
    while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop) &&
4706
1.03M
           (PARSER_STOPPED(ctxt) == 0)) { /* checked */
4707
1.03M
  if (len + 1 >= size) {
4708
1.99k
      xmlChar *tmp;
4709
1.99k
            int newSize;
4710
4711
1.99k
      newSize = xmlGrowCapacity(size, 1, 1, maxLength);
4712
1.99k
            if (newSize < 0) {
4713
8
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4714
8
                xmlFree(buf);
4715
8
                return(NULL);
4716
8
            }
4717
1.98k
      tmp = xmlRealloc(buf, newSize);
4718
1.98k
      if (tmp == NULL) {
4719
0
    xmlErrMemory(ctxt);
4720
0
    xmlFree(buf);
4721
0
    return(NULL);
4722
0
      }
4723
1.98k
      buf = tmp;
4724
1.98k
            size = newSize;
4725
1.98k
  }
4726
1.03M
  buf[len++] = cur;
4727
1.03M
  NEXT;
4728
1.03M
  cur = CUR;
4729
1.03M
    }
4730
9.87k
    buf[len] = 0;
4731
9.87k
    if (cur != stop) {
4732
1.70k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4733
8.17k
    } else {
4734
8.17k
  NEXTL(1);
4735
8.17k
    }
4736
9.87k
    return(buf);
4737
9.88k
}
4738
4739
static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial);
4740
4741
/*
4742
 * used for the test in the inner loop of the char data testing
4743
 */
4744
static const unsigned char test_char_data[256] = {
4745
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4746
    0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4747
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4748
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4749
    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4750
    0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4751
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4752
    0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4753
    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4754
    0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4755
    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4756
    0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4757
    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4758
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4759
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4760
    0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4761
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4762
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4763
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4764
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4765
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4766
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4767
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4768
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4769
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4770
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4771
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4772
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4773
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4774
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4775
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4776
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4777
};
4778
4779
static void
4780
xmlCharacters(xmlParserCtxtPtr ctxt, const xmlChar *buf, int size,
4781
11.5M
              int isBlank) {
4782
11.5M
    int checkBlanks;
4783
4784
11.5M
    if ((ctxt->sax == NULL) || (ctxt->disableSAX))
4785
4
        return;
4786
4787
11.5M
    checkBlanks = (!ctxt->keepBlanks) ||
4788
0
                  (ctxt->sax->ignorableWhitespace != ctxt->sax->characters);
4789
4790
    /*
4791
     * Calling areBlanks with only parts of a text node
4792
     * is fundamentally broken, making the NOBLANKS option
4793
     * essentially unusable.
4794
     */
4795
11.5M
    if ((checkBlanks) &&
4796
11.5M
        (areBlanks(ctxt, buf, size, isBlank))) {
4797
1.52M
        if ((ctxt->sax->ignorableWhitespace != NULL) &&
4798
1.52M
            (ctxt->keepBlanks))
4799
0
            ctxt->sax->ignorableWhitespace(ctxt->userData, buf, size);
4800
10.0M
    } else {
4801
10.0M
        if (ctxt->sax->characters != NULL)
4802
10.0M
            ctxt->sax->characters(ctxt->userData, buf, size);
4803
4804
        /*
4805
         * The old code used to update this value for "complex" data
4806
         * even if checkBlanks was false. This was probably a bug.
4807
         */
4808
10.0M
        if ((checkBlanks) && (*ctxt->space == -1))
4809
2.89M
            *ctxt->space = -2;
4810
10.0M
    }
4811
11.5M
}
4812
4813
/**
4814
 * xmlParseCharDataInternal:
4815
 * @ctxt:  an XML parser context
4816
 * @partial:  buffer may contain partial UTF-8 sequences
4817
 *
4818
 * Parse character data. Always makes progress if the first char isn't
4819
 * '<' or '&'.
4820
 *
4821
 * The right angle bracket (>) may be represented using the string "&gt;",
4822
 * and must, for compatibility, be escaped using "&gt;" or a character
4823
 * reference when it appears in the string "]]>" in content, when that
4824
 * string is not marking the end of a CDATA section.
4825
 *
4826
 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4827
 */
4828
static void
4829
13.2M
xmlParseCharDataInternal(xmlParserCtxtPtr ctxt, int partial) {
4830
13.2M
    const xmlChar *in;
4831
13.2M
    int nbchar = 0;
4832
13.2M
    int line = ctxt->input->line;
4833
13.2M
    int col = ctxt->input->col;
4834
13.2M
    int ccol;
4835
4836
13.2M
    GROW;
4837
    /*
4838
     * Accelerated common case where input don't need to be
4839
     * modified before passing it to the handler.
4840
     */
4841
13.2M
    in = ctxt->input->cur;
4842
15.0M
    do {
4843
17.0M
get_more_space:
4844
35.9M
        while (*in == 0x20) { in++; ctxt->input->col++; }
4845
17.0M
        if (*in == 0xA) {
4846
10.3M
            do {
4847
10.3M
                ctxt->input->line++; ctxt->input->col = 1;
4848
10.3M
                in++;
4849
10.3M
            } while (*in == 0xA);
4850
2.02M
            goto get_more_space;
4851
2.02M
        }
4852
15.0M
        if (*in == '<') {
4853
2.30M
            nbchar = in - ctxt->input->cur;
4854
2.30M
            if (nbchar > 0) {
4855
2.30M
                const xmlChar *tmp = ctxt->input->cur;
4856
2.30M
                ctxt->input->cur = in;
4857
4858
2.30M
                xmlCharacters(ctxt, tmp, nbchar, 1);
4859
2.30M
            }
4860
2.30M
            return;
4861
2.30M
        }
4862
4863
13.5M
get_more:
4864
13.5M
        ccol = ctxt->input->col;
4865
148M
        while (test_char_data[*in]) {
4866
135M
            in++;
4867
135M
            ccol++;
4868
135M
        }
4869
13.5M
        ctxt->input->col = ccol;
4870
13.5M
        if (*in == 0xA) {
4871
4.57M
            do {
4872
4.57M
                ctxt->input->line++; ctxt->input->col = 1;
4873
4.57M
                in++;
4874
4.57M
            } while (*in == 0xA);
4875
576k
            goto get_more;
4876
576k
        }
4877
12.9M
        if (*in == ']') {
4878
256k
            if ((in[1] == ']') && (in[2] == '>')) {
4879
3.03k
                xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4880
3.03k
                ctxt->input->cur = in + 1;
4881
3.03k
                return;
4882
3.03k
            }
4883
253k
            if ((!partial) || (ctxt->input->end - in >= 2)) {
4884
253k
                in++;
4885
253k
                ctxt->input->col++;
4886
253k
                goto get_more;
4887
253k
            }
4888
253k
        }
4889
12.7M
        nbchar = in - ctxt->input->cur;
4890
12.7M
        if (nbchar > 0) {
4891
7.56M
            const xmlChar *tmp = ctxt->input->cur;
4892
7.56M
            ctxt->input->cur = in;
4893
4894
7.56M
            xmlCharacters(ctxt, tmp, nbchar, 0);
4895
4896
7.56M
            line = ctxt->input->line;
4897
7.56M
            col = ctxt->input->col;
4898
7.56M
        }
4899
12.7M
        ctxt->input->cur = in;
4900
12.7M
        if (*in == 0xD) {
4901
2.07M
            in++;
4902
2.07M
            if (*in == 0xA) {
4903
1.81M
                ctxt->input->cur = in;
4904
1.81M
                in++;
4905
1.81M
                ctxt->input->line++; ctxt->input->col = 1;
4906
1.81M
                continue; /* while */
4907
1.81M
            }
4908
264k
            in--;
4909
264k
        }
4910
10.8M
        if (*in == '<') {
4911
5.74M
            return;
4912
5.74M
        }
4913
5.14M
        if (*in == '&') {
4914
200k
            return;
4915
200k
        }
4916
4.94M
        if ((partial) && (*in == ']') && (ctxt->input->end - in < 2)) {
4917
70
            return;
4918
70
        }
4919
4.94M
        SHRINK;
4920
4.94M
        GROW;
4921
4.94M
        in = ctxt->input->cur;
4922
6.76M
    } while (((*in >= 0x20) && (*in <= 0x7F)) ||
4923
4.96M
             (*in == 0x09) || (*in == 0x0a));
4924
4.96M
    ctxt->input->line = line;
4925
4.96M
    ctxt->input->col = col;
4926
4.96M
    xmlParseCharDataComplex(ctxt, partial);
4927
4.96M
}
4928
4929
/**
4930
 * xmlParseCharDataComplex:
4931
 * @ctxt:  an XML parser context
4932
 * @cdata:  int indicating whether we are within a CDATA section
4933
 *
4934
 * Always makes progress if the first char isn't '<' or '&'.
4935
 *
4936
 * parse a CharData section.this is the fallback function
4937
 * of xmlParseCharData() when the parsing requires handling
4938
 * of non-ASCII characters.
4939
 */
4940
static void
4941
4.96M
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial) {
4942
4.96M
    xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4943
4.96M
    int nbchar = 0;
4944
4.96M
    int cur, l;
4945
4946
4.96M
    cur = xmlCurrentCharRecover(ctxt, &l);
4947
78.8M
    while ((cur != '<') && /* checked */
4948
77.8M
           (cur != '&') &&
4949
77.6M
           ((!partial) || (cur != ']') ||
4950
7.35k
            (ctxt->input->end - ctxt->input->cur >= 2)) &&
4951
77.6M
     (IS_CHAR(cur))) {
4952
73.9M
  if ((cur == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4953
2.80k
      xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4954
2.80k
  }
4955
73.9M
  COPY_BUF(buf, nbchar, cur);
4956
  /* move current position before possible calling of ctxt->sax->characters */
4957
73.9M
  NEXTL(l);
4958
73.9M
  if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4959
188k
      buf[nbchar] = 0;
4960
4961
188k
            xmlCharacters(ctxt, buf, nbchar, 0);
4962
188k
      nbchar = 0;
4963
188k
            SHRINK;
4964
188k
  }
4965
73.9M
  cur = xmlCurrentCharRecover(ctxt, &l);
4966
73.9M
    }
4967
4.96M
    if (nbchar != 0) {
4968
1.49M
        buf[nbchar] = 0;
4969
4970
1.49M
        xmlCharacters(ctxt, buf, nbchar, 0);
4971
1.49M
    }
4972
    /*
4973
     * cur == 0 can mean
4974
     *
4975
     * - End of buffer.
4976
     * - An actual 0 character.
4977
     * - An incomplete UTF-8 sequence. This is allowed if partial is set.
4978
     */
4979
4.96M
    if (ctxt->input->cur < ctxt->input->end) {
4980
4.89M
        if ((cur == 0) && (CUR != 0)) {
4981
5.26k
            if (partial == 0) {
4982
2.04k
                xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4983
2.04k
                        "Incomplete UTF-8 sequence starting with %02X\n", CUR);
4984
2.04k
                NEXTL(1);
4985
2.04k
            }
4986
4.89M
        } else if ((cur != '<') && (cur != '&') && (cur != ']')) {
4987
            /* Generate the error and skip the offending character */
4988
3.69M
            xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4989
3.69M
                              "PCDATA invalid Char value %d\n", cur);
4990
3.69M
            NEXTL(l);
4991
3.69M
        }
4992
4.89M
    }
4993
4.96M
}
4994
4995
/**
4996
 * xmlParseCharData:
4997
 * @ctxt:  an XML parser context
4998
 * @cdata:  unused
4999
 *
5000
 * DEPRECATED: Internal function, don't use.
5001
 */
5002
void
5003
0
xmlParseCharData(xmlParserCtxtPtr ctxt, ATTRIBUTE_UNUSED int cdata) {
5004
0
    xmlParseCharDataInternal(ctxt, 0);
5005
0
}
5006
5007
/**
5008
 * xmlParseExternalID:
5009
 * @ctxt:  an XML parser context
5010
 * @publicID:  a xmlChar** receiving PubidLiteral
5011
 * @strict: indicate whether we should restrict parsing to only
5012
 *          production [75], see NOTE below
5013
 *
5014
 * DEPRECATED: Internal function, don't use.
5015
 *
5016
 * Parse an External ID or a Public ID
5017
 *
5018
 * NOTE: Productions [75] and [83] interact badly since [75] can generate
5019
 *       'PUBLIC' S PubidLiteral S SystemLiteral
5020
 *
5021
 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
5022
 *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
5023
 *
5024
 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
5025
 *
5026
 * Returns the function returns SystemLiteral and in the second
5027
 *                case publicID receives PubidLiteral, is strict is off
5028
 *                it is possible to return NULL and have publicID set.
5029
 */
5030
5031
xmlChar *
5032
73.1k
xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
5033
73.1k
    xmlChar *URI = NULL;
5034
5035
73.1k
    *publicID = NULL;
5036
73.1k
    if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
5037
5.97k
        SKIP(6);
5038
5.97k
  if (SKIP_BLANKS == 0) {
5039
2.49k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5040
2.49k
                     "Space required after 'SYSTEM'\n");
5041
2.49k
  }
5042
5.97k
  URI = xmlParseSystemLiteral(ctxt);
5043
5.97k
  if (URI == NULL) {
5044
263
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
5045
263
        }
5046
67.1k
    } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
5047
10.4k
        SKIP(6);
5048
10.4k
  if (SKIP_BLANKS == 0) {
5049
1.50k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5050
1.50k
        "Space required after 'PUBLIC'\n");
5051
1.50k
  }
5052
10.4k
  *publicID = xmlParsePubidLiteral(ctxt);
5053
10.4k
  if (*publicID == NULL) {
5054
588
      xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
5055
588
  }
5056
10.4k
  if (strict) {
5057
      /*
5058
       * We don't handle [83] so "S SystemLiteral" is required.
5059
       */
5060
8.02k
      if (SKIP_BLANKS == 0) {
5061
1.62k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5062
1.62k
      "Space required after the Public Identifier\n");
5063
1.62k
      }
5064
8.02k
  } else {
5065
      /*
5066
       * We handle [83] so we return immediately, if
5067
       * "S SystemLiteral" is not detected. We skip blanks if no
5068
             * system literal was found, but this is harmless since we must
5069
             * be at the end of a NotationDecl.
5070
       */
5071
2.43k
      if (SKIP_BLANKS == 0) return(NULL);
5072
685
      if ((CUR != '\'') && (CUR != '"')) return(NULL);
5073
685
  }
5074
8.50k
  URI = xmlParseSystemLiteral(ctxt);
5075
8.50k
  if (URI == NULL) {
5076
1.43k
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
5077
1.43k
        }
5078
8.50k
    }
5079
71.1k
    return(URI);
5080
73.1k
}
5081
5082
/**
5083
 * xmlParseCommentComplex:
5084
 * @ctxt:  an XML parser context
5085
 * @buf:  the already parsed part of the buffer
5086
 * @len:  number of bytes in the buffer
5087
 * @size:  allocated size of the buffer
5088
 *
5089
 * Skip an XML (SGML) comment <!-- .... -->
5090
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
5091
 *  must not occur within comments. "
5092
 * This is the slow routine in case the accelerator for ascii didn't work
5093
 *
5094
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
5095
 */
5096
static void
5097
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
5098
48.3k
                       size_t len, size_t size) {
5099
48.3k
    int q, ql;
5100
48.3k
    int r, rl;
5101
48.3k
    int cur, l;
5102
48.3k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5103
0
                    XML_MAX_HUGE_LENGTH :
5104
48.3k
                    XML_MAX_TEXT_LENGTH;
5105
5106
48.3k
    if (buf == NULL) {
5107
8.77k
        len = 0;
5108
8.77k
  size = XML_PARSER_BUFFER_SIZE;
5109
8.77k
  buf = xmlMalloc(size);
5110
8.77k
  if (buf == NULL) {
5111
0
      xmlErrMemory(ctxt);
5112
0
      return;
5113
0
  }
5114
8.77k
    }
5115
48.3k
    q = xmlCurrentCharRecover(ctxt, &ql);
5116
48.3k
    if (q == 0)
5117
3.58k
        goto not_terminated;
5118
44.8k
    if (!IS_CHAR(q)) {
5119
11.7k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
5120
11.7k
                          "xmlParseComment: invalid xmlChar value %d\n",
5121
11.7k
                    q);
5122
11.7k
  xmlFree (buf);
5123
11.7k
  return;
5124
11.7k
    }
5125
33.0k
    NEXTL(ql);
5126
33.0k
    r = xmlCurrentCharRecover(ctxt, &rl);
5127
33.0k
    if (r == 0)
5128
684
        goto not_terminated;
5129
32.3k
    if (!IS_CHAR(r)) {
5130
3.04k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
5131
3.04k
                          "xmlParseComment: invalid xmlChar value %d\n",
5132
3.04k
                    r);
5133
3.04k
  xmlFree (buf);
5134
3.04k
  return;
5135
3.04k
    }
5136
29.3k
    NEXTL(rl);
5137
29.3k
    cur = xmlCurrentCharRecover(ctxt, &l);
5138
29.3k
    if (cur == 0)
5139
538
        goto not_terminated;
5140
5.83M
    while (IS_CHAR(cur) && /* checked */
5141
5.82M
           ((cur != '>') ||
5142
5.80M
      (r != '-') || (q != '-'))) {
5143
5.80M
  if ((r == '-') && (q == '-')) {
5144
68.6k
      xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
5145
68.6k
  }
5146
5.80M
  if (len + 5 >= size) {
5147
17.1k
      xmlChar *tmp;
5148
17.1k
            int newSize;
5149
5150
17.1k
      newSize = xmlGrowCapacity(size, 1, 1, maxLength);
5151
17.1k
            if (newSize < 0) {
5152
0
                xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5153
0
                             "Comment too big found", NULL);
5154
0
                xmlFree (buf);
5155
0
                return;
5156
0
            }
5157
17.1k
      tmp = xmlRealloc(buf, newSize);
5158
17.1k
      if (tmp == NULL) {
5159
0
    xmlErrMemory(ctxt);
5160
0
    xmlFree(buf);
5161
0
    return;
5162
0
      }
5163
17.1k
      buf = tmp;
5164
17.1k
            size = newSize;
5165
17.1k
  }
5166
5.80M
  COPY_BUF(buf, len, q);
5167
5168
5.80M
  q = r;
5169
5.80M
  ql = rl;
5170
5.80M
  r = cur;
5171
5.80M
  rl = l;
5172
5173
5.80M
  NEXTL(l);
5174
5.80M
  cur = xmlCurrentCharRecover(ctxt, &l);
5175
5176
5.80M
    }
5177
28.7k
    buf[len] = 0;
5178
28.7k
    if (cur == 0) {
5179
4.93k
  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5180
4.93k
                       "Comment not terminated \n<!--%.50s\n", buf);
5181
23.8k
    } else if (!IS_CHAR(cur)) {
5182
9.60k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
5183
9.60k
                          "xmlParseComment: invalid xmlChar value %d\n",
5184
9.60k
                    cur);
5185
14.2k
    } else {
5186
14.2k
        NEXT;
5187
14.2k
  if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5188
14.2k
      (!ctxt->disableSAX))
5189
14.2k
      ctxt->sax->comment(ctxt->userData, buf);
5190
14.2k
    }
5191
28.7k
    xmlFree(buf);
5192
28.7k
    return;
5193
4.80k
not_terminated:
5194
4.80k
    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5195
4.80k
       "Comment not terminated\n", NULL);
5196
4.80k
    xmlFree(buf);
5197
4.80k
}
5198
5199
/**
5200
 * xmlParseComment:
5201
 * @ctxt:  an XML parser context
5202
 *
5203
 * DEPRECATED: Internal function, don't use.
5204
 *
5205
 * Parse an XML (SGML) comment. Always consumes '<!'.
5206
 *
5207
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
5208
 *  must not occur within comments. "
5209
 *
5210
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
5211
 */
5212
void
5213
93.7k
xmlParseComment(xmlParserCtxtPtr ctxt) {
5214
93.7k
    xmlChar *buf = NULL;
5215
93.7k
    size_t size = XML_PARSER_BUFFER_SIZE;
5216
93.7k
    size_t len = 0;
5217
93.7k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5218
0
                       XML_MAX_HUGE_LENGTH :
5219
93.7k
                       XML_MAX_TEXT_LENGTH;
5220
93.7k
    const xmlChar *in;
5221
93.7k
    size_t nbchar = 0;
5222
93.7k
    int ccol;
5223
5224
    /*
5225
     * Check that there is a comment right here.
5226
     */
5227
93.7k
    if ((RAW != '<') || (NXT(1) != '!'))
5228
0
        return;
5229
93.7k
    SKIP(2);
5230
93.7k
    if ((RAW != '-') || (NXT(1) != '-'))
5231
71
        return;
5232
93.6k
    SKIP(2);
5233
93.6k
    GROW;
5234
5235
    /*
5236
     * Accelerated common case where input don't need to be
5237
     * modified before passing it to the handler.
5238
     */
5239
93.6k
    in = ctxt->input->cur;
5240
93.6k
    do {
5241
93.6k
  if (*in == 0xA) {
5242
10.0k
      do {
5243
10.0k
    ctxt->input->line++; ctxt->input->col = 1;
5244
10.0k
    in++;
5245
10.0k
      } while (*in == 0xA);
5246
3.67k
  }
5247
328k
get_more:
5248
328k
        ccol = ctxt->input->col;
5249
6.05M
  while (((*in > '-') && (*in <= 0x7F)) ||
5250
850k
         ((*in >= 0x20) && (*in < '-')) ||
5251
5.72M
         (*in == 0x09)) {
5252
5.72M
        in++;
5253
5.72M
        ccol++;
5254
5.72M
  }
5255
328k
  ctxt->input->col = ccol;
5256
328k
  if (*in == 0xA) {
5257
65.7k
      do {
5258
65.7k
    ctxt->input->line++; ctxt->input->col = 1;
5259
65.7k
    in++;
5260
65.7k
      } while (*in == 0xA);
5261
13.5k
      goto get_more;
5262
13.5k
  }
5263
315k
  nbchar = in - ctxt->input->cur;
5264
  /*
5265
   * save current set of data
5266
   */
5267
315k
  if (nbchar > 0) {
5268
283k
            if (nbchar > maxLength - len) {
5269
0
                xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5270
0
                                  "Comment too big found", NULL);
5271
0
                xmlFree(buf);
5272
0
                return;
5273
0
            }
5274
283k
            if (buf == NULL) {
5275
71.7k
                if ((*in == '-') && (in[1] == '-'))
5276
34.3k
                    size = nbchar + 1;
5277
37.3k
                else
5278
37.3k
                    size = XML_PARSER_BUFFER_SIZE + nbchar;
5279
71.7k
                buf = xmlMalloc(size);
5280
71.7k
                if (buf == NULL) {
5281
0
                    xmlErrMemory(ctxt);
5282
0
                    return;
5283
0
                }
5284
71.7k
                len = 0;
5285
212k
            } else if (len + nbchar + 1 >= size) {
5286
5.77k
                xmlChar *new_buf;
5287
5.77k
                size += len + nbchar + XML_PARSER_BUFFER_SIZE;
5288
5.77k
                new_buf = xmlRealloc(buf, size);
5289
5.77k
                if (new_buf == NULL) {
5290
0
                    xmlErrMemory(ctxt);
5291
0
                    xmlFree(buf);
5292
0
                    return;
5293
0
                }
5294
5.77k
                buf = new_buf;
5295
5.77k
            }
5296
283k
            memcpy(&buf[len], ctxt->input->cur, nbchar);
5297
283k
            len += nbchar;
5298
283k
            buf[len] = 0;
5299
283k
  }
5300
315k
  ctxt->input->cur = in;
5301
315k
  if (*in == 0xA) {
5302
0
      in++;
5303
0
      ctxt->input->line++; ctxt->input->col = 1;
5304
0
  }
5305
315k
  if (*in == 0xD) {
5306
24.9k
      in++;
5307
24.9k
      if (*in == 0xA) {
5308
15.7k
    ctxt->input->cur = in;
5309
15.7k
    in++;
5310
15.7k
    ctxt->input->line++; ctxt->input->col = 1;
5311
15.7k
    goto get_more;
5312
15.7k
      }
5313
9.21k
      in--;
5314
9.21k
  }
5315
299k
  SHRINK;
5316
299k
  GROW;
5317
299k
  in = ctxt->input->cur;
5318
299k
  if (*in == '-') {
5319
251k
      if (in[1] == '-') {
5320
226k
          if (in[2] == '>') {
5321
45.2k
        SKIP(3);
5322
45.2k
        if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5323
45.2k
            (!ctxt->disableSAX)) {
5324
45.2k
      if (buf != NULL)
5325
32.1k
          ctxt->sax->comment(ctxt->userData, buf);
5326
13.1k
      else
5327
13.1k
          ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5328
45.2k
        }
5329
45.2k
        if (buf != NULL)
5330
32.1k
            xmlFree(buf);
5331
45.2k
        return;
5332
45.2k
    }
5333
181k
    if (buf != NULL) {
5334
177k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5335
177k
                          "Double hyphen within comment: "
5336
177k
                                      "<!--%.50s\n",
5337
177k
              buf);
5338
177k
    } else
5339
4.00k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5340
4.00k
                          "Double hyphen within comment\n", NULL);
5341
181k
    in++;
5342
181k
    ctxt->input->col++;
5343
181k
      }
5344
205k
      in++;
5345
205k
      ctxt->input->col++;
5346
205k
      goto get_more;
5347
251k
  }
5348
299k
    } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5349
48.3k
    xmlParseCommentComplex(ctxt, buf, len, size);
5350
48.3k
}
5351
5352
5353
/**
5354
 * xmlParsePITarget:
5355
 * @ctxt:  an XML parser context
5356
 *
5357
 * DEPRECATED: Internal function, don't use.
5358
 *
5359
 * parse the name of a PI
5360
 *
5361
 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5362
 *
5363
 * Returns the PITarget name or NULL
5364
 */
5365
5366
const xmlChar *
5367
295k
xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5368
295k
    const xmlChar *name;
5369
5370
295k
    name = xmlParseName(ctxt);
5371
295k
    if ((name != NULL) &&
5372
247k
        ((name[0] == 'x') || (name[0] == 'X')) &&
5373
66.3k
        ((name[1] == 'm') || (name[1] == 'M')) &&
5374
33.9k
        ((name[2] == 'l') || (name[2] == 'L'))) {
5375
25.4k
  int i;
5376
25.4k
  if ((name[0] == 'x') && (name[1] == 'm') &&
5377
23.9k
      (name[2] == 'l') && (name[3] == 0)) {
5378
17.7k
      xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5379
17.7k
     "XML declaration allowed only at the start of the document\n");
5380
17.7k
      return(name);
5381
17.7k
  } else if (name[3] == 0) {
5382
1.94k
      xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5383
1.94k
      return(name);
5384
1.94k
  }
5385
16.6k
  for (i = 0;;i++) {
5386
16.6k
      if (xmlW3CPIs[i] == NULL) break;
5387
11.2k
      if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5388
316
          return(name);
5389
11.2k
  }
5390
5.42k
  xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5391
5.42k
          "xmlParsePITarget: invalid name prefix 'xml'\n",
5392
5.42k
          NULL, NULL);
5393
5.42k
    }
5394
275k
    if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5395
3.44k
  xmlNsErr(ctxt, XML_NS_ERR_COLON,
5396
3.44k
     "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5397
3.44k
    }
5398
275k
    return(name);
5399
295k
}
5400
5401
#ifdef LIBXML_CATALOG_ENABLED
5402
/**
5403
 * xmlParseCatalogPI:
5404
 * @ctxt:  an XML parser context
5405
 * @catalog:  the PI value string
5406
 *
5407
 * parse an XML Catalog Processing Instruction.
5408
 *
5409
 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5410
 *
5411
 * Occurs only if allowed by the user and if happening in the Misc
5412
 * part of the document before any doctype information
5413
 * This will add the given catalog to the parsing context in order
5414
 * to be used if there is a resolution need further down in the document
5415
 */
5416
5417
static void
5418
0
xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5419
0
    xmlChar *URL = NULL;
5420
0
    const xmlChar *tmp, *base;
5421
0
    xmlChar marker;
5422
5423
0
    tmp = catalog;
5424
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5425
0
    if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5426
0
  goto error;
5427
0
    tmp += 7;
5428
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5429
0
    if (*tmp != '=') {
5430
0
  return;
5431
0
    }
5432
0
    tmp++;
5433
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5434
0
    marker = *tmp;
5435
0
    if ((marker != '\'') && (marker != '"'))
5436
0
  goto error;
5437
0
    tmp++;
5438
0
    base = tmp;
5439
0
    while ((*tmp != 0) && (*tmp != marker)) tmp++;
5440
0
    if (*tmp == 0)
5441
0
  goto error;
5442
0
    URL = xmlStrndup(base, tmp - base);
5443
0
    tmp++;
5444
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5445
0
    if (*tmp != 0)
5446
0
  goto error;
5447
5448
0
    if (URL != NULL) {
5449
        /*
5450
         * Unfortunately, the catalog API doesn't report OOM errors.
5451
         * xmlGetLastError isn't very helpful since we don't know
5452
         * where the last error came from. We'd have to reset it
5453
         * before this call and restore it afterwards.
5454
         */
5455
0
  ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5456
0
  xmlFree(URL);
5457
0
    }
5458
0
    return;
5459
5460
0
error:
5461
0
    xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5462
0
            "Catalog PI syntax error: %s\n",
5463
0
      catalog, NULL);
5464
0
    if (URL != NULL)
5465
0
  xmlFree(URL);
5466
0
}
5467
#endif
5468
5469
/**
5470
 * xmlParsePI:
5471
 * @ctxt:  an XML parser context
5472
 *
5473
 * DEPRECATED: Internal function, don't use.
5474
 *
5475
 * parse an XML Processing Instruction.
5476
 *
5477
 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5478
 *
5479
 * The processing is transferred to SAX once parsed.
5480
 */
5481
5482
void
5483
295k
xmlParsePI(xmlParserCtxtPtr ctxt) {
5484
295k
    xmlChar *buf = NULL;
5485
295k
    size_t len = 0;
5486
295k
    size_t size = XML_PARSER_BUFFER_SIZE;
5487
295k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5488
0
                       XML_MAX_HUGE_LENGTH :
5489
295k
                       XML_MAX_TEXT_LENGTH;
5490
295k
    int cur, l;
5491
295k
    const xmlChar *target;
5492
5493
295k
    if ((RAW == '<') && (NXT(1) == '?')) {
5494
  /*
5495
   * this is a Processing Instruction.
5496
   */
5497
295k
  SKIP(2);
5498
5499
  /*
5500
   * Parse the target name and check for special support like
5501
   * namespace.
5502
   */
5503
295k
        target = xmlParsePITarget(ctxt);
5504
295k
  if (target != NULL) {
5505
247k
      if ((RAW == '?') && (NXT(1) == '>')) {
5506
58.2k
    SKIP(2);
5507
5508
    /*
5509
     * SAX: PI detected.
5510
     */
5511
58.2k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5512
58.2k
        (ctxt->sax->processingInstruction != NULL))
5513
58.2k
        ctxt->sax->processingInstruction(ctxt->userData,
5514
58.2k
                                         target, NULL);
5515
58.2k
    return;
5516
58.2k
      }
5517
188k
      buf = xmlMalloc(size);
5518
188k
      if (buf == NULL) {
5519
0
    xmlErrMemory(ctxt);
5520
0
    return;
5521
0
      }
5522
188k
      if (SKIP_BLANKS == 0) {
5523
22.4k
    xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5524
22.4k
        "ParsePI: PI %s space expected\n", target);
5525
22.4k
      }
5526
188k
      cur = xmlCurrentCharRecover(ctxt, &l);
5527
186M
      while (IS_CHAR(cur) && /* checked */
5528
186M
       ((cur != '?') || (NXT(1) != '>'))) {
5529
186M
    if (len + 5 >= size) {
5530
40.7k
        xmlChar *tmp;
5531
40.7k
                    int newSize;
5532
5533
40.7k
                    newSize = xmlGrowCapacity(size, 1, 1, maxLength);
5534
40.7k
                    if (newSize < 0) {
5535
13
                        xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5536
13
                                          "PI %s too big found", target);
5537
13
                        xmlFree(buf);
5538
13
                        return;
5539
13
                    }
5540
40.7k
        tmp = xmlRealloc(buf, newSize);
5541
40.7k
        if (tmp == NULL) {
5542
0
      xmlErrMemory(ctxt);
5543
0
      xmlFree(buf);
5544
0
      return;
5545
0
        }
5546
40.7k
        buf = tmp;
5547
40.7k
                    size = newSize;
5548
40.7k
    }
5549
186M
    COPY_BUF(buf, len, cur);
5550
186M
    NEXTL(l);
5551
186M
    cur = xmlCurrentCharRecover(ctxt, &l);
5552
186M
      }
5553
188k
      buf[len] = 0;
5554
188k
      if (cur != '?') {
5555
29.3k
    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5556
29.3k
          "ParsePI: PI %s never end ...\n", target);
5557
159k
      } else {
5558
159k
    SKIP(2);
5559
5560
159k
#ifdef LIBXML_CATALOG_ENABLED
5561
159k
    if ((ctxt->inSubset == 0) &&
5562
154k
        (xmlStrEqual(target, XML_CATALOG_PI))) {
5563
38.0k
        xmlCatalogAllow allow = xmlCatalogGetDefaults();
5564
5565
38.0k
        if ((ctxt->options & XML_PARSE_CATALOG_PI) &&
5566
0
                        ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5567
0
       (allow == XML_CATA_ALLOW_ALL)))
5568
0
      xmlParseCatalogPI(ctxt, buf);
5569
38.0k
    }
5570
159k
#endif
5571
5572
    /*
5573
     * SAX: PI detected.
5574
     */
5575
159k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5576
159k
        (ctxt->sax->processingInstruction != NULL))
5577
159k
        ctxt->sax->processingInstruction(ctxt->userData,
5578
159k
                                         target, buf);
5579
159k
      }
5580
188k
      xmlFree(buf);
5581
188k
  } else {
5582
48.5k
      xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5583
48.5k
  }
5584
295k
    }
5585
295k
}
5586
5587
/**
5588
 * xmlParseNotationDecl:
5589
 * @ctxt:  an XML parser context
5590
 *
5591
 * DEPRECATED: Internal function, don't use.
5592
 *
5593
 * Parse a notation declaration. Always consumes '<!'.
5594
 *
5595
 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
5596
 *
5597
 * Hence there is actually 3 choices:
5598
 *     'PUBLIC' S PubidLiteral
5599
 *     'PUBLIC' S PubidLiteral S SystemLiteral
5600
 * and 'SYSTEM' S SystemLiteral
5601
 *
5602
 * See the NOTE on xmlParseExternalID().
5603
 */
5604
5605
void
5606
5.00k
xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5607
5.00k
    const xmlChar *name;
5608
5.00k
    xmlChar *Pubid;
5609
5.00k
    xmlChar *Systemid;
5610
5611
5.00k
    if ((CUR != '<') || (NXT(1) != '!'))
5612
0
        return;
5613
5.00k
    SKIP(2);
5614
5615
5.00k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5616
4.66k
  int inputid = ctxt->input->id;
5617
4.66k
  SKIP(8);
5618
4.66k
  if (SKIP_BLANKS_PE == 0) {
5619
148
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5620
148
         "Space required after '<!NOTATION'\n");
5621
148
      return;
5622
148
  }
5623
5624
4.51k
        name = xmlParseName(ctxt);
5625
4.51k
  if (name == NULL) {
5626
251
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5627
251
      return;
5628
251
  }
5629
4.26k
  if (xmlStrchr(name, ':') != NULL) {
5630
264
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5631
264
         "colons are forbidden from notation names '%s'\n",
5632
264
         name, NULL, NULL);
5633
264
  }
5634
4.26k
  if (SKIP_BLANKS_PE == 0) {
5635
292
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5636
292
         "Space required after the NOTATION name'\n");
5637
292
      return;
5638
292
  }
5639
5640
  /*
5641
   * Parse the IDs.
5642
   */
5643
3.96k
  Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5644
3.96k
  SKIP_BLANKS_PE;
5645
5646
3.96k
  if (RAW == '>') {
5647
792
      if (inputid != ctxt->input->id) {
5648
43
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5649
43
                         "Notation declaration doesn't start and stop"
5650
43
                               " in the same entity\n");
5651
43
      }
5652
792
      NEXT;
5653
792
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5654
792
    (ctxt->sax->notationDecl != NULL))
5655
792
    ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5656
3.17k
  } else {
5657
3.17k
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5658
3.17k
  }
5659
3.96k
  if (Systemid != NULL) xmlFree(Systemid);
5660
3.96k
  if (Pubid != NULL) xmlFree(Pubid);
5661
3.96k
    }
5662
5.00k
}
5663
5664
/**
5665
 * xmlParseEntityDecl:
5666
 * @ctxt:  an XML parser context
5667
 *
5668
 * DEPRECATED: Internal function, don't use.
5669
 *
5670
 * Parse an entity declaration. Always consumes '<!'.
5671
 *
5672
 * [70] EntityDecl ::= GEDecl | PEDecl
5673
 *
5674
 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5675
 *
5676
 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5677
 *
5678
 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5679
 *
5680
 * [74] PEDef ::= EntityValue | ExternalID
5681
 *
5682
 * [76] NDataDecl ::= S 'NDATA' S Name
5683
 *
5684
 * [ VC: Notation Declared ]
5685
 * The Name must match the declared name of a notation.
5686
 */
5687
5688
void
5689
78.2k
xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5690
78.2k
    const xmlChar *name = NULL;
5691
78.2k
    xmlChar *value = NULL;
5692
78.2k
    xmlChar *URI = NULL, *literal = NULL;
5693
78.2k
    const xmlChar *ndata = NULL;
5694
78.2k
    int isParameter = 0;
5695
78.2k
    xmlChar *orig = NULL;
5696
5697
78.2k
    if ((CUR != '<') || (NXT(1) != '!'))
5698
0
        return;
5699
78.2k
    SKIP(2);
5700
5701
    /* GROW; done in the caller */
5702
78.2k
    if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5703
77.8k
  int inputid = ctxt->input->id;
5704
77.8k
  SKIP(6);
5705
77.8k
  if (SKIP_BLANKS_PE == 0) {
5706
18.5k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5707
18.5k
         "Space required after '<!ENTITY'\n");
5708
18.5k
  }
5709
5710
77.8k
  if (RAW == '%') {
5711
12.0k
      NEXT;
5712
12.0k
      if (SKIP_BLANKS_PE == 0) {
5713
7.02k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5714
7.02k
             "Space required after '%%'\n");
5715
7.02k
      }
5716
12.0k
      isParameter = 1;
5717
12.0k
  }
5718
5719
77.8k
        name = xmlParseName(ctxt);
5720
77.8k
  if (name == NULL) {
5721
1.79k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5722
1.79k
                     "xmlParseEntityDecl: no name\n");
5723
1.79k
            return;
5724
1.79k
  }
5725
76.0k
  if (xmlStrchr(name, ':') != NULL) {
5726
2.12k
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5727
2.12k
         "colons are forbidden from entities names '%s'\n",
5728
2.12k
         name, NULL, NULL);
5729
2.12k
  }
5730
76.0k
  if (SKIP_BLANKS_PE == 0) {
5731
13.9k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5732
13.9k
         "Space required after the entity name\n");
5733
13.9k
  }
5734
5735
  /*
5736
   * handle the various case of definitions...
5737
   */
5738
76.0k
  if (isParameter) {
5739
11.9k
      if ((RAW == '"') || (RAW == '\'')) {
5740
9.57k
          value = xmlParseEntityValue(ctxt, &orig);
5741
9.57k
    if (value) {
5742
9.55k
        if ((ctxt->sax != NULL) &&
5743
9.55k
      (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5744
9.55k
      ctxt->sax->entityDecl(ctxt->userData, name,
5745
9.55k
                        XML_INTERNAL_PARAMETER_ENTITY,
5746
9.55k
            NULL, NULL, value);
5747
9.55k
    }
5748
9.57k
      } else {
5749
2.36k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5750
2.36k
    if ((URI == NULL) && (literal == NULL)) {
5751
362
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5752
362
    }
5753
2.36k
    if (URI) {
5754
1.28k
                    if (xmlStrchr(URI, '#')) {
5755
256
                        xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5756
1.03k
                    } else {
5757
1.03k
                        if ((ctxt->sax != NULL) &&
5758
1.03k
                            (!ctxt->disableSAX) &&
5759
1.03k
                            (ctxt->sax->entityDecl != NULL))
5760
1.03k
                            ctxt->sax->entityDecl(ctxt->userData, name,
5761
1.03k
                                        XML_EXTERNAL_PARAMETER_ENTITY,
5762
1.03k
                                        literal, URI, NULL);
5763
1.03k
                    }
5764
1.28k
    }
5765
2.36k
      }
5766
64.1k
  } else {
5767
64.1k
      if ((RAW == '"') || (RAW == '\'')) {
5768
51.6k
          value = xmlParseEntityValue(ctxt, &orig);
5769
51.6k
    if ((ctxt->sax != NULL) &&
5770
51.6k
        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5771
51.6k
        ctxt->sax->entityDecl(ctxt->userData, name,
5772
51.6k
        XML_INTERNAL_GENERAL_ENTITY,
5773
51.6k
        NULL, NULL, value);
5774
    /*
5775
     * For expat compatibility in SAX mode.
5776
     */
5777
51.6k
    if ((ctxt->myDoc == NULL) ||
5778
51.6k
        (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5779
0
        if (ctxt->myDoc == NULL) {
5780
0
      ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5781
0
      if (ctxt->myDoc == NULL) {
5782
0
          xmlErrMemory(ctxt);
5783
0
          goto done;
5784
0
      }
5785
0
      ctxt->myDoc->properties = XML_DOC_INTERNAL;
5786
0
        }
5787
0
        if (ctxt->myDoc->intSubset == NULL) {
5788
0
      ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5789
0
              BAD_CAST "fake", NULL, NULL);
5790
0
                        if (ctxt->myDoc->intSubset == NULL) {
5791
0
                            xmlErrMemory(ctxt);
5792
0
                            goto done;
5793
0
                        }
5794
0
                    }
5795
5796
0
        xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5797
0
                    NULL, NULL, value);
5798
0
    }
5799
51.6k
      } else {
5800
12.4k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5801
12.4k
    if ((URI == NULL) && (literal == NULL)) {
5802
3.89k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5803
3.89k
    }
5804
12.4k
    if (URI) {
5805
8.10k
                    if (xmlStrchr(URI, '#')) {
5806
2.55k
                        xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5807
2.55k
                    }
5808
8.10k
    }
5809
12.4k
    if ((RAW != '>') && (SKIP_BLANKS_PE == 0)) {
5810
4.09k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5811
4.09k
           "Space required before 'NDATA'\n");
5812
4.09k
    }
5813
12.4k
    if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5814
1.37k
        SKIP(5);
5815
1.37k
        if (SKIP_BLANKS_PE == 0) {
5816
253
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5817
253
               "Space required after 'NDATA'\n");
5818
253
        }
5819
1.37k
        ndata = xmlParseName(ctxt);
5820
1.37k
        if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5821
1.37k
            (ctxt->sax->unparsedEntityDecl != NULL))
5822
1.37k
      ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5823
1.37k
            literal, URI, ndata);
5824
11.0k
    } else {
5825
11.0k
        if ((ctxt->sax != NULL) &&
5826
11.0k
            (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5827
11.0k
      ctxt->sax->entityDecl(ctxt->userData, name,
5828
11.0k
            XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5829
11.0k
            literal, URI, NULL);
5830
        /*
5831
         * For expat compatibility in SAX mode.
5832
         * assuming the entity replacement was asked for
5833
         */
5834
11.0k
        if ((ctxt->replaceEntities != 0) &&
5835
0
      ((ctxt->myDoc == NULL) ||
5836
0
      (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5837
0
      if (ctxt->myDoc == NULL) {
5838
0
          ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5839
0
          if (ctxt->myDoc == NULL) {
5840
0
              xmlErrMemory(ctxt);
5841
0
        goto done;
5842
0
          }
5843
0
          ctxt->myDoc->properties = XML_DOC_INTERNAL;
5844
0
      }
5845
5846
0
      if (ctxt->myDoc->intSubset == NULL) {
5847
0
          ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5848
0
            BAD_CAST "fake", NULL, NULL);
5849
0
                            if (ctxt->myDoc->intSubset == NULL) {
5850
0
                                xmlErrMemory(ctxt);
5851
0
                                goto done;
5852
0
                            }
5853
0
                        }
5854
0
      xmlSAX2EntityDecl(ctxt, name,
5855
0
                  XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5856
0
                  literal, URI, NULL);
5857
0
        }
5858
11.0k
    }
5859
12.4k
      }
5860
64.1k
  }
5861
76.0k
  SKIP_BLANKS_PE;
5862
76.0k
  if (RAW != '>') {
5863
5.12k
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5864
5.12k
              "xmlParseEntityDecl: entity %s not terminated\n", name);
5865
5.12k
      xmlHaltParser(ctxt);
5866
70.9k
  } else {
5867
70.9k
      if (inputid != ctxt->input->id) {
5868
50
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5869
50
                         "Entity declaration doesn't start and stop in"
5870
50
                               " the same entity\n");
5871
50
      }
5872
70.9k
      NEXT;
5873
70.9k
  }
5874
76.0k
  if (orig != NULL) {
5875
      /*
5876
       * Ugly mechanism to save the raw entity value.
5877
       */
5878
61.0k
      xmlEntityPtr cur = NULL;
5879
5880
61.0k
      if (isParameter) {
5881
9.55k
          if ((ctxt->sax != NULL) &&
5882
9.55k
        (ctxt->sax->getParameterEntity != NULL))
5883
9.55k
        cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5884
51.5k
      } else {
5885
51.5k
          if ((ctxt->sax != NULL) &&
5886
51.5k
        (ctxt->sax->getEntity != NULL))
5887
51.5k
        cur = ctxt->sax->getEntity(ctxt->userData, name);
5888
51.5k
    if ((cur == NULL) && (ctxt->userData==ctxt)) {
5889
0
        cur = xmlSAX2GetEntity(ctxt, name);
5890
0
    }
5891
51.5k
      }
5892
61.0k
            if ((cur != NULL) && (cur->orig == NULL)) {
5893
37.6k
    cur->orig = orig;
5894
37.6k
                orig = NULL;
5895
37.6k
      }
5896
61.0k
  }
5897
5898
76.0k
done:
5899
76.0k
  if (value != NULL) xmlFree(value);
5900
76.0k
  if (URI != NULL) xmlFree(URI);
5901
76.0k
  if (literal != NULL) xmlFree(literal);
5902
76.0k
        if (orig != NULL) xmlFree(orig);
5903
76.0k
    }
5904
78.2k
}
5905
5906
/**
5907
 * xmlParseDefaultDecl:
5908
 * @ctxt:  an XML parser context
5909
 * @value:  Receive a possible fixed default value for the attribute
5910
 *
5911
 * DEPRECATED: Internal function, don't use.
5912
 *
5913
 * Parse an attribute default declaration
5914
 *
5915
 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5916
 *
5917
 * [ VC: Required Attribute ]
5918
 * if the default declaration is the keyword #REQUIRED, then the
5919
 * attribute must be specified for all elements of the type in the
5920
 * attribute-list declaration.
5921
 *
5922
 * [ VC: Attribute Default Legal ]
5923
 * The declared default value must meet the lexical constraints of
5924
 * the declared attribute type c.f. xmlValidateAttributeDecl()
5925
 *
5926
 * [ VC: Fixed Attribute Default ]
5927
 * if an attribute has a default value declared with the #FIXED
5928
 * keyword, instances of that attribute must match the default value.
5929
 *
5930
 * [ WFC: No < in Attribute Values ]
5931
 * handled in xmlParseAttValue()
5932
 *
5933
 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5934
 *          or XML_ATTRIBUTE_FIXED.
5935
 */
5936
5937
int
5938
67.6k
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5939
67.6k
    int val;
5940
67.6k
    xmlChar *ret;
5941
5942
67.6k
    *value = NULL;
5943
67.6k
    if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5944
2.20k
  SKIP(9);
5945
2.20k
  return(XML_ATTRIBUTE_REQUIRED);
5946
2.20k
    }
5947
65.4k
    if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5948
15.6k
  SKIP(8);
5949
15.6k
  return(XML_ATTRIBUTE_IMPLIED);
5950
15.6k
    }
5951
49.7k
    val = XML_ATTRIBUTE_NONE;
5952
49.7k
    if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5953
621
  SKIP(6);
5954
621
  val = XML_ATTRIBUTE_FIXED;
5955
621
  if (SKIP_BLANKS_PE == 0) {
5956
56
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5957
56
         "Space required after '#FIXED'\n");
5958
56
  }
5959
621
    }
5960
49.7k
    ret = xmlParseAttValue(ctxt);
5961
49.7k
    if (ret == NULL) {
5962
6.03k
  xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5963
6.03k
           "Attribute default value declaration error\n");
5964
6.03k
    } else
5965
43.7k
        *value = ret;
5966
49.7k
    return(val);
5967
65.4k
}
5968
5969
/**
5970
 * xmlParseNotationType:
5971
 * @ctxt:  an XML parser context
5972
 *
5973
 * DEPRECATED: Internal function, don't use.
5974
 *
5975
 * parse an Notation attribute type.
5976
 *
5977
 * Note: the leading 'NOTATION' S part has already being parsed...
5978
 *
5979
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5980
 *
5981
 * [ VC: Notation Attributes ]
5982
 * Values of this type must match one of the notation names included
5983
 * in the declaration; all notation names in the declaration must be declared.
5984
 *
5985
 * Returns: the notation attribute tree built while parsing
5986
 */
5987
5988
xmlEnumerationPtr
5989
2.47k
xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5990
2.47k
    const xmlChar *name;
5991
2.47k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5992
5993
2.47k
    if (RAW != '(') {
5994
323
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5995
323
  return(NULL);
5996
323
    }
5997
2.72k
    do {
5998
2.72k
        NEXT;
5999
2.72k
  SKIP_BLANKS_PE;
6000
2.72k
        name = xmlParseName(ctxt);
6001
2.72k
  if (name == NULL) {
6002
335
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6003
335
         "Name expected in NOTATION declaration\n");
6004
335
            xmlFreeEnumeration(ret);
6005
335
      return(NULL);
6006
335
  }
6007
2.39k
        tmp = NULL;
6008
2.39k
#ifdef LIBXML_VALID_ENABLED
6009
2.39k
        if (ctxt->validate) {
6010
0
            tmp = ret;
6011
0
            while (tmp != NULL) {
6012
0
                if (xmlStrEqual(name, tmp->name)) {
6013
0
                    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
6014
0
              "standalone: attribute notation value token %s duplicated\n",
6015
0
                                     name, NULL);
6016
0
                    if (!xmlDictOwns(ctxt->dict, name))
6017
0
                        xmlFree((xmlChar *) name);
6018
0
                    break;
6019
0
                }
6020
0
                tmp = tmp->next;
6021
0
            }
6022
0
        }
6023
2.39k
#endif /* LIBXML_VALID_ENABLED */
6024
2.39k
  if (tmp == NULL) {
6025
2.39k
      cur = xmlCreateEnumeration(name);
6026
2.39k
      if (cur == NULL) {
6027
0
                xmlErrMemory(ctxt);
6028
0
                xmlFreeEnumeration(ret);
6029
0
                return(NULL);
6030
0
            }
6031
2.39k
      if (last == NULL) ret = last = cur;
6032
528
      else {
6033
528
    last->next = cur;
6034
528
    last = cur;
6035
528
      }
6036
2.39k
  }
6037
2.39k
  SKIP_BLANKS_PE;
6038
2.39k
    } while (RAW == '|');
6039
1.81k
    if (RAW != ')') {
6040
174
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
6041
174
        xmlFreeEnumeration(ret);
6042
174
  return(NULL);
6043
174
    }
6044
1.64k
    NEXT;
6045
1.64k
    return(ret);
6046
1.81k
}
6047
6048
/**
6049
 * xmlParseEnumerationType:
6050
 * @ctxt:  an XML parser context
6051
 *
6052
 * DEPRECATED: Internal function, don't use.
6053
 *
6054
 * parse an Enumeration attribute type.
6055
 *
6056
 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
6057
 *
6058
 * [ VC: Enumeration ]
6059
 * Values of this type must match one of the Nmtoken tokens in
6060
 * the declaration
6061
 *
6062
 * Returns: the enumeration attribute tree built while parsing
6063
 */
6064
6065
xmlEnumerationPtr
6066
24.7k
xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
6067
24.7k
    xmlChar *name;
6068
24.7k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
6069
6070
24.7k
    if (RAW != '(') {
6071
2.07k
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
6072
2.07k
  return(NULL);
6073
2.07k
    }
6074
23.7k
    do {
6075
23.7k
        NEXT;
6076
23.7k
  SKIP_BLANKS_PE;
6077
23.7k
        name = xmlParseNmtoken(ctxt);
6078
23.7k
  if (name == NULL) {
6079
343
      xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
6080
343
      return(ret);
6081
343
  }
6082
23.4k
        tmp = NULL;
6083
23.4k
#ifdef LIBXML_VALID_ENABLED
6084
23.4k
        if (ctxt->validate) {
6085
0
            tmp = ret;
6086
0
            while (tmp != NULL) {
6087
0
                if (xmlStrEqual(name, tmp->name)) {
6088
0
                    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
6089
0
              "standalone: attribute enumeration value token %s duplicated\n",
6090
0
                                     name, NULL);
6091
0
                    if (!xmlDictOwns(ctxt->dict, name))
6092
0
                        xmlFree(name);
6093
0
                    break;
6094
0
                }
6095
0
                tmp = tmp->next;
6096
0
            }
6097
0
        }
6098
23.4k
#endif /* LIBXML_VALID_ENABLED */
6099
23.4k
  if (tmp == NULL) {
6100
23.4k
      cur = xmlCreateEnumeration(name);
6101
23.4k
      if (!xmlDictOwns(ctxt->dict, name))
6102
23.4k
    xmlFree(name);
6103
23.4k
      if (cur == NULL) {
6104
0
                xmlErrMemory(ctxt);
6105
0
                xmlFreeEnumeration(ret);
6106
0
                return(NULL);
6107
0
            }
6108
23.4k
      if (last == NULL) ret = last = cur;
6109
1.01k
      else {
6110
1.01k
    last->next = cur;
6111
1.01k
    last = cur;
6112
1.01k
      }
6113
23.4k
  }
6114
23.4k
  SKIP_BLANKS_PE;
6115
23.4k
    } while (RAW == '|');
6116
22.3k
    if (RAW != ')') {
6117
630
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
6118
630
  return(ret);
6119
630
    }
6120
21.7k
    NEXT;
6121
21.7k
    return(ret);
6122
22.3k
}
6123
6124
/**
6125
 * xmlParseEnumeratedType:
6126
 * @ctxt:  an XML parser context
6127
 * @tree:  the enumeration tree built while parsing
6128
 *
6129
 * DEPRECATED: Internal function, don't use.
6130
 *
6131
 * parse an Enumerated attribute type.
6132
 *
6133
 * [57] EnumeratedType ::= NotationType | Enumeration
6134
 *
6135
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
6136
 *
6137
 *
6138
 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
6139
 */
6140
6141
int
6142
27.3k
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6143
27.3k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
6144
2.53k
  SKIP(8);
6145
2.53k
  if (SKIP_BLANKS_PE == 0) {
6146
63
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6147
63
         "Space required after 'NOTATION'\n");
6148
63
      return(0);
6149
63
  }
6150
2.47k
  *tree = xmlParseNotationType(ctxt);
6151
2.47k
  if (*tree == NULL) return(0);
6152
1.64k
  return(XML_ATTRIBUTE_NOTATION);
6153
2.47k
    }
6154
24.7k
    *tree = xmlParseEnumerationType(ctxt);
6155
24.7k
    if (*tree == NULL) return(0);
6156
22.4k
    return(XML_ATTRIBUTE_ENUMERATION);
6157
24.7k
}
6158
6159
/**
6160
 * xmlParseAttributeType:
6161
 * @ctxt:  an XML parser context
6162
 * @tree:  the enumeration tree built while parsing
6163
 *
6164
 * DEPRECATED: Internal function, don't use.
6165
 *
6166
 * parse the Attribute list def for an element
6167
 *
6168
 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
6169
 *
6170
 * [55] StringType ::= 'CDATA'
6171
 *
6172
 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
6173
 *                        'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
6174
 *
6175
 * Validity constraints for attribute values syntax are checked in
6176
 * xmlValidateAttributeValue()
6177
 *
6178
 * [ VC: ID ]
6179
 * Values of type ID must match the Name production. A name must not
6180
 * appear more than once in an XML document as a value of this type;
6181
 * i.e., ID values must uniquely identify the elements which bear them.
6182
 *
6183
 * [ VC: One ID per Element Type ]
6184
 * No element type may have more than one ID attribute specified.
6185
 *
6186
 * [ VC: ID Attribute Default ]
6187
 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
6188
 *
6189
 * [ VC: IDREF ]
6190
 * Values of type IDREF must match the Name production, and values
6191
 * of type IDREFS must match Names; each IDREF Name must match the value
6192
 * of an ID attribute on some element in the XML document; i.e. IDREF
6193
 * values must match the value of some ID attribute.
6194
 *
6195
 * [ VC: Entity Name ]
6196
 * Values of type ENTITY must match the Name production, values
6197
 * of type ENTITIES must match Names; each Entity Name must match the
6198
 * name of an unparsed entity declared in the DTD.
6199
 *
6200
 * [ VC: Name Token ]
6201
 * Values of type NMTOKEN must match the Nmtoken production; values
6202
 * of type NMTOKENS must match Nmtokens.
6203
 *
6204
 * Returns the attribute type
6205
 */
6206
int
6207
72.0k
xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6208
72.0k
    if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
6209
2.15k
  SKIP(5);
6210
2.15k
  return(XML_ATTRIBUTE_CDATA);
6211
69.8k
     } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
6212
10.2k
  SKIP(6);
6213
10.2k
  return(XML_ATTRIBUTE_IDREFS);
6214
59.6k
     } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
6215
2.33k
  SKIP(5);
6216
2.33k
  return(XML_ATTRIBUTE_IDREF);
6217
57.3k
     } else if ((RAW == 'I') && (NXT(1) == 'D')) {
6218
27.1k
        SKIP(2);
6219
27.1k
  return(XML_ATTRIBUTE_ID);
6220
30.1k
     } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
6221
377
  SKIP(6);
6222
377
  return(XML_ATTRIBUTE_ENTITY);
6223
29.7k
     } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
6224
977
  SKIP(8);
6225
977
  return(XML_ATTRIBUTE_ENTITIES);
6226
28.7k
     } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
6227
53
  SKIP(8);
6228
53
  return(XML_ATTRIBUTE_NMTOKENS);
6229
28.7k
     } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
6230
1.39k
  SKIP(7);
6231
1.39k
  return(XML_ATTRIBUTE_NMTOKEN);
6232
1.39k
     }
6233
27.3k
     return(xmlParseEnumeratedType(ctxt, tree));
6234
72.0k
}
6235
6236
/**
6237
 * xmlParseAttributeListDecl:
6238
 * @ctxt:  an XML parser context
6239
 *
6240
 * DEPRECATED: Internal function, don't use.
6241
 *
6242
 * Parse an attribute list declaration for an element. Always consumes '<!'.
6243
 *
6244
 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
6245
 *
6246
 * [53] AttDef ::= S Name S AttType S DefaultDecl
6247
 *
6248
 */
6249
void
6250
40.8k
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
6251
40.8k
    const xmlChar *elemName;
6252
40.8k
    const xmlChar *attrName;
6253
40.8k
    xmlEnumerationPtr tree;
6254
6255
40.8k
    if ((CUR != '<') || (NXT(1) != '!'))
6256
0
        return;
6257
40.8k
    SKIP(2);
6258
6259
40.8k
    if (CMP7(CUR_PTR, 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
6260
40.4k
  int inputid = ctxt->input->id;
6261
6262
40.4k
  SKIP(7);
6263
40.4k
  if (SKIP_BLANKS_PE == 0) {
6264
21.6k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6265
21.6k
                     "Space required after '<!ATTLIST'\n");
6266
21.6k
  }
6267
40.4k
        elemName = xmlParseName(ctxt);
6268
40.4k
  if (elemName == NULL) {
6269
2.24k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6270
2.24k
         "ATTLIST: no name for Element\n");
6271
2.24k
      return;
6272
2.24k
  }
6273
38.1k
  SKIP_BLANKS_PE;
6274
38.1k
  GROW;
6275
102k
  while ((RAW != '>') && (PARSER_STOPPED(ctxt) == 0)) {
6276
88.9k
      int type;
6277
88.9k
      int def;
6278
88.9k
      xmlChar *defaultValue = NULL;
6279
6280
88.9k
      GROW;
6281
88.9k
            tree = NULL;
6282
88.9k
      attrName = xmlParseName(ctxt);
6283
88.9k
      if (attrName == NULL) {
6284
14.2k
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6285
14.2k
             "ATTLIST: no name for Attribute\n");
6286
14.2k
    break;
6287
14.2k
      }
6288
74.6k
      GROW;
6289
74.6k
      if (SKIP_BLANKS_PE == 0) {
6290
2.68k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6291
2.68k
            "Space required after the attribute name\n");
6292
2.68k
    break;
6293
2.68k
      }
6294
6295
72.0k
      type = xmlParseAttributeType(ctxt, &tree);
6296
72.0k
      if (type <= 0) {
6297
3.26k
          break;
6298
3.26k
      }
6299
6300
68.7k
      GROW;
6301
68.7k
      if (SKIP_BLANKS_PE == 0) {
6302
1.13k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6303
1.13k
             "Space required after the attribute type\n");
6304
1.13k
          if (tree != NULL)
6305
707
        xmlFreeEnumeration(tree);
6306
1.13k
    break;
6307
1.13k
      }
6308
6309
67.6k
      def = xmlParseDefaultDecl(ctxt, &defaultValue);
6310
67.6k
      if (def <= 0) {
6311
0
                if (defaultValue != NULL)
6312
0
        xmlFree(defaultValue);
6313
0
          if (tree != NULL)
6314
0
        xmlFreeEnumeration(tree);
6315
0
          break;
6316
0
      }
6317
67.6k
      if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6318
43.2k
          xmlAttrNormalizeSpace(defaultValue, defaultValue);
6319
6320
67.6k
      GROW;
6321
67.6k
            if (RAW != '>') {
6322
55.4k
    if (SKIP_BLANKS_PE == 0) {
6323
3.16k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6324
3.16k
      "Space required after the attribute default value\n");
6325
3.16k
        if (defaultValue != NULL)
6326
311
      xmlFree(defaultValue);
6327
3.16k
        if (tree != NULL)
6328
575
      xmlFreeEnumeration(tree);
6329
3.16k
        break;
6330
3.16k
    }
6331
55.4k
      }
6332
64.4k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6333
64.4k
    (ctxt->sax->attributeDecl != NULL))
6334
64.4k
    ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6335
64.4k
                          type, def, defaultValue, tree);
6336
3
      else if (tree != NULL)
6337
0
    xmlFreeEnumeration(tree);
6338
6339
64.4k
      if ((ctxt->sax2) && (defaultValue != NULL) &&
6340
43.4k
          (def != XML_ATTRIBUTE_IMPLIED) &&
6341
43.4k
    (def != XML_ATTRIBUTE_REQUIRED)) {
6342
43.4k
    xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6343
43.4k
      }
6344
64.4k
      if (ctxt->sax2) {
6345
64.4k
    xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6346
64.4k
      }
6347
64.4k
      if (defaultValue != NULL)
6348
43.4k
          xmlFree(defaultValue);
6349
64.4k
      GROW;
6350
64.4k
  }
6351
38.1k
  if (RAW == '>') {
6352
14.0k
      if (inputid != ctxt->input->id) {
6353
87
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6354
87
                               "Attribute list declaration doesn't start and"
6355
87
                               " stop in the same entity\n");
6356
87
      }
6357
14.0k
      NEXT;
6358
14.0k
  }
6359
38.1k
    }
6360
40.8k
}
6361
6362
/**
6363
 * xmlParseElementMixedContentDecl:
6364
 * @ctxt:  an XML parser context
6365
 * @inputchk:  the input used for the current entity, needed for boundary checks
6366
 *
6367
 * DEPRECATED: Internal function, don't use.
6368
 *
6369
 * parse the declaration for a Mixed Element content
6370
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6371
 *
6372
 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6373
 *                '(' S? '#PCDATA' S? ')'
6374
 *
6375
 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6376
 *
6377
 * [ VC: No Duplicate Types ]
6378
 * The same name must not appear more than once in a single
6379
 * mixed-content declaration.
6380
 *
6381
 * returns: the list of the xmlElementContentPtr describing the element choices
6382
 */
6383
xmlElementContentPtr
6384
7.92k
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6385
7.92k
    xmlElementContentPtr ret = NULL, cur = NULL, n;
6386
7.92k
    const xmlChar *elem = NULL;
6387
6388
7.92k
    GROW;
6389
7.92k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6390
7.92k
  SKIP(7);
6391
7.92k
  SKIP_BLANKS_PE;
6392
7.92k
  if (RAW == ')') {
6393
4.60k
      if (ctxt->input->id != inputchk) {
6394
271
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6395
271
                               "Element content declaration doesn't start and"
6396
271
                               " stop in the same entity\n");
6397
271
      }
6398
4.60k
      NEXT;
6399
4.60k
      ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6400
4.60k
      if (ret == NULL)
6401
0
                goto mem_error;
6402
4.60k
      if (RAW == '*') {
6403
196
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6404
196
    NEXT;
6405
196
      }
6406
4.60k
      return(ret);
6407
4.60k
  }
6408
3.31k
  if ((RAW == '(') || (RAW == '|')) {
6409
3.05k
      ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6410
3.05k
      if (ret == NULL)
6411
0
                goto mem_error;
6412
3.05k
  }
6413
10.9k
  while ((RAW == '|') && (PARSER_STOPPED(ctxt) == 0)) {
6414
7.84k
      NEXT;
6415
7.84k
            n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6416
7.84k
            if (n == NULL)
6417
0
                goto mem_error;
6418
7.84k
      if (elem == NULL) {
6419
3.03k
    n->c1 = cur;
6420
3.03k
    if (cur != NULL)
6421
3.03k
        cur->parent = n;
6422
3.03k
    ret = cur = n;
6423
4.80k
      } else {
6424
4.80k
          cur->c2 = n;
6425
4.80k
    n->parent = cur;
6426
4.80k
    n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6427
4.80k
                if (n->c1 == NULL)
6428
0
                    goto mem_error;
6429
4.80k
    n->c1->parent = n;
6430
4.80k
    cur = n;
6431
4.80k
      }
6432
7.84k
      SKIP_BLANKS_PE;
6433
7.84k
      elem = xmlParseName(ctxt);
6434
7.84k
      if (elem == NULL) {
6435
254
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6436
254
      "xmlParseElementMixedContentDecl : Name expected\n");
6437
254
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6438
254
    return(NULL);
6439
254
      }
6440
7.58k
      SKIP_BLANKS_PE;
6441
7.58k
      GROW;
6442
7.58k
  }
6443
3.06k
  if ((RAW == ')') && (NXT(1) == '*')) {
6444
1.31k
      if (elem != NULL) {
6445
1.31k
    cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6446
1.31k
                                   XML_ELEMENT_CONTENT_ELEMENT);
6447
1.31k
    if (cur->c2 == NULL)
6448
0
                    goto mem_error;
6449
1.31k
    cur->c2->parent = cur;
6450
1.31k
            }
6451
1.31k
            if (ret != NULL)
6452
1.31k
                ret->ocur = XML_ELEMENT_CONTENT_MULT;
6453
1.31k
      if (ctxt->input->id != inputchk) {
6454
6
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6455
6
                               "Element content declaration doesn't start and"
6456
6
                               " stop in the same entity\n");
6457
6
      }
6458
1.31k
      SKIP(2);
6459
1.75k
  } else {
6460
1.75k
      xmlFreeDocElementContent(ctxt->myDoc, ret);
6461
1.75k
      xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6462
1.75k
      return(NULL);
6463
1.75k
  }
6464
6465
3.06k
    } else {
6466
0
  xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6467
0
    }
6468
1.31k
    return(ret);
6469
6470
0
mem_error:
6471
0
    xmlErrMemory(ctxt);
6472
0
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6473
0
    return(NULL);
6474
7.92k
}
6475
6476
/**
6477
 * xmlParseElementChildrenContentDeclPriv:
6478
 * @ctxt:  an XML parser context
6479
 * @inputchk:  the input used for the current entity, needed for boundary checks
6480
 * @depth: the level of recursion
6481
 *
6482
 * parse the declaration for a Mixed Element content
6483
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6484
 *
6485
 *
6486
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6487
 *
6488
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6489
 *
6490
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6491
 *
6492
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6493
 *
6494
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6495
 * TODO Parameter-entity replacement text must be properly nested
6496
 *  with parenthesized groups. That is to say, if either of the
6497
 *  opening or closing parentheses in a choice, seq, or Mixed
6498
 *  construct is contained in the replacement text for a parameter
6499
 *  entity, both must be contained in the same replacement text. For
6500
 *  interoperability, if a parameter-entity reference appears in a
6501
 *  choice, seq, or Mixed construct, its replacement text should not
6502
 *  be empty, and neither the first nor last non-blank character of
6503
 *  the replacement text should be a connector (| or ,).
6504
 *
6505
 * Returns the tree of xmlElementContentPtr describing the element
6506
 *          hierarchy.
6507
 */
6508
static xmlElementContentPtr
6509
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6510
30.7k
                                       int depth) {
6511
30.7k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
6512
30.7k
    xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6513
30.7k
    const xmlChar *elem;
6514
30.7k
    xmlChar type = 0;
6515
6516
30.7k
    if (depth > maxDepth) {
6517
2
        xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
6518
2
                "xmlParseElementChildrenContentDecl : depth %d too deep, "
6519
2
                "use XML_PARSE_HUGE\n", depth);
6520
2
  return(NULL);
6521
2
    }
6522
30.7k
    SKIP_BLANKS_PE;
6523
30.7k
    GROW;
6524
30.7k
    if (RAW == '(') {
6525
20.2k
  int inputid = ctxt->input->id;
6526
6527
        /* Recurse on first child */
6528
20.2k
  NEXT;
6529
20.2k
  SKIP_BLANKS_PE;
6530
20.2k
        cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6531
20.2k
                                                           depth + 1);
6532
20.2k
        if (cur == NULL)
6533
17.9k
            return(NULL);
6534
2.27k
  SKIP_BLANKS_PE;
6535
2.27k
  GROW;
6536
10.5k
    } else {
6537
10.5k
  elem = xmlParseName(ctxt);
6538
10.5k
  if (elem == NULL) {
6539
1.12k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6540
1.12k
      return(NULL);
6541
1.12k
  }
6542
9.45k
        cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6543
9.45k
  if (cur == NULL) {
6544
0
      xmlErrMemory(ctxt);
6545
0
      return(NULL);
6546
0
  }
6547
9.45k
  GROW;
6548
9.45k
  if (RAW == '?') {
6549
205
      cur->ocur = XML_ELEMENT_CONTENT_OPT;
6550
205
      NEXT;
6551
9.25k
  } else if (RAW == '*') {
6552
1.06k
      cur->ocur = XML_ELEMENT_CONTENT_MULT;
6553
1.06k
      NEXT;
6554
8.18k
  } else if (RAW == '+') {
6555
133
      cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6556
133
      NEXT;
6557
8.05k
  } else {
6558
8.05k
      cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6559
8.05k
  }
6560
9.45k
  GROW;
6561
9.45k
    }
6562
11.7k
    SKIP_BLANKS_PE;
6563
20.5k
    while ((RAW != ')') && (PARSER_STOPPED(ctxt) == 0)) {
6564
        /*
6565
   * Each loop we parse one separator and one element.
6566
   */
6567
15.3k
        if (RAW == ',') {
6568
1.89k
      if (type == 0) type = CUR;
6569
6570
      /*
6571
       * Detect "Name | Name , Name" error
6572
       */
6573
778
      else if (type != CUR) {
6574
14
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6575
14
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6576
14
                      type);
6577
14
    if ((last != NULL) && (last != ret))
6578
14
        xmlFreeDocElementContent(ctxt->myDoc, last);
6579
14
    if (ret != NULL)
6580
14
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6581
14
    return(NULL);
6582
14
      }
6583
1.87k
      NEXT;
6584
6585
1.87k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6586
1.87k
      if (op == NULL) {
6587
0
                xmlErrMemory(ctxt);
6588
0
    if ((last != NULL) && (last != ret))
6589
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6590
0
          xmlFreeDocElementContent(ctxt->myDoc, ret);
6591
0
    return(NULL);
6592
0
      }
6593
1.87k
      if (last == NULL) {
6594
1.11k
    op->c1 = ret;
6595
1.11k
    if (ret != NULL)
6596
1.11k
        ret->parent = op;
6597
1.11k
    ret = cur = op;
6598
1.11k
      } else {
6599
764
          cur->c2 = op;
6600
764
    if (op != NULL)
6601
764
        op->parent = cur;
6602
764
    op->c1 = last;
6603
764
    if (last != NULL)
6604
764
        last->parent = op;
6605
764
    cur =op;
6606
764
    last = NULL;
6607
764
      }
6608
13.4k
  } else if (RAW == '|') {
6609
9.41k
      if (type == 0) type = CUR;
6610
6611
      /*
6612
       * Detect "Name , Name | Name" error
6613
       */
6614
4.77k
      else if (type != CUR) {
6615
53
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6616
53
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6617
53
          type);
6618
53
    if ((last != NULL) && (last != ret))
6619
53
        xmlFreeDocElementContent(ctxt->myDoc, last);
6620
53
    if (ret != NULL)
6621
53
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6622
53
    return(NULL);
6623
53
      }
6624
9.35k
      NEXT;
6625
6626
9.35k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6627
9.35k
      if (op == NULL) {
6628
0
                xmlErrMemory(ctxt);
6629
0
    if ((last != NULL) && (last != ret))
6630
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6631
0
    if (ret != NULL)
6632
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6633
0
    return(NULL);
6634
0
      }
6635
9.35k
      if (last == NULL) {
6636
4.64k
    op->c1 = ret;
6637
4.64k
    if (ret != NULL)
6638
4.64k
        ret->parent = op;
6639
4.64k
    ret = cur = op;
6640
4.71k
      } else {
6641
4.71k
          cur->c2 = op;
6642
4.71k
    if (op != NULL)
6643
4.71k
        op->parent = cur;
6644
4.71k
    op->c1 = last;
6645
4.71k
    if (last != NULL)
6646
4.71k
        last->parent = op;
6647
4.71k
    cur =op;
6648
4.71k
    last = NULL;
6649
4.71k
      }
6650
9.35k
  } else {
6651
4.00k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6652
4.00k
      if ((last != NULL) && (last != ret))
6653
1.21k
          xmlFreeDocElementContent(ctxt->myDoc, last);
6654
4.00k
      if (ret != NULL)
6655
4.00k
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6656
4.00k
      return(NULL);
6657
4.00k
  }
6658
11.2k
  GROW;
6659
11.2k
  SKIP_BLANKS_PE;
6660
11.2k
  GROW;
6661
11.2k
  if (RAW == '(') {
6662
2.65k
      int inputid = ctxt->input->id;
6663
      /* Recurse on second child */
6664
2.65k
      NEXT;
6665
2.65k
      SKIP_BLANKS_PE;
6666
2.65k
      last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6667
2.65k
                                                          depth + 1);
6668
2.65k
            if (last == NULL) {
6669
2.14k
    if (ret != NULL)
6670
2.14k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6671
2.14k
    return(NULL);
6672
2.14k
            }
6673
510
      SKIP_BLANKS_PE;
6674
8.58k
  } else {
6675
8.58k
      elem = xmlParseName(ctxt);
6676
8.58k
      if (elem == NULL) {
6677
302
    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6678
302
    if (ret != NULL)
6679
302
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6680
302
    return(NULL);
6681
302
      }
6682
8.27k
      last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6683
8.27k
      if (last == NULL) {
6684
0
                xmlErrMemory(ctxt);
6685
0
    if (ret != NULL)
6686
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6687
0
    return(NULL);
6688
0
      }
6689
8.27k
      if (RAW == '?') {
6690
1.58k
    last->ocur = XML_ELEMENT_CONTENT_OPT;
6691
1.58k
    NEXT;
6692
6.69k
      } else if (RAW == '*') {
6693
515
    last->ocur = XML_ELEMENT_CONTENT_MULT;
6694
515
    NEXT;
6695
6.18k
      } else if (RAW == '+') {
6696
200
    last->ocur = XML_ELEMENT_CONTENT_PLUS;
6697
200
    NEXT;
6698
5.98k
      } else {
6699
5.98k
    last->ocur = XML_ELEMENT_CONTENT_ONCE;
6700
5.98k
      }
6701
8.27k
  }
6702
8.78k
  SKIP_BLANKS_PE;
6703
8.78k
  GROW;
6704
8.78k
    }
6705
5.21k
    if ((cur != NULL) && (last != NULL)) {
6706
2.02k
        cur->c2 = last;
6707
2.02k
  if (last != NULL)
6708
2.02k
      last->parent = cur;
6709
2.02k
    }
6710
5.21k
    if (ctxt->input->id != inputchk) {
6711
269
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6712
269
                       "Element content declaration doesn't start and stop in"
6713
269
                       " the same entity\n");
6714
269
    }
6715
5.21k
    NEXT;
6716
5.21k
    if (RAW == '?') {
6717
687
  if (ret != NULL) {
6718
687
      if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6719
612
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6720
499
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6721
188
      else
6722
188
          ret->ocur = XML_ELEMENT_CONTENT_OPT;
6723
687
  }
6724
687
  NEXT;
6725
4.52k
    } else if (RAW == '*') {
6726
965
  if (ret != NULL) {
6727
965
      ret->ocur = XML_ELEMENT_CONTENT_MULT;
6728
965
      cur = ret;
6729
      /*
6730
       * Some normalization:
6731
       * (a | b* | c?)* == (a | b | c)*
6732
       */
6733
4.22k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6734
3.26k
    if ((cur->c1 != NULL) &&
6735
3.26k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6736
2.08k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6737
1.36k
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6738
3.26k
    if ((cur->c2 != NULL) &&
6739
3.26k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6740
3.03k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6741
262
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6742
3.26k
    cur = cur->c2;
6743
3.26k
      }
6744
965
  }
6745
965
  NEXT;
6746
3.56k
    } else if (RAW == '+') {
6747
1.09k
  if (ret != NULL) {
6748
1.09k
      int found = 0;
6749
6750
1.09k
      if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6751
1.02k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6752
574
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6753
524
      else
6754
524
          ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6755
      /*
6756
       * Some normalization:
6757
       * (a | b*)+ == (a | b)*
6758
       * (a | b?)+ == (a | b)*
6759
       */
6760
1.30k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6761
206
    if ((cur->c1 != NULL) &&
6762
206
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6763
188
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6764
51
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6765
51
        found = 1;
6766
51
    }
6767
206
    if ((cur->c2 != NULL) &&
6768
206
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6769
164
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6770
57
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6771
57
        found = 1;
6772
57
    }
6773
206
    cur = cur->c2;
6774
206
      }
6775
1.09k
      if (found)
6776
66
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6777
1.09k
  }
6778
1.09k
  NEXT;
6779
1.09k
    }
6780
5.21k
    return(ret);
6781
11.7k
}
6782
6783
/**
6784
 * xmlParseElementChildrenContentDecl:
6785
 * @ctxt:  an XML parser context
6786
 * @inputchk:  the input used for the current entity, needed for boundary checks
6787
 *
6788
 * DEPRECATED: Internal function, don't use.
6789
 *
6790
 * parse the declaration for a Mixed Element content
6791
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6792
 *
6793
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6794
 *
6795
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6796
 *
6797
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6798
 *
6799
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6800
 *
6801
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6802
 * TODO Parameter-entity replacement text must be properly nested
6803
 *  with parenthesized groups. That is to say, if either of the
6804
 *  opening or closing parentheses in a choice, seq, or Mixed
6805
 *  construct is contained in the replacement text for a parameter
6806
 *  entity, both must be contained in the same replacement text. For
6807
 *  interoperability, if a parameter-entity reference appears in a
6808
 *  choice, seq, or Mixed construct, its replacement text should not
6809
 *  be empty, and neither the first nor last non-blank character of
6810
 *  the replacement text should be a connector (| or ,).
6811
 *
6812
 * Returns the tree of xmlElementContentPtr describing the element
6813
 *          hierarchy.
6814
 */
6815
xmlElementContentPtr
6816
0
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6817
    /* stub left for API/ABI compat */
6818
0
    return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6819
0
}
6820
6821
/**
6822
 * xmlParseElementContentDecl:
6823
 * @ctxt:  an XML parser context
6824
 * @name:  the name of the element being defined.
6825
 * @result:  the Element Content pointer will be stored here if any
6826
 *
6827
 * DEPRECATED: Internal function, don't use.
6828
 *
6829
 * parse the declaration for an Element content either Mixed or Children,
6830
 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6831
 *
6832
 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6833
 *
6834
 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6835
 */
6836
6837
int
6838
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6839
15.8k
                           xmlElementContentPtr *result) {
6840
6841
15.8k
    xmlElementContentPtr tree = NULL;
6842
15.8k
    int inputid = ctxt->input->id;
6843
15.8k
    int res;
6844
6845
15.8k
    *result = NULL;
6846
6847
15.8k
    if (RAW != '(') {
6848
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6849
0
    "xmlParseElementContentDecl : %s '(' expected\n", name);
6850
0
  return(-1);
6851
0
    }
6852
15.8k
    NEXT;
6853
15.8k
    GROW;
6854
15.8k
    SKIP_BLANKS_PE;
6855
15.8k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6856
7.92k
        tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6857
7.92k
  res = XML_ELEMENT_TYPE_MIXED;
6858
7.92k
    } else {
6859
7.92k
        tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6860
7.92k
  res = XML_ELEMENT_TYPE_ELEMENT;
6861
7.92k
    }
6862
15.8k
    SKIP_BLANKS_PE;
6863
15.8k
    *result = tree;
6864
15.8k
    return(res);
6865
15.8k
}
6866
6867
/**
6868
 * xmlParseElementDecl:
6869
 * @ctxt:  an XML parser context
6870
 *
6871
 * DEPRECATED: Internal function, don't use.
6872
 *
6873
 * Parse an element declaration. Always consumes '<!'.
6874
 *
6875
 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6876
 *
6877
 * [ VC: Unique Element Type Declaration ]
6878
 * No element type may be declared more than once
6879
 *
6880
 * Returns the type of the element, or -1 in case of error
6881
 */
6882
int
6883
19.0k
xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6884
19.0k
    const xmlChar *name;
6885
19.0k
    int ret = -1;
6886
19.0k
    xmlElementContentPtr content  = NULL;
6887
6888
19.0k
    if ((CUR != '<') || (NXT(1) != '!'))
6889
0
        return(ret);
6890
19.0k
    SKIP(2);
6891
6892
    /* GROW; done in the caller */
6893
19.0k
    if (CMP7(CUR_PTR, 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6894
18.7k
  int inputid = ctxt->input->id;
6895
6896
18.7k
  SKIP(7);
6897
18.7k
  if (SKIP_BLANKS_PE == 0) {
6898
169
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6899
169
               "Space required after 'ELEMENT'\n");
6900
169
      return(-1);
6901
169
  }
6902
18.5k
        name = xmlParseName(ctxt);
6903
18.5k
  if (name == NULL) {
6904
872
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6905
872
         "xmlParseElementDecl: no name for Element\n");
6906
872
      return(-1);
6907
872
  }
6908
17.7k
  if (SKIP_BLANKS_PE == 0) {
6909
4.71k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6910
4.71k
         "Space required after the element name\n");
6911
4.71k
  }
6912
17.7k
  if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6913
316
      SKIP(5);
6914
      /*
6915
       * Element must always be empty.
6916
       */
6917
316
      ret = XML_ELEMENT_TYPE_EMPTY;
6918
17.3k
  } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6919
206
             (NXT(2) == 'Y')) {
6920
179
      SKIP(3);
6921
      /*
6922
       * Element is a generic container.
6923
       */
6924
179
      ret = XML_ELEMENT_TYPE_ANY;
6925
17.2k
  } else if (RAW == '(') {
6926
15.8k
      ret = xmlParseElementContentDecl(ctxt, name, &content);
6927
15.8k
  } else {
6928
      /*
6929
       * [ WFC: PEs in Internal Subset ] error handling.
6930
       */
6931
1.37k
            xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6932
1.37k
                  "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6933
1.37k
      return(-1);
6934
1.37k
  }
6935
6936
16.3k
  SKIP_BLANKS_PE;
6937
6938
16.3k
  if (RAW != '>') {
6939
6.01k
      xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6940
6.01k
      if (content != NULL) {
6941
662
    xmlFreeDocElementContent(ctxt->myDoc, content);
6942
662
      }
6943
10.3k
  } else {
6944
10.3k
      if (inputid != ctxt->input->id) {
6945
142
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6946
142
                               "Element declaration doesn't start and stop in"
6947
142
                               " the same entity\n");
6948
142
      }
6949
6950
10.3k
      NEXT;
6951
10.3k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6952
10.3k
    (ctxt->sax->elementDecl != NULL)) {
6953
10.3k
    if (content != NULL)
6954
7.68k
        content->parent = NULL;
6955
10.3k
          ctxt->sax->elementDecl(ctxt->userData, name, ret,
6956
10.3k
                           content);
6957
10.3k
    if ((content != NULL) && (content->parent == NULL)) {
6958
        /*
6959
         * this is a trick: if xmlAddElementDecl is called,
6960
         * instead of copying the full tree it is plugged directly
6961
         * if called from the parser. Avoid duplicating the
6962
         * interfaces or change the API/ABI
6963
         */
6964
5.43k
        xmlFreeDocElementContent(ctxt->myDoc, content);
6965
5.43k
    }
6966
10.3k
      } else if (content != NULL) {
6967
0
    xmlFreeDocElementContent(ctxt->myDoc, content);
6968
0
      }
6969
10.3k
  }
6970
16.3k
    }
6971
16.6k
    return(ret);
6972
19.0k
}
6973
6974
/**
6975
 * xmlParseConditionalSections
6976
 * @ctxt:  an XML parser context
6977
 *
6978
 * Parse a conditional section. Always consumes '<!['.
6979
 *
6980
 * [61] conditionalSect ::= includeSect | ignoreSect
6981
 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6982
 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6983
 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6984
 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6985
 */
6986
6987
static void
6988
0
xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6989
0
    int *inputIds = NULL;
6990
0
    size_t inputIdsSize = 0;
6991
0
    size_t depth = 0;
6992
6993
0
    while (PARSER_STOPPED(ctxt) == 0) {
6994
0
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6995
0
            int id = ctxt->input->id;
6996
6997
0
            SKIP(3);
6998
0
            SKIP_BLANKS_PE;
6999
7000
0
            if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
7001
0
                SKIP(7);
7002
0
                SKIP_BLANKS_PE;
7003
0
                if (RAW != '[') {
7004
0
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
7005
0
                    xmlHaltParser(ctxt);
7006
0
                    goto error;
7007
0
                }
7008
0
                if (ctxt->input->id != id) {
7009
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
7010
0
                                   "All markup of the conditional section is"
7011
0
                                   " not in the same entity\n");
7012
0
                }
7013
0
                NEXT;
7014
7015
0
                if (inputIdsSize <= depth) {
7016
0
                    int *tmp;
7017
0
                    int newSize;
7018
7019
0
                    newSize = xmlGrowCapacity(inputIdsSize, sizeof(tmp[0]),
7020
0
                                              4, 1000);
7021
0
                    if (newSize < 0) {
7022
0
                        xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
7023
0
                                       "Maximum conditional section nesting"
7024
0
                                       " depth exceeded\n");
7025
0
                        goto error;
7026
0
                    }
7027
0
                    tmp = xmlRealloc(inputIds, newSize * sizeof(tmp[0]));
7028
0
                    if (tmp == NULL) {
7029
0
                        xmlErrMemory(ctxt);
7030
0
                        goto error;
7031
0
                    }
7032
0
                    inputIds = tmp;
7033
0
                    inputIdsSize = newSize;
7034
0
                }
7035
0
                inputIds[depth] = id;
7036
0
                depth++;
7037
0
            } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
7038
0
                size_t ignoreDepth = 0;
7039
7040
0
                SKIP(6);
7041
0
                SKIP_BLANKS_PE;
7042
0
                if (RAW != '[') {
7043
0
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
7044
0
                    xmlHaltParser(ctxt);
7045
0
                    goto error;
7046
0
                }
7047
0
                if (ctxt->input->id != id) {
7048
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
7049
0
                                   "All markup of the conditional section is"
7050
0
                                   " not in the same entity\n");
7051
0
                }
7052
0
                NEXT;
7053
7054
0
                while (PARSER_STOPPED(ctxt) == 0) {
7055
0
                    if (RAW == 0) {
7056
0
                        xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
7057
0
                        goto error;
7058
0
                    }
7059
0
                    if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7060
0
                        SKIP(3);
7061
0
                        ignoreDepth++;
7062
                        /* Check for integer overflow */
7063
0
                        if (ignoreDepth == 0) {
7064
0
                            xmlErrMemory(ctxt);
7065
0
                            goto error;
7066
0
                        }
7067
0
                    } else if ((RAW == ']') && (NXT(1) == ']') &&
7068
0
                               (NXT(2) == '>')) {
7069
0
                        SKIP(3);
7070
0
                        if (ignoreDepth == 0)
7071
0
                            break;
7072
0
                        ignoreDepth--;
7073
0
                    } else {
7074
0
                        NEXT;
7075
0
                    }
7076
0
                }
7077
7078
0
                if (ctxt->input->id != id) {
7079
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
7080
0
                                   "All markup of the conditional section is"
7081
0
                                   " not in the same entity\n");
7082
0
                }
7083
0
            } else {
7084
0
                xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
7085
0
                xmlHaltParser(ctxt);
7086
0
                goto error;
7087
0
            }
7088
0
        } else if ((depth > 0) &&
7089
0
                   (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
7090
0
            depth--;
7091
0
            if (ctxt->input->id != inputIds[depth]) {
7092
0
                xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
7093
0
                               "All markup of the conditional section is not"
7094
0
                               " in the same entity\n");
7095
0
            }
7096
0
            SKIP(3);
7097
0
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7098
0
            xmlParseMarkupDecl(ctxt);
7099
0
        } else {
7100
0
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7101
0
            xmlHaltParser(ctxt);
7102
0
            goto error;
7103
0
        }
7104
7105
0
        if (depth == 0)
7106
0
            break;
7107
7108
0
        SKIP_BLANKS_PE;
7109
0
        SHRINK;
7110
0
        GROW;
7111
0
    }
7112
7113
0
error:
7114
0
    xmlFree(inputIds);
7115
0
}
7116
7117
/**
7118
 * xmlParseMarkupDecl:
7119
 * @ctxt:  an XML parser context
7120
 *
7121
 * DEPRECATED: Internal function, don't use.
7122
 *
7123
 * Parse markup declarations. Always consumes '<!' or '<?'.
7124
 *
7125
 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
7126
 *                     NotationDecl | PI | Comment
7127
 *
7128
 * [ VC: Proper Declaration/PE Nesting ]
7129
 * Parameter-entity replacement text must be properly nested with
7130
 * markup declarations. That is to say, if either the first character
7131
 * or the last character of a markup declaration (markupdecl above) is
7132
 * contained in the replacement text for a parameter-entity reference,
7133
 * both must be contained in the same replacement text.
7134
 *
7135
 * [ WFC: PEs in Internal Subset ]
7136
 * In the internal DTD subset, parameter-entity references can occur
7137
 * only where markup declarations can occur, not within markup declarations.
7138
 * (This does not apply to references that occur in external parameter
7139
 * entities or to the external subset.)
7140
 */
7141
void
7142
166k
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
7143
166k
    GROW;
7144
166k
    if (CUR == '<') {
7145
166k
        if (NXT(1) == '!') {
7146
156k
      switch (NXT(2)) {
7147
97.3k
          case 'E':
7148
97.3k
        if (NXT(3) == 'L')
7149
19.0k
      xmlParseElementDecl(ctxt);
7150
78.3k
        else if (NXT(3) == 'N')
7151
78.2k
      xmlParseEntityDecl(ctxt);
7152
105
                    else
7153
105
                        SKIP(2);
7154
97.3k
        break;
7155
40.8k
          case 'A':
7156
40.8k
        xmlParseAttributeListDecl(ctxt);
7157
40.8k
        break;
7158
5.00k
          case 'N':
7159
5.00k
        xmlParseNotationDecl(ctxt);
7160
5.00k
        break;
7161
11.8k
          case '-':
7162
11.8k
        xmlParseComment(ctxt);
7163
11.8k
        break;
7164
1.76k
    default:
7165
1.76k
                    xmlFatalErr(ctxt,
7166
1.76k
                                ctxt->inSubset == 2 ?
7167
0
                                    XML_ERR_EXT_SUBSET_NOT_FINISHED :
7168
1.76k
                                    XML_ERR_INT_SUBSET_NOT_FINISHED,
7169
1.76k
                                NULL);
7170
1.76k
                    SKIP(2);
7171
1.76k
        break;
7172
156k
      }
7173
156k
  } else if (NXT(1) == '?') {
7174
9.41k
      xmlParsePI(ctxt);
7175
9.41k
  }
7176
166k
    }
7177
166k
}
7178
7179
/**
7180
 * xmlParseTextDecl:
7181
 * @ctxt:  an XML parser context
7182
 *
7183
 * DEPRECATED: Internal function, don't use.
7184
 *
7185
 * parse an XML declaration header for external entities
7186
 *
7187
 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
7188
 */
7189
7190
void
7191
0
xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
7192
0
    xmlChar *version;
7193
7194
    /*
7195
     * We know that '<?xml' is here.
7196
     */
7197
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7198
0
  SKIP(5);
7199
0
    } else {
7200
0
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
7201
0
  return;
7202
0
    }
7203
7204
0
    if (SKIP_BLANKS == 0) {
7205
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7206
0
           "Space needed after '<?xml'\n");
7207
0
    }
7208
7209
    /*
7210
     * We may have the VersionInfo here.
7211
     */
7212
0
    version = xmlParseVersionInfo(ctxt);
7213
0
    if (version == NULL) {
7214
0
  version = xmlCharStrdup(XML_DEFAULT_VERSION);
7215
0
        if (version == NULL) {
7216
0
            xmlErrMemory(ctxt);
7217
0
            return;
7218
0
        }
7219
0
    } else {
7220
0
  if (SKIP_BLANKS == 0) {
7221
0
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7222
0
               "Space needed here\n");
7223
0
  }
7224
0
    }
7225
0
    ctxt->input->version = version;
7226
7227
    /*
7228
     * We must have the encoding declaration
7229
     */
7230
0
    xmlParseEncodingDecl(ctxt);
7231
7232
0
    SKIP_BLANKS;
7233
0
    if ((RAW == '?') && (NXT(1) == '>')) {
7234
0
        SKIP(2);
7235
0
    } else if (RAW == '>') {
7236
        /* Deprecated old WD ... */
7237
0
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7238
0
  NEXT;
7239
0
    } else {
7240
0
        int c;
7241
7242
0
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7243
0
        while ((PARSER_STOPPED(ctxt) == 0) && ((c = CUR) != 0)) {
7244
0
            NEXT;
7245
0
            if (c == '>')
7246
0
                break;
7247
0
        }
7248
0
    }
7249
0
}
7250
7251
/**
7252
 * xmlParseExternalSubset:
7253
 * @ctxt:  an XML parser context
7254
 * @ExternalID: the external identifier
7255
 * @SystemID: the system identifier (or URL)
7256
 *
7257
 * DEPRECATED: Internal function, don't use.
7258
 *
7259
 * parse Markup declarations from an external subset
7260
 *
7261
 * [30] extSubset ::= textDecl? extSubsetDecl
7262
 *
7263
 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7264
 */
7265
void
7266
xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7267
0
                       const xmlChar *SystemID) {
7268
0
    int oldInputNr;
7269
7270
0
    xmlCtxtInitializeLate(ctxt);
7271
7272
0
    xmlDetectEncoding(ctxt);
7273
7274
0
    if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7275
0
  xmlParseTextDecl(ctxt);
7276
0
    }
7277
0
    if (ctxt->myDoc == NULL) {
7278
0
        ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7279
0
  if (ctxt->myDoc == NULL) {
7280
0
      xmlErrMemory(ctxt);
7281
0
      return;
7282
0
  }
7283
0
  ctxt->myDoc->properties = XML_DOC_INTERNAL;
7284
0
    }
7285
0
    if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL) &&
7286
0
        (xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID) == NULL)) {
7287
0
        xmlErrMemory(ctxt);
7288
0
    }
7289
7290
0
    ctxt->inSubset = 2;
7291
0
    oldInputNr = ctxt->inputNr;
7292
7293
0
    SKIP_BLANKS_PE;
7294
0
    while (((RAW != 0) || (ctxt->inputNr > oldInputNr)) &&
7295
0
           (!PARSER_STOPPED(ctxt))) {
7296
0
  GROW;
7297
0
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7298
0
            xmlParseConditionalSections(ctxt);
7299
0
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7300
0
            xmlParseMarkupDecl(ctxt);
7301
0
        } else {
7302
0
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7303
0
            xmlHaltParser(ctxt);
7304
0
            return;
7305
0
        }
7306
0
        SKIP_BLANKS_PE;
7307
0
        SHRINK;
7308
0
    }
7309
7310
0
    while (ctxt->inputNr > oldInputNr)
7311
0
        xmlPopPE(ctxt);
7312
7313
0
    xmlParserCheckEOF(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED);
7314
0
}
7315
7316
/**
7317
 * xmlParseReference:
7318
 * @ctxt:  an XML parser context
7319
 *
7320
 * DEPRECATED: Internal function, don't use.
7321
 *
7322
 * parse and handle entity references in content, depending on the SAX
7323
 * interface, this may end-up in a call to character() if this is a
7324
 * CharRef, a predefined entity, if there is no reference() callback.
7325
 * or if the parser was asked to switch to that mode.
7326
 *
7327
 * Always consumes '&'.
7328
 *
7329
 * [67] Reference ::= EntityRef | CharRef
7330
 */
7331
void
7332
851k
xmlParseReference(xmlParserCtxtPtr ctxt) {
7333
851k
    xmlEntityPtr ent = NULL;
7334
851k
    const xmlChar *name;
7335
851k
    xmlChar *val;
7336
7337
851k
    if (RAW != '&')
7338
0
        return;
7339
7340
    /*
7341
     * Simple case of a CharRef
7342
     */
7343
851k
    if (NXT(1) == '#') {
7344
248k
  int i = 0;
7345
248k
  xmlChar out[16];
7346
248k
  int value = xmlParseCharRef(ctxt);
7347
7348
248k
  if (value == 0)
7349
82.4k
      return;
7350
7351
        /*
7352
         * Just encode the value in UTF-8
7353
         */
7354
165k
        COPY_BUF(out, i, value);
7355
165k
        out[i] = 0;
7356
165k
        if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7357
165k
            (!ctxt->disableSAX))
7358
165k
            ctxt->sax->characters(ctxt->userData, out, i);
7359
165k
  return;
7360
248k
    }
7361
7362
    /*
7363
     * We are seeing an entity reference
7364
     */
7365
602k
    name = xmlParseEntityRefInternal(ctxt);
7366
602k
    if (name == NULL)
7367
483k
        return;
7368
119k
    ent = xmlLookupGeneralEntity(ctxt, name, /* isAttr */ 0);
7369
119k
    if (ent == NULL) {
7370
        /*
7371
         * Create a reference for undeclared entities.
7372
         */
7373
44.6k
        if ((ctxt->replaceEntities == 0) &&
7374
44.6k
            (ctxt->sax != NULL) &&
7375
44.6k
            (ctxt->disableSAX == 0) &&
7376
44.6k
            (ctxt->sax->reference != NULL)) {
7377
44.6k
            ctxt->sax->reference(ctxt->userData, name);
7378
44.6k
        }
7379
44.6k
        return;
7380
44.6k
    }
7381
74.8k
    if (!ctxt->wellFormed)
7382
67.9k
  return;
7383
7384
    /* special case of predefined entities */
7385
6.90k
    if ((ent->name == NULL) ||
7386
6.90k
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7387
2.03k
  val = ent->content;
7388
2.03k
  if (val == NULL) return;
7389
  /*
7390
   * inline the entity.
7391
   */
7392
2.03k
  if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7393
2.03k
      (!ctxt->disableSAX))
7394
2.03k
      ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7395
2.03k
  return;
7396
2.03k
    }
7397
7398
    /*
7399
     * Some users try to parse entities on their own and used to set
7400
     * the renamed "checked" member. Fix the flags to cover this
7401
     * case.
7402
     */
7403
4.87k
    if (((ent->flags & XML_ENT_PARSED) == 0) && (ent->children != NULL))
7404
0
        ent->flags |= XML_ENT_PARSED;
7405
7406
    /*
7407
     * The first reference to the entity trigger a parsing phase
7408
     * where the ent->children is filled with the result from
7409
     * the parsing.
7410
     * Note: external parsed entities will not be loaded, it is not
7411
     * required for a non-validating parser, unless the parsing option
7412
     * of validating, or substituting entities were given. Doing so is
7413
     * far more secure as the parser will only process data coming from
7414
     * the document entity by default.
7415
     *
7416
     * FIXME: This doesn't work correctly since entities can be
7417
     * expanded with different namespace declarations in scope.
7418
     * For example:
7419
     *
7420
     * <!DOCTYPE doc [
7421
     *   <!ENTITY ent "<ns:elem/>">
7422
     * ]>
7423
     * <doc>
7424
     *   <decl1 xmlns:ns="urn:ns1">
7425
     *     &ent;
7426
     *   </decl1>
7427
     *   <decl2 xmlns:ns="urn:ns2">
7428
     *     &ent;
7429
     *   </decl2>
7430
     * </doc>
7431
     *
7432
     * Proposed fix:
7433
     *
7434
     * - Ignore current namespace declarations when parsing the
7435
     *   entity. If a prefix can't be resolved, don't report an error
7436
     *   but mark it as unresolved.
7437
     * - Try to resolve these prefixes when expanding the entity.
7438
     *   This will require a specialized version of xmlStaticCopyNode
7439
     *   which can also make use of the namespace hash table to avoid
7440
     *   quadratic behavior.
7441
     *
7442
     * Alternatively, we could simply reparse the entity on each
7443
     * expansion like we already do with custom SAX callbacks.
7444
     * External entity content should be cached in this case.
7445
     */
7446
4.87k
    if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7447
585
        (((ctxt->options & XML_PARSE_NO_XXE) == 0) &&
7448
585
         ((ctxt->replaceEntities) ||
7449
4.28k
          (ctxt->validate)))) {
7450
4.28k
        if ((ent->flags & XML_ENT_PARSED) == 0) {
7451
1.91k
            xmlCtxtParseEntity(ctxt, ent);
7452
2.37k
        } else if (ent->children == NULL) {
7453
            /*
7454
             * Probably running in SAX mode and the callbacks don't
7455
             * build the entity content. Parse the entity again.
7456
             *
7457
             * This will also be triggered in normal tree builder mode
7458
             * if an entity happens to be empty, causing unnecessary
7459
             * reloads. It's hard to come up with a reliable check in
7460
             * which mode we're running.
7461
             */
7462
178
            xmlCtxtParseEntity(ctxt, ent);
7463
178
        }
7464
4.28k
    }
7465
7466
    /*
7467
     * We also check for amplification if entities aren't substituted.
7468
     * They might be expanded later.
7469
     */
7470
4.87k
    if (xmlParserEntityCheck(ctxt, ent->expandedSize))
7471
8
        return;
7472
7473
4.86k
    if ((ctxt->sax == NULL) || (ctxt->disableSAX))
7474
348
        return;
7475
7476
4.51k
    if (ctxt->replaceEntities == 0) {
7477
  /*
7478
   * Create a reference
7479
   */
7480
4.51k
        if (ctxt->sax->reference != NULL)
7481
4.51k
      ctxt->sax->reference(ctxt->userData, ent->name);
7482
4.51k
    } else if ((ent->children != NULL) && (ctxt->node != NULL)) {
7483
0
        xmlNodePtr copy, cur;
7484
7485
        /*
7486
         * Seems we are generating the DOM content, copy the tree
7487
   */
7488
0
        cur = ent->children;
7489
7490
        /*
7491
         * Handle first text node with SAX to coalesce text efficiently
7492
         */
7493
0
        if ((cur->type == XML_TEXT_NODE) ||
7494
0
            (cur->type == XML_CDATA_SECTION_NODE)) {
7495
0
            int len = xmlStrlen(cur->content);
7496
7497
0
            if ((cur->type == XML_TEXT_NODE) ||
7498
0
                (ctxt->options & XML_PARSE_NOCDATA)) {
7499
0
                if (ctxt->sax->characters != NULL)
7500
0
                    ctxt->sax->characters(ctxt, cur->content, len);
7501
0
            } else {
7502
0
                if (ctxt->sax->cdataBlock != NULL)
7503
0
                    ctxt->sax->cdataBlock(ctxt, cur->content, len);
7504
0
            }
7505
7506
0
            cur = cur->next;
7507
0
        }
7508
7509
0
        while (cur != NULL) {
7510
0
            xmlNodePtr last;
7511
7512
            /*
7513
             * Handle last text node with SAX to coalesce text efficiently
7514
             */
7515
0
            if ((cur->next == NULL) &&
7516
0
                ((cur->type == XML_TEXT_NODE) ||
7517
0
                 (cur->type == XML_CDATA_SECTION_NODE))) {
7518
0
                int len = xmlStrlen(cur->content);
7519
7520
0
                if ((cur->type == XML_TEXT_NODE) ||
7521
0
                    (ctxt->options & XML_PARSE_NOCDATA)) {
7522
0
                    if (ctxt->sax->characters != NULL)
7523
0
                        ctxt->sax->characters(ctxt, cur->content, len);
7524
0
                } else {
7525
0
                    if (ctxt->sax->cdataBlock != NULL)
7526
0
                        ctxt->sax->cdataBlock(ctxt, cur->content, len);
7527
0
                }
7528
7529
0
                break;
7530
0
            }
7531
7532
            /*
7533
             * Reset coalesce buffer stats only for non-text nodes.
7534
             */
7535
0
            ctxt->nodemem = 0;
7536
0
            ctxt->nodelen = 0;
7537
7538
0
            copy = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7539
7540
0
            if (copy == NULL) {
7541
0
                xmlErrMemory(ctxt);
7542
0
                break;
7543
0
            }
7544
7545
0
            if (ctxt->parseMode == XML_PARSE_READER) {
7546
                /* Needed for reader */
7547
0
                copy->extra = cur->extra;
7548
                /* Maybe needed for reader */
7549
0
                copy->_private = cur->_private;
7550
0
            }
7551
7552
0
            copy->parent = ctxt->node;
7553
0
            last = ctxt->node->last;
7554
0
            if (last == NULL) {
7555
0
                ctxt->node->children = copy;
7556
0
            } else {
7557
0
                last->next = copy;
7558
0
                copy->prev = last;
7559
0
            }
7560
0
            ctxt->node->last = copy;
7561
7562
0
            cur = cur->next;
7563
0
        }
7564
0
    }
7565
4.51k
}
7566
7567
static void
7568
482k
xmlHandleUndeclaredEntity(xmlParserCtxtPtr ctxt, const xmlChar *name) {
7569
    /*
7570
     * [ WFC: Entity Declared ]
7571
     * In a document without any DTD, a document with only an
7572
     * internal DTD subset which contains no parameter entity
7573
     * references, or a document with "standalone='yes'", the
7574
     * Name given in the entity reference must match that in an
7575
     * entity declaration, except that well-formed documents
7576
     * need not declare any of the following entities: amp, lt,
7577
     * gt, apos, quot.
7578
     * The declaration of a parameter entity must precede any
7579
     * reference to it.
7580
     * Similarly, the declaration of a general entity must
7581
     * precede any reference to it which appears in a default
7582
     * value in an attribute-list declaration. Note that if
7583
     * entities are declared in the external subset or in
7584
     * external parameter entities, a non-validating processor
7585
     * is not obligated to read and process their declarations;
7586
     * for such documents, the rule that an entity must be
7587
     * declared is a well-formedness constraint only if
7588
     * standalone='yes'.
7589
     */
7590
482k
    if ((ctxt->standalone == 1) ||
7591
481k
        ((ctxt->hasExternalSubset == 0) &&
7592
472k
         (ctxt->hasPErefs == 0))) {
7593
452k
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7594
452k
                          "Entity '%s' not defined\n", name);
7595
452k
    } else if (ctxt->validate) {
7596
        /*
7597
         * [ VC: Entity Declared ]
7598
         * In a document with an external subset or external
7599
         * parameter entities with "standalone='no'", ...
7600
         * ... The declaration of a parameter entity must
7601
         * precede any reference to it...
7602
         */
7603
0
        xmlValidityError(ctxt, XML_ERR_UNDECLARED_ENTITY,
7604
0
                         "Entity '%s' not defined\n", name, NULL);
7605
29.8k
    } else if ((ctxt->loadsubset & ~XML_SKIP_IDS) ||
7606
29.8k
               ((ctxt->replaceEntities) &&
7607
0
                ((ctxt->options & XML_PARSE_NO_XXE) == 0))) {
7608
        /*
7609
         * Also raise a non-fatal error
7610
         *
7611
         * - if the external subset is loaded and all entity declarations
7612
         *   should be available, or
7613
         * - entity substition was requested without restricting
7614
         *   external entity access.
7615
         */
7616
0
        xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7617
0
                     "Entity '%s' not defined\n", name);
7618
29.8k
    } else {
7619
29.8k
        xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7620
29.8k
                      "Entity '%s' not defined\n", name, NULL);
7621
29.8k
    }
7622
7623
482k
    ctxt->valid = 0;
7624
482k
}
7625
7626
static xmlEntityPtr
7627
1.45M
xmlLookupGeneralEntity(xmlParserCtxtPtr ctxt, const xmlChar *name, int inAttr) {
7628
1.45M
    xmlEntityPtr ent = NULL;
7629
7630
    /*
7631
     * Predefined entities override any extra definition
7632
     */
7633
1.45M
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7634
1.45M
        ent = xmlGetPredefinedEntity(name);
7635
1.45M
        if (ent != NULL)
7636
294k
            return(ent);
7637
1.45M
    }
7638
7639
    /*
7640
     * Ask first SAX for entity resolution, otherwise try the
7641
     * entities which may have stored in the parser context.
7642
     */
7643
1.16M
    if (ctxt->sax != NULL) {
7644
1.16M
  if (ctxt->sax->getEntity != NULL)
7645
1.16M
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7646
1.16M
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7647
2.72k
      (ctxt->options & XML_PARSE_OLDSAX))
7648
0
      ent = xmlGetPredefinedEntity(name);
7649
1.16M
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7650
2.72k
      (ctxt->userData==ctxt)) {
7651
2.72k
      ent = xmlSAX2GetEntity(ctxt, name);
7652
2.72k
  }
7653
1.16M
    }
7654
7655
1.16M
    if (ent == NULL) {
7656
475k
        xmlHandleUndeclaredEntity(ctxt, name);
7657
475k
    }
7658
7659
    /*
7660
     * [ WFC: Parsed Entity ]
7661
     * An entity reference must not contain the name of an
7662
     * unparsed entity
7663
     */
7664
689k
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7665
109
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7666
109
     "Entity reference to unparsed entity %s\n", name);
7667
109
        ent = NULL;
7668
109
    }
7669
7670
    /*
7671
     * [ WFC: No External Entity References ]
7672
     * Attribute values cannot contain direct or indirect
7673
     * entity references to external entities.
7674
     */
7675
689k
    else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7676
1.20k
        if (inAttr) {
7677
377
            xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7678
377
                 "Attribute references external entity '%s'\n", name);
7679
377
            ent = NULL;
7680
377
        }
7681
1.20k
    }
7682
7683
1.16M
    return(ent);
7684
1.45M
}
7685
7686
/**
7687
 * xmlParseEntityRefInternal:
7688
 * @ctxt:  an XML parser context
7689
 * @inAttr:  whether we are in an attribute value
7690
 *
7691
 * Parse an entity reference. Always consumes '&'.
7692
 *
7693
 * [68] EntityRef ::= '&' Name ';'
7694
 *
7695
 * Returns the name, or NULL in case of error.
7696
 */
7697
static const xmlChar *
7698
1.44M
xmlParseEntityRefInternal(xmlParserCtxtPtr ctxt) {
7699
1.44M
    const xmlChar *name;
7700
7701
1.44M
    GROW;
7702
7703
1.44M
    if (RAW != '&')
7704
0
        return(NULL);
7705
1.44M
    NEXT;
7706
1.44M
    name = xmlParseName(ctxt);
7707
1.44M
    if (name == NULL) {
7708
586k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7709
586k
           "xmlParseEntityRef: no name\n");
7710
586k
        return(NULL);
7711
586k
    }
7712
861k
    if (RAW != ';') {
7713
200k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7714
200k
  return(NULL);
7715
200k
    }
7716
661k
    NEXT;
7717
7718
661k
    return(name);
7719
861k
}
7720
7721
/**
7722
 * xmlParseEntityRef:
7723
 * @ctxt:  an XML parser context
7724
 *
7725
 * DEPRECATED: Internal function, don't use.
7726
 *
7727
 * Returns the xmlEntityPtr if found, or NULL otherwise.
7728
 */
7729
xmlEntityPtr
7730
0
xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7731
0
    const xmlChar *name;
7732
7733
0
    if (ctxt == NULL)
7734
0
        return(NULL);
7735
7736
0
    name = xmlParseEntityRefInternal(ctxt);
7737
0
    if (name == NULL)
7738
0
        return(NULL);
7739
7740
0
    return(xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 0));
7741
0
}
7742
7743
/**
7744
 * xmlParseStringEntityRef:
7745
 * @ctxt:  an XML parser context
7746
 * @str:  a pointer to an index in the string
7747
 *
7748
 * parse ENTITY references declarations, but this version parses it from
7749
 * a string value.
7750
 *
7751
 * [68] EntityRef ::= '&' Name ';'
7752
 *
7753
 * [ WFC: Entity Declared ]
7754
 * In a document without any DTD, a document with only an internal DTD
7755
 * subset which contains no parameter entity references, or a document
7756
 * with "standalone='yes'", the Name given in the entity reference
7757
 * must match that in an entity declaration, except that well-formed
7758
 * documents need not declare any of the following entities: amp, lt,
7759
 * gt, apos, quot.  The declaration of a parameter entity must precede
7760
 * any reference to it.  Similarly, the declaration of a general entity
7761
 * must precede any reference to it which appears in a default value in an
7762
 * attribute-list declaration. Note that if entities are declared in the
7763
 * external subset or in external parameter entities, a non-validating
7764
 * processor is not obligated to read and process their declarations;
7765
 * for such documents, the rule that an entity must be declared is a
7766
 * well-formedness constraint only if standalone='yes'.
7767
 *
7768
 * [ WFC: Parsed Entity ]
7769
 * An entity reference must not contain the name of an unparsed entity
7770
 *
7771
 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7772
 * is updated to the current location in the string.
7773
 */
7774
static xmlChar *
7775
798k
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7776
798k
    xmlChar *name;
7777
798k
    const xmlChar *ptr;
7778
798k
    xmlChar cur;
7779
7780
798k
    if ((str == NULL) || (*str == NULL))
7781
0
        return(NULL);
7782
798k
    ptr = *str;
7783
798k
    cur = *ptr;
7784
798k
    if (cur != '&')
7785
0
  return(NULL);
7786
7787
798k
    ptr++;
7788
798k
    name = xmlParseStringName(ctxt, &ptr);
7789
798k
    if (name == NULL) {
7790
87
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7791
87
           "xmlParseStringEntityRef: no name\n");
7792
87
  *str = ptr;
7793
87
  return(NULL);
7794
87
    }
7795
798k
    if (*ptr != ';') {
7796
48
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7797
48
        xmlFree(name);
7798
48
  *str = ptr;
7799
48
  return(NULL);
7800
48
    }
7801
798k
    ptr++;
7802
7803
798k
    *str = ptr;
7804
798k
    return(name);
7805
798k
}
7806
7807
/**
7808
 * xmlParsePEReference:
7809
 * @ctxt:  an XML parser context
7810
 *
7811
 * DEPRECATED: Internal function, don't use.
7812
 *
7813
 * Parse a parameter entity reference. Always consumes '%'.
7814
 *
7815
 * The entity content is handled directly by pushing it's content as
7816
 * a new input stream.
7817
 *
7818
 * [69] PEReference ::= '%' Name ';'
7819
 *
7820
 * [ WFC: No Recursion ]
7821
 * A parsed entity must not contain a recursive
7822
 * reference to itself, either directly or indirectly.
7823
 *
7824
 * [ WFC: Entity Declared ]
7825
 * In a document without any DTD, a document with only an internal DTD
7826
 * subset which contains no parameter entity references, or a document
7827
 * with "standalone='yes'", ...  ... The declaration of a parameter
7828
 * entity must precede any reference to it...
7829
 *
7830
 * [ VC: Entity Declared ]
7831
 * In a document with an external subset or external parameter entities
7832
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7833
 * must precede any reference to it...
7834
 *
7835
 * [ WFC: In DTD ]
7836
 * Parameter-entity references may only appear in the DTD.
7837
 * NOTE: misleading but this is handled.
7838
 */
7839
void
7840
xmlParsePEReference(xmlParserCtxtPtr ctxt)
7841
57.0k
{
7842
57.0k
    const xmlChar *name;
7843
57.0k
    xmlEntityPtr entity = NULL;
7844
57.0k
    xmlParserInputPtr input;
7845
7846
57.0k
    if (RAW != '%')
7847
0
        return;
7848
57.0k
    NEXT;
7849
57.0k
    name = xmlParseName(ctxt);
7850
57.0k
    if (name == NULL) {
7851
15.6k
  xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
7852
15.6k
  return;
7853
15.6k
    }
7854
41.4k
    if (RAW != ';') {
7855
8.59k
  xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
7856
8.59k
        return;
7857
8.59k
    }
7858
7859
32.8k
    NEXT;
7860
7861
    /* Must be set before xmlHandleUndeclaredEntity */
7862
32.8k
    ctxt->hasPErefs = 1;
7863
7864
    /*
7865
     * Request the entity from SAX
7866
     */
7867
32.8k
    if ((ctxt->sax != NULL) &&
7868
32.8k
  (ctxt->sax->getParameterEntity != NULL))
7869
32.8k
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7870
7871
32.8k
    if (entity == NULL) {
7872
6.25k
        xmlHandleUndeclaredEntity(ctxt, name);
7873
26.5k
    } else {
7874
  /*
7875
   * Internal checking in case the entity quest barfed
7876
   */
7877
26.5k
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7878
127
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7879
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7880
0
      "Internal: %%%s; is not a parameter entity\n",
7881
0
        name, NULL);
7882
26.5k
  } else {
7883
26.5k
      if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7884
127
                ((ctxt->options & XML_PARSE_NO_XXE) ||
7885
127
     ((ctxt->loadsubset == 0) &&
7886
127
      (ctxt->replaceEntities == 0) &&
7887
127
      (ctxt->validate == 0))))
7888
127
    return;
7889
7890
26.4k
            if (entity->flags & XML_ENT_EXPANDING) {
7891
4
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7892
4
                xmlHaltParser(ctxt);
7893
4
                return;
7894
4
            }
7895
7896
26.4k
      input = xmlNewEntityInputStream(ctxt, entity);
7897
26.4k
      if (xmlCtxtPushInput(ctxt, input) < 0) {
7898
0
                xmlFreeInputStream(input);
7899
0
    return;
7900
0
            }
7901
7902
26.4k
            entity->flags |= XML_ENT_EXPANDING;
7903
7904
26.4k
            GROW;
7905
7906
26.4k
      if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
7907
0
                xmlDetectEncoding(ctxt);
7908
7909
0
                if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7910
0
                    (IS_BLANK_CH(NXT(5)))) {
7911
0
                    xmlParseTextDecl(ctxt);
7912
0
                }
7913
0
            }
7914
26.4k
  }
7915
26.5k
    }
7916
32.8k
}
7917
7918
/**
7919
 * xmlLoadEntityContent:
7920
 * @ctxt:  an XML parser context
7921
 * @entity: an unloaded system entity
7922
 *
7923
 * Load the content of an entity.
7924
 *
7925
 * Returns 0 in case of success and -1 in case of failure
7926
 */
7927
static int
7928
0
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7929
0
    xmlParserInputPtr oldinput, input = NULL;
7930
0
    xmlParserInputPtr *oldinputTab;
7931
0
    const xmlChar *oldencoding;
7932
0
    xmlChar *content = NULL;
7933
0
    xmlResourceType rtype;
7934
0
    size_t length, i;
7935
0
    int oldinputNr, oldinputMax;
7936
0
    int ret = -1;
7937
0
    int res;
7938
7939
0
    if ((ctxt == NULL) || (entity == NULL) ||
7940
0
        ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7941
0
   (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7942
0
  (entity->content != NULL)) {
7943
0
  xmlFatalErr(ctxt, XML_ERR_ARGUMENT,
7944
0
              "xmlLoadEntityContent parameter error");
7945
0
        return(-1);
7946
0
    }
7947
7948
0
    if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)
7949
0
        rtype = XML_RESOURCE_PARAMETER_ENTITY;
7950
0
    else
7951
0
        rtype = XML_RESOURCE_GENERAL_ENTITY;
7952
7953
0
    input = xmlLoadResource(ctxt, (char *) entity->URI,
7954
0
                            (char *) entity->ExternalID, rtype);
7955
0
    if (input == NULL)
7956
0
        return(-1);
7957
7958
0
    oldinput = ctxt->input;
7959
0
    oldinputNr = ctxt->inputNr;
7960
0
    oldinputMax = ctxt->inputMax;
7961
0
    oldinputTab = ctxt->inputTab;
7962
0
    oldencoding = ctxt->encoding;
7963
7964
0
    ctxt->input = NULL;
7965
0
    ctxt->inputNr = 0;
7966
0
    ctxt->inputMax = 1;
7967
0
    ctxt->encoding = NULL;
7968
0
    ctxt->inputTab = xmlMalloc(sizeof(xmlParserInputPtr));
7969
0
    if (ctxt->inputTab == NULL) {
7970
0
        xmlErrMemory(ctxt);
7971
0
        xmlFreeInputStream(input);
7972
0
        goto error;
7973
0
    }
7974
7975
0
    xmlBufResetInput(input->buf->buffer, input);
7976
7977
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
7978
0
        xmlFreeInputStream(input);
7979
0
        goto error;
7980
0
    }
7981
7982
0
    xmlDetectEncoding(ctxt);
7983
7984
    /*
7985
     * Parse a possible text declaration first
7986
     */
7987
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7988
0
  xmlParseTextDecl(ctxt);
7989
        /*
7990
         * An XML-1.0 document can't reference an entity not XML-1.0
7991
         */
7992
0
        if ((xmlStrEqual(ctxt->version, BAD_CAST "1.0")) &&
7993
0
            (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
7994
0
            xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
7995
0
                           "Version mismatch between document and entity\n");
7996
0
        }
7997
0
    }
7998
7999
0
    length = input->cur - input->base;
8000
0
    xmlBufShrink(input->buf->buffer, length);
8001
0
    xmlSaturatedAdd(&ctxt->sizeentities, length);
8002
8003
0
    while ((res = xmlParserInputBufferGrow(input->buf, 4096)) > 0)
8004
0
        ;
8005
8006
0
    xmlBufResetInput(input->buf->buffer, input);
8007
8008
0
    if (res < 0) {
8009
0
        xmlCtxtErrIO(ctxt, input->buf->error, NULL);
8010
0
        goto error;
8011
0
    }
8012
8013
0
    length = xmlBufUse(input->buf->buffer);
8014
0
    if (length > INT_MAX) {
8015
0
        xmlErrMemory(ctxt);
8016
0
        goto error;
8017
0
    }
8018
8019
0
    content = xmlStrndup(xmlBufContent(input->buf->buffer), length);
8020
0
    if (content == NULL) {
8021
0
        xmlErrMemory(ctxt);
8022
0
        goto error;
8023
0
    }
8024
8025
0
    for (i = 0; i < length; ) {
8026
0
        int clen = length - i;
8027
0
        int c = xmlGetUTF8Char(content + i, &clen);
8028
8029
0
        if ((c < 0) || (!IS_CHAR(c))) {
8030
0
            xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8031
0
                              "xmlLoadEntityContent: invalid char value %d\n",
8032
0
                              content[i]);
8033
0
            goto error;
8034
0
        }
8035
0
        i += clen;
8036
0
    }
8037
8038
0
    xmlSaturatedAdd(&ctxt->sizeentities, length);
8039
0
    entity->content = content;
8040
0
    entity->length = length;
8041
0
    content = NULL;
8042
0
    ret = 0;
8043
8044
0
error:
8045
0
    while (ctxt->inputNr > 0)
8046
0
        xmlFreeInputStream(xmlCtxtPopInput(ctxt));
8047
0
    xmlFree(ctxt->inputTab);
8048
0
    xmlFree((xmlChar *) ctxt->encoding);
8049
8050
0
    ctxt->input = oldinput;
8051
0
    ctxt->inputNr = oldinputNr;
8052
0
    ctxt->inputMax = oldinputMax;
8053
0
    ctxt->inputTab = oldinputTab;
8054
0
    ctxt->encoding = oldencoding;
8055
8056
0
    xmlFree(content);
8057
8058
0
    return(ret);
8059
0
}
8060
8061
/**
8062
 * xmlParseStringPEReference:
8063
 * @ctxt:  an XML parser context
8064
 * @str:  a pointer to an index in the string
8065
 *
8066
 * parse PEReference declarations
8067
 *
8068
 * [69] PEReference ::= '%' Name ';'
8069
 *
8070
 * [ WFC: No Recursion ]
8071
 * A parsed entity must not contain a recursive
8072
 * reference to itself, either directly or indirectly.
8073
 *
8074
 * [ WFC: Entity Declared ]
8075
 * In a document without any DTD, a document with only an internal DTD
8076
 * subset which contains no parameter entity references, or a document
8077
 * with "standalone='yes'", ...  ... The declaration of a parameter
8078
 * entity must precede any reference to it...
8079
 *
8080
 * [ VC: Entity Declared ]
8081
 * In a document with an external subset or external parameter entities
8082
 * with "standalone='no'", ...  ... The declaration of a parameter entity
8083
 * must precede any reference to it...
8084
 *
8085
 * [ WFC: In DTD ]
8086
 * Parameter-entity references may only appear in the DTD.
8087
 * NOTE: misleading but this is handled.
8088
 *
8089
 * Returns the string of the entity content.
8090
 *         str is updated to the current value of the index
8091
 */
8092
static xmlEntityPtr
8093
1.74k
xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8094
1.74k
    const xmlChar *ptr;
8095
1.74k
    xmlChar cur;
8096
1.74k
    xmlChar *name;
8097
1.74k
    xmlEntityPtr entity = NULL;
8098
8099
1.74k
    if ((str == NULL) || (*str == NULL)) return(NULL);
8100
1.74k
    ptr = *str;
8101
1.74k
    cur = *ptr;
8102
1.74k
    if (cur != '%')
8103
0
        return(NULL);
8104
1.74k
    ptr++;
8105
1.74k
    name = xmlParseStringName(ctxt, &ptr);
8106
1.74k
    if (name == NULL) {
8107
335
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8108
335
           "xmlParseStringPEReference: no name\n");
8109
335
  *str = ptr;
8110
335
  return(NULL);
8111
335
    }
8112
1.41k
    cur = *ptr;
8113
1.41k
    if (cur != ';') {
8114
961
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8115
961
  xmlFree(name);
8116
961
  *str = ptr;
8117
961
  return(NULL);
8118
961
    }
8119
451
    ptr++;
8120
8121
    /* Must be set before xmlHandleUndeclaredEntity */
8122
451
    ctxt->hasPErefs = 1;
8123
8124
    /*
8125
     * Request the entity from SAX
8126
     */
8127
451
    if ((ctxt->sax != NULL) &&
8128
451
  (ctxt->sax->getParameterEntity != NULL))
8129
451
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8130
8131
451
    if (entity == NULL) {
8132
424
        xmlHandleUndeclaredEntity(ctxt, name);
8133
424
    } else {
8134
  /*
8135
   * Internal checking in case the entity quest barfed
8136
   */
8137
27
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8138
0
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8139
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8140
0
        "%%%s; is not a parameter entity\n",
8141
0
        name, NULL);
8142
0
  }
8143
27
    }
8144
8145
451
    xmlFree(name);
8146
451
    *str = ptr;
8147
451
    return(entity);
8148
1.41k
}
8149
8150
/**
8151
 * xmlParseDocTypeDecl:
8152
 * @ctxt:  an XML parser context
8153
 *
8154
 * DEPRECATED: Internal function, don't use.
8155
 *
8156
 * parse a DOCTYPE declaration
8157
 *
8158
 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8159
 *                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8160
 *
8161
 * [ VC: Root Element Type ]
8162
 * The Name in the document type declaration must match the element
8163
 * type of the root element.
8164
 */
8165
8166
void
8167
54.3k
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8168
54.3k
    const xmlChar *name = NULL;
8169
54.3k
    xmlChar *ExternalID = NULL;
8170
54.3k
    xmlChar *URI = NULL;
8171
8172
    /*
8173
     * We know that '<!DOCTYPE' has been detected.
8174
     */
8175
54.3k
    SKIP(9);
8176
8177
54.3k
    if (SKIP_BLANKS == 0) {
8178
40.6k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8179
40.6k
                       "Space required after 'DOCTYPE'\n");
8180
40.6k
    }
8181
8182
    /*
8183
     * Parse the DOCTYPE name.
8184
     */
8185
54.3k
    name = xmlParseName(ctxt);
8186
54.3k
    if (name == NULL) {
8187
7.87k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8188
7.87k
           "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8189
7.87k
    }
8190
54.3k
    ctxt->intSubName = name;
8191
8192
54.3k
    SKIP_BLANKS;
8193
8194
    /*
8195
     * Check for SystemID and ExternalID
8196
     */
8197
54.3k
    URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8198
8199
54.3k
    if ((URI != NULL) || (ExternalID != NULL)) {
8200
2.81k
        ctxt->hasExternalSubset = 1;
8201
2.81k
    }
8202
54.3k
    ctxt->extSubURI = URI;
8203
54.3k
    ctxt->extSubSystem = ExternalID;
8204
8205
54.3k
    SKIP_BLANKS;
8206
8207
    /*
8208
     * Create and update the internal subset.
8209
     */
8210
54.3k
    if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8211
54.3k
  (!ctxt->disableSAX))
8212
54.3k
  ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8213
8214
54.3k
    if ((RAW != '[') && (RAW != '>')) {
8215
5.13k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8216
5.13k
    }
8217
54.3k
}
8218
8219
/**
8220
 * xmlParseInternalSubset:
8221
 * @ctxt:  an XML parser context
8222
 *
8223
 * parse the internal subset declaration
8224
 *
8225
 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8226
 */
8227
8228
static void
8229
41.9k
xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8230
    /*
8231
     * Is there any DTD definition ?
8232
     */
8233
41.9k
    if (RAW == '[') {
8234
41.9k
        int oldInputNr = ctxt->inputNr;
8235
8236
41.9k
        NEXT;
8237
  /*
8238
   * Parse the succession of Markup declarations and
8239
   * PEReferences.
8240
   * Subsequence (markupdecl | PEReference | S)*
8241
   */
8242
41.9k
  SKIP_BLANKS;
8243
265k
  while (((RAW != ']') || (ctxt->inputNr > oldInputNr)) &&
8244
241k
               (PARSER_STOPPED(ctxt) == 0)) {
8245
8246
            /*
8247
             * Conditional sections are allowed from external entities included
8248
             * by PE References in the internal subset.
8249
             */
8250
238k
            if ((PARSER_EXTERNAL(ctxt)) &&
8251
0
                (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8252
0
                xmlParseConditionalSections(ctxt);
8253
238k
            } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
8254
166k
          xmlParseMarkupDecl(ctxt);
8255
166k
            } else if (RAW == '%') {
8256
57.0k
          xmlParsePEReference(ctxt);
8257
57.0k
            } else {
8258
15.3k
    xmlFatalErr(ctxt, XML_ERR_INT_SUBSET_NOT_FINISHED, NULL);
8259
15.3k
                break;
8260
15.3k
            }
8261
223k
      SKIP_BLANKS_PE;
8262
223k
            SHRINK;
8263
223k
            GROW;
8264
223k
  }
8265
8266
47.1k
        while (ctxt->inputNr > oldInputNr)
8267
5.24k
            xmlPopPE(ctxt);
8268
8269
41.9k
  if (RAW == ']') {
8270
23.2k
      NEXT;
8271
23.2k
      SKIP_BLANKS;
8272
23.2k
  }
8273
41.9k
    }
8274
8275
    /*
8276
     * We should be at the end of the DOCTYPE declaration.
8277
     */
8278
41.9k
    if ((ctxt->wellFormed) && (RAW != '>')) {
8279
39
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8280
39
  return;
8281
39
    }
8282
41.9k
    NEXT;
8283
41.9k
}
8284
8285
#ifdef LIBXML_SAX1_ENABLED
8286
/**
8287
 * xmlParseAttribute:
8288
 * @ctxt:  an XML parser context
8289
 * @value:  a xmlChar ** used to store the value of the attribute
8290
 *
8291
 * DEPRECATED: Internal function, don't use.
8292
 *
8293
 * parse an attribute
8294
 *
8295
 * [41] Attribute ::= Name Eq AttValue
8296
 *
8297
 * [ WFC: No External Entity References ]
8298
 * Attribute values cannot contain direct or indirect entity references
8299
 * to external entities.
8300
 *
8301
 * [ WFC: No < in Attribute Values ]
8302
 * The replacement text of any entity referred to directly or indirectly in
8303
 * an attribute value (other than "&lt;") must not contain a <.
8304
 *
8305
 * [ VC: Attribute Value Type ]
8306
 * The attribute must have been declared; the value must be of the type
8307
 * declared for it.
8308
 *
8309
 * [25] Eq ::= S? '=' S?
8310
 *
8311
 * With namespace:
8312
 *
8313
 * [NS 11] Attribute ::= QName Eq AttValue
8314
 *
8315
 * Also the case QName == xmlns:??? is handled independently as a namespace
8316
 * definition.
8317
 *
8318
 * Returns the attribute name, and the value in *value.
8319
 */
8320
8321
const xmlChar *
8322
0
xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8323
0
    const xmlChar *name;
8324
0
    xmlChar *val;
8325
8326
0
    *value = NULL;
8327
0
    GROW;
8328
0
    name = xmlParseName(ctxt);
8329
0
    if (name == NULL) {
8330
0
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8331
0
                 "error parsing attribute name\n");
8332
0
        return(NULL);
8333
0
    }
8334
8335
    /*
8336
     * read the value
8337
     */
8338
0
    SKIP_BLANKS;
8339
0
    if (RAW == '=') {
8340
0
        NEXT;
8341
0
  SKIP_BLANKS;
8342
0
  val = xmlParseAttValue(ctxt);
8343
0
    } else {
8344
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8345
0
         "Specification mandates value for attribute %s\n", name);
8346
0
  return(name);
8347
0
    }
8348
8349
    /*
8350
     * Check that xml:lang conforms to the specification
8351
     * No more registered as an error, just generate a warning now
8352
     * since this was deprecated in XML second edition
8353
     */
8354
0
    if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8355
0
  if (!xmlCheckLanguageID(val)) {
8356
0
      xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8357
0
              "Malformed value for xml:lang : %s\n",
8358
0
        val, NULL);
8359
0
  }
8360
0
    }
8361
8362
    /*
8363
     * Check that xml:space conforms to the specification
8364
     */
8365
0
    if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8366
0
  if (xmlStrEqual(val, BAD_CAST "default"))
8367
0
      *(ctxt->space) = 0;
8368
0
  else if (xmlStrEqual(val, BAD_CAST "preserve"))
8369
0
      *(ctxt->space) = 1;
8370
0
  else {
8371
0
    xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8372
0
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8373
0
                                 val, NULL);
8374
0
  }
8375
0
    }
8376
8377
0
    *value = val;
8378
0
    return(name);
8379
0
}
8380
8381
/**
8382
 * xmlParseStartTag:
8383
 * @ctxt:  an XML parser context
8384
 *
8385
 * DEPRECATED: Internal function, don't use.
8386
 *
8387
 * Parse a start tag. Always consumes '<'.
8388
 *
8389
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8390
 *
8391
 * [ WFC: Unique Att Spec ]
8392
 * No attribute name may appear more than once in the same start-tag or
8393
 * empty-element tag.
8394
 *
8395
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8396
 *
8397
 * [ WFC: Unique Att Spec ]
8398
 * No attribute name may appear more than once in the same start-tag or
8399
 * empty-element tag.
8400
 *
8401
 * With namespace:
8402
 *
8403
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8404
 *
8405
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8406
 *
8407
 * Returns the element name parsed
8408
 */
8409
8410
const xmlChar *
8411
0
xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8412
0
    const xmlChar *name;
8413
0
    const xmlChar *attname;
8414
0
    xmlChar *attvalue;
8415
0
    const xmlChar **atts = ctxt->atts;
8416
0
    int nbatts = 0;
8417
0
    int maxatts = ctxt->maxatts;
8418
0
    int i;
8419
8420
0
    if (RAW != '<') return(NULL);
8421
0
    NEXT1;
8422
8423
0
    name = xmlParseName(ctxt);
8424
0
    if (name == NULL) {
8425
0
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8426
0
       "xmlParseStartTag: invalid element name\n");
8427
0
        return(NULL);
8428
0
    }
8429
8430
    /*
8431
     * Now parse the attributes, it ends up with the ending
8432
     *
8433
     * (S Attribute)* S?
8434
     */
8435
0
    SKIP_BLANKS;
8436
0
    GROW;
8437
8438
0
    while (((RAW != '>') &&
8439
0
     ((RAW != '/') || (NXT(1) != '>')) &&
8440
0
     (IS_BYTE_CHAR(RAW))) && (PARSER_STOPPED(ctxt) == 0)) {
8441
0
  attname = xmlParseAttribute(ctxt, &attvalue);
8442
0
        if (attname == NULL)
8443
0
      break;
8444
0
        if (attvalue != NULL) {
8445
      /*
8446
       * [ WFC: Unique Att Spec ]
8447
       * No attribute name may appear more than once in the same
8448
       * start-tag or empty-element tag.
8449
       */
8450
0
      for (i = 0; i < nbatts;i += 2) {
8451
0
          if (xmlStrEqual(atts[i], attname)) {
8452
0
        xmlErrAttributeDup(ctxt, NULL, attname);
8453
0
        goto failed;
8454
0
    }
8455
0
      }
8456
      /*
8457
       * Add the pair to atts
8458
       */
8459
0
      if (nbatts + 4 > maxatts) {
8460
0
          const xmlChar **n;
8461
0
                int newSize;
8462
8463
0
                newSize = xmlGrowCapacity(maxatts, sizeof(n[0]) * 2,
8464
0
                                          11, XML_MAX_ATTRS);
8465
0
                if (newSize < 0) {
8466
0
        xmlErrMemory(ctxt);
8467
0
        goto failed;
8468
0
    }
8469
0
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
8470
0
                if (newSize < 2)
8471
0
                    newSize = 2;
8472
0
#endif
8473
0
          n = xmlRealloc(atts, newSize * sizeof(n[0]) * 2);
8474
0
    if (n == NULL) {
8475
0
        xmlErrMemory(ctxt);
8476
0
        goto failed;
8477
0
    }
8478
0
    atts = n;
8479
0
                maxatts = newSize * 2;
8480
0
    ctxt->atts = atts;
8481
0
    ctxt->maxatts = maxatts;
8482
0
      }
8483
8484
0
      atts[nbatts++] = attname;
8485
0
      atts[nbatts++] = attvalue;
8486
0
      atts[nbatts] = NULL;
8487
0
      atts[nbatts + 1] = NULL;
8488
8489
0
            attvalue = NULL;
8490
0
  }
8491
8492
0
failed:
8493
8494
0
        if (attvalue != NULL)
8495
0
            xmlFree(attvalue);
8496
8497
0
  GROW
8498
0
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8499
0
      break;
8500
0
  if (SKIP_BLANKS == 0) {
8501
0
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8502
0
         "attributes construct error\n");
8503
0
  }
8504
0
  SHRINK;
8505
0
        GROW;
8506
0
    }
8507
8508
    /*
8509
     * SAX: Start of Element !
8510
     */
8511
0
    if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8512
0
  (!ctxt->disableSAX)) {
8513
0
  if (nbatts > 0)
8514
0
      ctxt->sax->startElement(ctxt->userData, name, atts);
8515
0
  else
8516
0
      ctxt->sax->startElement(ctxt->userData, name, NULL);
8517
0
    }
8518
8519
0
    if (atts != NULL) {
8520
        /* Free only the content strings */
8521
0
        for (i = 1;i < nbatts;i+=2)
8522
0
      if (atts[i] != NULL)
8523
0
         xmlFree((xmlChar *) atts[i]);
8524
0
    }
8525
0
    return(name);
8526
0
}
8527
8528
/**
8529
 * xmlParseEndTag1:
8530
 * @ctxt:  an XML parser context
8531
 * @line:  line of the start tag
8532
 * @nsNr:  number of namespaces on the start tag
8533
 *
8534
 * Parse an end tag. Always consumes '</'.
8535
 *
8536
 * [42] ETag ::= '</' Name S? '>'
8537
 *
8538
 * With namespace
8539
 *
8540
 * [NS 9] ETag ::= '</' QName S? '>'
8541
 */
8542
8543
static void
8544
0
xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8545
0
    const xmlChar *name;
8546
8547
0
    GROW;
8548
0
    if ((RAW != '<') || (NXT(1) != '/')) {
8549
0
  xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8550
0
           "xmlParseEndTag: '</' not found\n");
8551
0
  return;
8552
0
    }
8553
0
    SKIP(2);
8554
8555
0
    name = xmlParseNameAndCompare(ctxt,ctxt->name);
8556
8557
    /*
8558
     * We should definitely be at the ending "S? '>'" part
8559
     */
8560
0
    GROW;
8561
0
    SKIP_BLANKS;
8562
0
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8563
0
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8564
0
    } else
8565
0
  NEXT1;
8566
8567
    /*
8568
     * [ WFC: Element Type Match ]
8569
     * The Name in an element's end-tag must match the element type in the
8570
     * start-tag.
8571
     *
8572
     */
8573
0
    if (name != (xmlChar*)1) {
8574
0
        if (name == NULL) name = BAD_CAST "unparsable";
8575
0
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8576
0
         "Opening and ending tag mismatch: %s line %d and %s\n",
8577
0
                    ctxt->name, line, name);
8578
0
    }
8579
8580
    /*
8581
     * SAX: End of Tag
8582
     */
8583
0
    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8584
0
  (!ctxt->disableSAX))
8585
0
        ctxt->sax->endElement(ctxt->userData, ctxt->name);
8586
8587
0
    namePop(ctxt);
8588
0
    spacePop(ctxt);
8589
0
}
8590
8591
/**
8592
 * xmlParseEndTag:
8593
 * @ctxt:  an XML parser context
8594
 *
8595
 * DEPRECATED: Internal function, don't use.
8596
 *
8597
 * parse an end of tag
8598
 *
8599
 * [42] ETag ::= '</' Name S? '>'
8600
 *
8601
 * With namespace
8602
 *
8603
 * [NS 9] ETag ::= '</' QName S? '>'
8604
 */
8605
8606
void
8607
0
xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8608
0
    xmlParseEndTag1(ctxt, 0);
8609
0
}
8610
#endif /* LIBXML_SAX1_ENABLED */
8611
8612
/************************************************************************
8613
 *                  *
8614
 *          SAX 2 specific operations       *
8615
 *                  *
8616
 ************************************************************************/
8617
8618
/**
8619
 * xmlParseQNameHashed:
8620
 * @ctxt:  an XML parser context
8621
 * @prefix:  pointer to store the prefix part
8622
 *
8623
 * parse an XML Namespace QName
8624
 *
8625
 * [6]  QName  ::= (Prefix ':')? LocalPart
8626
 * [7]  Prefix  ::= NCName
8627
 * [8]  LocalPart  ::= NCName
8628
 *
8629
 * Returns the Name parsed or NULL
8630
 */
8631
8632
static xmlHashedString
8633
17.1M
xmlParseQNameHashed(xmlParserCtxtPtr ctxt, xmlHashedString *prefix) {
8634
17.1M
    xmlHashedString l, p;
8635
17.1M
    int start, isNCName = 0;
8636
8637
17.1M
    l.name = NULL;
8638
17.1M
    p.name = NULL;
8639
8640
17.1M
    GROW;
8641
17.1M
    start = CUR_PTR - BASE_PTR;
8642
8643
17.1M
    l = xmlParseNCName(ctxt);
8644
17.1M
    if (l.name != NULL) {
8645
16.2M
        isNCName = 1;
8646
16.2M
        if (CUR == ':') {
8647
871k
            NEXT;
8648
871k
            p = l;
8649
871k
            l = xmlParseNCName(ctxt);
8650
871k
        }
8651
16.2M
    }
8652
17.1M
    if ((l.name == NULL) || (CUR == ':')) {
8653
1.00M
        xmlChar *tmp;
8654
8655
1.00M
        l.name = NULL;
8656
1.00M
        p.name = NULL;
8657
1.00M
        if ((isNCName == 0) && (CUR != ':'))
8658
754k
            return(l);
8659
255k
        tmp = xmlParseNmtoken(ctxt);
8660
255k
        if (tmp != NULL)
8661
216k
            xmlFree(tmp);
8662
255k
        l = xmlDictLookupHashed(ctxt->dict, BASE_PTR + start,
8663
255k
                                CUR_PTR - (BASE_PTR + start));
8664
255k
        if (l.name == NULL) {
8665
0
            xmlErrMemory(ctxt);
8666
0
            return(l);
8667
0
        }
8668
255k
        xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8669
255k
                 "Failed to parse QName '%s'\n", l.name, NULL, NULL);
8670
255k
    }
8671
8672
16.4M
    *prefix = p;
8673
16.4M
    return(l);
8674
17.1M
}
8675
8676
/**
8677
 * xmlParseQName:
8678
 * @ctxt:  an XML parser context
8679
 * @prefix:  pointer to store the prefix part
8680
 *
8681
 * parse an XML Namespace QName
8682
 *
8683
 * [6]  QName  ::= (Prefix ':')? LocalPart
8684
 * [7]  Prefix  ::= NCName
8685
 * [8]  LocalPart  ::= NCName
8686
 *
8687
 * Returns the Name parsed or NULL
8688
 */
8689
8690
static const xmlChar *
8691
17.0k
xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8692
17.0k
    xmlHashedString n, p;
8693
8694
17.0k
    n = xmlParseQNameHashed(ctxt, &p);
8695
17.0k
    if (n.name == NULL)
8696
1.51k
        return(NULL);
8697
15.5k
    *prefix = p.name;
8698
15.5k
    return(n.name);
8699
17.0k
}
8700
8701
/**
8702
 * xmlParseQNameAndCompare:
8703
 * @ctxt:  an XML parser context
8704
 * @name:  the localname
8705
 * @prefix:  the prefix, if any.
8706
 *
8707
 * parse an XML name and compares for match
8708
 * (specialized for endtag parsing)
8709
 *
8710
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8711
 * and the name for mismatch
8712
 */
8713
8714
static const xmlChar *
8715
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8716
107k
                        xmlChar const *prefix) {
8717
107k
    const xmlChar *cmp;
8718
107k
    const xmlChar *in;
8719
107k
    const xmlChar *ret;
8720
107k
    const xmlChar *prefix2;
8721
8722
107k
    if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8723
8724
107k
    GROW;
8725
107k
    in = ctxt->input->cur;
8726
8727
107k
    cmp = prefix;
8728
294k
    while (*in != 0 && *in == *cmp) {
8729
187k
  ++in;
8730
187k
  ++cmp;
8731
187k
    }
8732
107k
    if ((*cmp == 0) && (*in == ':')) {
8733
96.3k
        in++;
8734
96.3k
  cmp = name;
8735
871k
  while (*in != 0 && *in == *cmp) {
8736
774k
      ++in;
8737
774k
      ++cmp;
8738
774k
  }
8739
96.3k
  if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8740
      /* success */
8741
90.1k
            ctxt->input->col += in - ctxt->input->cur;
8742
90.1k
      ctxt->input->cur = in;
8743
90.1k
      return((const xmlChar*) 1);
8744
90.1k
  }
8745
96.3k
    }
8746
    /*
8747
     * all strings coms from the dictionary, equality can be done directly
8748
     */
8749
17.0k
    ret = xmlParseQName (ctxt, &prefix2);
8750
17.0k
    if (ret == NULL)
8751
1.51k
        return(NULL);
8752
15.5k
    if ((ret == name) && (prefix == prefix2))
8753
570
  return((const xmlChar*) 1);
8754
14.9k
    return ret;
8755
15.5k
}
8756
8757
/**
8758
 * xmlParseAttribute2:
8759
 * @ctxt:  an XML parser context
8760
 * @pref:  the element prefix
8761
 * @elem:  the element name
8762
 * @prefix:  a xmlChar ** used to store the value of the attribute prefix
8763
 * @value:  a xmlChar ** used to store the value of the attribute
8764
 * @len:  an int * to save the length of the attribute
8765
 * @alloc:  an int * to indicate if the attribute was allocated
8766
 *
8767
 * parse an attribute in the new SAX2 framework.
8768
 *
8769
 * Returns the attribute name, and the value in *value, .
8770
 */
8771
8772
static xmlHashedString
8773
xmlParseAttribute2(xmlParserCtxtPtr ctxt,
8774
                   const xmlChar * pref, const xmlChar * elem,
8775
                   xmlHashedString * hprefix, xmlChar ** value,
8776
                   int *len, int *alloc)
8777
6.82M
{
8778
6.82M
    xmlHashedString hname;
8779
6.82M
    const xmlChar *prefix, *name;
8780
6.82M
    xmlChar *val = NULL, *internal_val = NULL;
8781
6.82M
    int normalize = 0;
8782
6.82M
    int isNamespace;
8783
8784
6.82M
    *value = NULL;
8785
6.82M
    GROW;
8786
6.82M
    hname = xmlParseQNameHashed(ctxt, hprefix);
8787
6.82M
    if (hname.name == NULL) {
8788
718k
        xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8789
718k
                       "error parsing attribute name\n");
8790
718k
        return(hname);
8791
718k
    }
8792
6.10M
    name = hname.name;
8793
6.10M
    prefix = hprefix->name;
8794
8795
    /*
8796
     * get the type if needed
8797
     */
8798
6.10M
    if (ctxt->attsSpecial != NULL) {
8799
296k
        int type;
8800
8801
296k
        type = XML_PTR_TO_INT(xmlHashQLookup2(ctxt->attsSpecial, pref, elem,
8802
296k
                                              prefix, name));
8803
296k
        if (type != 0)
8804
39.2k
            normalize = 1;
8805
296k
    }
8806
8807
    /*
8808
     * read the value
8809
     */
8810
6.10M
    SKIP_BLANKS;
8811
6.10M
    if (RAW == '=') {
8812
5.83M
        NEXT;
8813
5.83M
        SKIP_BLANKS;
8814
5.83M
        isNamespace = (((prefix == NULL) && (name == ctxt->str_xmlns)) ||
8815
5.61M
                       (prefix == ctxt->str_xmlns));
8816
5.83M
        val = xmlParseAttValueInternal(ctxt, len, alloc, normalize,
8817
5.83M
                                       isNamespace);
8818
5.83M
        if (val == NULL)
8819
110k
            goto error;
8820
5.83M
    } else {
8821
272k
        xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8822
272k
                          "Specification mandates value for attribute %s\n",
8823
272k
                          name);
8824
272k
        goto error;
8825
272k
    }
8826
8827
5.72M
    if (prefix == ctxt->str_xml) {
8828
        /*
8829
         * Check that xml:lang conforms to the specification
8830
         * No more registered as an error, just generate a warning now
8831
         * since this was deprecated in XML second edition
8832
         */
8833
155k
        if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8834
0
            internal_val = xmlStrndup(val, *len);
8835
0
            if (internal_val == NULL)
8836
0
                goto mem_error;
8837
0
            if (!xmlCheckLanguageID(internal_val)) {
8838
0
                xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8839
0
                              "Malformed value for xml:lang : %s\n",
8840
0
                              internal_val, NULL);
8841
0
            }
8842
0
        }
8843
8844
        /*
8845
         * Check that xml:space conforms to the specification
8846
         */
8847
155k
        if (xmlStrEqual(name, BAD_CAST "space")) {
8848
10.3k
            internal_val = xmlStrndup(val, *len);
8849
10.3k
            if (internal_val == NULL)
8850
0
                goto mem_error;
8851
10.3k
            if (xmlStrEqual(internal_val, BAD_CAST "default"))
8852
604
                *(ctxt->space) = 0;
8853
9.75k
            else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8854
8.79k
                *(ctxt->space) = 1;
8855
959
            else {
8856
959
                xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8857
959
                              "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8858
959
                              internal_val, NULL);
8859
959
            }
8860
10.3k
        }
8861
155k
        if (internal_val) {
8862
10.3k
            xmlFree(internal_val);
8863
10.3k
        }
8864
155k
    }
8865
8866
5.72M
    *value = val;
8867
5.72M
    return (hname);
8868
8869
0
mem_error:
8870
0
    xmlErrMemory(ctxt);
8871
383k
error:
8872
383k
    if ((val != NULL) && (*alloc != 0))
8873
0
        xmlFree(val);
8874
383k
    return(hname);
8875
0
}
8876
8877
/**
8878
 * xmlAttrHashInsert:
8879
 * @ctxt: parser context
8880
 * @size: size of the hash table
8881
 * @name: attribute name
8882
 * @uri: namespace uri
8883
 * @hashValue: combined hash value of name and uri
8884
 * @aindex: attribute index (this is a multiple of 5)
8885
 *
8886
 * Inserts a new attribute into the hash table.
8887
 *
8888
 * Returns INT_MAX if no existing attribute was found, the attribute
8889
 * index if an attribute was found, -1 if a memory allocation failed.
8890
 */
8891
static int
8892
xmlAttrHashInsert(xmlParserCtxtPtr ctxt, unsigned size, const xmlChar *name,
8893
3.88M
                  const xmlChar *uri, unsigned hashValue, int aindex) {
8894
3.88M
    xmlAttrHashBucket *table = ctxt->attrHash;
8895
3.88M
    xmlAttrHashBucket *bucket;
8896
3.88M
    unsigned hindex;
8897
8898
3.88M
    hindex = hashValue & (size - 1);
8899
3.88M
    bucket = &table[hindex];
8900
8901
4.31M
    while (bucket->index >= 0) {
8902
536k
        const xmlChar **atts = &ctxt->atts[bucket->index];
8903
8904
536k
        if (name == atts[0]) {
8905
110k
            int nsIndex = XML_PTR_TO_INT(atts[2]);
8906
8907
110k
            if ((nsIndex == NS_INDEX_EMPTY) ? (uri == NULL) :
8908
110k
                (nsIndex == NS_INDEX_XML) ? (uri == ctxt->str_xml_ns) :
8909
8.13k
                (uri == ctxt->nsTab[nsIndex * 2 + 1]))
8910
110k
                return(bucket->index);
8911
110k
        }
8912
8913
426k
        hindex++;
8914
426k
        bucket++;
8915
426k
        if (hindex >= size) {
8916
115k
            hindex = 0;
8917
115k
            bucket = table;
8918
115k
        }
8919
426k
    }
8920
8921
3.77M
    bucket->index = aindex;
8922
8923
3.77M
    return(INT_MAX);
8924
3.88M
}
8925
8926
static int
8927
xmlAttrHashInsertQName(xmlParserCtxtPtr ctxt, unsigned size,
8928
                       const xmlChar *name, const xmlChar *prefix,
8929
9.01k
                       unsigned hashValue, int aindex) {
8930
9.01k
    xmlAttrHashBucket *table = ctxt->attrHash;
8931
9.01k
    xmlAttrHashBucket *bucket;
8932
9.01k
    unsigned hindex;
8933
8934
9.01k
    hindex = hashValue & (size - 1);
8935
9.01k
    bucket = &table[hindex];
8936
8937
14.3k
    while (bucket->index >= 0) {
8938
9.32k
        const xmlChar **atts = &ctxt->atts[bucket->index];
8939
8940
9.32k
        if ((name == atts[0]) && (prefix == atts[1]))
8941
3.95k
            return(bucket->index);
8942
8943
5.36k
        hindex++;
8944
5.36k
        bucket++;
8945
5.36k
        if (hindex >= size) {
8946
14
            hindex = 0;
8947
14
            bucket = table;
8948
14
        }
8949
5.36k
    }
8950
8951
5.05k
    bucket->index = aindex;
8952
8953
5.05k
    return(INT_MAX);
8954
9.01k
}
8955
/**
8956
 * xmlParseStartTag2:
8957
 * @ctxt:  an XML parser context
8958
 *
8959
 * Parse a start tag. Always consumes '<'.
8960
 *
8961
 * This routine is called when running SAX2 parsing
8962
 *
8963
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8964
 *
8965
 * [ WFC: Unique Att Spec ]
8966
 * No attribute name may appear more than once in the same start-tag or
8967
 * empty-element tag.
8968
 *
8969
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8970
 *
8971
 * [ WFC: Unique Att Spec ]
8972
 * No attribute name may appear more than once in the same start-tag or
8973
 * empty-element tag.
8974
 *
8975
 * With namespace:
8976
 *
8977
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8978
 *
8979
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8980
 *
8981
 * Returns the element name parsed
8982
 */
8983
8984
static const xmlChar *
8985
xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
8986
10.3M
                  const xmlChar **URI, int *nbNsPtr) {
8987
10.3M
    xmlHashedString hlocalname;
8988
10.3M
    xmlHashedString hprefix;
8989
10.3M
    xmlHashedString hattname;
8990
10.3M
    xmlHashedString haprefix;
8991
10.3M
    const xmlChar *localname;
8992
10.3M
    const xmlChar *prefix;
8993
10.3M
    const xmlChar *attname;
8994
10.3M
    const xmlChar *aprefix;
8995
10.3M
    const xmlChar *uri;
8996
10.3M
    xmlChar *attvalue = NULL;
8997
10.3M
    const xmlChar **atts = ctxt->atts;
8998
10.3M
    unsigned attrHashSize = 0;
8999
10.3M
    int maxatts = ctxt->maxatts;
9000
10.3M
    int nratts, nbatts, nbdef;
9001
10.3M
    int i, j, nbNs, nbTotalDef, attval, nsIndex, maxAtts;
9002
10.3M
    int alloc = 0;
9003
10.3M
    int numNsErr = 0;
9004
10.3M
    int numDupErr = 0;
9005
9006
10.3M
    if (RAW != '<') return(NULL);
9007
10.3M
    NEXT1;
9008
9009
10.3M
    nbatts = 0;
9010
10.3M
    nratts = 0;
9011
10.3M
    nbdef = 0;
9012
10.3M
    nbNs = 0;
9013
10.3M
    nbTotalDef = 0;
9014
10.3M
    attval = 0;
9015
9016
10.3M
    if (xmlParserNsStartElement(ctxt->nsdb) < 0) {
9017
0
        xmlErrMemory(ctxt);
9018
0
        return(NULL);
9019
0
    }
9020
9021
10.3M
    hlocalname = xmlParseQNameHashed(ctxt, &hprefix);
9022
10.3M
    if (hlocalname.name == NULL) {
9023
33.9k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9024
33.9k
           "StartTag: invalid element name\n");
9025
33.9k
        return(NULL);
9026
33.9k
    }
9027
10.3M
    localname = hlocalname.name;
9028
10.3M
    prefix = hprefix.name;
9029
9030
    /*
9031
     * Now parse the attributes, it ends up with the ending
9032
     *
9033
     * (S Attribute)* S?
9034
     */
9035
10.3M
    SKIP_BLANKS;
9036
10.3M
    GROW;
9037
9038
    /*
9039
     * The ctxt->atts array will be ultimately passed to the SAX callback
9040
     * containing five xmlChar pointers for each attribute:
9041
     *
9042
     * [0] attribute name
9043
     * [1] attribute prefix
9044
     * [2] namespace URI
9045
     * [3] attribute value
9046
     * [4] end of attribute value
9047
     *
9048
     * To save memory, we reuse this array temporarily and store integers
9049
     * in these pointer variables.
9050
     *
9051
     * [0] attribute name
9052
     * [1] attribute prefix
9053
     * [2] hash value of attribute prefix, and later namespace index
9054
     * [3] for non-allocated values: ptrdiff_t offset into input buffer
9055
     * [4] for non-allocated values: ptrdiff_t offset into input buffer
9056
     *
9057
     * The ctxt->attallocs array contains an additional unsigned int for
9058
     * each attribute, containing the hash value of the attribute name
9059
     * and the alloc flag in bit 31.
9060
     */
9061
9062
13.2M
    while (((RAW != '>') &&
9063
9.99M
     ((RAW != '/') || (NXT(1) != '>')) &&
9064
6.89M
     (IS_BYTE_CHAR(RAW))) && (PARSER_STOPPED(ctxt) == 0)) {
9065
6.82M
  int len = -1;
9066
9067
6.82M
  hattname = xmlParseAttribute2(ctxt, prefix, localname,
9068
6.82M
                                          &haprefix, &attvalue, &len,
9069
6.82M
                                          &alloc);
9070
6.82M
        if (hattname.name == NULL)
9071
718k
      break;
9072
6.10M
        if (attvalue == NULL)
9073
383k
            goto next_attr;
9074
5.72M
        attname = hattname.name;
9075
5.72M
        aprefix = haprefix.name;
9076
5.72M
  if (len < 0) len = xmlStrlen(attvalue);
9077
9078
5.72M
        if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9079
209k
            xmlHashedString huri;
9080
209k
            xmlURIPtr parsedUri;
9081
9082
209k
            huri = xmlDictLookupHashed(ctxt->dict, attvalue, len);
9083
209k
            uri = huri.name;
9084
209k
            if (uri == NULL) {
9085
0
                xmlErrMemory(ctxt);
9086
0
                goto next_attr;
9087
0
            }
9088
209k
            if (*uri != 0) {
9089
207k
                if (xmlParseURISafe((const char *) uri, &parsedUri) < 0) {
9090
0
                    xmlErrMemory(ctxt);
9091
0
                    goto next_attr;
9092
0
                }
9093
207k
                if (parsedUri == NULL) {
9094
138k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9095
138k
                             "xmlns: '%s' is not a valid URI\n",
9096
138k
                                       uri, NULL, NULL);
9097
138k
                } else {
9098
68.1k
                    if (parsedUri->scheme == NULL) {
9099
12.8k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9100
12.8k
                                  "xmlns: URI %s is not absolute\n",
9101
12.8k
                                  uri, NULL, NULL);
9102
12.8k
                    }
9103
68.1k
                    xmlFreeURI(parsedUri);
9104
68.1k
                }
9105
207k
                if (uri == ctxt->str_xml_ns) {
9106
80
                    if (attname != ctxt->str_xml) {
9107
80
                        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9108
80
                     "xml namespace URI cannot be the default namespace\n",
9109
80
                                 NULL, NULL, NULL);
9110
80
                    }
9111
80
                    goto next_attr;
9112
80
                }
9113
206k
                if ((len == 29) &&
9114
2.36k
                    (xmlStrEqual(uri,
9115
2.36k
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9116
942
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9117
942
                         "reuse of the xmlns namespace name is forbidden\n",
9118
942
                             NULL, NULL, NULL);
9119
942
                    goto next_attr;
9120
942
                }
9121
206k
            }
9122
9123
208k
            if (xmlParserNsPush(ctxt, NULL, &huri, NULL, 0) > 0)
9124
200k
                nbNs++;
9125
5.51M
        } else if (aprefix == ctxt->str_xmlns) {
9126
102k
            xmlHashedString huri;
9127
102k
            xmlURIPtr parsedUri;
9128
9129
102k
            huri = xmlDictLookupHashed(ctxt->dict, attvalue, len);
9130
102k
            uri = huri.name;
9131
102k
            if (uri == NULL) {
9132
0
                xmlErrMemory(ctxt);
9133
0
                goto next_attr;
9134
0
            }
9135
9136
102k
            if (attname == ctxt->str_xml) {
9137
723
                if (uri != ctxt->str_xml_ns) {
9138
442
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9139
442
                             "xml namespace prefix mapped to wrong URI\n",
9140
442
                             NULL, NULL, NULL);
9141
442
                }
9142
                /*
9143
                 * Do not keep a namespace definition node
9144
                 */
9145
723
                goto next_attr;
9146
723
            }
9147
101k
            if (uri == ctxt->str_xml_ns) {
9148
56
                if (attname != ctxt->str_xml) {
9149
56
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9150
56
                             "xml namespace URI mapped to wrong prefix\n",
9151
56
                             NULL, NULL, NULL);
9152
56
                }
9153
56
                goto next_attr;
9154
56
            }
9155
101k
            if (attname == ctxt->str_xmlns) {
9156
426
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9157
426
                         "redefinition of the xmlns prefix is forbidden\n",
9158
426
                         NULL, NULL, NULL);
9159
426
                goto next_attr;
9160
426
            }
9161
100k
            if ((len == 29) &&
9162
1.56k
                (xmlStrEqual(uri,
9163
1.56k
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9164
367
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9165
367
                         "reuse of the xmlns namespace name is forbidden\n",
9166
367
                         NULL, NULL, NULL);
9167
367
                goto next_attr;
9168
367
            }
9169
100k
            if ((uri == NULL) || (uri[0] == 0)) {
9170
704
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9171
704
                         "xmlns:%s: Empty XML namespace is not allowed\n",
9172
704
                              attname, NULL, NULL);
9173
704
                goto next_attr;
9174
99.9k
            } else {
9175
99.9k
                if (xmlParseURISafe((const char *) uri, &parsedUri) < 0) {
9176
0
                    xmlErrMemory(ctxt);
9177
0
                    goto next_attr;
9178
0
                }
9179
99.9k
                if (parsedUri == NULL) {
9180
41.8k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9181
41.8k
                         "xmlns:%s: '%s' is not a valid URI\n",
9182
41.8k
                                       attname, uri, NULL);
9183
58.0k
                } else {
9184
58.0k
                    if ((ctxt->pedantic) && (parsedUri->scheme == NULL)) {
9185
0
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9186
0
                                  "xmlns:%s: URI %s is not absolute\n",
9187
0
                                  attname, uri, NULL);
9188
0
                    }
9189
58.0k
                    xmlFreeURI(parsedUri);
9190
58.0k
                }
9191
99.9k
            }
9192
9193
99.9k
            if (xmlParserNsPush(ctxt, &hattname, &huri, NULL, 0) > 0)
9194
97.9k
                nbNs++;
9195
5.41M
        } else {
9196
            /*
9197
             * Populate attributes array, see above for repurposing
9198
             * of xmlChar pointers.
9199
             */
9200
5.41M
            if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9201
173k
                int res = xmlCtxtGrowAttrs(ctxt);
9202
9203
173k
                maxatts = ctxt->maxatts;
9204
173k
                atts = ctxt->atts;
9205
9206
173k
                if (res < 0)
9207
0
                    goto next_attr;
9208
173k
            }
9209
5.41M
            ctxt->attallocs[nratts++] = (hattname.hashValue & 0x7FFFFFFF) |
9210
5.41M
                                        ((unsigned) alloc << 31);
9211
5.41M
            atts[nbatts++] = attname;
9212
5.41M
            atts[nbatts++] = aprefix;
9213
5.41M
            atts[nbatts++] = (const xmlChar *) (size_t) haprefix.hashValue;
9214
5.41M
            if (alloc) {
9215
242k
                atts[nbatts++] = attvalue;
9216
242k
                attvalue += len;
9217
242k
                atts[nbatts++] = attvalue;
9218
5.16M
            } else {
9219
                /*
9220
                 * attvalue points into the input buffer which can be
9221
                 * reallocated. Store differences to input->base instead.
9222
                 * The pointers will be reconstructed later.
9223
                 */
9224
5.16M
                atts[nbatts++] = (void *) (attvalue - BASE_PTR);
9225
5.16M
                attvalue += len;
9226
5.16M
                atts[nbatts++] = (void *) (attvalue - BASE_PTR);
9227
5.16M
            }
9228
            /*
9229
             * tag if some deallocation is needed
9230
             */
9231
5.41M
            if (alloc != 0) attval = 1;
9232
5.41M
            attvalue = NULL; /* moved into atts */
9233
5.41M
        }
9234
9235
6.10M
next_attr:
9236
6.10M
        if ((attvalue != NULL) && (alloc != 0)) {
9237
75.3k
            xmlFree(attvalue);
9238
75.3k
            attvalue = NULL;
9239
75.3k
        }
9240
9241
6.10M
  GROW
9242
6.10M
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9243
2.52M
      break;
9244
3.57M
  if (SKIP_BLANKS == 0) {
9245
662k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9246
662k
         "attributes construct error\n");
9247
662k
      break;
9248
662k
  }
9249
2.91M
        GROW;
9250
2.91M
    }
9251
9252
    /*
9253
     * Namespaces from default attributes
9254
     */
9255
10.3M
    if (ctxt->attsDefault != NULL) {
9256
308k
        xmlDefAttrsPtr defaults;
9257
9258
308k
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9259
308k
  if (defaults != NULL) {
9260
236k
      for (i = 0; i < defaults->nbAttrs; i++) {
9261
142k
                xmlDefAttr *attr = &defaults->attrs[i];
9262
9263
142k
          attname = attr->name.name;
9264
142k
    aprefix = attr->prefix.name;
9265
9266
142k
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9267
5.86k
                    xmlParserEntityCheck(ctxt, attr->expandedSize);
9268
9269
5.86k
                    if (xmlParserNsPush(ctxt, NULL, &attr->value, NULL, 1) > 0)
9270
5.72k
                        nbNs++;
9271
136k
    } else if (aprefix == ctxt->str_xmlns) {
9272
18.0k
                    xmlParserEntityCheck(ctxt, attr->expandedSize);
9273
9274
18.0k
                    if (xmlParserNsPush(ctxt, &attr->name, &attr->value,
9275
18.0k
                                      NULL, 1) > 0)
9276
17.6k
                        nbNs++;
9277
118k
    } else {
9278
118k
                    if (nratts + nbTotalDef >= XML_MAX_ATTRS) {
9279
0
                        xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
9280
0
                                    "Maximum number of attributes exceeded");
9281
0
                        break;
9282
0
                    }
9283
118k
                    nbTotalDef += 1;
9284
118k
                }
9285
142k
      }
9286
93.8k
  }
9287
308k
    }
9288
9289
    /*
9290
     * Resolve attribute namespaces
9291
     */
9292
15.7M
    for (i = 0; i < nbatts; i += 5) {
9293
5.41M
        attname = atts[i];
9294
5.41M
        aprefix = atts[i+1];
9295
9296
        /*
9297
  * The default namespace does not apply to attribute names.
9298
  */
9299
5.41M
  if (aprefix == NULL) {
9300
5.03M
            nsIndex = NS_INDEX_EMPTY;
9301
5.03M
        } else if (aprefix == ctxt->str_xml) {
9302
155k
            nsIndex = NS_INDEX_XML;
9303
220k
        } else {
9304
220k
            haprefix.name = aprefix;
9305
220k
            haprefix.hashValue = (size_t) atts[i+2];
9306
220k
            nsIndex = xmlParserNsLookup(ctxt, &haprefix, NULL);
9307
9308
220k
      if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex)) {
9309
71.1k
                xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9310
71.1k
        "Namespace prefix %s for %s on %s is not defined\n",
9311
71.1k
        aprefix, attname, localname);
9312
71.1k
                nsIndex = NS_INDEX_EMPTY;
9313
71.1k
            }
9314
220k
        }
9315
9316
5.41M
        atts[i+2] = XML_INT_TO_PTR(nsIndex);
9317
5.41M
    }
9318
9319
    /*
9320
     * Maximum number of attributes including default attributes.
9321
     */
9322
10.3M
    maxAtts = nratts + nbTotalDef;
9323
9324
    /*
9325
     * Verify that attribute names are unique.
9326
     */
9327
10.3M
    if (maxAtts > 1) {
9328
1.49M
        attrHashSize = 4;
9329
2.11M
        while (attrHashSize / 2 < (unsigned) maxAtts)
9330
620k
            attrHashSize *= 2;
9331
9332
1.49M
        if (attrHashSize > ctxt->attrHashMax) {
9333
44.3k
            xmlAttrHashBucket *tmp;
9334
9335
44.3k
            tmp = xmlRealloc(ctxt->attrHash, attrHashSize * sizeof(tmp[0]));
9336
44.3k
            if (tmp == NULL) {
9337
0
                xmlErrMemory(ctxt);
9338
0
                goto done;
9339
0
            }
9340
9341
44.3k
            ctxt->attrHash = tmp;
9342
44.3k
            ctxt->attrHashMax = attrHashSize;
9343
44.3k
        }
9344
9345
1.49M
        memset(ctxt->attrHash, -1, attrHashSize * sizeof(ctxt->attrHash[0]));
9346
9347
5.35M
        for (i = 0, j = 0; j < nratts; i += 5, j++) {
9348
3.85M
            const xmlChar *nsuri;
9349
3.85M
            unsigned hashValue, nameHashValue, uriHashValue;
9350
3.85M
            int res;
9351
9352
3.85M
            attname = atts[i];
9353
3.85M
            aprefix = atts[i+1];
9354
3.85M
            nsIndex = XML_PTR_TO_INT(atts[i+2]);
9355
            /* Hash values always have bit 31 set, see dict.c */
9356
3.85M
            nameHashValue = ctxt->attallocs[j] | 0x80000000;
9357
9358
3.85M
            if (nsIndex == NS_INDEX_EMPTY) {
9359
                /*
9360
                 * Prefix with empty namespace means an undeclared
9361
                 * prefix which was already reported above.
9362
                 */
9363
3.69M
                if (aprefix != NULL)
9364
50.6k
                    continue;
9365
3.64M
                nsuri = NULL;
9366
3.64M
                uriHashValue = URI_HASH_EMPTY;
9367
3.64M
            } else if (nsIndex == NS_INDEX_XML) {
9368
42.6k
                nsuri = ctxt->str_xml_ns;
9369
42.6k
                uriHashValue = URI_HASH_XML;
9370
117k
            } else {
9371
117k
                nsuri = ctxt->nsTab[nsIndex * 2 + 1];
9372
117k
                uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue;
9373
117k
            }
9374
9375
3.80M
            hashValue = xmlDictCombineHash(nameHashValue, uriHashValue);
9376
3.80M
            res = xmlAttrHashInsert(ctxt, attrHashSize, attname, nsuri,
9377
3.80M
                                    hashValue, i);
9378
3.80M
            if (res < 0)
9379
0
                continue;
9380
9381
            /*
9382
             * [ WFC: Unique Att Spec ]
9383
             * No attribute name may appear more than once in the same
9384
             * start-tag or empty-element tag.
9385
             * As extended by the Namespace in XML REC.
9386
             */
9387
3.80M
            if (res < INT_MAX) {
9388
96.6k
                if (aprefix == atts[res+1]) {
9389
90.9k
                    xmlErrAttributeDup(ctxt, aprefix, attname);
9390
90.9k
                    numDupErr += 1;
9391
90.9k
                } else {
9392
5.68k
                    xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9393
5.68k
                             "Namespaced Attribute %s in '%s' redefined\n",
9394
5.68k
                             attname, nsuri, NULL);
9395
5.68k
                    numNsErr += 1;
9396
5.68k
                }
9397
96.6k
            }
9398
3.80M
        }
9399
1.49M
    }
9400
9401
    /*
9402
     * Default attributes
9403
     */
9404
10.3M
    if (ctxt->attsDefault != NULL) {
9405
308k
        xmlDefAttrsPtr defaults;
9406
9407
308k
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9408
308k
  if (defaults != NULL) {
9409
236k
      for (i = 0; i < defaults->nbAttrs; i++) {
9410
142k
                xmlDefAttr *attr = &defaults->attrs[i];
9411
142k
                const xmlChar *nsuri = NULL;
9412
142k
                unsigned hashValue, uriHashValue = 0;
9413
142k
                int res;
9414
9415
142k
          attname = attr->name.name;
9416
142k
    aprefix = attr->prefix.name;
9417
9418
142k
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL))
9419
5.86k
                    continue;
9420
136k
    if (aprefix == ctxt->str_xmlns)
9421
18.0k
                    continue;
9422
9423
118k
                if (aprefix == NULL) {
9424
100k
                    nsIndex = NS_INDEX_EMPTY;
9425
100k
                    nsuri = NULL;
9426
100k
                    uriHashValue = URI_HASH_EMPTY;
9427
100k
                } else if (aprefix == ctxt->str_xml) {
9428
2.28k
                    nsIndex = NS_INDEX_XML;
9429
2.28k
                    nsuri = ctxt->str_xml_ns;
9430
2.28k
                    uriHashValue = URI_HASH_XML;
9431
15.2k
                } else {
9432
15.2k
                    nsIndex = xmlParserNsLookup(ctxt, &attr->prefix, NULL);
9433
15.2k
                    if ((nsIndex == INT_MAX) ||
9434
14.5k
                        (nsIndex < ctxt->nsdb->minNsIndex)) {
9435
14.5k
                        xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9436
14.5k
                                 "Namespace prefix %s for %s on %s is not "
9437
14.5k
                                 "defined\n",
9438
14.5k
                                 aprefix, attname, localname);
9439
14.5k
                        nsIndex = NS_INDEX_EMPTY;
9440
14.5k
                        nsuri = NULL;
9441
14.5k
                        uriHashValue = URI_HASH_EMPTY;
9442
14.5k
                    } else {
9443
718
                        nsuri = ctxt->nsTab[nsIndex * 2 + 1];
9444
718
                        uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue;
9445
718
                    }
9446
15.2k
                }
9447
9448
                /*
9449
                 * Check whether the attribute exists
9450
                 */
9451
118k
                if (maxAtts > 1) {
9452
81.2k
                    hashValue = xmlDictCombineHash(attr->name.hashValue,
9453
81.2k
                                                   uriHashValue);
9454
81.2k
                    res = xmlAttrHashInsert(ctxt, attrHashSize, attname, nsuri,
9455
81.2k
                                            hashValue, nbatts);
9456
81.2k
                    if (res < 0)
9457
0
                        continue;
9458
81.2k
                    if (res < INT_MAX) {
9459
13.4k
                        if (aprefix == atts[res+1])
9460
8.89k
                            continue;
9461
4.55k
                        xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9462
4.55k
                                 "Namespaced Attribute %s in '%s' redefined\n",
9463
4.55k
                                 attname, nsuri, NULL);
9464
4.55k
                    }
9465
81.2k
                }
9466
9467
109k
                xmlParserEntityCheck(ctxt, attr->expandedSize);
9468
9469
109k
                if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9470
3.87k
                    res = xmlCtxtGrowAttrs(ctxt);
9471
9472
3.87k
                    maxatts = ctxt->maxatts;
9473
3.87k
                    atts = ctxt->atts;
9474
9475
3.87k
                    if (res < 0) {
9476
0
                        localname = NULL;
9477
0
                        goto done;
9478
0
                    }
9479
3.87k
                }
9480
9481
109k
                atts[nbatts++] = attname;
9482
109k
                atts[nbatts++] = aprefix;
9483
109k
                atts[nbatts++] = XML_INT_TO_PTR(nsIndex);
9484
109k
                atts[nbatts++] = attr->value.name;
9485
109k
                atts[nbatts++] = attr->valueEnd;
9486
109k
                if ((ctxt->standalone == 1) && (attr->external != 0)) {
9487
0
                    xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9488
0
                            "standalone: attribute %s on %s defaulted "
9489
0
                            "from external subset\n",
9490
0
                            attname, localname);
9491
0
                }
9492
109k
                nbdef++;
9493
109k
      }
9494
93.8k
  }
9495
308k
    }
9496
9497
    /*
9498
     * Using a single hash table for nsUri/localName pairs cannot
9499
     * detect duplicate QNames reliably. The following example will
9500
     * only result in two namespace errors.
9501
     *
9502
     * <doc xmlns:a="a" xmlns:b="a">
9503
     *   <elem a:a="" b:a="" b:a=""/>
9504
     * </doc>
9505
     *
9506
     * If we saw more than one namespace error but no duplicate QNames
9507
     * were found, we have to scan for duplicate QNames.
9508
     */
9509
10.3M
    if ((numDupErr == 0) && (numNsErr > 1)) {
9510
1.06k
        memset(ctxt->attrHash, -1,
9511
1.06k
               attrHashSize * sizeof(ctxt->attrHash[0]));
9512
9513
11.2k
        for (i = 0, j = 0; j < nratts; i += 5, j++) {
9514
10.1k
            unsigned hashValue, nameHashValue, prefixHashValue;
9515
10.1k
            int res;
9516
9517
10.1k
            aprefix = atts[i+1];
9518
10.1k
            if (aprefix == NULL)
9519
1.13k
                continue;
9520
9521
9.01k
            attname = atts[i];
9522
            /* Hash values always have bit 31 set, see dict.c */
9523
9.01k
            nameHashValue = ctxt->attallocs[j] | 0x80000000;
9524
9.01k
            prefixHashValue = xmlDictComputeHash(ctxt->dict, aprefix);
9525
9526
9.01k
            hashValue = xmlDictCombineHash(nameHashValue, prefixHashValue);
9527
9.01k
            res = xmlAttrHashInsertQName(ctxt, attrHashSize, attname,
9528
9.01k
                                         aprefix, hashValue, i);
9529
9.01k
            if (res < INT_MAX)
9530
3.95k
                xmlErrAttributeDup(ctxt, aprefix, attname);
9531
9.01k
        }
9532
1.06k
    }
9533
9534
    /*
9535
     * Reconstruct attribute pointers
9536
     */
9537
15.8M
    for (i = 0, j = 0; i < nbatts; i += 5, j++) {
9538
        /* namespace URI */
9539
5.52M
        nsIndex = XML_PTR_TO_INT(atts[i+2]);
9540
5.52M
        if (nsIndex == INT_MAX)
9541
5.21M
            atts[i+2] = NULL;
9542
307k
        else if (nsIndex == INT_MAX - 1)
9543
157k
            atts[i+2] = ctxt->str_xml_ns;
9544
150k
        else
9545
150k
            atts[i+2] = ctxt->nsTab[nsIndex * 2 + 1];
9546
9547
5.52M
        if ((j < nratts) && (ctxt->attallocs[j] & 0x80000000) == 0) {
9548
5.16M
            atts[i+3] = BASE_PTR + XML_PTR_TO_INT(atts[i+3]);  /* value */
9549
5.16M
            atts[i+4] = BASE_PTR + XML_PTR_TO_INT(atts[i+4]);  /* valuend */
9550
5.16M
        }
9551
5.52M
    }
9552
9553
10.3M
    uri = xmlParserNsLookupUri(ctxt, &hprefix);
9554
10.3M
    if ((prefix != NULL) && (uri == NULL)) {
9555
104k
  xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9556
104k
           "Namespace prefix %s on %s is not defined\n",
9557
104k
     prefix, localname, NULL);
9558
104k
    }
9559
10.3M
    *pref = prefix;
9560
10.3M
    *URI = uri;
9561
9562
    /*
9563
     * SAX callback
9564
     */
9565
10.3M
    if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9566
10.3M
  (!ctxt->disableSAX)) {
9567
10.3M
  if (nbNs > 0)
9568
269k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix, uri,
9569
269k
                          nbNs, ctxt->nsTab + 2 * (ctxt->nsNr - nbNs),
9570
269k
        nbatts / 5, nbdef, atts);
9571
10.0M
  else
9572
10.0M
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix, uri,
9573
10.0M
                          0, NULL, nbatts / 5, nbdef, atts);
9574
10.3M
    }
9575
9576
10.3M
done:
9577
    /*
9578
     * Free allocated attribute values
9579
     */
9580
10.3M
    if (attval != 0) {
9581
603k
  for (i = 0, j = 0; j < nratts; i += 5, j++)
9582
365k
      if (ctxt->attallocs[j] & 0x80000000)
9583
242k
          xmlFree((xmlChar *) atts[i+3]);
9584
237k
    }
9585
9586
10.3M
    *nbNsPtr = nbNs;
9587
10.3M
    return(localname);
9588
10.3M
}
9589
9590
/**
9591
 * xmlParseEndTag2:
9592
 * @ctxt:  an XML parser context
9593
 * @line:  line of the start tag
9594
 * @nsNr:  number of namespaces on the start tag
9595
 *
9596
 * Parse an end tag. Always consumes '</'.
9597
 *
9598
 * [42] ETag ::= '</' Name S? '>'
9599
 *
9600
 * With namespace
9601
 *
9602
 * [NS 9] ETag ::= '</' QName S? '>'
9603
 */
9604
9605
static void
9606
3.43M
xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9607
3.43M
    const xmlChar *name;
9608
9609
3.43M
    GROW;
9610
3.43M
    if ((RAW != '<') || (NXT(1) != '/')) {
9611
0
  xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9612
0
  return;
9613
0
    }
9614
3.43M
    SKIP(2);
9615
9616
3.43M
    if (tag->prefix == NULL)
9617
3.32M
        name = xmlParseNameAndCompare(ctxt, ctxt->name);
9618
107k
    else
9619
107k
        name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9620
9621
    /*
9622
     * We should definitely be at the ending "S? '>'" part
9623
     */
9624
3.43M
    GROW;
9625
3.43M
    SKIP_BLANKS;
9626
3.43M
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9627
259k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9628
259k
    } else
9629
3.17M
  NEXT1;
9630
9631
    /*
9632
     * [ WFC: Element Type Match ]
9633
     * The Name in an element's end-tag must match the element type in the
9634
     * start-tag.
9635
     *
9636
     */
9637
3.43M
    if (name != (xmlChar*)1) {
9638
500k
        if (name == NULL) name = BAD_CAST "unparsable";
9639
500k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9640
500k
         "Opening and ending tag mismatch: %s line %d and %s\n",
9641
500k
                    ctxt->name, tag->line, name);
9642
500k
    }
9643
9644
    /*
9645
     * SAX: End of Tag
9646
     */
9647
3.43M
    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9648
3.43M
  (!ctxt->disableSAX))
9649
3.43M
  ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9650
3.43M
                                tag->URI);
9651
9652
3.43M
    spacePop(ctxt);
9653
3.43M
    if (tag->nsNr != 0)
9654
41.0k
  xmlParserNsPop(ctxt, tag->nsNr);
9655
3.43M
}
9656
9657
/**
9658
 * xmlParseCDSect:
9659
 * @ctxt:  an XML parser context
9660
 *
9661
 * DEPRECATED: Internal function, don't use.
9662
 *
9663
 * Parse escaped pure raw content. Always consumes '<!['.
9664
 *
9665
 * [18] CDSect ::= CDStart CData CDEnd
9666
 *
9667
 * [19] CDStart ::= '<![CDATA['
9668
 *
9669
 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9670
 *
9671
 * [21] CDEnd ::= ']]>'
9672
 */
9673
void
9674
864k
xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9675
864k
    xmlChar *buf = NULL;
9676
864k
    int len = 0;
9677
864k
    int size = XML_PARSER_BUFFER_SIZE;
9678
864k
    int r, rl;
9679
864k
    int s, sl;
9680
864k
    int cur, l;
9681
864k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9682
0
                    XML_MAX_HUGE_LENGTH :
9683
864k
                    XML_MAX_TEXT_LENGTH;
9684
9685
864k
    if ((CUR != '<') || (NXT(1) != '!') || (NXT(2) != '['))
9686
0
        return;
9687
864k
    SKIP(3);
9688
9689
864k
    if (!CMP6(CUR_PTR, 'C', 'D', 'A', 'T', 'A', '['))
9690
0
        return;
9691
864k
    SKIP(6);
9692
9693
864k
    r = xmlCurrentCharRecover(ctxt, &rl);
9694
864k
    if (!IS_CHAR(r)) {
9695
580
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9696
580
        goto out;
9697
580
    }
9698
863k
    NEXTL(rl);
9699
863k
    s = xmlCurrentCharRecover(ctxt, &sl);
9700
863k
    if (!IS_CHAR(s)) {
9701
249
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9702
249
        goto out;
9703
249
    }
9704
863k
    NEXTL(sl);
9705
863k
    cur = xmlCurrentCharRecover(ctxt, &l);
9706
863k
    buf = xmlMalloc(size);
9707
863k
    if (buf == NULL) {
9708
0
  xmlErrMemory(ctxt);
9709
0
        goto out;
9710
0
    }
9711
49.7M
    while (IS_CHAR(cur) &&
9712
49.7M
           ((r != ']') || (s != ']') || (cur != '>'))) {
9713
48.9M
  if (len + 5 >= size) {
9714
324k
      xmlChar *tmp;
9715
324k
            int newSize;
9716
9717
324k
            newSize = xmlGrowCapacity(size, 1, 1, maxLength);
9718
324k
            if (newSize < 0) {
9719
0
                xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9720
0
                               "CData section too big found\n");
9721
0
                goto out;
9722
0
            }
9723
324k
      tmp = xmlRealloc(buf, newSize);
9724
324k
      if (tmp == NULL) {
9725
0
    xmlErrMemory(ctxt);
9726
0
                goto out;
9727
0
      }
9728
324k
      buf = tmp;
9729
324k
      size = newSize;
9730
324k
  }
9731
48.9M
  COPY_BUF(buf, len, r);
9732
48.9M
  r = s;
9733
48.9M
  rl = sl;
9734
48.9M
  s = cur;
9735
48.9M
  sl = l;
9736
48.9M
  NEXTL(l);
9737
48.9M
  cur = xmlCurrentCharRecover(ctxt, &l);
9738
48.9M
    }
9739
863k
    buf[len] = 0;
9740
863k
    if (cur != '>') {
9741
4.92k
  xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9742
4.92k
                       "CData section not finished\n%.50s\n", buf);
9743
4.92k
        goto out;
9744
4.92k
    }
9745
858k
    NEXTL(l);
9746
9747
    /*
9748
     * OK the buffer is to be consumed as cdata.
9749
     */
9750
858k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9751
858k
        if ((ctxt->sax->cdataBlock != NULL) &&
9752
858k
            ((ctxt->options & XML_PARSE_NOCDATA) == 0)) {
9753
858k
            ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9754
858k
        } else if (ctxt->sax->characters != NULL) {
9755
0
            ctxt->sax->characters(ctxt->userData, buf, len);
9756
0
        }
9757
858k
    }
9758
9759
864k
out:
9760
864k
    xmlFree(buf);
9761
864k
}
9762
9763
/**
9764
 * xmlParseContentInternal:
9765
 * @ctxt:  an XML parser context
9766
 *
9767
 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9768
 * unexpected EOF to the caller.
9769
 */
9770
9771
static void
9772
1.97k
xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9773
1.97k
    int oldNameNr = ctxt->nameNr;
9774
1.97k
    int oldSpaceNr = ctxt->spaceNr;
9775
1.97k
    int oldNodeNr = ctxt->nodeNr;
9776
9777
1.97k
    GROW;
9778
557k
    while ((ctxt->input->cur < ctxt->input->end) &&
9779
555k
     (PARSER_STOPPED(ctxt) == 0)) {
9780
555k
  const xmlChar *cur = ctxt->input->cur;
9781
9782
  /*
9783
   * First case : a Processing Instruction.
9784
   */
9785
555k
  if ((*cur == '<') && (cur[1] == '?')) {
9786
14.1k
      xmlParsePI(ctxt);
9787
14.1k
  }
9788
9789
  /*
9790
   * Second case : a CDSection
9791
   */
9792
  /* 2.6.0 test was *cur not RAW */
9793
541k
  else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9794
101k
      xmlParseCDSect(ctxt);
9795
101k
  }
9796
9797
  /*
9798
   * Third case :  a comment
9799
   */
9800
440k
  else if ((*cur == '<') && (NXT(1) == '!') &&
9801
5.74k
     (NXT(2) == '-') && (NXT(3) == '-')) {
9802
635
      xmlParseComment(ctxt);
9803
635
  }
9804
9805
  /*
9806
   * Fourth case :  a sub-element.
9807
   */
9808
439k
  else if (*cur == '<') {
9809
179k
            if (NXT(1) == '/') {
9810
26.8k
                if (ctxt->nameNr <= oldNameNr)
9811
111
                    break;
9812
26.7k
          xmlParseElementEnd(ctxt);
9813
152k
            } else {
9814
152k
          xmlParseElementStart(ctxt);
9815
152k
            }
9816
179k
  }
9817
9818
  /*
9819
   * Fifth case : a reference. If if has not been resolved,
9820
   *    parsing returns it's Name, create the node
9821
   */
9822
9823
259k
  else if (*cur == '&') {
9824
6.94k
      xmlParseReference(ctxt);
9825
6.94k
  }
9826
9827
  /*
9828
   * Last case, text. Note that References are handled directly.
9829
   */
9830
252k
  else {
9831
252k
      xmlParseCharDataInternal(ctxt, 0);
9832
252k
  }
9833
9834
555k
  SHRINK;
9835
555k
  GROW;
9836
555k
    }
9837
9838
1.97k
    if ((ctxt->nameNr > oldNameNr) &&
9839
751
        (ctxt->input->cur >= ctxt->input->end) &&
9840
706
        (ctxt->wellFormed)) {
9841
27
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9842
27
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9843
27
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9844
27
                "Premature end of data in tag %s line %d\n",
9845
27
                name, line, NULL);
9846
27
    }
9847
9848
    /*
9849
     * Clean up in error case
9850
     */
9851
9852
27.8k
    while (ctxt->nodeNr > oldNodeNr)
9853
25.8k
        nodePop(ctxt);
9854
9855
27.8k
    while (ctxt->nameNr > oldNameNr) {
9856
25.8k
        xmlStartTag *tag = &ctxt->pushTab[ctxt->nameNr - 1];
9857
9858
25.8k
        if (tag->nsNr != 0)
9859
10.9k
            xmlParserNsPop(ctxt, tag->nsNr);
9860
9861
25.8k
        namePop(ctxt);
9862
25.8k
    }
9863
9864
27.8k
    while (ctxt->spaceNr > oldSpaceNr)
9865
25.8k
        spacePop(ctxt);
9866
1.97k
}
9867
9868
/**
9869
 * xmlParseContent:
9870
 * @ctxt:  an XML parser context
9871
 *
9872
 * Parse XML element content. This is useful if you're only interested
9873
 * in custom SAX callbacks. If you want a node list, use
9874
 * xmlCtxtParseContent.
9875
 */
9876
void
9877
0
xmlParseContent(xmlParserCtxtPtr ctxt) {
9878
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
9879
0
        return;
9880
9881
0
    xmlCtxtInitializeLate(ctxt);
9882
9883
0
    xmlParseContentInternal(ctxt);
9884
9885
0
    xmlParserCheckEOF(ctxt, XML_ERR_NOT_WELL_BALANCED);
9886
0
}
9887
9888
/**
9889
 * xmlParseElement:
9890
 * @ctxt:  an XML parser context
9891
 *
9892
 * DEPRECATED: Internal function, don't use.
9893
 *
9894
 * parse an XML element
9895
 *
9896
 * [39] element ::= EmptyElemTag | STag content ETag
9897
 *
9898
 * [ WFC: Element Type Match ]
9899
 * The Name in an element's end-tag must match the element type in the
9900
 * start-tag.
9901
 *
9902
 */
9903
9904
void
9905
0
xmlParseElement(xmlParserCtxtPtr ctxt) {
9906
0
    if (xmlParseElementStart(ctxt) != 0)
9907
0
        return;
9908
9909
0
    xmlParseContentInternal(ctxt);
9910
9911
0
    if (ctxt->input->cur >= ctxt->input->end) {
9912
0
        if (ctxt->wellFormed) {
9913
0
            const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9914
0
            int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9915
0
            xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9916
0
                    "Premature end of data in tag %s line %d\n",
9917
0
                    name, line, NULL);
9918
0
        }
9919
0
        return;
9920
0
    }
9921
9922
0
    xmlParseElementEnd(ctxt);
9923
0
}
9924
9925
/**
9926
 * xmlParseElementStart:
9927
 * @ctxt:  an XML parser context
9928
 *
9929
 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
9930
 * opening tag was parsed, 1 if an empty element was parsed.
9931
 *
9932
 * Always consumes '<'.
9933
 */
9934
static int
9935
152k
xmlParseElementStart(xmlParserCtxtPtr ctxt) {
9936
152k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
9937
152k
    const xmlChar *name;
9938
152k
    const xmlChar *prefix = NULL;
9939
152k
    const xmlChar *URI = NULL;
9940
152k
    xmlParserNodeInfo node_info;
9941
152k
    int line;
9942
152k
    xmlNodePtr cur;
9943
152k
    int nbNs = 0;
9944
9945
152k
    if (ctxt->nameNr > maxDepth) {
9946
0
        xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
9947
0
                "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9948
0
                ctxt->nameNr);
9949
0
  xmlHaltParser(ctxt);
9950
0
  return(-1);
9951
0
    }
9952
9953
    /* Capture start position */
9954
152k
    if (ctxt->record_info) {
9955
0
        node_info.begin_pos = ctxt->input->consumed +
9956
0
                          (CUR_PTR - ctxt->input->base);
9957
0
  node_info.begin_line = ctxt->input->line;
9958
0
    }
9959
9960
152k
    if (ctxt->spaceNr == 0)
9961
0
  spacePush(ctxt, -1);
9962
152k
    else if (*ctxt->space == -2)
9963
115k
  spacePush(ctxt, -1);
9964
37.6k
    else
9965
37.6k
  spacePush(ctxt, *ctxt->space);
9966
9967
152k
    line = ctxt->input->line;
9968
152k
#ifdef LIBXML_SAX1_ENABLED
9969
152k
    if (ctxt->sax2)
9970
152k
#endif /* LIBXML_SAX1_ENABLED */
9971
152k
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &nbNs);
9972
0
#ifdef LIBXML_SAX1_ENABLED
9973
0
    else
9974
0
  name = xmlParseStartTag(ctxt);
9975
152k
#endif /* LIBXML_SAX1_ENABLED */
9976
152k
    if (name == NULL) {
9977
14.7k
  spacePop(ctxt);
9978
14.7k
        return(-1);
9979
14.7k
    }
9980
137k
    nameNsPush(ctxt, name, prefix, URI, line, nbNs);
9981
137k
    cur = ctxt->node;
9982
9983
137k
#ifdef LIBXML_VALID_ENABLED
9984
    /*
9985
     * [ VC: Root Element Type ]
9986
     * The Name in the document type declaration must match the element
9987
     * type of the root element.
9988
     */
9989
137k
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9990
0
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
9991
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9992
137k
#endif /* LIBXML_VALID_ENABLED */
9993
9994
    /*
9995
     * Check for an Empty Element.
9996
     */
9997
137k
    if ((RAW == '/') && (NXT(1) == '>')) {
9998
62.4k
        SKIP(2);
9999
62.4k
  if (ctxt->sax2) {
10000
62.4k
      if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10001
62.4k
    (!ctxt->disableSAX))
10002
62.4k
    ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
10003
62.4k
#ifdef LIBXML_SAX1_ENABLED
10004
62.4k
  } else {
10005
0
      if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10006
0
    (!ctxt->disableSAX))
10007
0
    ctxt->sax->endElement(ctxt->userData, name);
10008
0
#endif /* LIBXML_SAX1_ENABLED */
10009
0
  }
10010
62.4k
  namePop(ctxt);
10011
62.4k
  spacePop(ctxt);
10012
62.4k
  if (nbNs > 0)
10013
2.22k
      xmlParserNsPop(ctxt, nbNs);
10014
62.4k
  if (cur != NULL && ctxt->record_info) {
10015
0
            node_info.node = cur;
10016
0
            node_info.end_pos = ctxt->input->consumed +
10017
0
                                (CUR_PTR - ctxt->input->base);
10018
0
            node_info.end_line = ctxt->input->line;
10019
0
            xmlParserAddNodeInfo(ctxt, &node_info);
10020
0
  }
10021
62.4k
  return(1);
10022
62.4k
    }
10023
75.5k
    if (RAW == '>') {
10024
52.6k
        NEXT1;
10025
52.6k
        if (cur != NULL && ctxt->record_info) {
10026
0
            node_info.node = cur;
10027
0
            node_info.end_pos = 0;
10028
0
            node_info.end_line = 0;
10029
0
            xmlParserAddNodeInfo(ctxt, &node_info);
10030
0
        }
10031
52.6k
    } else {
10032
22.8k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10033
22.8k
         "Couldn't find end of Start Tag %s line %d\n",
10034
22.8k
                    name, line, NULL);
10035
10036
  /*
10037
   * end of parsing of this node.
10038
   */
10039
22.8k
  nodePop(ctxt);
10040
22.8k
  namePop(ctxt);
10041
22.8k
  spacePop(ctxt);
10042
22.8k
  if (nbNs > 0)
10043
7.81k
      xmlParserNsPop(ctxt, nbNs);
10044
22.8k
  return(-1);
10045
22.8k
    }
10046
10047
52.6k
    return(0);
10048
75.5k
}
10049
10050
/**
10051
 * xmlParseElementEnd:
10052
 * @ctxt:  an XML parser context
10053
 *
10054
 * Parse the end of an XML element. Always consumes '</'.
10055
 */
10056
static void
10057
26.7k
xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
10058
26.7k
    xmlNodePtr cur = ctxt->node;
10059
10060
26.7k
    if (ctxt->nameNr <= 0) {
10061
0
        if ((RAW == '<') && (NXT(1) == '/'))
10062
0
            SKIP(2);
10063
0
        return;
10064
0
    }
10065
10066
    /*
10067
     * parse the end of tag: '</' should be here.
10068
     */
10069
26.7k
    if (ctxt->sax2) {
10070
26.7k
  xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
10071
26.7k
  namePop(ctxt);
10072
26.7k
    }
10073
0
#ifdef LIBXML_SAX1_ENABLED
10074
0
    else
10075
0
  xmlParseEndTag1(ctxt, 0);
10076
26.7k
#endif /* LIBXML_SAX1_ENABLED */
10077
10078
    /*
10079
     * Capture end position
10080
     */
10081
26.7k
    if (cur != NULL && ctxt->record_info) {
10082
0
        xmlParserNodeInfoPtr node_info;
10083
10084
0
        node_info = (xmlParserNodeInfoPtr) xmlParserFindNodeInfo(ctxt, cur);
10085
0
        if (node_info != NULL) {
10086
0
            node_info->end_pos = ctxt->input->consumed +
10087
0
                                 (CUR_PTR - ctxt->input->base);
10088
0
            node_info->end_line = ctxt->input->line;
10089
0
        }
10090
0
    }
10091
26.7k
}
10092
10093
/**
10094
 * xmlParseVersionNum:
10095
 * @ctxt:  an XML parser context
10096
 *
10097
 * DEPRECATED: Internal function, don't use.
10098
 *
10099
 * parse the XML version value.
10100
 *
10101
 * [26] VersionNum ::= '1.' [0-9]+
10102
 *
10103
 * In practice allow [0-9].[0-9]+ at that level
10104
 *
10105
 * Returns the string giving the XML version number, or NULL
10106
 */
10107
xmlChar *
10108
54.5k
xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10109
54.5k
    xmlChar *buf = NULL;
10110
54.5k
    int len = 0;
10111
54.5k
    int size = 10;
10112
54.5k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
10113
0
                    XML_MAX_TEXT_LENGTH :
10114
54.5k
                    XML_MAX_NAME_LENGTH;
10115
54.5k
    xmlChar cur;
10116
10117
54.5k
    buf = xmlMalloc(size);
10118
54.5k
    if (buf == NULL) {
10119
0
  xmlErrMemory(ctxt);
10120
0
  return(NULL);
10121
0
    }
10122
54.5k
    cur = CUR;
10123
54.5k
    if (!((cur >= '0') && (cur <= '9'))) {
10124
318
  xmlFree(buf);
10125
318
  return(NULL);
10126
318
    }
10127
54.2k
    buf[len++] = cur;
10128
54.2k
    NEXT;
10129
54.2k
    cur=CUR;
10130
54.2k
    if (cur != '.') {
10131
165
  xmlFree(buf);
10132
165
  return(NULL);
10133
165
    }
10134
54.0k
    buf[len++] = cur;
10135
54.0k
    NEXT;
10136
54.0k
    cur=CUR;
10137
999k
    while ((cur >= '0') && (cur <= '9')) {
10138
945k
  if (len + 1 >= size) {
10139
2.38k
      xmlChar *tmp;
10140
2.38k
            int newSize;
10141
10142
2.38k
            newSize = xmlGrowCapacity(size, 1, 1, maxLength);
10143
2.38k
            if (newSize < 0) {
10144
10
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "VersionNum");
10145
10
                xmlFree(buf);
10146
10
                return(NULL);
10147
10
            }
10148
2.37k
      tmp = xmlRealloc(buf, newSize);
10149
2.37k
      if (tmp == NULL) {
10150
0
    xmlErrMemory(ctxt);
10151
0
          xmlFree(buf);
10152
0
    return(NULL);
10153
0
      }
10154
2.37k
      buf = tmp;
10155
2.37k
            size = newSize;
10156
2.37k
  }
10157
945k
  buf[len++] = cur;
10158
945k
  NEXT;
10159
945k
  cur=CUR;
10160
945k
    }
10161
54.0k
    buf[len] = 0;
10162
54.0k
    return(buf);
10163
54.0k
}
10164
10165
/**
10166
 * xmlParseVersionInfo:
10167
 * @ctxt:  an XML parser context
10168
 *
10169
 * DEPRECATED: Internal function, don't use.
10170
 *
10171
 * parse the XML version.
10172
 *
10173
 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10174
 *
10175
 * [25] Eq ::= S? '=' S?
10176
 *
10177
 * Returns the version string, e.g. "1.0"
10178
 */
10179
10180
xmlChar *
10181
74.6k
xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10182
74.6k
    xmlChar *version = NULL;
10183
10184
74.6k
    if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10185
55.5k
  SKIP(7);
10186
55.5k
  SKIP_BLANKS;
10187
55.5k
  if (RAW != '=') {
10188
645
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10189
645
      return(NULL);
10190
645
        }
10191
54.8k
  NEXT;
10192
54.8k
  SKIP_BLANKS;
10193
54.8k
  if (RAW == '"') {
10194
39.8k
      NEXT;
10195
39.8k
      version = xmlParseVersionNum(ctxt);
10196
39.8k
      if (RAW != '"') {
10197
637
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10198
637
      } else
10199
39.2k
          NEXT;
10200
39.8k
  } else if (RAW == '\''){
10201
14.7k
      NEXT;
10202
14.7k
      version = xmlParseVersionNum(ctxt);
10203
14.7k
      if (RAW != '\'') {
10204
127
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10205
127
      } else
10206
14.5k
          NEXT;
10207
14.7k
  } else {
10208
291
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10209
291
  }
10210
54.8k
    }
10211
74.0k
    return(version);
10212
74.6k
}
10213
10214
/**
10215
 * xmlParseEncName:
10216
 * @ctxt:  an XML parser context
10217
 *
10218
 * DEPRECATED: Internal function, don't use.
10219
 *
10220
 * parse the XML encoding name
10221
 *
10222
 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10223
 *
10224
 * Returns the encoding name value or NULL
10225
 */
10226
xmlChar *
10227
54.8k
xmlParseEncName(xmlParserCtxtPtr ctxt) {
10228
54.8k
    xmlChar *buf = NULL;
10229
54.8k
    int len = 0;
10230
54.8k
    int size = 10;
10231
54.8k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
10232
0
                    XML_MAX_TEXT_LENGTH :
10233
54.8k
                    XML_MAX_NAME_LENGTH;
10234
54.8k
    xmlChar cur;
10235
10236
54.8k
    cur = CUR;
10237
54.8k
    if (((cur >= 'a') && (cur <= 'z')) ||
10238
54.6k
        ((cur >= 'A') && (cur <= 'Z'))) {
10239
54.6k
  buf = xmlMalloc(size);
10240
54.6k
  if (buf == NULL) {
10241
0
      xmlErrMemory(ctxt);
10242
0
      return(NULL);
10243
0
  }
10244
10245
54.6k
  buf[len++] = cur;
10246
54.6k
  NEXT;
10247
54.6k
  cur = CUR;
10248
1.31M
  while (((cur >= 'a') && (cur <= 'z')) ||
10249
1.07M
         ((cur >= 'A') && (cur <= 'Z')) ||
10250
540k
         ((cur >= '0') && (cur <= '9')) ||
10251
101k
         (cur == '.') || (cur == '_') ||
10252
1.26M
         (cur == '-')) {
10253
1.26M
      if (len + 1 >= size) {
10254
4.55k
          xmlChar *tmp;
10255
4.55k
                int newSize;
10256
10257
4.55k
                newSize = xmlGrowCapacity(size, 1, 1, maxLength);
10258
4.55k
                if (newSize < 0) {
10259
15
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "EncName");
10260
15
                    xmlFree(buf);
10261
15
                    return(NULL);
10262
15
                }
10263
4.53k
    tmp = xmlRealloc(buf, newSize);
10264
4.53k
    if (tmp == NULL) {
10265
0
        xmlErrMemory(ctxt);
10266
0
        xmlFree(buf);
10267
0
        return(NULL);
10268
0
    }
10269
4.53k
    buf = tmp;
10270
4.53k
                size = newSize;
10271
4.53k
      }
10272
1.26M
      buf[len++] = cur;
10273
1.26M
      NEXT;
10274
1.26M
      cur = CUR;
10275
1.26M
        }
10276
54.6k
  buf[len] = 0;
10277
54.6k
    } else {
10278
196
  xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10279
196
    }
10280
54.8k
    return(buf);
10281
54.8k
}
10282
10283
/**
10284
 * xmlParseEncodingDecl:
10285
 * @ctxt:  an XML parser context
10286
 *
10287
 * DEPRECATED: Internal function, don't use.
10288
 *
10289
 * parse the XML encoding declaration
10290
 *
10291
 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'")
10292
 *
10293
 * this setups the conversion filters.
10294
 *
10295
 * Returns the encoding value or NULL
10296
 */
10297
10298
const xmlChar *
10299
71.9k
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10300
71.9k
    xmlChar *encoding = NULL;
10301
10302
71.9k
    SKIP_BLANKS;
10303
71.9k
    if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g') == 0)
10304
16.7k
        return(NULL);
10305
10306
55.1k
    SKIP(8);
10307
55.1k
    SKIP_BLANKS;
10308
55.1k
    if (RAW != '=') {
10309
86
        xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10310
86
        return(NULL);
10311
86
    }
10312
55.0k
    NEXT;
10313
55.0k
    SKIP_BLANKS;
10314
55.0k
    if (RAW == '"') {
10315
40.0k
        NEXT;
10316
40.0k
        encoding = xmlParseEncName(ctxt);
10317
40.0k
        if (RAW != '"') {
10318
550
            xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10319
550
            xmlFree((xmlChar *) encoding);
10320
550
            return(NULL);
10321
550
        } else
10322
39.5k
            NEXT;
10323
40.0k
    } else if (RAW == '\''){
10324
14.7k
        NEXT;
10325
14.7k
        encoding = xmlParseEncName(ctxt);
10326
14.7k
        if (RAW != '\'') {
10327
301
            xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10328
301
            xmlFree((xmlChar *) encoding);
10329
301
            return(NULL);
10330
301
        } else
10331
14.4k
            NEXT;
10332
14.7k
    } else {
10333
235
        xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10334
235
    }
10335
10336
54.2k
    if (encoding == NULL)
10337
246
        return(NULL);
10338
10339
53.9k
    xmlSetDeclaredEncoding(ctxt, encoding);
10340
10341
53.9k
    return(ctxt->encoding);
10342
54.2k
}
10343
10344
/**
10345
 * xmlParseSDDecl:
10346
 * @ctxt:  an XML parser context
10347
 *
10348
 * DEPRECATED: Internal function, don't use.
10349
 *
10350
 * parse the XML standalone declaration
10351
 *
10352
 * [32] SDDecl ::= S 'standalone' Eq
10353
 *                 (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10354
 *
10355
 * [ VC: Standalone Document Declaration ]
10356
 * TODO The standalone document declaration must have the value "no"
10357
 * if any external markup declarations contain declarations of:
10358
 *  - attributes with default values, if elements to which these
10359
 *    attributes apply appear in the document without specifications
10360
 *    of values for these attributes, or
10361
 *  - entities (other than amp, lt, gt, apos, quot), if references
10362
 *    to those entities appear in the document, or
10363
 *  - attributes with values subject to normalization, where the
10364
 *    attribute appears in the document with a value which will change
10365
 *    as a result of normalization, or
10366
 *  - element types with element content, if white space occurs directly
10367
 *    within any instance of those types.
10368
 *
10369
 * Returns:
10370
 *   1 if standalone="yes"
10371
 *   0 if standalone="no"
10372
 *  -2 if standalone attribute is missing or invalid
10373
 *    (A standalone value of -2 means that the XML declaration was found,
10374
 *     but no value was specified for the standalone attribute).
10375
 */
10376
10377
int
10378
53.8k
xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10379
53.8k
    int standalone = -2;
10380
10381
53.8k
    SKIP_BLANKS;
10382
53.8k
    if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10383
11.7k
  SKIP(10);
10384
11.7k
        SKIP_BLANKS;
10385
11.7k
  if (RAW != '=') {
10386
44
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10387
44
      return(standalone);
10388
44
        }
10389
11.7k
  NEXT;
10390
11.7k
  SKIP_BLANKS;
10391
11.7k
        if (RAW == '\''){
10392
301
      NEXT;
10393
301
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10394
177
          standalone = 0;
10395
177
                SKIP(2);
10396
177
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10397
85
                 (NXT(2) == 's')) {
10398
77
          standalone = 1;
10399
77
    SKIP(3);
10400
77
            } else {
10401
47
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10402
47
      }
10403
301
      if (RAW != '\'') {
10404
128
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10405
128
      } else
10406
173
          NEXT;
10407
11.3k
  } else if (RAW == '"'){
10408
11.3k
      NEXT;
10409
11.3k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10410
25
          standalone = 0;
10411
25
    SKIP(2);
10412
11.3k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10413
11.2k
                 (NXT(2) == 's')) {
10414
11.2k
          standalone = 1;
10415
11.2k
                SKIP(3);
10416
11.2k
            } else {
10417
110
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10418
110
      }
10419
11.3k
      if (RAW != '"') {
10420
168
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10421
168
      } else
10422
11.2k
          NEXT;
10423
11.3k
  } else {
10424
17
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10425
17
        }
10426
11.7k
    }
10427
53.8k
    return(standalone);
10428
53.8k
}
10429
10430
/**
10431
 * xmlParseXMLDecl:
10432
 * @ctxt:  an XML parser context
10433
 *
10434
 * DEPRECATED: Internal function, don't use.
10435
 *
10436
 * parse an XML declaration header
10437
 *
10438
 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10439
 */
10440
10441
void
10442
74.6k
xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10443
74.6k
    xmlChar *version;
10444
10445
    /*
10446
     * This value for standalone indicates that the document has an
10447
     * XML declaration but it does not have a standalone attribute.
10448
     * It will be overwritten later if a standalone attribute is found.
10449
     */
10450
10451
74.6k
    ctxt->standalone = -2;
10452
10453
    /*
10454
     * We know that '<?xml' is here.
10455
     */
10456
74.6k
    SKIP(5);
10457
10458
74.6k
    if (!IS_BLANK_CH(RAW)) {
10459
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10460
0
                 "Blank needed after '<?xml'\n");
10461
0
    }
10462
74.6k
    SKIP_BLANKS;
10463
10464
    /*
10465
     * We must have the VersionInfo here.
10466
     */
10467
74.6k
    version = xmlParseVersionInfo(ctxt);
10468
74.6k
    if (version == NULL) {
10469
20.5k
  xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10470
54.0k
    } else {
10471
54.0k
  if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10472
      /*
10473
       * Changed here for XML-1.0 5th edition
10474
       */
10475
9.05k
      if (ctxt->options & XML_PARSE_OLD10) {
10476
0
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10477
0
                "Unsupported version '%s'\n",
10478
0
                version);
10479
9.05k
      } else {
10480
9.05k
          if ((version[0] == '1') && ((version[1] == '.'))) {
10481
8.41k
        xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10482
8.41k
                      "Unsupported version '%s'\n",
10483
8.41k
          version, NULL);
10484
8.41k
    } else {
10485
639
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10486
639
              "Unsupported version '%s'\n",
10487
639
              version);
10488
639
    }
10489
9.05k
      }
10490
9.05k
  }
10491
54.0k
  if (ctxt->version != NULL)
10492
0
      xmlFree((void *) ctxt->version);
10493
54.0k
  ctxt->version = version;
10494
54.0k
    }
10495
10496
    /*
10497
     * We may have the encoding declaration
10498
     */
10499
74.6k
    if (!IS_BLANK_CH(RAW)) {
10500
23.3k
        if ((RAW == '?') && (NXT(1) == '>')) {
10501
2.74k
      SKIP(2);
10502
2.74k
      return;
10503
2.74k
  }
10504
20.6k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10505
20.6k
    }
10506
71.9k
    xmlParseEncodingDecl(ctxt);
10507
10508
    /*
10509
     * We may have the standalone status.
10510
     */
10511
71.9k
    if ((ctxt->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10512
19.1k
        if ((RAW == '?') && (NXT(1) == '>')) {
10513
18.0k
      SKIP(2);
10514
18.0k
      return;
10515
18.0k
  }
10516
1.05k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10517
1.05k
    }
10518
10519
    /*
10520
     * We can grow the input buffer freely at that point
10521
     */
10522
53.8k
    GROW;
10523
10524
53.8k
    SKIP_BLANKS;
10525
53.8k
    ctxt->standalone = xmlParseSDDecl(ctxt);
10526
10527
53.8k
    SKIP_BLANKS;
10528
53.8k
    if ((RAW == '?') && (NXT(1) == '>')) {
10529
31.0k
        SKIP(2);
10530
31.0k
    } else if (RAW == '>') {
10531
        /* Deprecated old WD ... */
10532
6.33k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10533
6.33k
  NEXT;
10534
16.4k
    } else {
10535
16.4k
        int c;
10536
10537
16.4k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10538
5.56M
        while ((PARSER_STOPPED(ctxt) == 0) &&
10539
5.56M
               ((c = CUR) != 0)) {
10540
5.56M
            NEXT;
10541
5.56M
            if (c == '>')
10542
12.7k
                break;
10543
5.56M
        }
10544
16.4k
    }
10545
53.8k
}
10546
10547
/**
10548
 * xmlCtxtGetVersion:
10549
 * @ctxt:  parser context
10550
 *
10551
 * Available since 2.14.0.
10552
 *
10553
 * Returns the version from the XML declaration.
10554
 */
10555
const xmlChar *
10556
0
xmlCtxtGetVersion(xmlParserCtxtPtr ctxt) {
10557
0
    if (ctxt == NULL)
10558
0
        return(NULL);
10559
10560
0
    return(ctxt->version);
10561
0
}
10562
10563
/**
10564
 * xmlCtxtGetStandalone:
10565
 * @ctxt:  parser context
10566
 *
10567
 * Available since 2.14.0.
10568
 *
10569
 * Returns the value from the standalone document declaration.
10570
 */
10571
int
10572
0
xmlCtxtGetStandalone(xmlParserCtxtPtr ctxt) {
10573
0
    if (ctxt == NULL)
10574
0
        return(0);
10575
10576
0
    return(ctxt->standalone);
10577
0
}
10578
10579
/**
10580
 * xmlParseMisc:
10581
 * @ctxt:  an XML parser context
10582
 *
10583
 * DEPRECATED: Internal function, don't use.
10584
 *
10585
 * parse an XML Misc* optional field.
10586
 *
10587
 * [27] Misc ::= Comment | PI |  S
10588
 */
10589
10590
void
10591
0
xmlParseMisc(xmlParserCtxtPtr ctxt) {
10592
0
    while (PARSER_STOPPED(ctxt) == 0) {
10593
0
        SKIP_BLANKS;
10594
0
        GROW;
10595
0
        if ((RAW == '<') && (NXT(1) == '?')) {
10596
0
      xmlParsePI(ctxt);
10597
0
        } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10598
0
      xmlParseComment(ctxt);
10599
0
        } else {
10600
0
            break;
10601
0
        }
10602
0
    }
10603
0
}
10604
10605
static void
10606
154k
xmlFinishDocument(xmlParserCtxtPtr ctxt) {
10607
154k
    xmlDocPtr doc;
10608
10609
    /*
10610
     * SAX: end of the document processing.
10611
     */
10612
154k
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10613
154k
        ctxt->sax->endDocument(ctxt->userData);
10614
10615
154k
    doc = ctxt->myDoc;
10616
154k
    if (doc != NULL) {
10617
154k
        if (ctxt->wellFormed) {
10618
40.7k
            doc->properties |= XML_DOC_WELLFORMED;
10619
40.7k
            if (ctxt->valid)
10620
40.4k
                doc->properties |= XML_DOC_DTDVALID;
10621
40.7k
            if (ctxt->nsWellFormed)
10622
35.3k
                doc->properties |= XML_DOC_NSVALID;
10623
40.7k
        }
10624
10625
154k
        if (ctxt->options & XML_PARSE_OLD10)
10626
0
            doc->properties |= XML_DOC_OLD10;
10627
10628
        /*
10629
         * Remove locally kept entity definitions if the tree was not built
10630
         */
10631
154k
  if (xmlStrEqual(doc->version, SAX_COMPAT_MODE)) {
10632
0
            xmlFreeDoc(doc);
10633
0
            ctxt->myDoc = NULL;
10634
0
        }
10635
154k
    }
10636
154k
}
10637
10638
/**
10639
 * xmlParseDocument:
10640
 * @ctxt:  an XML parser context
10641
 *
10642
 * Parse an XML document and invoke the SAX handlers. This is useful
10643
 * if you're only interested in custom SAX callbacks. If you want a
10644
 * document tree, use xmlCtxtParseDocument.
10645
 *
10646
 * Returns 0, -1 in case of error.
10647
 */
10648
10649
int
10650
0
xmlParseDocument(xmlParserCtxtPtr ctxt) {
10651
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
10652
0
        return(-1);
10653
10654
0
    GROW;
10655
10656
    /*
10657
     * SAX: detecting the level.
10658
     */
10659
0
    xmlCtxtInitializeLate(ctxt);
10660
10661
0
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10662
0
        ctxt->sax->setDocumentLocator(ctxt->userData,
10663
0
                (xmlSAXLocator *) &xmlDefaultSAXLocator);
10664
0
    }
10665
10666
0
    xmlDetectEncoding(ctxt);
10667
10668
0
    if (CUR == 0) {
10669
0
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10670
0
  return(-1);
10671
0
    }
10672
10673
0
    GROW;
10674
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10675
10676
  /*
10677
   * Note that we will switch encoding on the fly.
10678
   */
10679
0
  xmlParseXMLDecl(ctxt);
10680
0
  SKIP_BLANKS;
10681
0
    } else {
10682
0
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10683
0
        if (ctxt->version == NULL) {
10684
0
            xmlErrMemory(ctxt);
10685
0
            return(-1);
10686
0
        }
10687
0
    }
10688
0
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10689
0
        ctxt->sax->startDocument(ctxt->userData);
10690
0
    if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10691
0
        (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10692
0
  ctxt->myDoc->compression = ctxt->input->buf->compressed;
10693
0
    }
10694
10695
    /*
10696
     * The Misc part of the Prolog
10697
     */
10698
0
    xmlParseMisc(ctxt);
10699
10700
    /*
10701
     * Then possibly doc type declaration(s) and more Misc
10702
     * (doctypedecl Misc*)?
10703
     */
10704
0
    GROW;
10705
0
    if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10706
10707
0
  ctxt->inSubset = 1;
10708
0
  xmlParseDocTypeDecl(ctxt);
10709
0
  if (RAW == '[') {
10710
0
      xmlParseInternalSubset(ctxt);
10711
0
  } else if (RAW == '>') {
10712
0
            NEXT;
10713
0
        }
10714
10715
  /*
10716
   * Create and update the external subset.
10717
   */
10718
0
  ctxt->inSubset = 2;
10719
0
  if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10720
0
      (!ctxt->disableSAX))
10721
0
      ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10722
0
                                ctxt->extSubSystem, ctxt->extSubURI);
10723
0
  ctxt->inSubset = 0;
10724
10725
0
        xmlCleanSpecialAttr(ctxt);
10726
10727
0
  xmlParseMisc(ctxt);
10728
0
    }
10729
10730
    /*
10731
     * Time to start parsing the tree itself
10732
     */
10733
0
    GROW;
10734
0
    if (RAW != '<') {
10735
0
        if (ctxt->wellFormed)
10736
0
            xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10737
0
                           "Start tag expected, '<' not found\n");
10738
0
    } else {
10739
0
  xmlParseElement(ctxt);
10740
10741
  /*
10742
   * The Misc part at the end
10743
   */
10744
0
  xmlParseMisc(ctxt);
10745
10746
0
        xmlParserCheckEOF(ctxt, XML_ERR_DOCUMENT_END);
10747
0
    }
10748
10749
0
    ctxt->instate = XML_PARSER_EOF;
10750
0
    xmlFinishDocument(ctxt);
10751
10752
0
    if (! ctxt->wellFormed) {
10753
0
  ctxt->valid = 0;
10754
0
  return(-1);
10755
0
    }
10756
10757
0
    return(0);
10758
0
}
10759
10760
/**
10761
 * xmlParseExtParsedEnt:
10762
 * @ctxt:  an XML parser context
10763
 *
10764
 * DEPRECATED: Internal function, don't use.
10765
 *
10766
 * parse a general parsed entity
10767
 * An external general parsed entity is well-formed if it matches the
10768
 * production labeled extParsedEnt.
10769
 *
10770
 * [78] extParsedEnt ::= TextDecl? content
10771
 *
10772
 * Returns 0, -1 in case of error. the parser context is augmented
10773
 *                as a result of the parsing.
10774
 */
10775
10776
int
10777
0
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10778
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
10779
0
        return(-1);
10780
10781
0
    xmlCtxtInitializeLate(ctxt);
10782
10783
0
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10784
0
        ctxt->sax->setDocumentLocator(ctxt->userData,
10785
0
                (xmlSAXLocator *) &xmlDefaultSAXLocator);
10786
0
    }
10787
10788
0
    xmlDetectEncoding(ctxt);
10789
10790
0
    if (CUR == 0) {
10791
0
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10792
0
    }
10793
10794
    /*
10795
     * Check for the XMLDecl in the Prolog.
10796
     */
10797
0
    GROW;
10798
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10799
10800
  /*
10801
   * Note that we will switch encoding on the fly.
10802
   */
10803
0
  xmlParseXMLDecl(ctxt);
10804
0
  SKIP_BLANKS;
10805
0
    } else {
10806
0
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10807
0
    }
10808
0
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10809
0
        ctxt->sax->startDocument(ctxt->userData);
10810
10811
    /*
10812
     * Doing validity checking on chunk doesn't make sense
10813
     */
10814
0
    ctxt->options &= ~XML_PARSE_DTDVALID;
10815
0
    ctxt->validate = 0;
10816
0
    ctxt->depth = 0;
10817
10818
0
    xmlParseContentInternal(ctxt);
10819
10820
0
    if (ctxt->input->cur < ctxt->input->end)
10821
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10822
10823
    /*
10824
     * SAX: end of the document processing.
10825
     */
10826
0
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10827
0
        ctxt->sax->endDocument(ctxt->userData);
10828
10829
0
    if (! ctxt->wellFormed) return(-1);
10830
0
    return(0);
10831
0
}
10832
10833
#ifdef LIBXML_PUSH_ENABLED
10834
/************************************************************************
10835
 *                  *
10836
 *    Progressive parsing interfaces        *
10837
 *                  *
10838
 ************************************************************************/
10839
10840
/**
10841
 * xmlParseLookupChar:
10842
 * @ctxt:  an XML parser context
10843
 * @c:  character
10844
 *
10845
 * Check whether the input buffer contains a character.
10846
 */
10847
static int
10848
2.43M
xmlParseLookupChar(xmlParserCtxtPtr ctxt, int c) {
10849
2.43M
    const xmlChar *cur;
10850
10851
2.43M
    if (ctxt->checkIndex == 0) {
10852
2.10M
        cur = ctxt->input->cur + 1;
10853
2.10M
    } else {
10854
334k
        cur = ctxt->input->cur + ctxt->checkIndex;
10855
334k
    }
10856
10857
2.43M
    if (memchr(cur, c, ctxt->input->end - cur) == NULL) {
10858
353k
        size_t index = ctxt->input->end - ctxt->input->cur;
10859
10860
353k
        if (index > LONG_MAX) {
10861
0
            ctxt->checkIndex = 0;
10862
0
            return(1);
10863
0
        }
10864
353k
        ctxt->checkIndex = index;
10865
353k
        return(0);
10866
2.08M
    } else {
10867
2.08M
        ctxt->checkIndex = 0;
10868
2.08M
        return(1);
10869
2.08M
    }
10870
2.43M
}
10871
10872
/**
10873
 * xmlParseLookupString:
10874
 * @ctxt:  an XML parser context
10875
 * @startDelta: delta to apply at the start
10876
 * @str:  string
10877
 * @strLen:  length of string
10878
 *
10879
 * Check whether the input buffer contains a string.
10880
 */
10881
static const xmlChar *
10882
xmlParseLookupString(xmlParserCtxtPtr ctxt, size_t startDelta,
10883
7.84M
                     const char *str, size_t strLen) {
10884
7.84M
    const xmlChar *cur, *term;
10885
10886
7.84M
    if (ctxt->checkIndex == 0) {
10887
1.08M
        cur = ctxt->input->cur + startDelta;
10888
6.76M
    } else {
10889
6.76M
        cur = ctxt->input->cur + ctxt->checkIndex;
10890
6.76M
    }
10891
10892
7.84M
    term = BAD_CAST strstr((const char *) cur, str);
10893
7.84M
    if (term == NULL) {
10894
6.82M
        const xmlChar *end = ctxt->input->end;
10895
6.82M
        size_t index;
10896
10897
        /* Rescan (strLen - 1) characters. */
10898
6.82M
        if ((size_t) (end - cur) < strLen)
10899
5.99k
            end = cur;
10900
6.81M
        else
10901
6.81M
            end -= strLen - 1;
10902
6.82M
        index = end - ctxt->input->cur;
10903
6.82M
        if (index > LONG_MAX) {
10904
0
            ctxt->checkIndex = 0;
10905
0
            return(ctxt->input->end - strLen);
10906
0
        }
10907
6.82M
        ctxt->checkIndex = index;
10908
6.82M
    } else {
10909
1.02M
        ctxt->checkIndex = 0;
10910
1.02M
    }
10911
10912
7.84M
    return(term);
10913
7.84M
}
10914
10915
/**
10916
 * xmlParseLookupCharData:
10917
 * @ctxt:  an XML parser context
10918
 *
10919
 * Check whether the input buffer contains terminated char data.
10920
 */
10921
static int
10922
3.12M
xmlParseLookupCharData(xmlParserCtxtPtr ctxt) {
10923
3.12M
    const xmlChar *cur = ctxt->input->cur + ctxt->checkIndex;
10924
3.12M
    const xmlChar *end = ctxt->input->end;
10925
3.12M
    size_t index;
10926
10927
53.9M
    while (cur < end) {
10928
53.7M
        if ((*cur == '<') || (*cur == '&')) {
10929
2.92M
            ctxt->checkIndex = 0;
10930
2.92M
            return(1);
10931
2.92M
        }
10932
50.8M
        cur++;
10933
50.8M
    }
10934
10935
200k
    index = cur - ctxt->input->cur;
10936
200k
    if (index > LONG_MAX) {
10937
0
        ctxt->checkIndex = 0;
10938
0
        return(1);
10939
0
    }
10940
200k
    ctxt->checkIndex = index;
10941
200k
    return(0);
10942
200k
}
10943
10944
/**
10945
 * xmlParseLookupGt:
10946
 * @ctxt:  an XML parser context
10947
 *
10948
 * Check whether there's enough data in the input buffer to finish parsing
10949
 * a start tag. This has to take quotes into account.
10950
 */
10951
static int
10952
7.86M
xmlParseLookupGt(xmlParserCtxtPtr ctxt) {
10953
7.86M
    const xmlChar *cur;
10954
7.86M
    const xmlChar *end = ctxt->input->end;
10955
7.86M
    int state = ctxt->endCheckState;
10956
7.86M
    size_t index;
10957
10958
7.86M
    if (ctxt->checkIndex == 0)
10959
6.75M
        cur = ctxt->input->cur + 1;
10960
1.10M
    else
10961
1.10M
        cur = ctxt->input->cur + ctxt->checkIndex;
10962
10963
703M
    while (cur < end) {
10964
702M
        if (state) {
10965
245M
            if (*cur == state)
10966
5.21M
                state = 0;
10967
457M
        } else if (*cur == '\'' || *cur == '"') {
10968
5.23M
            state = *cur;
10969
451M
        } else if (*cur == '>') {
10970
6.71M
            ctxt->checkIndex = 0;
10971
6.71M
            ctxt->endCheckState = 0;
10972
6.71M
            return(1);
10973
6.71M
        }
10974
695M
        cur++;
10975
695M
    }
10976
10977
1.14M
    index = cur - ctxt->input->cur;
10978
1.14M
    if (index > LONG_MAX) {
10979
0
        ctxt->checkIndex = 0;
10980
0
        ctxt->endCheckState = 0;
10981
0
        return(1);
10982
0
    }
10983
1.14M
    ctxt->checkIndex = index;
10984
1.14M
    ctxt->endCheckState = state;
10985
1.14M
    return(0);
10986
1.14M
}
10987
10988
/**
10989
 * xmlParseLookupInternalSubset:
10990
 * @ctxt:  an XML parser context
10991
 *
10992
 * Check whether there's enough data in the input buffer to finish parsing
10993
 * the internal subset.
10994
 */
10995
static int
10996
83.3k
xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt) {
10997
    /*
10998
     * Sorry, but progressive parsing of the internal subset is not
10999
     * supported. We first check that the full content of the internal
11000
     * subset is available and parsing is launched only at that point.
11001
     * Internal subset ends with "']' S? '>'" in an unescaped section and
11002
     * not in a ']]>' sequence which are conditional sections.
11003
     */
11004
83.3k
    const xmlChar *cur, *start;
11005
83.3k
    const xmlChar *end = ctxt->input->end;
11006
83.3k
    int state = ctxt->endCheckState;
11007
83.3k
    size_t index;
11008
11009
83.3k
    if (ctxt->checkIndex == 0) {
11010
38.0k
        cur = ctxt->input->cur + 1;
11011
45.2k
    } else {
11012
45.2k
        cur = ctxt->input->cur + ctxt->checkIndex;
11013
45.2k
    }
11014
83.3k
    start = cur;
11015
11016
68.2M
    while (cur < end) {
11017
68.2M
        if (state == '-') {
11018
1.62M
            if ((*cur == '-') &&
11019
17.8k
                (cur[1] == '-') &&
11020
8.78k
                (cur[2] == '>')) {
11021
4.05k
                state = 0;
11022
4.05k
                cur += 3;
11023
4.05k
                start = cur;
11024
4.05k
                continue;
11025
4.05k
            }
11026
1.62M
        }
11027
66.5M
        else if (state == ']') {
11028
61.7k
            if (*cur == '>') {
11029
33.3k
                ctxt->checkIndex = 0;
11030
33.3k
                ctxt->endCheckState = 0;
11031
33.3k
                return(1);
11032
33.3k
            }
11033
28.3k
            if (IS_BLANK_CH(*cur)) {
11034
4.61k
                state = ' ';
11035
23.6k
            } else if (*cur != ']') {
11036
8.54k
                state = 0;
11037
8.54k
                start = cur;
11038
8.54k
                continue;
11039
8.54k
            }
11040
28.3k
        }
11041
66.5M
        else if (state == ' ') {
11042
12.8k
            if (*cur == '>') {
11043
728
                ctxt->checkIndex = 0;
11044
728
                ctxt->endCheckState = 0;
11045
728
                return(1);
11046
728
            }
11047
12.1k
            if (!IS_BLANK_CH(*cur)) {
11048
3.86k
                state = 0;
11049
3.86k
                start = cur;
11050
3.86k
                continue;
11051
3.86k
            }
11052
12.1k
        }
11053
66.5M
        else if (state != 0) {
11054
55.6M
            if (*cur == state) {
11055
267k
                state = 0;
11056
267k
                start = cur + 1;
11057
267k
            }
11058
55.6M
        }
11059
10.8M
        else if (*cur == '<') {
11060
230k
            if ((cur[1] == '!') &&
11061
119k
                (cur[2] == '-') &&
11062
4.50k
                (cur[3] == '-')) {
11063
4.28k
                state = '-';
11064
4.28k
                cur += 4;
11065
                /* Don't treat <!--> as comment */
11066
4.28k
                start = cur;
11067
4.28k
                continue;
11068
4.28k
            }
11069
230k
        }
11070
10.6M
        else if ((*cur == '"') || (*cur == '\'') || (*cur == ']')) {
11071
315k
            state = *cur;
11072
315k
        }
11073
11074
68.1M
        cur++;
11075
68.1M
    }
11076
11077
    /*
11078
     * Rescan the three last characters to detect "<!--" and "-->"
11079
     * split across chunks.
11080
     */
11081
49.2k
    if ((state == 0) || (state == '-')) {
11082
14.8k
        if (cur - start < 3)
11083
1.11k
            cur = start;
11084
13.7k
        else
11085
13.7k
            cur -= 3;
11086
14.8k
    }
11087
49.2k
    index = cur - ctxt->input->cur;
11088
49.2k
    if (index > LONG_MAX) {
11089
0
        ctxt->checkIndex = 0;
11090
0
        ctxt->endCheckState = 0;
11091
0
        return(1);
11092
0
    }
11093
49.2k
    ctxt->checkIndex = index;
11094
49.2k
    ctxt->endCheckState = state;
11095
49.2k
    return(0);
11096
49.2k
}
11097
11098
/**
11099
 * xmlParseTryOrFinish:
11100
 * @ctxt:  an XML parser context
11101
 * @terminate:  last chunk indicator
11102
 *
11103
 * Try to progress on parsing
11104
 *
11105
 * Returns zero if no parsing was possible
11106
 */
11107
static int
11108
9.00M
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11109
9.00M
    int ret = 0;
11110
9.00M
    size_t avail;
11111
9.00M
    xmlChar cur, next;
11112
11113
9.00M
    if (ctxt->input == NULL)
11114
0
        return(0);
11115
11116
9.00M
    if ((ctxt->input != NULL) &&
11117
9.00M
        (ctxt->input->cur - ctxt->input->base > 4096)) {
11118
81.2k
        xmlParserShrink(ctxt);
11119
81.2k
    }
11120
11121
51.6M
    while (ctxt->disableSAX == 0) {
11122
51.6M
        avail = ctxt->input->end - ctxt->input->cur;
11123
51.6M
        if (avail < 1)
11124
199k
      goto done;
11125
51.4M
        switch (ctxt->instate) {
11126
18.9k
            case XML_PARSER_EOF:
11127
          /*
11128
     * Document parsing is done !
11129
     */
11130
18.9k
          goto done;
11131
218k
            case XML_PARSER_START:
11132
                /*
11133
                 * Very first chars read from the document flow.
11134
                 */
11135
218k
                if ((!terminate) && (avail < 4))
11136
487
                    goto done;
11137
11138
                /*
11139
                 * We need more bytes to detect EBCDIC code pages.
11140
                 * See xmlDetectEBCDIC.
11141
                 */
11142
217k
                if ((CMP4(CUR_PTR, 0x4C, 0x6F, 0xA7, 0x94)) &&
11143
420
                    (!terminate) && (avail < 200))
11144
183
                    goto done;
11145
11146
217k
                xmlDetectEncoding(ctxt);
11147
217k
                ctxt->instate = XML_PARSER_XML_DECL;
11148
217k
    break;
11149
11150
6.35M
            case XML_PARSER_XML_DECL:
11151
6.35M
    if ((!terminate) && (avail < 2))
11152
41
        goto done;
11153
6.35M
    cur = ctxt->input->cur[0];
11154
6.35M
    next = ctxt->input->cur[1];
11155
6.35M
          if ((cur == '<') && (next == '?')) {
11156
        /* PI or XML decl */
11157
6.22M
        if ((!terminate) &&
11158
6.19M
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11159
6.13M
      goto done;
11160
84.2k
        if ((ctxt->input->cur[2] == 'x') &&
11161
77.6k
      (ctxt->input->cur[3] == 'm') &&
11162
76.0k
      (ctxt->input->cur[4] == 'l') &&
11163
75.3k
      (IS_BLANK_CH(ctxt->input->cur[5]))) {
11164
74.6k
      ret += 5;
11165
74.6k
      xmlParseXMLDecl(ctxt);
11166
74.6k
        } else {
11167
9.57k
      ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11168
9.57k
                        if (ctxt->version == NULL) {
11169
0
                            xmlErrMemory(ctxt);
11170
0
                            break;
11171
0
                        }
11172
9.57k
        }
11173
132k
    } else {
11174
132k
        ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11175
132k
        if (ctxt->version == NULL) {
11176
0
            xmlErrMemory(ctxt);
11177
0
      break;
11178
0
        }
11179
132k
    }
11180
217k
                if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
11181
217k
                    ctxt->sax->setDocumentLocator(ctxt->userData,
11182
217k
                            (xmlSAXLocator *) &xmlDefaultSAXLocator);
11183
217k
                }
11184
217k
                if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11185
217k
                    (!ctxt->disableSAX))
11186
216k
                    ctxt->sax->startDocument(ctxt->userData);
11187
217k
                ctxt->instate = XML_PARSER_MISC;
11188
217k
    break;
11189
11.2M
            case XML_PARSER_START_TAG: {
11190
11.2M
          const xmlChar *name;
11191
11.2M
    const xmlChar *prefix = NULL;
11192
11.2M
    const xmlChar *URI = NULL;
11193
11.2M
                int line = ctxt->input->line;
11194
11.2M
    int nbNs = 0;
11195
11196
11.2M
    if ((!terminate) && (avail < 2))
11197
531
        goto done;
11198
11.2M
    cur = ctxt->input->cur[0];
11199
11.2M
          if (cur != '<') {
11200
31.1k
        xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
11201
31.1k
                                   "Start tag expected, '<' not found");
11202
31.1k
                    ctxt->instate = XML_PARSER_EOF;
11203
31.1k
                    xmlFinishDocument(ctxt);
11204
31.1k
        goto done;
11205
31.1k
    }
11206
11.2M
    if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11207
1.02M
                    goto done;
11208
10.1M
    if (ctxt->spaceNr == 0)
11209
0
        spacePush(ctxt, -1);
11210
10.1M
    else if (*ctxt->space == -2)
11211
5.70M
        spacePush(ctxt, -1);
11212
4.48M
    else
11213
4.48M
        spacePush(ctxt, *ctxt->space);
11214
10.1M
#ifdef LIBXML_SAX1_ENABLED
11215
10.1M
    if (ctxt->sax2)
11216
10.1M
#endif /* LIBXML_SAX1_ENABLED */
11217
10.1M
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &nbNs);
11218
0
#ifdef LIBXML_SAX1_ENABLED
11219
0
    else
11220
0
        name = xmlParseStartTag(ctxt);
11221
10.1M
#endif /* LIBXML_SAX1_ENABLED */
11222
10.1M
    if (name == NULL) {
11223
19.2k
        spacePop(ctxt);
11224
19.2k
                    ctxt->instate = XML_PARSER_EOF;
11225
19.2k
                    xmlFinishDocument(ctxt);
11226
19.2k
        goto done;
11227
19.2k
    }
11228
10.1M
#ifdef LIBXML_VALID_ENABLED
11229
    /*
11230
     * [ VC: Root Element Type ]
11231
     * The Name in the document type declaration must match
11232
     * the element type of the root element.
11233
     */
11234
10.1M
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11235
0
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
11236
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11237
10.1M
#endif /* LIBXML_VALID_ENABLED */
11238
11239
    /*
11240
     * Check for an Empty Element.
11241
     */
11242
10.1M
    if ((RAW == '/') && (NXT(1) == '>')) {
11243
4.30M
        SKIP(2);
11244
11245
4.30M
        if (ctxt->sax2) {
11246
4.30M
      if ((ctxt->sax != NULL) &&
11247
4.30M
          (ctxt->sax->endElementNs != NULL) &&
11248
4.30M
          (!ctxt->disableSAX))
11249
4.30M
          ctxt->sax->endElementNs(ctxt->userData, name,
11250
4.30M
                                  prefix, URI);
11251
4.30M
      if (nbNs > 0)
11252
6.98k
          xmlParserNsPop(ctxt, nbNs);
11253
4.30M
#ifdef LIBXML_SAX1_ENABLED
11254
4.30M
        } else {
11255
0
      if ((ctxt->sax != NULL) &&
11256
0
          (ctxt->sax->endElement != NULL) &&
11257
0
          (!ctxt->disableSAX))
11258
0
          ctxt->sax->endElement(ctxt->userData, name);
11259
0
#endif /* LIBXML_SAX1_ENABLED */
11260
0
        }
11261
4.30M
        spacePop(ctxt);
11262
5.86M
    } else if (RAW == '>') {
11263
4.44M
        NEXT;
11264
4.44M
                    nameNsPush(ctxt, name, prefix, URI, line, nbNs);
11265
4.44M
    } else {
11266
1.42M
        xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11267
1.42M
           "Couldn't find end of Start Tag %s\n",
11268
1.42M
           name);
11269
1.42M
        nodePop(ctxt);
11270
1.42M
        spacePop(ctxt);
11271
1.42M
                    if (nbNs > 0)
11272
145k
                        xmlParserNsPop(ctxt, nbNs);
11273
1.42M
    }
11274
11275
10.1M
                if (ctxt->nameNr == 0)
11276
29.5k
                    ctxt->instate = XML_PARSER_EPILOG;
11277
10.1M
                else
11278
10.1M
                    ctxt->instate = XML_PARSER_CONTENT;
11279
10.1M
                break;
11280
10.1M
      }
11281
29.3M
            case XML_PARSER_CONTENT: {
11282
29.3M
    cur = ctxt->input->cur[0];
11283
11284
29.3M
    if (cur == '<') {
11285
14.9M
                    if ((!terminate) && (avail < 2))
11286
17.7k
                        goto done;
11287
14.9M
        next = ctxt->input->cur[1];
11288
11289
14.9M
                    if (next == '/') {
11290
3.40M
                        ctxt->instate = XML_PARSER_END_TAG;
11291
3.40M
                        break;
11292
11.5M
                    } else if (next == '?') {
11293
527k
                        if ((!terminate) &&
11294
444k
                            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11295
432k
                            goto done;
11296
95.1k
                        xmlParsePI(ctxt);
11297
95.1k
                        ctxt->instate = XML_PARSER_CONTENT;
11298
95.1k
                        break;
11299
11.0M
                    } else if (next == '!') {
11300
996k
                        if ((!terminate) && (avail < 3))
11301
1.53k
                            goto done;
11302
994k
                        next = ctxt->input->cur[2];
11303
11304
994k
                        if (next == '-') {
11305
103k
                            if ((!terminate) && (avail < 4))
11306
27
                                goto done;
11307
103k
                            if (ctxt->input->cur[3] == '-') {
11308
103k
                                if ((!terminate) &&
11309
64.5k
                                    (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11310
52.7k
                                    goto done;
11311
50.7k
                                xmlParseComment(ctxt);
11312
50.7k
                                ctxt->instate = XML_PARSER_CONTENT;
11313
50.7k
                                break;
11314
103k
                            }
11315
891k
                        } else if (next == '[') {
11316
890k
                            if ((!terminate) && (avail < 9))
11317
9.69k
                                goto done;
11318
880k
                            if ((ctxt->input->cur[2] == '[') &&
11319
880k
                                (ctxt->input->cur[3] == 'C') &&
11320
880k
                                (ctxt->input->cur[4] == 'D') &&
11321
880k
                                (ctxt->input->cur[5] == 'A') &&
11322
880k
                                (ctxt->input->cur[6] == 'T') &&
11323
880k
                                (ctxt->input->cur[7] == 'A') &&
11324
880k
                                (ctxt->input->cur[8] == '[')) {
11325
880k
                                if ((!terminate) &&
11326
864k
                                    (!xmlParseLookupString(ctxt, 9, "]]>", 3)))
11327
116k
                                    goto done;
11328
763k
                                ctxt->instate = XML_PARSER_CDATA_SECTION;
11329
763k
                                xmlParseCDSect(ctxt);
11330
763k
                                ctxt->instate = XML_PARSER_CONTENT;
11331
763k
                                break;
11332
880k
                            }
11333
880k
                        }
11334
994k
                    }
11335
14.9M
    } else if (cur == '&') {
11336
1.17M
        if ((!terminate) && (!xmlParseLookupChar(ctxt, ';')))
11337
330k
      goto done;
11338
844k
        xmlParseReference(ctxt);
11339
844k
                    break;
11340
13.1M
    } else {
11341
        /* TODO Avoid the extra copy, handle directly !!! */
11342
        /*
11343
         * Goal of the following test is:
11344
         *  - minimize calls to the SAX 'character' callback
11345
         *    when they are mergeable
11346
         *  - handle an problem for isBlank when we only parse
11347
         *    a sequence of blank chars and the next one is
11348
         *    not available to check against '<' presence.
11349
         *  - tries to homogenize the differences in SAX
11350
         *    callbacks between the push and pull versions
11351
         *    of the parser.
11352
         */
11353
13.1M
        if (avail < XML_PARSER_BIG_BUFFER_SIZE) {
11354
3.39M
      if ((!terminate) && (!xmlParseLookupCharData(ctxt)))
11355
200k
          goto done;
11356
3.39M
                    }
11357
12.9M
                    ctxt->checkIndex = 0;
11358
12.9M
        xmlParseCharDataInternal(ctxt, !terminate);
11359
12.9M
                    break;
11360
13.1M
    }
11361
11362
10.0M
                ctxt->instate = XML_PARSER_START_TAG;
11363
10.0M
    break;
11364
29.3M
      }
11365
3.43M
            case XML_PARSER_END_TAG:
11366
3.43M
    if ((!terminate) && (!xmlParseLookupChar(ctxt, '>')))
11367
22.4k
        goto done;
11368
3.40M
    if (ctxt->sax2) {
11369
3.40M
              xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11370
3.40M
        nameNsPop(ctxt);
11371
3.40M
    }
11372
0
#ifdef LIBXML_SAX1_ENABLED
11373
0
      else
11374
0
        xmlParseEndTag1(ctxt, 0);
11375
3.40M
#endif /* LIBXML_SAX1_ENABLED */
11376
3.40M
    if (ctxt->nameNr == 0) {
11377
39.3k
        ctxt->instate = XML_PARSER_EPILOG;
11378
3.36M
    } else {
11379
3.36M
        ctxt->instate = XML_PARSER_CONTENT;
11380
3.36M
    }
11381
3.40M
    break;
11382
696k
            case XML_PARSER_MISC:
11383
753k
            case XML_PARSER_PROLOG:
11384
803k
            case XML_PARSER_EPILOG:
11385
803k
    SKIP_BLANKS;
11386
803k
                avail = ctxt->input->end - ctxt->input->cur;
11387
803k
    if (avail < 1)
11388
123k
        goto done;
11389
679k
    if (ctxt->input->cur[0] == '<') {
11390
633k
                    if ((!terminate) && (avail < 2))
11391
530
                        goto done;
11392
633k
                    next = ctxt->input->cur[1];
11393
633k
                    if (next == '?') {
11394
242k
                        if ((!terminate) &&
11395
234k
                            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11396
65.2k
                            goto done;
11397
177k
                        xmlParsePI(ctxt);
11398
177k
                        break;
11399
390k
                    } else if (next == '!') {
11400
224k
                        if ((!terminate) && (avail < 3))
11401
179
                            goto done;
11402
11403
223k
                        if (ctxt->input->cur[2] == '-') {
11404
45.7k
                            if ((!terminate) && (avail < 4))
11405
146
                                goto done;
11406
45.6k
                            if (ctxt->input->cur[3] == '-') {
11407
45.5k
                                if ((!terminate) &&
11408
40.1k
                                    (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11409
15.0k
                                    goto done;
11410
30.4k
                                xmlParseComment(ctxt);
11411
30.4k
                                break;
11412
45.5k
                            }
11413
178k
                        } else if (ctxt->instate == XML_PARSER_MISC) {
11414
177k
                            if ((!terminate) && (avail < 9))
11415
344
                                goto done;
11416
177k
                            if ((ctxt->input->cur[2] == 'D') &&
11417
177k
                                (ctxt->input->cur[3] == 'O') &&
11418
177k
                                (ctxt->input->cur[4] == 'C') &&
11419
176k
                                (ctxt->input->cur[5] == 'T') &&
11420
176k
                                (ctxt->input->cur[6] == 'Y') &&
11421
176k
                                (ctxt->input->cur[7] == 'P') &&
11422
176k
                                (ctxt->input->cur[8] == 'E')) {
11423
176k
                                if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11424
122k
                                    goto done;
11425
54.3k
                                ctxt->inSubset = 1;
11426
54.3k
                                xmlParseDocTypeDecl(ctxt);
11427
54.3k
                                if (RAW == '[') {
11428
45.1k
                                    ctxt->instate = XML_PARSER_DTD;
11429
45.1k
                                } else {
11430
9.20k
                                    if (RAW == '>')
11431
4.06k
                                        NEXT;
11432
                                    /*
11433
                                     * Create and update the external subset.
11434
                                     */
11435
9.20k
                                    ctxt->inSubset = 2;
11436
9.20k
                                    if ((ctxt->sax != NULL) &&
11437
9.20k
                                        (!ctxt->disableSAX) &&
11438
9.20k
                                        (ctxt->sax->externalSubset != NULL))
11439
9.20k
                                        ctxt->sax->externalSubset(
11440
9.20k
                                                ctxt->userData,
11441
9.20k
                                                ctxt->intSubName,
11442
9.20k
                                                ctxt->extSubSystem,
11443
9.20k
                                                ctxt->extSubURI);
11444
9.20k
                                    ctxt->inSubset = 0;
11445
9.20k
                                    xmlCleanSpecialAttr(ctxt);
11446
9.20k
                                    ctxt->instate = XML_PARSER_PROLOG;
11447
9.20k
                                }
11448
54.3k
                                break;
11449
176k
                            }
11450
177k
                        }
11451
223k
                    }
11452
633k
                }
11453
11454
213k
                if (ctxt->instate == XML_PARSER_EPILOG) {
11455
18.6k
                    if (ctxt->errNo == XML_ERR_OK)
11456
100
                        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11457
18.6k
        ctxt->instate = XML_PARSER_EOF;
11458
18.6k
                    xmlFinishDocument(ctxt);
11459
194k
                } else {
11460
194k
        ctxt->instate = XML_PARSER_START_TAG;
11461
194k
    }
11462
213k
    break;
11463
91.2k
            case XML_PARSER_DTD: {
11464
91.2k
                if ((!terminate) && (!xmlParseLookupInternalSubset(ctxt)))
11465
49.2k
                    goto done;
11466
41.9k
    xmlParseInternalSubset(ctxt);
11467
41.9k
    ctxt->inSubset = 2;
11468
41.9k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11469
36.7k
        (ctxt->sax->externalSubset != NULL))
11470
36.7k
        ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11471
36.7k
          ctxt->extSubSystem, ctxt->extSubURI);
11472
41.9k
    ctxt->inSubset = 0;
11473
41.9k
    xmlCleanSpecialAttr(ctxt);
11474
41.9k
    ctxt->instate = XML_PARSER_PROLOG;
11475
41.9k
                break;
11476
91.2k
      }
11477
0
            default:
11478
0
                xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
11479
0
      "PP: internal error\n");
11480
0
    ctxt->instate = XML_PARSER_EOF;
11481
0
    break;
11482
51.4M
  }
11483
51.4M
    }
11484
9.00M
done:
11485
9.00M
    return(ret);
11486
9.00M
}
11487
11488
/**
11489
 * xmlParseChunk:
11490
 * @ctxt:  an XML parser context
11491
 * @chunk:  chunk of memory
11492
 * @size:  size of chunk in bytes
11493
 * @terminate:  last chunk indicator
11494
 *
11495
 * Parse a chunk of memory in push parser mode.
11496
 *
11497
 * Assumes that the parser context was initialized with
11498
 * xmlCreatePushParserCtxt.
11499
 *
11500
 * The last chunk, which will often be empty, must be marked with
11501
 * the @terminate flag. With the default SAX callbacks, the resulting
11502
 * document will be available in ctxt->myDoc. This pointer will not
11503
 * be freed when calling xmlFreeParserCtxt and must be freed by the
11504
 * caller. If the document isn't well-formed, it will still be returned
11505
 * in ctxt->myDoc.
11506
 *
11507
 * As an exception, xmlCtxtResetPush will free the document in
11508
 * ctxt->myDoc. So ctxt->myDoc should be set to NULL after extracting
11509
 * the document.
11510
 *
11511
 * Returns an xmlParserErrors code (0 on success).
11512
 */
11513
int
11514
xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
11515
9.00M
              int terminate) {
11516
9.00M
    size_t curBase;
11517
9.00M
    size_t maxLength;
11518
9.00M
    size_t pos;
11519
9.00M
    int end_in_lf = 0;
11520
9.00M
    int res;
11521
11522
9.00M
    if ((ctxt == NULL) || (size < 0))
11523
0
        return(XML_ERR_ARGUMENT);
11524
9.00M
    if ((chunk == NULL) && (size > 0))
11525
0
        return(XML_ERR_ARGUMENT);
11526
9.00M
    if ((ctxt->input == NULL) || (ctxt->input->buf == NULL))
11527
0
        return(XML_ERR_ARGUMENT);
11528
9.00M
    if (ctxt->disableSAX != 0)
11529
0
        return(ctxt->errNo);
11530
11531
9.00M
    ctxt->input->flags |= XML_INPUT_PROGRESSIVE;
11532
9.00M
    if (ctxt->instate == XML_PARSER_START)
11533
218k
        xmlCtxtInitializeLate(ctxt);
11534
9.00M
    if ((size > 0) && (chunk != NULL) && (!terminate) &&
11535
8.87M
        (chunk[size - 1] == '\r')) {
11536
18.9k
  end_in_lf = 1;
11537
18.9k
  size--;
11538
18.9k
    }
11539
11540
    /*
11541
     * Also push an empty chunk to make sure that the raw buffer
11542
     * will be flushed if there is an encoder.
11543
     */
11544
9.00M
    pos = ctxt->input->cur - ctxt->input->base;
11545
9.00M
    res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11546
9.00M
    xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
11547
9.00M
    if (res < 0) {
11548
143
        xmlCtxtErrIO(ctxt, ctxt->input->buf->error, NULL);
11549
143
        xmlHaltParser(ctxt);
11550
143
        return(ctxt->errNo);
11551
143
    }
11552
11553
9.00M
    xmlParseTryOrFinish(ctxt, terminate);
11554
11555
9.00M
    curBase = ctxt->input->cur - ctxt->input->base;
11556
9.00M
    maxLength = (ctxt->options & XML_PARSE_HUGE) ?
11557
0
                XML_MAX_HUGE_LENGTH :
11558
9.00M
                XML_MAX_LOOKUP_LIMIT;
11559
9.00M
    if (curBase > maxLength) {
11560
9
        xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
11561
9
                    "Buffer size limit exceeded, try XML_PARSE_HUGE\n");
11562
9
        xmlHaltParser(ctxt);
11563
9
    }
11564
11565
9.00M
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX != 0))
11566
11.0k
        return(ctxt->errNo);
11567
11568
8.99M
    if (end_in_lf == 1) {
11569
18.8k
  pos = ctxt->input->cur - ctxt->input->base;
11570
18.8k
  res = xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
11571
18.8k
  xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
11572
18.8k
        if (res < 0) {
11573
43
            xmlCtxtErrIO(ctxt, ctxt->input->buf->error, NULL);
11574
43
            xmlHaltParser(ctxt);
11575
43
            return(ctxt->errNo);
11576
43
        }
11577
18.8k
    }
11578
8.99M
    if (terminate) {
11579
  /*
11580
   * Check for termination
11581
   */
11582
128k
        if ((ctxt->instate != XML_PARSER_EOF) &&
11583
85.3k
            (ctxt->instate != XML_PARSER_EPILOG)) {
11584
36.2k
            if (ctxt->nameNr > 0) {
11585
27.8k
                const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
11586
27.8k
                int line = ctxt->pushTab[ctxt->nameNr - 1].line;
11587
27.8k
                xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
11588
27.8k
                        "Premature end of data in tag %s line %d\n",
11589
27.8k
                        name, line, NULL);
11590
27.8k
            } else if (ctxt->instate == XML_PARSER_START) {
11591
1
                xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11592
8.35k
            } else {
11593
8.35k
                xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
11594
8.35k
                               "Start tag expected, '<' not found\n");
11595
8.35k
            }
11596
92.1k
        } else {
11597
92.1k
            xmlParserCheckEOF(ctxt, XML_ERR_DOCUMENT_END);
11598
92.1k
        }
11599
128k
  if (ctxt->instate != XML_PARSER_EOF) {
11600
85.3k
            ctxt->instate = XML_PARSER_EOF;
11601
85.3k
            xmlFinishDocument(ctxt);
11602
85.3k
  }
11603
128k
    }
11604
8.99M
    if (ctxt->wellFormed == 0)
11605
144k
  return((xmlParserErrors) ctxt->errNo);
11606
8.84M
    else
11607
8.84M
        return(0);
11608
8.99M
}
11609
11610
/************************************************************************
11611
 *                  *
11612
 *    I/O front end functions to the parser     *
11613
 *                  *
11614
 ************************************************************************/
11615
11616
/**
11617
 * xmlCreatePushParserCtxt:
11618
 * @sax:  a SAX handler (optional)
11619
 * @user_data:  user data for SAX callbacks (optional)
11620
 * @chunk:  initial chunk (optional, deprecated)
11621
 * @size:  size of initial chunk in bytes
11622
 * @filename:  file name or URI (optional)
11623
 *
11624
 * Create a parser context for using the XML parser in push mode.
11625
 * See xmlParseChunk.
11626
 *
11627
 * Passing an initial chunk is useless and deprecated.
11628
 *
11629
 * The push parser doesn't support recovery mode or the
11630
 * XML_PARSE_NOBLANKS option.
11631
 *
11632
 * @filename is used as base URI to fetch external entities and for
11633
 * error reports.
11634
 *
11635
 * Returns the new parser context or NULL if a memory allocation
11636
 * failed.
11637
 */
11638
11639
xmlParserCtxtPtr
11640
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11641
217k
                        const char *chunk, int size, const char *filename) {
11642
217k
    xmlParserCtxtPtr ctxt;
11643
217k
    xmlParserInputPtr input;
11644
11645
217k
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
11646
217k
    if (ctxt == NULL)
11647
0
  return(NULL);
11648
11649
217k
    ctxt->options &= ~XML_PARSE_NODICT;
11650
217k
    ctxt->dictNames = 1;
11651
11652
217k
    input = xmlNewPushInput(filename, chunk, size);
11653
217k
    if (input == NULL) {
11654
0
  xmlFreeParserCtxt(ctxt);
11655
0
  return(NULL);
11656
0
    }
11657
217k
    if (xmlCtxtPushInput(ctxt, input) < 0) {
11658
0
        xmlFreeInputStream(input);
11659
0
        xmlFreeParserCtxt(ctxt);
11660
0
        return(NULL);
11661
0
    }
11662
11663
217k
    return(ctxt);
11664
217k
}
11665
#endif /* LIBXML_PUSH_ENABLED */
11666
11667
/**
11668
 * xmlStopParser:
11669
 * @ctxt:  an XML parser context
11670
 *
11671
 * Blocks further parser processing
11672
 */
11673
void
11674
217k
xmlStopParser(xmlParserCtxtPtr ctxt) {
11675
217k
    if (ctxt == NULL)
11676
0
        return;
11677
217k
    xmlHaltParser(ctxt);
11678
217k
    if (ctxt->errNo != XML_ERR_NO_MEMORY)
11679
217k
        ctxt->errNo = XML_ERR_USER_STOP;
11680
217k
}
11681
11682
/**
11683
 * xmlCreateIOParserCtxt:
11684
 * @sax:  a SAX handler (optional)
11685
 * @user_data:  user data for SAX callbacks (optional)
11686
 * @ioread:  an I/O read function
11687
 * @ioclose:  an I/O close function (optional)
11688
 * @ioctx:  an I/O handler
11689
 * @enc:  the charset encoding if known (deprecated)
11690
 *
11691
 * Create a parser context for using the XML parser with an existing
11692
 * I/O stream
11693
 *
11694
 * Returns the new parser context or NULL
11695
 */
11696
xmlParserCtxtPtr
11697
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11698
                      xmlInputReadCallback ioread,
11699
                      xmlInputCloseCallback ioclose,
11700
0
                      void *ioctx, xmlCharEncoding enc) {
11701
0
    xmlParserCtxtPtr ctxt;
11702
0
    xmlParserInputPtr input;
11703
0
    const char *encoding;
11704
11705
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
11706
0
    if (ctxt == NULL)
11707
0
  return(NULL);
11708
11709
0
    encoding = xmlGetCharEncodingName(enc);
11710
0
    input = xmlCtxtNewInputFromIO(ctxt, NULL, ioread, ioclose, ioctx,
11711
0
                                  encoding, 0);
11712
0
    if (input == NULL) {
11713
0
  xmlFreeParserCtxt(ctxt);
11714
0
        return (NULL);
11715
0
    }
11716
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
11717
0
        xmlFreeInputStream(input);
11718
0
        xmlFreeParserCtxt(ctxt);
11719
0
        return(NULL);
11720
0
    }
11721
11722
0
    return(ctxt);
11723
0
}
11724
11725
#ifdef LIBXML_VALID_ENABLED
11726
/************************************************************************
11727
 *                  *
11728
 *    Front ends when parsing a DTD       *
11729
 *                  *
11730
 ************************************************************************/
11731
11732
/**
11733
 * xmlCtxtParseDtd:
11734
 * @ctxt:  a parser context
11735
 * @input:  a parser input
11736
 * @publicId:  public ID of the DTD (optional)
11737
 * @systemId:  system ID of the DTD (optional)
11738
 *
11739
 * Parse a DTD.
11740
 *
11741
 * Option XML_PARSE_DTDLOAD should be enabled in the parser context
11742
 * to make external entities work.
11743
 *
11744
 * Availabe since 2.14.0.
11745
 *
11746
 * Returns the resulting xmlDtdPtr or NULL in case of error.
11747
 * @input will be freed by the function in any case.
11748
 */
11749
xmlDtdPtr
11750
xmlCtxtParseDtd(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
11751
0
                const xmlChar *publicId, const xmlChar *systemId) {
11752
0
    xmlDtdPtr ret = NULL;
11753
11754
0
    if ((ctxt == NULL) || (input == NULL)) {
11755
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
11756
0
        xmlFreeInputStream(input);
11757
0
        return(NULL);
11758
0
    }
11759
11760
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
11761
0
        xmlFreeInputStream(input);
11762
0
        return(NULL);
11763
0
    }
11764
11765
0
    if (publicId == NULL)
11766
0
        publicId = BAD_CAST "none";
11767
0
    if (systemId == NULL)
11768
0
        systemId = BAD_CAST "none";
11769
11770
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11771
0
    if (ctxt->myDoc == NULL) {
11772
0
        xmlErrMemory(ctxt);
11773
0
        goto error;
11774
0
    }
11775
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
11776
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11777
0
                                       publicId, systemId);
11778
0
    if (ctxt->myDoc->extSubset == NULL) {
11779
0
        xmlErrMemory(ctxt);
11780
0
        xmlFreeDoc(ctxt->myDoc);
11781
0
        goto error;
11782
0
    }
11783
11784
0
    xmlParseExternalSubset(ctxt, publicId, systemId);
11785
11786
0
    if (ctxt->wellFormed) {
11787
0
        ret = ctxt->myDoc->extSubset;
11788
0
        ctxt->myDoc->extSubset = NULL;
11789
0
        if (ret != NULL) {
11790
0
            xmlNodePtr tmp;
11791
11792
0
            ret->doc = NULL;
11793
0
            tmp = ret->children;
11794
0
            while (tmp != NULL) {
11795
0
                tmp->doc = NULL;
11796
0
                tmp = tmp->next;
11797
0
            }
11798
0
        }
11799
0
    } else {
11800
0
        ret = NULL;
11801
0
    }
11802
0
    xmlFreeDoc(ctxt->myDoc);
11803
0
    ctxt->myDoc = NULL;
11804
11805
0
error:
11806
0
    xmlFreeInputStream(xmlCtxtPopInput(ctxt));
11807
11808
0
    return(ret);
11809
0
}
11810
11811
/**
11812
 * xmlIOParseDTD:
11813
 * @sax:  the SAX handler block or NULL
11814
 * @input:  an Input Buffer
11815
 * @enc:  the charset encoding if known
11816
 *
11817
 * DEPRECATED: Use xmlCtxtParseDtd.
11818
 *
11819
 * Load and parse a DTD
11820
 *
11821
 * Returns the resulting xmlDtdPtr or NULL in case of error.
11822
 * @input will be freed by the function in any case.
11823
 */
11824
11825
xmlDtdPtr
11826
xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
11827
0
        xmlCharEncoding enc) {
11828
0
    xmlDtdPtr ret = NULL;
11829
0
    xmlParserCtxtPtr ctxt;
11830
0
    xmlParserInputPtr pinput = NULL;
11831
11832
0
    if (input == NULL)
11833
0
  return(NULL);
11834
11835
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
11836
0
    if (ctxt == NULL) {
11837
0
        xmlFreeParserInputBuffer(input);
11838
0
  return(NULL);
11839
0
    }
11840
0
    xmlCtxtSetOptions(ctxt, XML_PARSE_DTDLOAD);
11841
11842
    /*
11843
     * generate a parser input from the I/O handler
11844
     */
11845
11846
0
    pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
11847
0
    if (pinput == NULL) {
11848
0
        xmlFreeParserInputBuffer(input);
11849
0
  xmlFreeParserCtxt(ctxt);
11850
0
  return(NULL);
11851
0
    }
11852
11853
0
    if (enc != XML_CHAR_ENCODING_NONE) {
11854
0
        xmlSwitchEncoding(ctxt, enc);
11855
0
    }
11856
11857
0
    ret = xmlCtxtParseDtd(ctxt, pinput, NULL, NULL);
11858
11859
0
    xmlFreeParserCtxt(ctxt);
11860
0
    return(ret);
11861
0
}
11862
11863
/**
11864
 * xmlSAXParseDTD:
11865
 * @sax:  the SAX handler block
11866
 * @ExternalID:  a NAME* containing the External ID of the DTD
11867
 * @SystemID:  a NAME* containing the URL to the DTD
11868
 *
11869
 * DEPRECATED: Use xmlCtxtParseDtd.
11870
 *
11871
 * Load and parse an external subset.
11872
 *
11873
 * Returns the resulting xmlDtdPtr or NULL in case of error.
11874
 */
11875
11876
xmlDtdPtr
11877
xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
11878
0
                          const xmlChar *SystemID) {
11879
0
    xmlDtdPtr ret = NULL;
11880
0
    xmlParserCtxtPtr ctxt;
11881
0
    xmlParserInputPtr input = NULL;
11882
0
    xmlChar* systemIdCanonic;
11883
11884
0
    if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
11885
11886
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
11887
0
    if (ctxt == NULL) {
11888
0
  return(NULL);
11889
0
    }
11890
0
    xmlCtxtSetOptions(ctxt, XML_PARSE_DTDLOAD);
11891
11892
    /*
11893
     * Canonicalise the system ID
11894
     */
11895
0
    systemIdCanonic = xmlCanonicPath(SystemID);
11896
0
    if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
11897
0
  xmlFreeParserCtxt(ctxt);
11898
0
  return(NULL);
11899
0
    }
11900
11901
    /*
11902
     * Ask the Entity resolver to load the damn thing
11903
     */
11904
11905
0
    if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
11906
0
  input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
11907
0
                                   systemIdCanonic);
11908
0
    if (input == NULL) {
11909
0
  xmlFreeParserCtxt(ctxt);
11910
0
  if (systemIdCanonic != NULL)
11911
0
      xmlFree(systemIdCanonic);
11912
0
  return(NULL);
11913
0
    }
11914
11915
0
    if (input->filename == NULL)
11916
0
  input->filename = (char *) systemIdCanonic;
11917
0
    else
11918
0
  xmlFree(systemIdCanonic);
11919
11920
0
    ret = xmlCtxtParseDtd(ctxt, input, ExternalID, SystemID);
11921
11922
0
    xmlFreeParserCtxt(ctxt);
11923
0
    return(ret);
11924
0
}
11925
11926
11927
/**
11928
 * xmlParseDTD:
11929
 * @ExternalID:  a NAME* containing the External ID of the DTD
11930
 * @SystemID:  a NAME* containing the URL to the DTD
11931
 *
11932
 * Load and parse an external subset.
11933
 *
11934
 * Returns the resulting xmlDtdPtr or NULL in case of error.
11935
 */
11936
11937
xmlDtdPtr
11938
0
xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
11939
0
    return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
11940
0
}
11941
#endif /* LIBXML_VALID_ENABLED */
11942
11943
/************************************************************************
11944
 *                  *
11945
 *    Front ends when parsing an Entity     *
11946
 *                  *
11947
 ************************************************************************/
11948
11949
static xmlNodePtr
11950
xmlCtxtParseContentInternal(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
11951
1.97k
                            int hasTextDecl, int buildTree) {
11952
1.97k
    xmlNodePtr root = NULL;
11953
1.97k
    xmlNodePtr list = NULL;
11954
1.97k
    xmlChar *rootName = BAD_CAST "#root";
11955
1.97k
    int result;
11956
11957
1.97k
    if (buildTree) {
11958
1.97k
        root = xmlNewDocNode(ctxt->myDoc, NULL, rootName, NULL);
11959
1.97k
        if (root == NULL) {
11960
0
            xmlErrMemory(ctxt);
11961
0
            goto error;
11962
0
        }
11963
1.97k
    }
11964
11965
1.97k
    if (xmlCtxtPushInput(ctxt, input) < 0)
11966
0
        goto error;
11967
11968
1.97k
    nameNsPush(ctxt, rootName, NULL, NULL, 0, 0);
11969
1.97k
    spacePush(ctxt, -1);
11970
11971
1.97k
    if (buildTree)
11972
1.97k
        nodePush(ctxt, root);
11973
11974
1.97k
    if (hasTextDecl) {
11975
0
        xmlDetectEncoding(ctxt);
11976
11977
        /*
11978
         * Parse a possible text declaration first
11979
         */
11980
0
        if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
11981
0
            (IS_BLANK_CH(NXT(5)))) {
11982
0
            xmlParseTextDecl(ctxt);
11983
            /*
11984
             * An XML-1.0 document can't reference an entity not XML-1.0
11985
             */
11986
0
            if ((xmlStrEqual(ctxt->version, BAD_CAST "1.0")) &&
11987
0
                (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
11988
0
                xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
11989
0
                               "Version mismatch between document and "
11990
0
                               "entity\n");
11991
0
            }
11992
0
        }
11993
0
    }
11994
11995
1.97k
    xmlParseContentInternal(ctxt);
11996
11997
1.97k
    if (ctxt->input->cur < ctxt->input->end)
11998
335
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11999
12000
1.97k
    if ((ctxt->wellFormed) ||
12001
1.97k
        ((ctxt->recovery) && (!xmlCtxtIsCatastrophicError(ctxt)))) {
12002
1.97k
        if (root != NULL) {
12003
1.97k
            xmlNodePtr cur;
12004
12005
            /*
12006
             * Unlink newly created node list.
12007
             */
12008
1.97k
            list = root->children;
12009
1.97k
            root->children = NULL;
12010
1.97k
            root->last = NULL;
12011
14.5k
            for (cur = list; cur != NULL; cur = cur->next)
12012
12.5k
                cur->parent = NULL;
12013
1.97k
        }
12014
1.97k
    }
12015
12016
    /*
12017
     * Read the rest of the stream in case of errors. We want
12018
     * to account for the whole entity size.
12019
     */
12020
1.97k
    do {
12021
1.97k
        ctxt->input->cur = ctxt->input->end;
12022
1.97k
        xmlParserShrink(ctxt);
12023
1.97k
        result = xmlParserGrow(ctxt);
12024
1.97k
    } while (result > 0);
12025
12026
1.97k
    if (buildTree)
12027
1.97k
        nodePop(ctxt);
12028
12029
1.97k
    namePop(ctxt);
12030
1.97k
    spacePop(ctxt);
12031
12032
1.97k
    xmlCtxtPopInput(ctxt);
12033
12034
1.97k
error:
12035
1.97k
    xmlFreeNode(root);
12036
12037
1.97k
    return(list);
12038
1.97k
}
12039
12040
static void
12041
2.09k
xmlCtxtParseEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr ent) {
12042
2.09k
    xmlParserInputPtr input;
12043
2.09k
    xmlNodePtr list;
12044
2.09k
    unsigned long consumed;
12045
2.09k
    int isExternal;
12046
2.09k
    int buildTree;
12047
2.09k
    int oldMinNsIndex;
12048
2.09k
    int oldNodelen, oldNodemem;
12049
12050
2.09k
    isExternal = (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY);
12051
2.09k
    buildTree = (ctxt->node != NULL);
12052
12053
    /*
12054
     * Recursion check
12055
     */
12056
2.09k
    if (ent->flags & XML_ENT_EXPANDING) {
12057
112
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
12058
112
        xmlHaltParser(ctxt);
12059
112
        goto error;
12060
112
    }
12061
12062
    /*
12063
     * Load entity
12064
     */
12065
1.97k
    input = xmlNewEntityInputStream(ctxt, ent);
12066
1.97k
    if (input == NULL)
12067
0
        goto error;
12068
12069
    /*
12070
     * When building a tree, we need to limit the scope of namespace
12071
     * declarations, so that entities don't reference xmlNs structs
12072
     * from the parent of a reference.
12073
     */
12074
1.97k
    oldMinNsIndex = ctxt->nsdb->minNsIndex;
12075
1.97k
    if (buildTree)
12076
1.97k
        ctxt->nsdb->minNsIndex = ctxt->nsNr;
12077
12078
1.97k
    oldNodelen = ctxt->nodelen;
12079
1.97k
    oldNodemem = ctxt->nodemem;
12080
1.97k
    ctxt->nodelen = 0;
12081
1.97k
    ctxt->nodemem = 0;
12082
12083
    /*
12084
     * Parse content
12085
     *
12086
     * This initiates a recursive call chain:
12087
     *
12088
     * - xmlCtxtParseContentInternal
12089
     * - xmlParseContentInternal
12090
     * - xmlParseReference
12091
     * - xmlCtxtParseEntity
12092
     *
12093
     * The nesting depth is limited by the maximum number of inputs,
12094
     * see xmlCtxtPushInput.
12095
     *
12096
     * It's possible to make this non-recursive (minNsIndex must be
12097
     * stored in the input struct) at the expense of code readability.
12098
     */
12099
12100
1.97k
    ent->flags |= XML_ENT_EXPANDING;
12101
12102
1.97k
    list = xmlCtxtParseContentInternal(ctxt, input, isExternal, buildTree);
12103
12104
1.97k
    ent->flags &= ~XML_ENT_EXPANDING;
12105
12106
1.97k
    ctxt->nsdb->minNsIndex = oldMinNsIndex;
12107
1.97k
    ctxt->nodelen = oldNodelen;
12108
1.97k
    ctxt->nodemem = oldNodemem;
12109
12110
    /*
12111
     * Entity size accounting
12112
     */
12113
1.97k
    consumed = input->consumed;
12114
1.97k
    xmlSaturatedAddSizeT(&consumed, input->end - input->base);
12115
12116
1.97k
    if ((ent->flags & XML_ENT_CHECKED) == 0)
12117
1.68k
        xmlSaturatedAdd(&ent->expandedSize, consumed);
12118
12119
1.97k
    if ((ent->flags & XML_ENT_PARSED) == 0) {
12120
1.68k
        if (isExternal)
12121
0
            xmlSaturatedAdd(&ctxt->sizeentities, consumed);
12122
12123
1.68k
        ent->children = list;
12124
12125
14.0k
        while (list != NULL) {
12126
12.3k
            list->parent = (xmlNodePtr) ent;
12127
12128
            /*
12129
             * Downstream code like the nginx xslt module can set
12130
             * ctxt->myDoc->extSubset to a separate DTD, so the entity
12131
             * might have a different or a NULL document.
12132
             */
12133
12.3k
            if (list->doc != ent->doc)
12134
0
                xmlSetTreeDoc(list, ent->doc);
12135
12136
12.3k
            if (list->next == NULL)
12137
1.57k
                ent->last = list;
12138
12.3k
            list = list->next;
12139
12.3k
        }
12140
1.68k
    } else {
12141
290
        xmlFreeNodeList(list);
12142
290
    }
12143
12144
1.97k
    xmlFreeInputStream(input);
12145
12146
2.09k
error:
12147
2.09k
    ent->flags |= XML_ENT_PARSED | XML_ENT_CHECKED;
12148
2.09k
}
12149
12150
/**
12151
 * xmlParseCtxtExternalEntity:
12152
 * @ctxt:  the existing parsing context
12153
 * @URL:  the URL for the entity to load
12154
 * @ID:  the System ID for the entity to load
12155
 * @listOut:  the return value for the set of parsed nodes
12156
 *
12157
 * Parse an external general entity within an existing parsing context
12158
 * An external general parsed entity is well-formed if it matches the
12159
 * production labeled extParsedEnt.
12160
 *
12161
 * [78] extParsedEnt ::= TextDecl? content
12162
 *
12163
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12164
 *    the parser error code otherwise
12165
 */
12166
12167
int
12168
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctxt, const xmlChar *URL,
12169
0
                           const xmlChar *ID, xmlNodePtr *listOut) {
12170
0
    xmlParserInputPtr input;
12171
0
    xmlNodePtr list;
12172
12173
0
    if (listOut != NULL)
12174
0
        *listOut = NULL;
12175
12176
0
    if (ctxt == NULL)
12177
0
        return(XML_ERR_ARGUMENT);
12178
12179
0
    input = xmlLoadResource(ctxt, (char *) URL, (char *) ID,
12180
0
                            XML_RESOURCE_GENERAL_ENTITY);
12181
0
    if (input == NULL)
12182
0
        return(ctxt->errNo);
12183
12184
0
    xmlCtxtInitializeLate(ctxt);
12185
12186
0
    list = xmlCtxtParseContentInternal(ctxt, input, /* hasTextDecl */ 1, 1);
12187
0
    if (listOut != NULL)
12188
0
        *listOut = list;
12189
0
    else
12190
0
        xmlFreeNodeList(list);
12191
12192
0
    xmlFreeInputStream(input);
12193
0
    return(ctxt->errNo);
12194
0
}
12195
12196
#ifdef LIBXML_SAX1_ENABLED
12197
/**
12198
 * xmlParseExternalEntity:
12199
 * @doc:  the document the chunk pertains to
12200
 * @sax:  the SAX handler block (possibly NULL)
12201
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12202
 * @depth:  Used for loop detection, use 0
12203
 * @URL:  the URL for the entity to load
12204
 * @ID:  the System ID for the entity to load
12205
 * @list:  the return value for the set of parsed nodes
12206
 *
12207
 * DEPRECATED: Use xmlParseCtxtExternalEntity.
12208
 *
12209
 * Parse an external general entity
12210
 * An external general parsed entity is well-formed if it matches the
12211
 * production labeled extParsedEnt.
12212
 *
12213
 * [78] extParsedEnt ::= TextDecl? content
12214
 *
12215
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12216
 *    the parser error code otherwise
12217
 */
12218
12219
int
12220
xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
12221
0
    int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *list) {
12222
0
    xmlParserCtxtPtr ctxt;
12223
0
    int ret;
12224
12225
0
    if (list != NULL)
12226
0
        *list = NULL;
12227
12228
0
    if (doc == NULL)
12229
0
        return(XML_ERR_ARGUMENT);
12230
12231
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12232
0
    if (ctxt == NULL)
12233
0
        return(XML_ERR_NO_MEMORY);
12234
12235
0
    ctxt->depth = depth;
12236
0
    ctxt->myDoc = doc;
12237
0
    ret = xmlParseCtxtExternalEntity(ctxt, URL, ID, list);
12238
12239
0
    xmlFreeParserCtxt(ctxt);
12240
0
    return(ret);
12241
0
}
12242
12243
/**
12244
 * xmlParseBalancedChunkMemory:
12245
 * @doc:  the document the chunk pertains to (must not be NULL)
12246
 * @sax:  the SAX handler block (possibly NULL)
12247
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12248
 * @depth:  Used for loop detection, use 0
12249
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
12250
 * @lst:  the return value for the set of parsed nodes
12251
 *
12252
 * Parse a well-balanced chunk of an XML document
12253
 * called by the parser
12254
 * The allowed sequence for the Well Balanced Chunk is the one defined by
12255
 * the content production in the XML grammar:
12256
 *
12257
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12258
 *
12259
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12260
 *    the parser error code otherwise
12261
 */
12262
12263
int
12264
xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12265
0
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
12266
0
    return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
12267
0
                                                depth, string, lst, 0 );
12268
0
}
12269
#endif /* LIBXML_SAX1_ENABLED */
12270
12271
/**
12272
 * xmlCtxtParseContent:
12273
 * @ctxt:  parser context
12274
 * @input:  parser input
12275
 * @node:  target node or document
12276
 * @hasTextDecl:  whether to parse text declaration
12277
 *
12278
 * Parse a well-balanced chunk of XML matching the 'content' production.
12279
 *
12280
 * Namespaces in scope of @node and entities of @node's document are
12281
 * recognized. When validating, the DTD of @node's document is used.
12282
 *
12283
 * Always consumes @input even in error case.
12284
 *
12285
 * Available since 2.14.0.
12286
 *
12287
 * Returns a node list or NULL in case of error.
12288
 */
12289
xmlNodePtr
12290
xmlCtxtParseContent(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
12291
0
                    xmlNodePtr node, int hasTextDecl) {
12292
0
    xmlDocPtr doc;
12293
0
    xmlNodePtr cur, list = NULL;
12294
0
    int nsnr = 0;
12295
0
    xmlDictPtr oldDict;
12296
0
    int oldOptions, oldDictNames, oldLoadSubset;
12297
12298
0
    if ((ctxt == NULL) || (input == NULL) || (node == NULL)) {
12299
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
12300
0
        goto exit;
12301
0
    }
12302
12303
0
    doc = node->doc;
12304
0
    if (doc == NULL) {
12305
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
12306
0
        goto exit;
12307
0
    }
12308
12309
0
    switch (node->type) {
12310
0
        case XML_ELEMENT_NODE:
12311
0
        case XML_DOCUMENT_NODE:
12312
0
        case XML_HTML_DOCUMENT_NODE:
12313
0
            break;
12314
12315
0
        case XML_ATTRIBUTE_NODE:
12316
0
        case XML_TEXT_NODE:
12317
0
        case XML_CDATA_SECTION_NODE:
12318
0
        case XML_ENTITY_REF_NODE:
12319
0
        case XML_PI_NODE:
12320
0
        case XML_COMMENT_NODE:
12321
0
            for (cur = node->parent; cur != NULL; cur = node->parent) {
12322
0
                if ((cur->type == XML_ELEMENT_NODE) ||
12323
0
                    (cur->type == XML_DOCUMENT_NODE) ||
12324
0
                    (cur->type == XML_HTML_DOCUMENT_NODE)) {
12325
0
                    node = cur;
12326
0
                    break;
12327
0
                }
12328
0
            }
12329
0
            break;
12330
12331
0
        default:
12332
0
            xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
12333
0
            goto exit;
12334
0
    }
12335
12336
0
#ifdef LIBXML_HTML_ENABLED
12337
0
    if (ctxt->html)
12338
0
        htmlCtxtReset(ctxt);
12339
0
    else
12340
0
#endif
12341
0
        xmlCtxtReset(ctxt);
12342
12343
0
    oldDict = ctxt->dict;
12344
0
    oldOptions = ctxt->options;
12345
0
    oldDictNames = ctxt->dictNames;
12346
0
    oldLoadSubset = ctxt->loadsubset;
12347
12348
    /*
12349
     * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
12350
     */
12351
0
    if (doc->dict != NULL) {
12352
0
        ctxt->dict = doc->dict;
12353
0
    } else {
12354
0
        ctxt->options |= XML_PARSE_NODICT;
12355
0
        ctxt->dictNames = 0;
12356
0
    }
12357
12358
    /*
12359
     * Disable IDs
12360
     */
12361
0
    ctxt->loadsubset |= XML_SKIP_IDS;
12362
12363
0
    ctxt->myDoc = doc;
12364
12365
0
#ifdef LIBXML_HTML_ENABLED
12366
0
    if (ctxt->html) {
12367
        /*
12368
         * When parsing in context, it makes no sense to add implied
12369
         * elements like html/body/etc...
12370
         */
12371
0
        ctxt->options |= HTML_PARSE_NOIMPLIED;
12372
12373
0
        list = htmlCtxtParseContentInternal(ctxt, input);
12374
0
    } else
12375
0
#endif
12376
0
    {
12377
0
        xmlCtxtInitializeLate(ctxt);
12378
12379
        /*
12380
         * initialize the SAX2 namespaces stack
12381
         */
12382
0
        cur = node;
12383
0
        while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
12384
0
            xmlNsPtr ns = cur->nsDef;
12385
0
            xmlHashedString hprefix, huri;
12386
12387
0
            while (ns != NULL) {
12388
0
                hprefix = xmlDictLookupHashed(ctxt->dict, ns->prefix, -1);
12389
0
                huri = xmlDictLookupHashed(ctxt->dict, ns->href, -1);
12390
0
                if (xmlParserNsPush(ctxt, &hprefix, &huri, ns, 1) > 0)
12391
0
                    nsnr++;
12392
0
                ns = ns->next;
12393
0
            }
12394
0
            cur = cur->parent;
12395
0
        }
12396
12397
0
        list = xmlCtxtParseContentInternal(ctxt, input, hasTextDecl, 1);
12398
12399
0
        if (nsnr > 0)
12400
0
            xmlParserNsPop(ctxt, nsnr);
12401
0
    }
12402
12403
0
    ctxt->dict = oldDict;
12404
0
    ctxt->options = oldOptions;
12405
0
    ctxt->dictNames = oldDictNames;
12406
0
    ctxt->loadsubset = oldLoadSubset;
12407
0
    ctxt->myDoc = NULL;
12408
0
    ctxt->node = NULL;
12409
12410
0
exit:
12411
0
    xmlFreeInputStream(input);
12412
0
    return(list);
12413
0
}
12414
12415
/**
12416
 * xmlParseInNodeContext:
12417
 * @node:  the context node
12418
 * @data:  the input string
12419
 * @datalen:  the input string length in bytes
12420
 * @options:  a combination of xmlParserOption
12421
 * @listOut:  the return value for the set of parsed nodes
12422
 *
12423
 * Parse a well-balanced chunk of an XML document
12424
 * within the context (DTD, namespaces, etc ...) of the given node.
12425
 *
12426
 * The allowed sequence for the data is a Well Balanced Chunk defined by
12427
 * the content production in the XML grammar:
12428
 *
12429
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12430
 *
12431
 * This function assumes the encoding of @node's document which is
12432
 * typically not what you want. A better alternative is
12433
 * xmlCtxtParseContent.
12434
 *
12435
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12436
 * error code otherwise
12437
 */
12438
xmlParserErrors
12439
xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
12440
0
                      int options, xmlNodePtr *listOut) {
12441
0
    xmlParserCtxtPtr ctxt;
12442
0
    xmlParserInputPtr input;
12443
0
    xmlDocPtr doc;
12444
0
    xmlNodePtr list;
12445
0
    xmlParserErrors ret;
12446
12447
0
    if (listOut == NULL)
12448
0
        return(XML_ERR_INTERNAL_ERROR);
12449
0
    *listOut = NULL;
12450
12451
0
    if ((node == NULL) || (data == NULL) || (datalen < 0))
12452
0
        return(XML_ERR_INTERNAL_ERROR);
12453
12454
0
    doc = node->doc;
12455
0
    if (doc == NULL)
12456
0
        return(XML_ERR_INTERNAL_ERROR);
12457
12458
0
#ifdef LIBXML_HTML_ENABLED
12459
0
    if (doc->type == XML_HTML_DOCUMENT_NODE) {
12460
0
        ctxt = htmlNewParserCtxt();
12461
0
    }
12462
0
    else
12463
0
#endif
12464
0
        ctxt = xmlNewParserCtxt();
12465
12466
0
    if (ctxt == NULL)
12467
0
        return(XML_ERR_NO_MEMORY);
12468
12469
0
    input = xmlCtxtNewInputFromMemory(ctxt, NULL, data, datalen,
12470
0
                                      (const char *) doc->encoding,
12471
0
                                      XML_INPUT_BUF_STATIC);
12472
0
    if (input == NULL) {
12473
0
        xmlFreeParserCtxt(ctxt);
12474
0
        return(XML_ERR_NO_MEMORY);
12475
0
    }
12476
12477
0
    xmlCtxtUseOptions(ctxt, options);
12478
12479
0
    list = xmlCtxtParseContent(ctxt, input, node, /* hasTextDecl */ 0);
12480
12481
0
    if (list == NULL) {
12482
0
        ret = ctxt->errNo;
12483
0
        if (ret == XML_ERR_ARGUMENT)
12484
0
            ret = XML_ERR_INTERNAL_ERROR;
12485
0
    } else {
12486
0
        ret = XML_ERR_OK;
12487
0
        *listOut = list;
12488
0
    }
12489
12490
0
    xmlFreeParserCtxt(ctxt);
12491
12492
0
    return(ret);
12493
0
}
12494
12495
#ifdef LIBXML_SAX1_ENABLED
12496
/**
12497
 * xmlParseBalancedChunkMemoryRecover:
12498
 * @doc:  the document the chunk pertains to (must not be NULL)
12499
 * @sax:  the SAX handler block (possibly NULL)
12500
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12501
 * @depth:  Used for loop detection, use 0
12502
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
12503
 * @listOut:  the return value for the set of parsed nodes
12504
 * @recover: return nodes even if the data is broken (use 0)
12505
 *
12506
 * Parse a well-balanced chunk of an XML document
12507
 *
12508
 * The allowed sequence for the Well Balanced Chunk is the one defined by
12509
 * the content production in the XML grammar:
12510
 *
12511
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12512
 *
12513
 * Returns 0 if the chunk is well balanced, or thehe parser error code
12514
 * otherwise.
12515
 *
12516
 * In case recover is set to 1, the nodelist will not be empty even if
12517
 * the parsed chunk is not well balanced, assuming the parsing succeeded to
12518
 * some extent.
12519
 */
12520
int
12521
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12522
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *listOut,
12523
0
     int recover) {
12524
0
    xmlParserCtxtPtr ctxt;
12525
0
    xmlParserInputPtr input;
12526
0
    xmlNodePtr list;
12527
0
    int ret;
12528
12529
0
    if (listOut != NULL)
12530
0
        *listOut = NULL;
12531
12532
0
    if (string == NULL)
12533
0
        return(XML_ERR_ARGUMENT);
12534
12535
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12536
0
    if (ctxt == NULL)
12537
0
        return(XML_ERR_NO_MEMORY);
12538
12539
0
    xmlCtxtInitializeLate(ctxt);
12540
12541
0
    ctxt->depth = depth;
12542
0
    ctxt->myDoc = doc;
12543
0
    if (recover) {
12544
0
        ctxt->options |= XML_PARSE_RECOVER;
12545
0
        ctxt->recovery = 1;
12546
0
    }
12547
12548
0
    input = xmlNewStringInputStream(ctxt, string);
12549
0
    if (input == NULL) {
12550
0
        ret = ctxt->errNo;
12551
0
        goto error;
12552
0
    }
12553
12554
0
    list = xmlCtxtParseContentInternal(ctxt, input, /* hasTextDecl */ 0, 1);
12555
0
    if (listOut != NULL)
12556
0
        *listOut = list;
12557
0
    else
12558
0
        xmlFreeNodeList(list);
12559
12560
0
    if (!ctxt->wellFormed)
12561
0
        ret = ctxt->errNo;
12562
0
    else
12563
0
        ret = XML_ERR_OK;
12564
12565
0
error:
12566
0
    xmlFreeInputStream(input);
12567
0
    xmlFreeParserCtxt(ctxt);
12568
0
    return(ret);
12569
0
}
12570
12571
/**
12572
 * xmlSAXParseEntity:
12573
 * @sax:  the SAX handler block
12574
 * @filename:  the filename
12575
 *
12576
 * DEPRECATED: Don't use.
12577
 *
12578
 * parse an XML external entity out of context and build a tree.
12579
 * It use the given SAX function block to handle the parsing callback.
12580
 * If sax is NULL, fallback to the default DOM tree building routines.
12581
 *
12582
 * [78] extParsedEnt ::= TextDecl? content
12583
 *
12584
 * This correspond to a "Well Balanced" chunk
12585
 *
12586
 * Returns the resulting document tree
12587
 */
12588
12589
xmlDocPtr
12590
0
xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
12591
0
    xmlDocPtr ret;
12592
0
    xmlParserCtxtPtr ctxt;
12593
12594
0
    ctxt = xmlCreateFileParserCtxt(filename);
12595
0
    if (ctxt == NULL) {
12596
0
  return(NULL);
12597
0
    }
12598
0
    if (sax != NULL) {
12599
0
        if (sax->initialized == XML_SAX2_MAGIC) {
12600
0
            *ctxt->sax = *sax;
12601
0
        } else {
12602
0
            memset(ctxt->sax, 0, sizeof(*ctxt->sax));
12603
0
            memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12604
0
        }
12605
0
        ctxt->userData = NULL;
12606
0
    }
12607
12608
0
    xmlParseExtParsedEnt(ctxt);
12609
12610
0
    if (ctxt->wellFormed) {
12611
0
  ret = ctxt->myDoc;
12612
0
    } else {
12613
0
        ret = NULL;
12614
0
        xmlFreeDoc(ctxt->myDoc);
12615
0
    }
12616
12617
0
    xmlFreeParserCtxt(ctxt);
12618
12619
0
    return(ret);
12620
0
}
12621
12622
/**
12623
 * xmlParseEntity:
12624
 * @filename:  the filename
12625
 *
12626
 * parse an XML external entity out of context and build a tree.
12627
 *
12628
 * [78] extParsedEnt ::= TextDecl? content
12629
 *
12630
 * This correspond to a "Well Balanced" chunk
12631
 *
12632
 * Returns the resulting document tree
12633
 */
12634
12635
xmlDocPtr
12636
0
xmlParseEntity(const char *filename) {
12637
0
    return(xmlSAXParseEntity(NULL, filename));
12638
0
}
12639
#endif /* LIBXML_SAX1_ENABLED */
12640
12641
/**
12642
 * xmlCreateEntityParserCtxt:
12643
 * @URL:  the entity URL
12644
 * @ID:  the entity PUBLIC ID
12645
 * @base:  a possible base for the target URI
12646
 *
12647
 * DEPRECATED: Don't use.
12648
 *
12649
 * Create a parser context for an external entity
12650
 * Automatic support for ZLIB/Compress compressed document is provided
12651
 * by default if found at compile-time.
12652
 *
12653
 * Returns the new parser context or NULL
12654
 */
12655
xmlParserCtxtPtr
12656
xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
12657
0
                    const xmlChar *base) {
12658
0
    xmlParserCtxtPtr ctxt;
12659
0
    xmlParserInputPtr input;
12660
0
    xmlChar *uri = NULL;
12661
12662
0
    ctxt = xmlNewParserCtxt();
12663
0
    if (ctxt == NULL)
12664
0
  return(NULL);
12665
12666
0
    if (base != NULL) {
12667
0
        if (xmlBuildURISafe(URL, base, &uri) < 0)
12668
0
            goto error;
12669
0
        if (uri != NULL)
12670
0
            URL = uri;
12671
0
    }
12672
12673
0
    input = xmlLoadResource(ctxt, (char *) URL, (char *) ID,
12674
0
                            XML_RESOURCE_UNKNOWN);
12675
0
    if (input == NULL)
12676
0
        goto error;
12677
12678
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
12679
0
        xmlFreeInputStream(input);
12680
0
        goto error;
12681
0
    }
12682
12683
0
    xmlFree(uri);
12684
0
    return(ctxt);
12685
12686
0
error:
12687
0
    xmlFree(uri);
12688
0
    xmlFreeParserCtxt(ctxt);
12689
0
    return(NULL);
12690
0
}
12691
12692
/************************************************************************
12693
 *                  *
12694
 *    Front ends when parsing from a file     *
12695
 *                  *
12696
 ************************************************************************/
12697
12698
/**
12699
 * xmlCreateURLParserCtxt:
12700
 * @filename:  the filename or URL
12701
 * @options:  a combination of xmlParserOption
12702
 *
12703
 * DEPRECATED: Use xmlNewParserCtxt and xmlCtxtReadFile.
12704
 *
12705
 * Create a parser context for a file or URL content.
12706
 * Automatic support for ZLIB/Compress compressed document is provided
12707
 * by default if found at compile-time and for file accesses
12708
 *
12709
 * Returns the new parser context or NULL
12710
 */
12711
xmlParserCtxtPtr
12712
xmlCreateURLParserCtxt(const char *filename, int options)
12713
0
{
12714
0
    xmlParserCtxtPtr ctxt;
12715
0
    xmlParserInputPtr input;
12716
12717
0
    ctxt = xmlNewParserCtxt();
12718
0
    if (ctxt == NULL)
12719
0
  return(NULL);
12720
12721
0
    options |= XML_PARSE_UNZIP;
12722
12723
0
    xmlCtxtUseOptions(ctxt, options);
12724
0
    ctxt->linenumbers = 1;
12725
12726
0
    input = xmlLoadResource(ctxt, filename, NULL, XML_RESOURCE_MAIN_DOCUMENT);
12727
0
    if (input == NULL) {
12728
0
  xmlFreeParserCtxt(ctxt);
12729
0
  return(NULL);
12730
0
    }
12731
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
12732
0
        xmlFreeInputStream(input);
12733
0
        xmlFreeParserCtxt(ctxt);
12734
0
        return(NULL);
12735
0
    }
12736
12737
0
    return(ctxt);
12738
0
}
12739
12740
/**
12741
 * xmlCreateFileParserCtxt:
12742
 * @filename:  the filename
12743
 *
12744
 * DEPRECATED: Use xmlNewParserCtxt and xmlCtxtReadFile.
12745
 *
12746
 * Create a parser context for a file content.
12747
 * Automatic support for ZLIB/Compress compressed document is provided
12748
 * by default if found at compile-time.
12749
 *
12750
 * Returns the new parser context or NULL
12751
 */
12752
xmlParserCtxtPtr
12753
xmlCreateFileParserCtxt(const char *filename)
12754
0
{
12755
0
    return(xmlCreateURLParserCtxt(filename, 0));
12756
0
}
12757
12758
#ifdef LIBXML_SAX1_ENABLED
12759
/**
12760
 * xmlSAXParseFileWithData:
12761
 * @sax:  the SAX handler block
12762
 * @filename:  the filename
12763
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
12764
 *             documents
12765
 * @data:  the userdata
12766
 *
12767
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
12768
 *
12769
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12770
 * compressed document is provided by default if found at compile-time.
12771
 * It use the given SAX function block to handle the parsing callback.
12772
 * If sax is NULL, fallback to the default DOM tree building routines.
12773
 *
12774
 * User data (void *) is stored within the parser context in the
12775
 * context's _private member, so it is available nearly everywhere in libxml
12776
 *
12777
 * Returns the resulting document tree
12778
 */
12779
12780
xmlDocPtr
12781
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
12782
0
                        int recovery, void *data) {
12783
0
    xmlDocPtr ret = NULL;
12784
0
    xmlParserCtxtPtr ctxt;
12785
0
    xmlParserInputPtr input;
12786
12787
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12788
0
    if (ctxt == NULL)
12789
0
  return(NULL);
12790
12791
0
    if (data != NULL)
12792
0
  ctxt->_private = data;
12793
12794
0
    if (recovery) {
12795
0
        ctxt->options |= XML_PARSE_RECOVER;
12796
0
        ctxt->recovery = 1;
12797
0
    }
12798
12799
0
    if ((filename != NULL) && (filename[0] == '-') && (filename[1] == 0))
12800
0
        input = xmlCtxtNewInputFromFd(ctxt, filename, STDIN_FILENO, NULL, 0);
12801
0
    else
12802
0
        input = xmlCtxtNewInputFromUrl(ctxt, filename, NULL, NULL, 0);
12803
12804
0
    if (input != NULL)
12805
0
        ret = xmlCtxtParseDocument(ctxt, input);
12806
12807
0
    xmlFreeParserCtxt(ctxt);
12808
0
    return(ret);
12809
0
}
12810
12811
/**
12812
 * xmlSAXParseFile:
12813
 * @sax:  the SAX handler block
12814
 * @filename:  the filename
12815
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
12816
 *             documents
12817
 *
12818
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
12819
 *
12820
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12821
 * compressed document is provided by default if found at compile-time.
12822
 * It use the given SAX function block to handle the parsing callback.
12823
 * If sax is NULL, fallback to the default DOM tree building routines.
12824
 *
12825
 * Returns the resulting document tree
12826
 */
12827
12828
xmlDocPtr
12829
xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
12830
0
                          int recovery) {
12831
0
    return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
12832
0
}
12833
12834
/**
12835
 * xmlRecoverDoc:
12836
 * @cur:  a pointer to an array of xmlChar
12837
 *
12838
 * DEPRECATED: Use xmlReadDoc with XML_PARSE_RECOVER.
12839
 *
12840
 * parse an XML in-memory document and build a tree.
12841
 * In the case the document is not Well Formed, a attempt to build a
12842
 * tree is tried anyway
12843
 *
12844
 * Returns the resulting document tree or NULL in case of failure
12845
 */
12846
12847
xmlDocPtr
12848
0
xmlRecoverDoc(const xmlChar *cur) {
12849
0
    return(xmlSAXParseDoc(NULL, cur, 1));
12850
0
}
12851
12852
/**
12853
 * xmlParseFile:
12854
 * @filename:  the filename
12855
 *
12856
 * DEPRECATED: Use xmlReadFile.
12857
 *
12858
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12859
 * compressed document is provided by default if found at compile-time.
12860
 *
12861
 * Returns the resulting document tree if the file was wellformed,
12862
 * NULL otherwise.
12863
 */
12864
12865
xmlDocPtr
12866
0
xmlParseFile(const char *filename) {
12867
0
    return(xmlSAXParseFile(NULL, filename, 0));
12868
0
}
12869
12870
/**
12871
 * xmlRecoverFile:
12872
 * @filename:  the filename
12873
 *
12874
 * DEPRECATED: Use xmlReadFile with XML_PARSE_RECOVER.
12875
 *
12876
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12877
 * compressed document is provided by default if found at compile-time.
12878
 * In the case the document is not Well Formed, it attempts to build
12879
 * a tree anyway
12880
 *
12881
 * Returns the resulting document tree or NULL in case of failure
12882
 */
12883
12884
xmlDocPtr
12885
0
xmlRecoverFile(const char *filename) {
12886
0
    return(xmlSAXParseFile(NULL, filename, 1));
12887
0
}
12888
12889
12890
/**
12891
 * xmlSetupParserForBuffer:
12892
 * @ctxt:  an XML parser context
12893
 * @buffer:  a xmlChar * buffer
12894
 * @filename:  a file name
12895
 *
12896
 * DEPRECATED: Don't use.
12897
 *
12898
 * Setup the parser context to parse a new buffer; Clears any prior
12899
 * contents from the parser context. The buffer parameter must not be
12900
 * NULL, but the filename parameter can be
12901
 */
12902
void
12903
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
12904
                             const char* filename)
12905
0
{
12906
0
    xmlParserInputPtr input;
12907
12908
0
    if ((ctxt == NULL) || (buffer == NULL))
12909
0
        return;
12910
12911
0
    xmlClearParserCtxt(ctxt);
12912
12913
0
    input = xmlCtxtNewInputFromString(ctxt, filename, (const char *) buffer,
12914
0
                                      NULL, 0);
12915
0
    if (input == NULL)
12916
0
        return;
12917
0
    if (xmlCtxtPushInput(ctxt, input) < 0)
12918
0
        xmlFreeInputStream(input);
12919
0
}
12920
12921
/**
12922
 * xmlSAXUserParseFile:
12923
 * @sax:  a SAX handler
12924
 * @user_data:  The user data returned on SAX callbacks
12925
 * @filename:  a file name
12926
 *
12927
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
12928
 *
12929
 * parse an XML file and call the given SAX handler routines.
12930
 * Automatic support for ZLIB/Compress compressed document is provided
12931
 *
12932
 * Returns 0 in case of success or a error number otherwise
12933
 */
12934
int
12935
xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
12936
0
                    const char *filename) {
12937
0
    int ret = 0;
12938
0
    xmlParserCtxtPtr ctxt;
12939
12940
0
    ctxt = xmlCreateFileParserCtxt(filename);
12941
0
    if (ctxt == NULL) return -1;
12942
0
    if (sax != NULL) {
12943
0
        if (sax->initialized == XML_SAX2_MAGIC) {
12944
0
            *ctxt->sax = *sax;
12945
0
        } else {
12946
0
            memset(ctxt->sax, 0, sizeof(*ctxt->sax));
12947
0
            memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12948
0
        }
12949
0
  ctxt->userData = user_data;
12950
0
    }
12951
12952
0
    xmlParseDocument(ctxt);
12953
12954
0
    if (ctxt->wellFormed)
12955
0
  ret = 0;
12956
0
    else {
12957
0
        if (ctxt->errNo != 0)
12958
0
      ret = ctxt->errNo;
12959
0
  else
12960
0
      ret = -1;
12961
0
    }
12962
0
    if (ctxt->myDoc != NULL) {
12963
0
        xmlFreeDoc(ctxt->myDoc);
12964
0
  ctxt->myDoc = NULL;
12965
0
    }
12966
0
    xmlFreeParserCtxt(ctxt);
12967
12968
0
    return ret;
12969
0
}
12970
#endif /* LIBXML_SAX1_ENABLED */
12971
12972
/************************************************************************
12973
 *                  *
12974
 *    Front ends when parsing from memory     *
12975
 *                  *
12976
 ************************************************************************/
12977
12978
/**
12979
 * xmlCreateMemoryParserCtxt:
12980
 * @buffer:  a pointer to a char array
12981
 * @size:  the size of the array
12982
 *
12983
 * Create a parser context for an XML in-memory document. The input buffer
12984
 * must not contain a terminating null byte.
12985
 *
12986
 * Returns the new parser context or NULL
12987
 */
12988
xmlParserCtxtPtr
12989
0
xmlCreateMemoryParserCtxt(const char *buffer, int size) {
12990
0
    xmlParserCtxtPtr ctxt;
12991
0
    xmlParserInputPtr input;
12992
12993
0
    if (size < 0)
12994
0
  return(NULL);
12995
12996
0
    ctxt = xmlNewParserCtxt();
12997
0
    if (ctxt == NULL)
12998
0
  return(NULL);
12999
13000
0
    input = xmlCtxtNewInputFromMemory(ctxt, NULL, buffer, size, NULL, 0);
13001
0
    if (input == NULL) {
13002
0
  xmlFreeParserCtxt(ctxt);
13003
0
  return(NULL);
13004
0
    }
13005
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
13006
0
        xmlFreeInputStream(input);
13007
0
        xmlFreeParserCtxt(ctxt);
13008
0
        return(NULL);
13009
0
    }
13010
13011
0
    return(ctxt);
13012
0
}
13013
13014
#ifdef LIBXML_SAX1_ENABLED
13015
/**
13016
 * xmlSAXParseMemoryWithData:
13017
 * @sax:  the SAX handler block
13018
 * @buffer:  an pointer to a char array
13019
 * @size:  the size of the array
13020
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13021
 *             documents
13022
 * @data:  the userdata
13023
 *
13024
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
13025
 *
13026
 * parse an XML in-memory block and use the given SAX function block
13027
 * to handle the parsing callback. If sax is NULL, fallback to the default
13028
 * DOM tree building routines.
13029
 *
13030
 * User data (void *) is stored within the parser context in the
13031
 * context's _private member, so it is available nearly everywhere in libxml
13032
 *
13033
 * Returns the resulting document tree
13034
 */
13035
13036
xmlDocPtr
13037
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
13038
0
                          int size, int recovery, void *data) {
13039
0
    xmlDocPtr ret = NULL;
13040
0
    xmlParserCtxtPtr ctxt;
13041
0
    xmlParserInputPtr input;
13042
13043
0
    if (size < 0)
13044
0
        return(NULL);
13045
13046
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
13047
0
    if (ctxt == NULL)
13048
0
        return(NULL);
13049
13050
0
    if (data != NULL)
13051
0
  ctxt->_private=data;
13052
13053
0
    if (recovery) {
13054
0
        ctxt->options |= XML_PARSE_RECOVER;
13055
0
        ctxt->recovery = 1;
13056
0
    }
13057
13058
0
    input = xmlCtxtNewInputFromMemory(ctxt, NULL, buffer, size, NULL,
13059
0
                                      XML_INPUT_BUF_STATIC);
13060
13061
0
    if (input != NULL)
13062
0
        ret = xmlCtxtParseDocument(ctxt, input);
13063
13064
0
    xmlFreeParserCtxt(ctxt);
13065
0
    return(ret);
13066
0
}
13067
13068
/**
13069
 * xmlSAXParseMemory:
13070
 * @sax:  the SAX handler block
13071
 * @buffer:  an pointer to a char array
13072
 * @size:  the size of the array
13073
 * @recovery:  work in recovery mode, i.e. tries to read not Well Formed
13074
 *             documents
13075
 *
13076
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
13077
 *
13078
 * parse an XML in-memory block and use the given SAX function block
13079
 * to handle the parsing callback. If sax is NULL, fallback to the default
13080
 * DOM tree building routines.
13081
 *
13082
 * Returns the resulting document tree
13083
 */
13084
xmlDocPtr
13085
xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
13086
0
            int size, int recovery) {
13087
0
    return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
13088
0
}
13089
13090
/**
13091
 * xmlParseMemory:
13092
 * @buffer:  an pointer to a char array
13093
 * @size:  the size of the array
13094
 *
13095
 * DEPRECATED: Use xmlReadMemory.
13096
 *
13097
 * parse an XML in-memory block and build a tree.
13098
 *
13099
 * Returns the resulting document tree
13100
 */
13101
13102
0
xmlDocPtr xmlParseMemory(const char *buffer, int size) {
13103
0
   return(xmlSAXParseMemory(NULL, buffer, size, 0));
13104
0
}
13105
13106
/**
13107
 * xmlRecoverMemory:
13108
 * @buffer:  an pointer to a char array
13109
 * @size:  the size of the array
13110
 *
13111
 * DEPRECATED: Use xmlReadMemory with XML_PARSE_RECOVER.
13112
 *
13113
 * parse an XML in-memory block and build a tree.
13114
 * In the case the document is not Well Formed, an attempt to
13115
 * build a tree is tried anyway
13116
 *
13117
 * Returns the resulting document tree or NULL in case of error
13118
 */
13119
13120
0
xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
13121
0
   return(xmlSAXParseMemory(NULL, buffer, size, 1));
13122
0
}
13123
13124
/**
13125
 * xmlSAXUserParseMemory:
13126
 * @sax:  a SAX handler
13127
 * @user_data:  The user data returned on SAX callbacks
13128
 * @buffer:  an in-memory XML document input
13129
 * @size:  the length of the XML document in bytes
13130
 *
13131
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
13132
 *
13133
 * parse an XML in-memory buffer and call the given SAX handler routines.
13134
 *
13135
 * Returns 0 in case of success or a error number otherwise
13136
 */
13137
int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
13138
0
        const char *buffer, int size) {
13139
0
    int ret = 0;
13140
0
    xmlParserCtxtPtr ctxt;
13141
13142
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13143
0
    if (ctxt == NULL) return -1;
13144
0
    if (sax != NULL) {
13145
0
        if (sax->initialized == XML_SAX2_MAGIC) {
13146
0
            *ctxt->sax = *sax;
13147
0
        } else {
13148
0
            memset(ctxt->sax, 0, sizeof(*ctxt->sax));
13149
0
            memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
13150
0
        }
13151
0
  ctxt->userData = user_data;
13152
0
    }
13153
13154
0
    xmlParseDocument(ctxt);
13155
13156
0
    if (ctxt->wellFormed)
13157
0
  ret = 0;
13158
0
    else {
13159
0
        if (ctxt->errNo != 0)
13160
0
      ret = ctxt->errNo;
13161
0
  else
13162
0
      ret = -1;
13163
0
    }
13164
0
    if (ctxt->myDoc != NULL) {
13165
0
        xmlFreeDoc(ctxt->myDoc);
13166
0
  ctxt->myDoc = NULL;
13167
0
    }
13168
0
    xmlFreeParserCtxt(ctxt);
13169
13170
0
    return ret;
13171
0
}
13172
#endif /* LIBXML_SAX1_ENABLED */
13173
13174
/**
13175
 * xmlCreateDocParserCtxt:
13176
 * @str:  a pointer to an array of xmlChar
13177
 *
13178
 * Creates a parser context for an XML in-memory document.
13179
 *
13180
 * Returns the new parser context or NULL
13181
 */
13182
xmlParserCtxtPtr
13183
0
xmlCreateDocParserCtxt(const xmlChar *str) {
13184
0
    xmlParserCtxtPtr ctxt;
13185
0
    xmlParserInputPtr input;
13186
13187
0
    ctxt = xmlNewParserCtxt();
13188
0
    if (ctxt == NULL)
13189
0
  return(NULL);
13190
13191
0
    input = xmlCtxtNewInputFromString(ctxt, NULL, (const char *) str, NULL, 0);
13192
0
    if (input == NULL) {
13193
0
  xmlFreeParserCtxt(ctxt);
13194
0
  return(NULL);
13195
0
    }
13196
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
13197
0
        xmlFreeInputStream(input);
13198
0
        xmlFreeParserCtxt(ctxt);
13199
0
        return(NULL);
13200
0
    }
13201
13202
0
    return(ctxt);
13203
0
}
13204
13205
#ifdef LIBXML_SAX1_ENABLED
13206
/**
13207
 * xmlSAXParseDoc:
13208
 * @sax:  the SAX handler block
13209
 * @cur:  a pointer to an array of xmlChar
13210
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13211
 *             documents
13212
 *
13213
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadDoc.
13214
 *
13215
 * parse an XML in-memory document and build a tree.
13216
 * It use the given SAX function block to handle the parsing callback.
13217
 * If sax is NULL, fallback to the default DOM tree building routines.
13218
 *
13219
 * Returns the resulting document tree
13220
 */
13221
13222
xmlDocPtr
13223
0
xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
13224
0
    xmlDocPtr ret;
13225
0
    xmlParserCtxtPtr ctxt;
13226
0
    xmlSAXHandlerPtr oldsax = NULL;
13227
13228
0
    if (cur == NULL) return(NULL);
13229
13230
13231
0
    ctxt = xmlCreateDocParserCtxt(cur);
13232
0
    if (ctxt == NULL) return(NULL);
13233
0
    if (sax != NULL) {
13234
0
        oldsax = ctxt->sax;
13235
0
        ctxt->sax = sax;
13236
0
        ctxt->userData = NULL;
13237
0
    }
13238
13239
0
    xmlParseDocument(ctxt);
13240
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13241
0
    else {
13242
0
       ret = NULL;
13243
0
       xmlFreeDoc(ctxt->myDoc);
13244
0
       ctxt->myDoc = NULL;
13245
0
    }
13246
0
    if (sax != NULL)
13247
0
  ctxt->sax = oldsax;
13248
0
    xmlFreeParserCtxt(ctxt);
13249
13250
0
    return(ret);
13251
0
}
13252
13253
/**
13254
 * xmlParseDoc:
13255
 * @cur:  a pointer to an array of xmlChar
13256
 *
13257
 * DEPRECATED: Use xmlReadDoc.
13258
 *
13259
 * parse an XML in-memory document and build a tree.
13260
 *
13261
 * Returns the resulting document tree
13262
 */
13263
13264
xmlDocPtr
13265
0
xmlParseDoc(const xmlChar *cur) {
13266
0
    return(xmlSAXParseDoc(NULL, cur, 0));
13267
0
}
13268
#endif /* LIBXML_SAX1_ENABLED */
13269
13270
/************************************************************************
13271
 *                  *
13272
 *  New set (2.6.0) of simpler and more flexible APIs   *
13273
 *                  *
13274
 ************************************************************************/
13275
13276
/**
13277
 * DICT_FREE:
13278
 * @str:  a string
13279
 *
13280
 * Free a string if it is not owned by the "dict" dictionary in the
13281
 * current scope
13282
 */
13283
#define DICT_FREE(str)            \
13284
0
  if ((str) && ((!dict) ||       \
13285
0
      (xmlDictOwns(dict, (const xmlChar *)(str)) == 0)))  \
13286
0
      xmlFree((char *)(str));
13287
13288
/**
13289
 * xmlCtxtReset:
13290
 * @ctxt: an XML parser context
13291
 *
13292
 * Reset a parser context
13293
 */
13294
void
13295
xmlCtxtReset(xmlParserCtxtPtr ctxt)
13296
0
{
13297
0
    xmlParserInputPtr input;
13298
0
    xmlDictPtr dict;
13299
13300
0
    if (ctxt == NULL)
13301
0
        return;
13302
13303
0
    dict = ctxt->dict;
13304
13305
0
    while ((input = xmlCtxtPopInput(ctxt)) != NULL) { /* Non consuming */
13306
0
        xmlFreeInputStream(input);
13307
0
    }
13308
0
    ctxt->inputNr = 0;
13309
0
    ctxt->input = NULL;
13310
13311
0
    ctxt->spaceNr = 0;
13312
0
    if (ctxt->spaceTab != NULL) {
13313
0
  ctxt->spaceTab[0] = -1;
13314
0
  ctxt->space = &ctxt->spaceTab[0];
13315
0
    } else {
13316
0
        ctxt->space = NULL;
13317
0
    }
13318
13319
13320
0
    ctxt->nodeNr = 0;
13321
0
    ctxt->node = NULL;
13322
13323
0
    ctxt->nameNr = 0;
13324
0
    ctxt->name = NULL;
13325
13326
0
    ctxt->nsNr = 0;
13327
0
    xmlParserNsReset(ctxt->nsdb);
13328
13329
0
    DICT_FREE(ctxt->version);
13330
0
    ctxt->version = NULL;
13331
0
    DICT_FREE(ctxt->encoding);
13332
0
    ctxt->encoding = NULL;
13333
0
    DICT_FREE(ctxt->extSubURI);
13334
0
    ctxt->extSubURI = NULL;
13335
0
    DICT_FREE(ctxt->extSubSystem);
13336
0
    ctxt->extSubSystem = NULL;
13337
13338
0
    if (ctxt->directory != NULL) {
13339
0
        xmlFree(ctxt->directory);
13340
0
        ctxt->directory = NULL;
13341
0
    }
13342
13343
0
    if (ctxt->myDoc != NULL)
13344
0
        xmlFreeDoc(ctxt->myDoc);
13345
0
    ctxt->myDoc = NULL;
13346
13347
0
    ctxt->standalone = -1;
13348
0
    ctxt->hasExternalSubset = 0;
13349
0
    ctxt->hasPErefs = 0;
13350
0
    ctxt->html = 0;
13351
0
    ctxt->instate = XML_PARSER_START;
13352
13353
0
    ctxt->wellFormed = 1;
13354
0
    ctxt->nsWellFormed = 1;
13355
0
    ctxt->disableSAX = 0;
13356
0
    ctxt->valid = 1;
13357
0
    ctxt->record_info = 0;
13358
0
    ctxt->checkIndex = 0;
13359
0
    ctxt->endCheckState = 0;
13360
0
    ctxt->inSubset = 0;
13361
0
    ctxt->errNo = XML_ERR_OK;
13362
0
    ctxt->depth = 0;
13363
0
    ctxt->catalogs = NULL;
13364
0
    ctxt->sizeentities = 0;
13365
0
    ctxt->sizeentcopy = 0;
13366
0
    xmlInitNodeInfoSeq(&ctxt->node_seq);
13367
13368
0
    if (ctxt->attsDefault != NULL) {
13369
0
        xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
13370
0
        ctxt->attsDefault = NULL;
13371
0
    }
13372
0
    if (ctxt->attsSpecial != NULL) {
13373
0
        xmlHashFree(ctxt->attsSpecial, NULL);
13374
0
        ctxt->attsSpecial = NULL;
13375
0
    }
13376
13377
0
#ifdef LIBXML_CATALOG_ENABLED
13378
0
    if (ctxt->catalogs != NULL)
13379
0
  xmlCatalogFreeLocal(ctxt->catalogs);
13380
0
#endif
13381
0
    ctxt->nbErrors = 0;
13382
0
    ctxt->nbWarnings = 0;
13383
0
    if (ctxt->lastError.code != XML_ERR_OK)
13384
0
        xmlResetError(&ctxt->lastError);
13385
0
}
13386
13387
/**
13388
 * xmlCtxtResetPush:
13389
 * @ctxt: an XML parser context
13390
 * @chunk:  a pointer to an array of chars
13391
 * @size:  number of chars in the array
13392
 * @filename:  an optional file name or URI
13393
 * @encoding:  the document encoding, or NULL
13394
 *
13395
 * Reset a push parser context
13396
 *
13397
 * Returns 0 in case of success and 1 in case of error
13398
 */
13399
int
13400
xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
13401
                 int size, const char *filename, const char *encoding)
13402
0
{
13403
0
    xmlParserInputPtr input;
13404
13405
0
    if (ctxt == NULL)
13406
0
        return(1);
13407
13408
0
    xmlCtxtReset(ctxt);
13409
13410
0
    input = xmlNewPushInput(filename, chunk, size);
13411
0
    if (input == NULL)
13412
0
        return(1);
13413
13414
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
13415
0
        xmlFreeInputStream(input);
13416
0
        return(1);
13417
0
    }
13418
13419
0
    if (encoding != NULL)
13420
0
        xmlSwitchEncodingName(ctxt, encoding);
13421
13422
0
    return(0);
13423
0
}
13424
13425
static int
13426
xmlCtxtSetOptionsInternal(xmlParserCtxtPtr ctxt, int options, int keepMask)
13427
217k
{
13428
217k
    int allMask;
13429
13430
217k
    if (ctxt == NULL)
13431
0
        return(-1);
13432
13433
    /*
13434
     * XInclude options aren't handled by the parser.
13435
     *
13436
     * XML_PARSE_XINCLUDE
13437
     * XML_PARSE_NOXINCNODE
13438
     * XML_PARSE_NOBASEFIX
13439
     */
13440
217k
    allMask = XML_PARSE_RECOVER |
13441
217k
              XML_PARSE_NOENT |
13442
217k
              XML_PARSE_DTDLOAD |
13443
217k
              XML_PARSE_DTDATTR |
13444
217k
              XML_PARSE_DTDVALID |
13445
217k
              XML_PARSE_NOERROR |
13446
217k
              XML_PARSE_NOWARNING |
13447
217k
              XML_PARSE_PEDANTIC |
13448
217k
              XML_PARSE_NOBLANKS |
13449
217k
#ifdef LIBXML_SAX1_ENABLED
13450
217k
              XML_PARSE_SAX1 |
13451
217k
#endif
13452
217k
              XML_PARSE_NONET |
13453
217k
              XML_PARSE_NODICT |
13454
217k
              XML_PARSE_NSCLEAN |
13455
217k
              XML_PARSE_NOCDATA |
13456
217k
              XML_PARSE_COMPACT |
13457
217k
              XML_PARSE_OLD10 |
13458
217k
              XML_PARSE_HUGE |
13459
217k
              XML_PARSE_OLDSAX |
13460
217k
              XML_PARSE_IGNORE_ENC |
13461
217k
              XML_PARSE_BIG_LINES |
13462
217k
              XML_PARSE_NO_XXE |
13463
217k
              XML_PARSE_UNZIP |
13464
217k
              XML_PARSE_NO_SYS_CATALOG |
13465
217k
              XML_PARSE_CATALOG_PI;
13466
13467
217k
    ctxt->options = (ctxt->options & keepMask) | (options & allMask);
13468
13469
    /*
13470
     * For some options, struct members are historically the source
13471
     * of truth. The values are initalized from global variables and
13472
     * old code could also modify them directly. Several older API
13473
     * functions that don't take an options argument rely on these
13474
     * deprecated mechanisms.
13475
     *
13476
     * Once public access to struct members and the globals are
13477
     * disabled, we can use the options bitmask as source of
13478
     * truth, making all these struct members obsolete.
13479
     *
13480
     * The XML_DETECT_IDS flags is misnamed. It simply enables
13481
     * loading of the external subset.
13482
     */
13483
217k
    ctxt->recovery = (options & XML_PARSE_RECOVER) ? 1 : 0;
13484
217k
    ctxt->replaceEntities = (options & XML_PARSE_NOENT) ? 1 : 0;
13485
217k
    ctxt->loadsubset = (options & XML_PARSE_DTDLOAD) ? XML_DETECT_IDS : 0;
13486
217k
    ctxt->loadsubset |= (options & XML_PARSE_DTDATTR) ? XML_COMPLETE_ATTRS : 0;
13487
217k
    ctxt->validate = (options & XML_PARSE_DTDVALID) ? 1 : 0;
13488
217k
    ctxt->pedantic = (options & XML_PARSE_PEDANTIC) ? 1 : 0;
13489
217k
    ctxt->keepBlanks = (options & XML_PARSE_NOBLANKS) ? 0 : 1;
13490
217k
    ctxt->dictNames = (options & XML_PARSE_NODICT) ? 0 : 1;
13491
13492
217k
    if (options & XML_PARSE_HUGE) {
13493
0
        if (ctxt->dict != NULL)
13494
0
            xmlDictSetLimit(ctxt->dict, 0);
13495
0
    }
13496
13497
217k
    ctxt->linenumbers = 1;
13498
13499
217k
    return(options & ~allMask);
13500
217k
}
13501
13502
/**
13503
 * xmlCtxtSetOptions:
13504
 * @ctxt: an XML parser context
13505
 * @options:  a bitmask of xmlParserOption values
13506
 *
13507
 * Applies the options to the parser context. Unset options are
13508
 * cleared.
13509
 *
13510
 * Available since 2.13.0. With older versions, you can use
13511
 * xmlCtxtUseOptions.
13512
 *
13513
 * XML_PARSE_RECOVER
13514
 *
13515
 * Enable "recovery" mode which allows non-wellformed documents.
13516
 * How this mode behaves exactly is unspecified and may change
13517
 * without further notice. Use of this feature is DISCOURAGED.
13518
 *
13519
 * Not supported by the push parser.
13520
 *
13521
 * XML_PARSE_NOENT
13522
 *
13523
 * Despite the confusing name, this option enables substitution
13524
 * of entities. The resulting tree won't contain any entity
13525
 * reference nodes.
13526
 *
13527
 * This option also enables loading of external entities (both
13528
 * general and parameter entities) which is dangerous. If you
13529
 * process untrusted data, it's recommended to set the
13530
 * XML_PARSE_NO_XXE option to disable loading of external
13531
 * entities.
13532
 *
13533
 * XML_PARSE_DTDLOAD
13534
 *
13535
 * Enables loading of an external DTD and the loading and
13536
 * substitution of external parameter entities. Has no effect
13537
 * if XML_PARSE_NO_XXE is set.
13538
 *
13539
 * XML_PARSE_DTDATTR
13540
 *
13541
 * Adds default attributes from the DTD to the result document.
13542
 *
13543
 * Implies XML_PARSE_DTDLOAD, but loading of external content
13544
 * can be disabled with XML_PARSE_NO_XXE.
13545
 *
13546
 * XML_PARSE_DTDVALID
13547
 *
13548
 * This option enables DTD validation which requires to load
13549
 * external DTDs and external entities (both general and
13550
 * parameter entities) unless XML_PARSE_NO_XXE was set.
13551
 *
13552
 * XML_PARSE_NO_XXE
13553
 *
13554
 * Disables loading of external DTDs or entities.
13555
 *
13556
 * Available since 2.13.0.
13557
 *
13558
 * XML_PARSE_NOERROR
13559
 *
13560
 * Disable error and warning reports to the error handlers.
13561
 * Errors are still accessible with xmlCtxtGetLastError.
13562
 *
13563
 * XML_PARSE_NOWARNING
13564
 *
13565
 * Disable warning reports.
13566
 *
13567
 * XML_PARSE_PEDANTIC
13568
 *
13569
 * Enable some pedantic warnings.
13570
 *
13571
 * XML_PARSE_NOBLANKS
13572
 *
13573
 * Remove some whitespace from the result document. Where to
13574
 * remove whitespace depends on DTD element declarations or a
13575
 * broken heuristic with unfixable bugs. Use of this option is
13576
 * DISCOURAGED.
13577
 *
13578
 * Not supported by the push parser.
13579
 *
13580
 * XML_PARSE_SAX1
13581
 *
13582
 * Always invoke the deprecated SAX1 startElement and endElement
13583
 * handlers. This option is DEPRECATED.
13584
 *
13585
 * XML_PARSE_NONET
13586
 *
13587
 * Disable network access with the builtin HTTP client.
13588
 *
13589
 * XML_PARSE_NODICT
13590
 *
13591
 * Create a document without interned strings, making all
13592
 * strings separate memory allocations.
13593
 *
13594
 * XML_PARSE_NSCLEAN
13595
 *
13596
 * Remove redundant namespace declarations from the result
13597
 * document.
13598
 *
13599
 * XML_PARSE_NOCDATA
13600
 *
13601
 * Output normal text nodes instead of CDATA nodes.
13602
 *
13603
 * XML_PARSE_COMPACT
13604
 *
13605
 * Store small strings directly in the node struct to save
13606
 * memory.
13607
 *
13608
 * XML_PARSE_OLD10
13609
 *
13610
 * Use old Name productions from before XML 1.0 Fifth Edition.
13611
 * This options is DEPRECATED.
13612
 *
13613
 * XML_PARSE_HUGE
13614
 *
13615
 * Relax some internal limits.
13616
 *
13617
 * Maximum size of text nodes, tags, comments, processing instructions,
13618
 * CDATA sections, entity values
13619
 *
13620
 * normal: 10M
13621
 * huge:    1B
13622
 *
13623
 * Maximum size of names, system literals, pubid literals
13624
 *
13625
 * normal: 50K
13626
 * huge:   10M
13627
 *
13628
 * Maximum nesting depth of elements
13629
 *
13630
 * normal:  256
13631
 * huge:   2048
13632
 *
13633
 * Maximum nesting depth of entities
13634
 *
13635
 * normal: 20
13636
 * huge:   40
13637
 *
13638
 * XML_PARSE_OLDSAX
13639
 *
13640
 * Enable an unspecified legacy mode for SAX parsers. This
13641
 * option is DEPRECATED.
13642
 *
13643
 * XML_PARSE_IGNORE_ENC
13644
 *
13645
 * Ignore the encoding in the XML declaration. This option is
13646
 * mostly unneeded these days. The only effect is to enforce
13647
 * UTF-8 decoding of ASCII-like data.
13648
 *
13649
 * XML_PARSE_BIG_LINES
13650
 *
13651
 * Enable reporting of line numbers larger than 65535.
13652
 *
13653
 * XML_PARSE_UNZIP
13654
 *
13655
 * Enable input decompression. Setting this option is discouraged
13656
 * to avoid zip bombs.
13657
 *
13658
 * Available since 2.14.0.
13659
 *
13660
 * XML_PARSE_NO_SYS_CATALOG
13661
 *
13662
 * Disables the global system XML catalog.
13663
 *
13664
 * Available since 2.14.0.
13665
 *
13666
 * XML_PARSE_CATALOG_PI
13667
 *
13668
 * Enable XML catalog processing instructions.
13669
 *
13670
 * Available since 2.14.0.
13671
 *
13672
 * Returns 0 in case of success, the set of unknown or unimplemented options
13673
 *         in case of error.
13674
 */
13675
int
13676
xmlCtxtSetOptions(xmlParserCtxtPtr ctxt, int options)
13677
0
{
13678
0
#ifdef LIBXML_HTML_ENABLED
13679
0
    if ((ctxt != NULL) && (ctxt->html))
13680
0
        return(htmlCtxtSetOptions(ctxt, options));
13681
0
#endif
13682
13683
0
    return(xmlCtxtSetOptionsInternal(ctxt, options, 0));
13684
0
}
13685
13686
/**
13687
 * xmlCtxtGetOptions:
13688
 * @ctxt: an XML parser context
13689
 *
13690
 * Get the current options of the parser context.
13691
 *
13692
 * Available since 2.14.0.
13693
 *
13694
 * Returns the current options set in the parser context, or -1 if ctxt is NULL.
13695
 */
13696
int
13697
xmlCtxtGetOptions(xmlParserCtxtPtr ctxt)
13698
0
{
13699
0
    if (ctxt == NULL)
13700
0
        return(-1);
13701
13702
0
    return(ctxt->options);
13703
0
}
13704
13705
/**
13706
 * xmlCtxtUseOptions:
13707
 * @ctxt: an XML parser context
13708
 * @options:  a combination of xmlParserOption
13709
 *
13710
 * DEPRECATED: Use xmlCtxtSetOptions.
13711
 *
13712
 * Applies the options to the parser context. The following options
13713
 * are never cleared and can only be enabled:
13714
 *
13715
 * XML_PARSE_NOERROR
13716
 * XML_PARSE_NOWARNING
13717
 * XML_PARSE_NONET
13718
 * XML_PARSE_NSCLEAN
13719
 * XML_PARSE_NOCDATA
13720
 * XML_PARSE_COMPACT
13721
 * XML_PARSE_OLD10
13722
 * XML_PARSE_HUGE
13723
 * XML_PARSE_OLDSAX
13724
 * XML_PARSE_IGNORE_ENC
13725
 * XML_PARSE_BIG_LINES
13726
 *
13727
 * Returns 0 in case of success, the set of unknown or unimplemented options
13728
 *         in case of error.
13729
 */
13730
int
13731
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
13732
217k
{
13733
217k
    int keepMask;
13734
13735
217k
#ifdef LIBXML_HTML_ENABLED
13736
217k
    if ((ctxt != NULL) && (ctxt->html))
13737
0
        return(htmlCtxtUseOptions(ctxt, options));
13738
217k
#endif
13739
13740
    /*
13741
     * For historic reasons, some options can only be enabled.
13742
     */
13743
217k
    keepMask = XML_PARSE_NOERROR |
13744
217k
               XML_PARSE_NOWARNING |
13745
217k
               XML_PARSE_NONET |
13746
217k
               XML_PARSE_NSCLEAN |
13747
217k
               XML_PARSE_NOCDATA |
13748
217k
               XML_PARSE_COMPACT |
13749
217k
               XML_PARSE_OLD10 |
13750
217k
               XML_PARSE_HUGE |
13751
217k
               XML_PARSE_OLDSAX |
13752
217k
               XML_PARSE_IGNORE_ENC |
13753
217k
               XML_PARSE_BIG_LINES;
13754
13755
217k
    return(xmlCtxtSetOptionsInternal(ctxt, options, keepMask));
13756
217k
}
13757
13758
/**
13759
 * xmlCtxtSetMaxAmplification:
13760
 * @ctxt: an XML parser context
13761
 * @maxAmpl:  maximum amplification factor
13762
 *
13763
 * To protect against exponential entity expansion ("billion laughs"), the
13764
 * size of serialized output is (roughly) limited to the input size
13765
 * multiplied by this factor. The default value is 5.
13766
 *
13767
 * When working with documents making heavy use of entity expansion, it can
13768
 * be necessary to increase the value. For security reasons, this should only
13769
 * be considered when processing trusted input.
13770
 */
13771
void
13772
xmlCtxtSetMaxAmplification(xmlParserCtxtPtr ctxt, unsigned maxAmpl)
13773
0
{
13774
0
    ctxt->maxAmpl = maxAmpl;
13775
0
}
13776
13777
/**
13778
 * xmlCtxtParseDocument:
13779
 * @ctxt:  an XML parser context
13780
 * @input:  parser input
13781
 *
13782
 * Parse an XML document and return the resulting document tree.
13783
 * Takes ownership of the input object.
13784
 *
13785
 * Available since 2.13.0.
13786
 *
13787
 * Returns the resulting document tree or NULL
13788
 */
13789
xmlDocPtr
13790
xmlCtxtParseDocument(xmlParserCtxtPtr ctxt, xmlParserInputPtr input)
13791
0
{
13792
0
    xmlDocPtr ret = NULL;
13793
13794
0
    if ((ctxt == NULL) || (input == NULL)) {
13795
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
13796
0
        xmlFreeInputStream(input);
13797
0
        return(NULL);
13798
0
    }
13799
13800
    /* assert(ctxt->inputNr == 0); */
13801
0
    while (ctxt->inputNr > 0)
13802
0
        xmlFreeInputStream(xmlCtxtPopInput(ctxt));
13803
13804
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
13805
0
        xmlFreeInputStream(input);
13806
0
        return(NULL);
13807
0
    }
13808
13809
0
    xmlParseDocument(ctxt);
13810
13811
0
    ret = xmlCtxtGetDocument(ctxt);
13812
13813
    /* assert(ctxt->inputNr == 1); */
13814
0
    while (ctxt->inputNr > 0)
13815
0
        xmlFreeInputStream(xmlCtxtPopInput(ctxt));
13816
13817
0
    return(ret);
13818
0
}
13819
13820
/**
13821
 * xmlReadDoc:
13822
 * @cur:  a pointer to a zero terminated string
13823
 * @URL:  base URL (optional)
13824
 * @encoding:  the document encoding (optional)
13825
 * @options:  a combination of xmlParserOption
13826
 *
13827
 * Convenience function to parse an XML document from a
13828
 * zero-terminated string.
13829
 *
13830
 * See xmlCtxtReadDoc for details.
13831
 *
13832
 * Returns the resulting document tree
13833
 */
13834
xmlDocPtr
13835
xmlReadDoc(const xmlChar *cur, const char *URL, const char *encoding,
13836
           int options)
13837
0
{
13838
0
    xmlParserCtxtPtr ctxt;
13839
0
    xmlParserInputPtr input;
13840
0
    xmlDocPtr doc = NULL;
13841
13842
0
    ctxt = xmlNewParserCtxt();
13843
0
    if (ctxt == NULL)
13844
0
        return(NULL);
13845
13846
0
    xmlCtxtUseOptions(ctxt, options);
13847
13848
0
    input = xmlCtxtNewInputFromString(ctxt, URL, (const char *) cur, encoding,
13849
0
                                      XML_INPUT_BUF_STATIC);
13850
13851
0
    if (input != NULL)
13852
0
        doc = xmlCtxtParseDocument(ctxt, input);
13853
13854
0
    xmlFreeParserCtxt(ctxt);
13855
0
    return(doc);
13856
0
}
13857
13858
/**
13859
 * xmlReadFile:
13860
 * @filename:  a file or URL
13861
 * @encoding:  the document encoding (optional)
13862
 * @options:  a combination of xmlParserOption
13863
 *
13864
 * Convenience function to parse an XML file from the filesystem,
13865
 * the network or a global user-define resource loader.
13866
 *
13867
 * This function always enables the XML_PARSE_UNZIP option for
13868
 * backward compatibility. If a "-" filename is passed, it will
13869
 * read from stdin. Both of these features are potentially
13870
 * insecure and might be removed from later versions.
13871
 *
13872
 * See xmlCtxtReadFile for details.
13873
 *
13874
 * Returns the resulting document tree
13875
 */
13876
xmlDocPtr
13877
xmlReadFile(const char *filename, const char *encoding, int options)
13878
0
{
13879
0
    xmlParserCtxtPtr ctxt;
13880
0
    xmlParserInputPtr input;
13881
0
    xmlDocPtr doc = NULL;
13882
13883
0
    ctxt = xmlNewParserCtxt();
13884
0
    if (ctxt == NULL)
13885
0
        return(NULL);
13886
13887
0
    options |= XML_PARSE_UNZIP;
13888
13889
0
    xmlCtxtUseOptions(ctxt, options);
13890
13891
    /*
13892
     * Backward compatibility for users of command line utilities like
13893
     * xmlstarlet expecting "-" to mean stdin. This is dangerous and
13894
     * should be removed at some point.
13895
     */
13896
0
    if ((filename != NULL) && (filename[0] == '-') && (filename[1] == 0))
13897
0
        input = xmlCtxtNewInputFromFd(ctxt, filename, STDIN_FILENO,
13898
0
                                      encoding, 0);
13899
0
    else
13900
0
        input = xmlCtxtNewInputFromUrl(ctxt, filename, NULL, encoding, 0);
13901
13902
0
    if (input != NULL)
13903
0
        doc = xmlCtxtParseDocument(ctxt, input);
13904
13905
0
    xmlFreeParserCtxt(ctxt);
13906
0
    return(doc);
13907
0
}
13908
13909
/**
13910
 * xmlReadMemory:
13911
 * @buffer:  a pointer to a char array
13912
 * @size:  the size of the array
13913
 * @url:  base URL (optional)
13914
 * @encoding:  the document encoding (optional)
13915
 * @options:  a combination of xmlParserOption
13916
 *
13917
 * Parse an XML in-memory document and build a tree. The input buffer must
13918
 * not contain a terminating null byte.
13919
 *
13920
 * See xmlCtxtReadMemory for details.
13921
 *
13922
 * Returns the resulting document tree
13923
 */
13924
xmlDocPtr
13925
xmlReadMemory(const char *buffer, int size, const char *url,
13926
              const char *encoding, int options)
13927
0
{
13928
0
    xmlParserCtxtPtr ctxt;
13929
0
    xmlParserInputPtr input;
13930
0
    xmlDocPtr doc = NULL;
13931
13932
0
    if (size < 0)
13933
0
  return(NULL);
13934
13935
0
    ctxt = xmlNewParserCtxt();
13936
0
    if (ctxt == NULL)
13937
0
        return(NULL);
13938
13939
0
    xmlCtxtUseOptions(ctxt, options);
13940
13941
0
    input = xmlCtxtNewInputFromMemory(ctxt, url, buffer, size, encoding,
13942
0
                                      XML_INPUT_BUF_STATIC);
13943
13944
0
    if (input != NULL)
13945
0
        doc = xmlCtxtParseDocument(ctxt, input);
13946
13947
0
    xmlFreeParserCtxt(ctxt);
13948
0
    return(doc);
13949
0
}
13950
13951
/**
13952
 * xmlReadFd:
13953
 * @fd:  an open file descriptor
13954
 * @URL:  base URL (optional)
13955
 * @encoding:  the document encoding (optional)
13956
 * @options:  a combination of xmlParserOption
13957
 *
13958
 * Parse an XML from a file descriptor and build a tree.
13959
 *
13960
 * See xmlCtxtReadFd for details.
13961
 *
13962
 * NOTE that the file descriptor will not be closed when the
13963
 * context is freed or reset.
13964
 *
13965
 * Returns the resulting document tree
13966
 */
13967
xmlDocPtr
13968
xmlReadFd(int fd, const char *URL, const char *encoding, int options)
13969
0
{
13970
0
    xmlParserCtxtPtr ctxt;
13971
0
    xmlParserInputPtr input;
13972
0
    xmlDocPtr doc = NULL;
13973
13974
0
    ctxt = xmlNewParserCtxt();
13975
0
    if (ctxt == NULL)
13976
0
        return(NULL);
13977
13978
0
    xmlCtxtUseOptions(ctxt, options);
13979
13980
0
    input = xmlCtxtNewInputFromFd(ctxt, URL, fd, encoding, 0);
13981
13982
0
    if (input != NULL)
13983
0
        doc = xmlCtxtParseDocument(ctxt, input);
13984
13985
0
    xmlFreeParserCtxt(ctxt);
13986
0
    return(doc);
13987
0
}
13988
13989
/**
13990
 * xmlReadIO:
13991
 * @ioread:  an I/O read function
13992
 * @ioclose:  an I/O close function (optional)
13993
 * @ioctx:  an I/O handler
13994
 * @URL:  base URL (optional)
13995
 * @encoding:  the document encoding (optional)
13996
 * @options:  a combination of xmlParserOption
13997
 *
13998
 * Parse an XML document from I/O functions and context and build a tree.
13999
 *
14000
 * See xmlCtxtReadIO for details.
14001
 *
14002
 * Returns the resulting document tree
14003
 */
14004
xmlDocPtr
14005
xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
14006
          void *ioctx, const char *URL, const char *encoding, int options)
14007
0
{
14008
0
    xmlParserCtxtPtr ctxt;
14009
0
    xmlParserInputPtr input;
14010
0
    xmlDocPtr doc = NULL;
14011
14012
0
    ctxt = xmlNewParserCtxt();
14013
0
    if (ctxt == NULL)
14014
0
        return(NULL);
14015
14016
0
    xmlCtxtUseOptions(ctxt, options);
14017
14018
0
    input = xmlCtxtNewInputFromIO(ctxt, URL, ioread, ioclose, ioctx,
14019
0
                                  encoding, 0);
14020
14021
0
    if (input != NULL)
14022
0
        doc = xmlCtxtParseDocument(ctxt, input);
14023
14024
0
    xmlFreeParserCtxt(ctxt);
14025
0
    return(doc);
14026
0
}
14027
14028
/**
14029
 * xmlCtxtReadDoc:
14030
 * @ctxt:  an XML parser context
14031
 * @str:  a pointer to a zero terminated string
14032
 * @URL:  base URL (optional)
14033
 * @encoding:  the document encoding (optional)
14034
 * @options:  a combination of xmlParserOption
14035
 *
14036
 * Parse an XML in-memory document and build a tree.
14037
 *
14038
 * @URL is used as base to resolve external entities and for error
14039
 * reporting.
14040
 *
14041
 * See xmlCtxtUseOptions for details.
14042
 *
14043
 * Returns the resulting document tree
14044
 */
14045
xmlDocPtr
14046
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar *str,
14047
               const char *URL, const char *encoding, int options)
14048
0
{
14049
0
    xmlParserInputPtr input;
14050
14051
0
    if (ctxt == NULL)
14052
0
        return(NULL);
14053
14054
0
    xmlCtxtReset(ctxt);
14055
0
    xmlCtxtUseOptions(ctxt, options);
14056
14057
0
    input = xmlCtxtNewInputFromString(ctxt, URL, (const char *) str, encoding,
14058
0
                                      XML_INPUT_BUF_STATIC);
14059
0
    if (input == NULL)
14060
0
        return(NULL);
14061
14062
0
    return(xmlCtxtParseDocument(ctxt, input));
14063
0
}
14064
14065
/**
14066
 * xmlCtxtReadFile:
14067
 * @ctxt:  an XML parser context
14068
 * @filename:  a file or URL
14069
 * @encoding:  the document encoding (optional)
14070
 * @options:  a combination of xmlParserOption
14071
 *
14072
 * Parse an XML file from the filesystem, the network or a user-defined
14073
 * resource loader.
14074
 *
14075
 * This function always enables the XML_PARSE_UNZIP option for
14076
 * backward compatibility. This feature is potentially insecure
14077
 * and might be removed from later versions.
14078
 *
14079
 * Returns the resulting document tree
14080
 */
14081
xmlDocPtr
14082
xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
14083
                const char *encoding, int options)
14084
0
{
14085
0
    xmlParserInputPtr input;
14086
14087
0
    if (ctxt == NULL)
14088
0
        return(NULL);
14089
14090
0
    options |= XML_PARSE_UNZIP;
14091
14092
0
    xmlCtxtReset(ctxt);
14093
0
    xmlCtxtUseOptions(ctxt, options);
14094
14095
0
    input = xmlCtxtNewInputFromUrl(ctxt, filename, NULL, encoding, 0);
14096
0
    if (input == NULL)
14097
0
        return(NULL);
14098
14099
0
    return(xmlCtxtParseDocument(ctxt, input));
14100
0
}
14101
14102
/**
14103
 * xmlCtxtReadMemory:
14104
 * @ctxt:  an XML parser context
14105
 * @buffer:  a pointer to a char array
14106
 * @size:  the size of the array
14107
 * @URL:  base URL (optional)
14108
 * @encoding:  the document encoding (optional)
14109
 * @options:  a combination of xmlParserOption
14110
 *
14111
 * Parse an XML in-memory document and build a tree. The input buffer must
14112
 * not contain a terminating null byte.
14113
 *
14114
 * @URL is used as base to resolve external entities and for error
14115
 * reporting.
14116
 *
14117
 * See xmlCtxtUseOptions for details.
14118
 *
14119
 * Returns the resulting document tree
14120
 */
14121
xmlDocPtr
14122
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
14123
                  const char *URL, const char *encoding, int options)
14124
0
{
14125
0
    xmlParserInputPtr input;
14126
14127
0
    if ((ctxt == NULL) || (size < 0))
14128
0
        return(NULL);
14129
14130
0
    xmlCtxtReset(ctxt);
14131
0
    xmlCtxtUseOptions(ctxt, options);
14132
14133
0
    input = xmlCtxtNewInputFromMemory(ctxt, URL, buffer, size, encoding,
14134
0
                                      XML_INPUT_BUF_STATIC);
14135
0
    if (input == NULL)
14136
0
        return(NULL);
14137
14138
0
    return(xmlCtxtParseDocument(ctxt, input));
14139
0
}
14140
14141
/**
14142
 * xmlCtxtReadFd:
14143
 * @ctxt:  an XML parser context
14144
 * @fd:  an open file descriptor
14145
 * @URL:  base URL (optional)
14146
 * @encoding:  the document encoding (optional)
14147
 * @options:  a combination of xmlParserOption
14148
 *
14149
 * Parse an XML document from a file descriptor and build a tree.
14150
 *
14151
 * NOTE that the file descriptor will not be closed when the
14152
 * context is freed or reset.
14153
 *
14154
 * @URL is used as base to resolve external entities and for error
14155
 * reporting.
14156
 *
14157
 * See xmlCtxtUseOptions for details.
14158
 *
14159
 * Returns the resulting document tree
14160
 */
14161
xmlDocPtr
14162
xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
14163
              const char *URL, const char *encoding, int options)
14164
0
{
14165
0
    xmlParserInputPtr input;
14166
14167
0
    if (ctxt == NULL)
14168
0
        return(NULL);
14169
14170
0
    xmlCtxtReset(ctxt);
14171
0
    xmlCtxtUseOptions(ctxt, options);
14172
14173
0
    input = xmlCtxtNewInputFromFd(ctxt, URL, fd, encoding, 0);
14174
0
    if (input == NULL)
14175
0
        return(NULL);
14176
14177
0
    return(xmlCtxtParseDocument(ctxt, input));
14178
0
}
14179
14180
/**
14181
 * xmlCtxtReadIO:
14182
 * @ctxt:  an XML parser context
14183
 * @ioread:  an I/O read function
14184
 * @ioclose:  an I/O close function
14185
 * @ioctx:  an I/O handler
14186
 * @URL:  the base URL to use for the document
14187
 * @encoding:  the document encoding, or NULL
14188
 * @options:  a combination of xmlParserOption
14189
 *
14190
 * parse an XML document from I/O functions and source and build a tree.
14191
 * This reuses the existing @ctxt parser context
14192
 *
14193
 * @URL is used as base to resolve external entities and for error
14194
 * reporting.
14195
 *
14196
 * See xmlCtxtUseOptions for details.
14197
 *
14198
 * Returns the resulting document tree
14199
 */
14200
xmlDocPtr
14201
xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
14202
              xmlInputCloseCallback ioclose, void *ioctx,
14203
        const char *URL,
14204
              const char *encoding, int options)
14205
0
{
14206
0
    xmlParserInputPtr input;
14207
14208
0
    if (ctxt == NULL)
14209
0
        return(NULL);
14210
14211
0
    xmlCtxtReset(ctxt);
14212
0
    xmlCtxtUseOptions(ctxt, options);
14213
14214
0
    input = xmlCtxtNewInputFromIO(ctxt, URL, ioread, ioclose, ioctx,
14215
0
                                  encoding, 0);
14216
0
    if (input == NULL)
14217
0
        return(NULL);
14218
14219
0
    return(xmlCtxtParseDocument(ctxt, input));
14220
0
}
14221