Coverage Report

Created: 2024-09-06 07:53

/src/libxml2/parser.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
 *            implemented on top of the SAX interfaces
4
 *
5
 * References:
6
 *   The XML specification:
7
 *     http://www.w3.org/TR/REC-xml
8
 *   Original 1.0 version:
9
 *     http://www.w3.org/TR/1998/REC-xml-19980210
10
 *   XML second edition working draft
11
 *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12
 *
13
 * Okay this is a big file, the parser core is around 7000 lines, then it
14
 * is followed by the progressive parser top routines, then the various
15
 * high level APIs to call the parser and a few miscellaneous functions.
16
 * A number of helper functions and deprecated ones have been moved to
17
 * parserInternals.c to reduce this file size.
18
 * As much as possible the functions are associated with their relative
19
 * production in the XML specification. A few productions defining the
20
 * different ranges of character are actually implanted either in
21
 * parserInternals.h or parserInternals.c
22
 * The DOM tree build is realized from the default SAX callbacks in
23
 * the module SAX2.c.
24
 * The routines doing the validation checks are in valid.c and called either
25
 * from the SAX callbacks or as standalone functions using a preparsed
26
 * document.
27
 *
28
 * See Copyright for the status of this software.
29
 *
30
 * daniel@veillard.com
31
 */
32
33
/* To avoid EBCDIC trouble when parsing on zOS */
34
#if defined(__MVS__)
35
#pragma convert("ISO8859-1")
36
#endif
37
38
#define IN_LIBXML
39
#include "libxml.h"
40
41
#if defined(_WIN32)
42
#define XML_DIR_SEP '\\'
43
#else
44
#define XML_DIR_SEP '/'
45
#endif
46
47
#include <stdlib.h>
48
#include <limits.h>
49
#include <string.h>
50
#include <stdarg.h>
51
#include <stddef.h>
52
#include <ctype.h>
53
#include <stdlib.h>
54
#include <libxml/parser.h>
55
#include <libxml/xmlmemory.h>
56
#include <libxml/tree.h>
57
#include <libxml/parserInternals.h>
58
#include <libxml/valid.h>
59
#include <libxml/entities.h>
60
#include <libxml/xmlerror.h>
61
#include <libxml/encoding.h>
62
#include <libxml/xmlIO.h>
63
#include <libxml/uri.h>
64
#include <libxml/SAX2.h>
65
#ifdef LIBXML_CATALOG_ENABLED
66
#include <libxml/catalog.h>
67
#endif
68
69
#include "private/buf.h"
70
#include "private/dict.h"
71
#include "private/entities.h"
72
#include "private/error.h"
73
#include "private/html.h"
74
#include "private/io.h"
75
#include "private/parser.h"
76
77
1.36M
#define NS_INDEX_EMPTY  INT_MAX
78
71.0k
#define NS_INDEX_XML    (INT_MAX - 1)
79
497k
#define URI_HASH_EMPTY  0xD943A04E
80
9.89k
#define URI_HASH_XML    0xF0451F02
81
82
#ifndef STDIN_FILENO
83
0
  #define STDIN_FILENO 0
84
#endif
85
86
struct _xmlStartTag {
87
    const xmlChar *prefix;
88
    const xmlChar *URI;
89
    int line;
90
    int nsNr;
91
};
92
93
typedef struct {
94
    void *saxData;
95
    unsigned prefixHashValue;
96
    unsigned uriHashValue;
97
    unsigned elementId;
98
    int oldIndex;
99
} xmlParserNsExtra;
100
101
typedef struct {
102
    unsigned hashValue;
103
    int index;
104
} xmlParserNsBucket;
105
106
struct _xmlParserNsData {
107
    xmlParserNsExtra *extra;
108
109
    unsigned hashSize;
110
    unsigned hashElems;
111
    xmlParserNsBucket *hash;
112
113
    unsigned elementId;
114
    int defaultNsIndex;
115
    int minNsIndex;
116
};
117
118
struct _xmlAttrHashBucket {
119
    int index;
120
};
121
122
static int
123
xmlParseElementStart(xmlParserCtxtPtr ctxt);
124
125
static void
126
xmlParseElementEnd(xmlParserCtxtPtr ctxt);
127
128
static xmlEntityPtr
129
xmlLookupGeneralEntity(xmlParserCtxtPtr ctxt, const xmlChar *name, int inAttr);
130
131
static const xmlChar *
132
xmlParseEntityRefInternal(xmlParserCtxtPtr ctxt);
133
134
/************************************************************************
135
 *                  *
136
 *  Arbitrary limits set in the parser. See XML_PARSE_HUGE    *
137
 *                  *
138
 ************************************************************************/
139
140
#define XML_PARSER_BIG_ENTITY 1000
141
#define XML_PARSER_LOT_ENTITY 5000
142
143
/*
144
 * Constants for protection against abusive entity expansion
145
 * ("billion laughs").
146
 */
147
148
/*
149
 * A certain amount of entity expansion which is always allowed.
150
 */
151
607k
#define XML_PARSER_ALLOWED_EXPANSION 1000000
152
153
/*
154
 * Fixed cost for each entity reference. This crudely models processing time
155
 * as well to protect, for example, against exponential expansion of empty
156
 * or very short entities.
157
 */
158
609k
#define XML_ENT_FIXED_COST 20
159
160
/**
161
 * xmlParserMaxDepth:
162
 *
163
 * arbitrary depth limit for the XML documents that we allow to
164
 * process. This is not a limitation of the parser but a safety
165
 * boundary feature. It can be disabled with the XML_PARSE_HUGE
166
 * parser option.
167
 */
168
const unsigned int xmlParserMaxDepth = 256;
169
170
171
172
64.7M
#define XML_PARSER_BIG_BUFFER_SIZE 300
173
140k
#define XML_PARSER_BUFFER_SIZE 100
174
56.0k
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
175
176
/**
177
 * XML_PARSER_CHUNK_SIZE
178
 *
179
 * When calling GROW that's the minimal amount of data
180
 * the parser expected to have received. It is not a hard
181
 * limit but an optimization when reading strings like Names
182
 * It is not strictly needed as long as inputs available characters
183
 * are followed by 0, which should be provided by the I/O level
184
 */
185
#define XML_PARSER_CHUNK_SIZE 100
186
187
/**
188
 * xmlParserVersion:
189
 *
190
 * Constant string describing the internal version of the library
191
 */
192
const char *const
193
xmlParserVersion = LIBXML_VERSION_STRING LIBXML_VERSION_EXTRA;
194
195
/*
196
 * List of XML prefixed PI allowed by W3C specs
197
 */
198
199
static const char* const xmlW3CPIs[] = {
200
    "xml-stylesheet",
201
    "xml-model",
202
    NULL
203
};
204
205
206
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
207
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
208
                                              const xmlChar **str);
209
210
static void
211
xmlCtxtParseEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr ent);
212
213
static int
214
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
215
216
/************************************************************************
217
 *                  *
218
 *    Some factorized error routines        *
219
 *                  *
220
 ************************************************************************/
221
222
static void
223
0
xmlErrMemory(xmlParserCtxtPtr ctxt) {
224
0
    xmlCtxtErrMemory(ctxt);
225
0
}
226
227
/**
228
 * xmlErrAttributeDup:
229
 * @ctxt:  an XML parser context
230
 * @prefix:  the attribute prefix
231
 * @localname:  the attribute localname
232
 *
233
 * Handle a redefinition of attribute error
234
 */
235
static void
236
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
237
                   const xmlChar * localname)
238
371k
{
239
371k
    if (prefix == NULL)
240
366k
        xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, XML_ERR_ATTRIBUTE_REDEFINED,
241
366k
                   XML_ERR_FATAL, localname, NULL, NULL, 0,
242
366k
                   "Attribute %s redefined\n", localname);
243
5.16k
    else
244
5.16k
        xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, XML_ERR_ATTRIBUTE_REDEFINED,
245
5.16k
                   XML_ERR_FATAL, prefix, localname, NULL, 0,
246
5.16k
                   "Attribute %s:%s redefined\n", prefix, localname);
247
371k
}
248
249
/**
250
 * xmlFatalErrMsg:
251
 * @ctxt:  an XML parser context
252
 * @error:  the error number
253
 * @msg:  the error message
254
 *
255
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
256
 */
257
static void LIBXML_ATTR_FORMAT(3,0)
258
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
259
               const char *msg)
260
3.51M
{
261
3.51M
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
262
3.51M
               NULL, NULL, NULL, 0, "%s", msg);
263
3.51M
}
264
265
/**
266
 * xmlWarningMsg:
267
 * @ctxt:  an XML parser context
268
 * @error:  the error number
269
 * @msg:  the error message
270
 * @str1:  extra data
271
 * @str2:  extra data
272
 *
273
 * Handle a warning.
274
 */
275
void LIBXML_ATTR_FORMAT(3,0)
276
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
277
              const char *msg, const xmlChar *str1, const xmlChar *str2)
278
27.0k
{
279
27.0k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_WARNING,
280
27.0k
               str1, str2, NULL, 0, msg, str1, str2);
281
27.0k
}
282
283
/**
284
 * xmlValidityError:
285
 * @ctxt:  an XML parser context
286
 * @error:  the error number
287
 * @msg:  the error message
288
 * @str1:  extra data
289
 *
290
 * Handle a validity error.
291
 */
292
static void LIBXML_ATTR_FORMAT(3,0)
293
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
294
              const char *msg, const xmlChar *str1, const xmlChar *str2)
295
792
{
296
792
    ctxt->valid = 0;
297
298
792
    xmlCtxtErr(ctxt, NULL, XML_FROM_DTD, error, XML_ERR_ERROR,
299
792
               str1, str2, NULL, 0, msg, str1, str2);
300
792
}
301
302
/**
303
 * xmlFatalErrMsgInt:
304
 * @ctxt:  an XML parser context
305
 * @error:  the error number
306
 * @msg:  the error message
307
 * @val:  an integer value
308
 *
309
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
310
 */
311
static void LIBXML_ATTR_FORMAT(3,0)
312
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
313
                  const char *msg, int val)
314
8.84M
{
315
8.84M
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
316
8.84M
               NULL, NULL, NULL, val, msg, val);
317
8.84M
}
318
319
/**
320
 * xmlFatalErrMsgStrIntStr:
321
 * @ctxt:  an XML parser context
322
 * @error:  the error number
323
 * @msg:  the error message
324
 * @str1:  an string info
325
 * @val:  an integer value
326
 * @str2:  an string info
327
 *
328
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
329
 */
330
static void LIBXML_ATTR_FORMAT(3,0)
331
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
332
                  const char *msg, const xmlChar *str1, int val,
333
      const xmlChar *str2)
334
249k
{
335
249k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
336
249k
               str1, str2, NULL, val, msg, str1, val, str2);
337
249k
}
338
339
/**
340
 * xmlFatalErrMsgStr:
341
 * @ctxt:  an XML parser context
342
 * @error:  the error number
343
 * @msg:  the error message
344
 * @val:  a string value
345
 *
346
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
347
 */
348
static void LIBXML_ATTR_FORMAT(3,0)
349
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
350
                  const char *msg, const xmlChar * val)
351
899k
{
352
899k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
353
899k
               val, NULL, NULL, 0, msg, val);
354
899k
}
355
356
/**
357
 * xmlErrMsgStr:
358
 * @ctxt:  an XML parser context
359
 * @error:  the error number
360
 * @msg:  the error message
361
 * @val:  a string value
362
 *
363
 * Handle a non fatal parser error
364
 */
365
static void LIBXML_ATTR_FORMAT(3,0)
366
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
367
                  const char *msg, const xmlChar * val)
368
0
{
369
0
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_ERROR,
370
0
               val, NULL, NULL, 0, msg, val);
371
0
}
372
373
/**
374
 * xmlNsErr:
375
 * @ctxt:  an XML parser context
376
 * @error:  the error number
377
 * @msg:  the message
378
 * @info1:  extra information string
379
 * @info2:  extra information string
380
 *
381
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
382
 */
383
static void LIBXML_ATTR_FORMAT(3,0)
384
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
385
         const char *msg,
386
         const xmlChar * info1, const xmlChar * info2,
387
         const xmlChar * info3)
388
245k
{
389
245k
    ctxt->nsWellFormed = 0;
390
391
245k
    xmlCtxtErr(ctxt, NULL, XML_FROM_NAMESPACE, error, XML_ERR_ERROR,
392
245k
               info1, info2, info3, 0, msg, info1, info2, info3);
393
245k
}
394
395
/**
396
 * xmlNsWarn
397
 * @ctxt:  an XML parser context
398
 * @error:  the error number
399
 * @msg:  the message
400
 * @info1:  extra information string
401
 * @info2:  extra information string
402
 *
403
 * Handle a namespace warning error
404
 */
405
static void LIBXML_ATTR_FORMAT(3,0)
406
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
407
         const char *msg,
408
         const xmlChar * info1, const xmlChar * info2,
409
         const xmlChar * info3)
410
7.58k
{
411
7.58k
    xmlCtxtErr(ctxt, NULL, XML_FROM_NAMESPACE, error, XML_ERR_WARNING,
412
7.58k
               info1, info2, info3, 0, msg, info1, info2, info3);
413
7.58k
}
414
415
static void
416
1.82M
xmlSaturatedAdd(unsigned long *dst, unsigned long val) {
417
1.82M
    if (val > ULONG_MAX - *dst)
418
0
        *dst = ULONG_MAX;
419
1.82M
    else
420
1.82M
        *dst += val;
421
1.82M
}
422
423
static void
424
609k
xmlSaturatedAddSizeT(unsigned long *dst, unsigned long val) {
425
609k
    if (val > ULONG_MAX - *dst)
426
0
        *dst = ULONG_MAX;
427
609k
    else
428
609k
        *dst += val;
429
609k
}
430
431
/**
432
 * xmlParserEntityCheck:
433
 * @ctxt:  parser context
434
 * @extra:  sum of unexpanded entity sizes
435
 *
436
 * Check for non-linear entity expansion behaviour.
437
 *
438
 * In some cases like xmlExpandEntityInAttValue, this function is called
439
 * for each, possibly nested entity and its unexpanded content length.
440
 *
441
 * In other cases like xmlParseReference, it's only called for each
442
 * top-level entity with its unexpanded content length plus the sum of
443
 * the unexpanded content lengths (plus fixed cost) of all nested
444
 * entities.
445
 *
446
 * Summing the unexpanded lengths also adds the length of the reference.
447
 * This is by design. Taking the length of the entity name into account
448
 * discourages attacks that try to waste CPU time with abusively long
449
 * entity names. See test/recurse/lol6.xml for example. Each call also
450
 * adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with
451
 * short entities.
452
 *
453
 * Returns 1 on error, 0 on success.
454
 */
455
static int
456
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long extra)
457
609k
{
458
609k
    unsigned long consumed;
459
609k
    unsigned long *expandedSize;
460
609k
    xmlParserInputPtr input = ctxt->input;
461
609k
    xmlEntityPtr entity = input->entity;
462
463
609k
    if ((entity) && (entity->flags & XML_ENT_CHECKED))
464
2.26k
        return(0);
465
466
    /*
467
     * Compute total consumed bytes so far, including input streams of
468
     * external entities.
469
     */
470
607k
    consumed = input->consumed;
471
607k
    xmlSaturatedAddSizeT(&consumed, input->cur - input->base);
472
607k
    xmlSaturatedAdd(&consumed, ctxt->sizeentities);
473
474
607k
    if (entity)
475
1.22k
        expandedSize = &entity->expandedSize;
476
605k
    else
477
605k
        expandedSize = &ctxt->sizeentcopy;
478
479
    /*
480
     * Add extra cost and some fixed cost.
481
     */
482
607k
    xmlSaturatedAdd(expandedSize, extra);
483
607k
    xmlSaturatedAdd(expandedSize, XML_ENT_FIXED_COST);
484
485
    /*
486
     * It's important to always use saturation arithmetic when tracking
487
     * entity sizes to make the size checks reliable. If "sizeentcopy"
488
     * overflows, we have to abort.
489
     */
490
607k
    if ((*expandedSize > XML_PARSER_ALLOWED_EXPANSION) &&
491
607k
        ((*expandedSize >= ULONG_MAX) ||
492
235k
         (*expandedSize / ctxt->maxAmpl > consumed))) {
493
114
        xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
494
114
                       "Maximum entity amplification factor exceeded, see "
495
114
                       "xmlCtxtSetMaxAmplification.\n");
496
114
        xmlHaltParser(ctxt);
497
114
        return(1);
498
114
    }
499
500
607k
    return(0);
501
607k
}
502
503
/************************************************************************
504
 *                  *
505
 *    Library wide options          *
506
 *                  *
507
 ************************************************************************/
508
509
/**
510
  * xmlHasFeature:
511
  * @feature: the feature to be examined
512
  *
513
  * Examines if the library has been compiled with a given feature.
514
  *
515
  * Returns a non-zero value if the feature exist, otherwise zero.
516
  * Returns zero (0) if the feature does not exist or an unknown
517
  * unknown feature is requested, non-zero otherwise.
518
  */
519
int
520
xmlHasFeature(xmlFeature feature)
521
0
{
522
0
    switch (feature) {
523
0
  case XML_WITH_THREAD:
524
0
#ifdef LIBXML_THREAD_ENABLED
525
0
      return(1);
526
#else
527
      return(0);
528
#endif
529
0
        case XML_WITH_TREE:
530
0
            return(1);
531
0
        case XML_WITH_OUTPUT:
532
0
#ifdef LIBXML_OUTPUT_ENABLED
533
0
            return(1);
534
#else
535
            return(0);
536
#endif
537
0
        case XML_WITH_PUSH:
538
0
#ifdef LIBXML_PUSH_ENABLED
539
0
            return(1);
540
#else
541
            return(0);
542
#endif
543
0
        case XML_WITH_READER:
544
0
#ifdef LIBXML_READER_ENABLED
545
0
            return(1);
546
#else
547
            return(0);
548
#endif
549
0
        case XML_WITH_PATTERN:
550
0
#ifdef LIBXML_PATTERN_ENABLED
551
0
            return(1);
552
#else
553
            return(0);
554
#endif
555
0
        case XML_WITH_WRITER:
556
0
#ifdef LIBXML_WRITER_ENABLED
557
0
            return(1);
558
#else
559
            return(0);
560
#endif
561
0
        case XML_WITH_SAX1:
562
0
#ifdef LIBXML_SAX1_ENABLED
563
0
            return(1);
564
#else
565
            return(0);
566
#endif
567
0
        case XML_WITH_HTTP:
568
#ifdef LIBXML_HTTP_ENABLED
569
            return(1);
570
#else
571
0
            return(0);
572
0
#endif
573
0
        case XML_WITH_VALID:
574
0
#ifdef LIBXML_VALID_ENABLED
575
0
            return(1);
576
#else
577
            return(0);
578
#endif
579
0
        case XML_WITH_HTML:
580
0
#ifdef LIBXML_HTML_ENABLED
581
0
            return(1);
582
#else
583
            return(0);
584
#endif
585
0
        case XML_WITH_LEGACY:
586
#ifdef LIBXML_LEGACY_ENABLED
587
            return(1);
588
#else
589
0
            return(0);
590
0
#endif
591
0
        case XML_WITH_C14N:
592
0
#ifdef LIBXML_C14N_ENABLED
593
0
            return(1);
594
#else
595
            return(0);
596
#endif
597
0
        case XML_WITH_CATALOG:
598
0
#ifdef LIBXML_CATALOG_ENABLED
599
0
            return(1);
600
#else
601
            return(0);
602
#endif
603
0
        case XML_WITH_XPATH:
604
0
#ifdef LIBXML_XPATH_ENABLED
605
0
            return(1);
606
#else
607
            return(0);
608
#endif
609
0
        case XML_WITH_XPTR:
610
0
#ifdef LIBXML_XPTR_ENABLED
611
0
            return(1);
612
#else
613
            return(0);
614
#endif
615
0
        case XML_WITH_XINCLUDE:
616
0
#ifdef LIBXML_XINCLUDE_ENABLED
617
0
            return(1);
618
#else
619
            return(0);
620
#endif
621
0
        case XML_WITH_ICONV:
622
0
#ifdef LIBXML_ICONV_ENABLED
623
0
            return(1);
624
#else
625
            return(0);
626
#endif
627
0
        case XML_WITH_ISO8859X:
628
0
#ifdef LIBXML_ISO8859X_ENABLED
629
0
            return(1);
630
#else
631
            return(0);
632
#endif
633
0
        case XML_WITH_UNICODE:
634
0
#ifdef LIBXML_UNICODE_ENABLED
635
0
            return(1);
636
#else
637
            return(0);
638
#endif
639
0
        case XML_WITH_REGEXP:
640
0
#ifdef LIBXML_REGEXP_ENABLED
641
0
            return(1);
642
#else
643
            return(0);
644
#endif
645
0
        case XML_WITH_AUTOMATA:
646
0
#ifdef LIBXML_REGEXP_ENABLED
647
0
            return(1);
648
#else
649
            return(0);
650
#endif
651
0
        case XML_WITH_EXPR:
652
#ifdef LIBXML_EXPR_ENABLED
653
            return(1);
654
#else
655
0
            return(0);
656
0
#endif
657
0
        case XML_WITH_SCHEMAS:
658
0
#ifdef LIBXML_SCHEMAS_ENABLED
659
0
            return(1);
660
#else
661
            return(0);
662
#endif
663
0
        case XML_WITH_SCHEMATRON:
664
0
#ifdef LIBXML_SCHEMATRON_ENABLED
665
0
            return(1);
666
#else
667
            return(0);
668
#endif
669
0
        case XML_WITH_MODULES:
670
0
#ifdef LIBXML_MODULES_ENABLED
671
0
            return(1);
672
#else
673
            return(0);
674
#endif
675
0
        case XML_WITH_DEBUG:
676
#ifdef LIBXML_DEBUG_ENABLED
677
            return(1);
678
#else
679
0
            return(0);
680
0
#endif
681
0
        case XML_WITH_DEBUG_MEM:
682
0
            return(0);
683
0
        case XML_WITH_ZLIB:
684
#ifdef LIBXML_ZLIB_ENABLED
685
            return(1);
686
#else
687
0
            return(0);
688
0
#endif
689
0
        case XML_WITH_LZMA:
690
#ifdef LIBXML_LZMA_ENABLED
691
            return(1);
692
#else
693
0
            return(0);
694
0
#endif
695
0
        case XML_WITH_ICU:
696
#ifdef LIBXML_ICU_ENABLED
697
            return(1);
698
#else
699
0
            return(0);
700
0
#endif
701
0
        default:
702
0
      break;
703
0
     }
704
0
     return(0);
705
0
}
706
707
/************************************************************************
708
 *                  *
709
 *      Simple string buffer        *
710
 *                  *
711
 ************************************************************************/
712
713
typedef struct {
714
    xmlChar *mem;
715
    unsigned size;
716
    unsigned cap; /* size < cap */
717
    unsigned max; /* size <= max */
718
    xmlParserErrors code;
719
} xmlSBuf;
720
721
static void
722
732k
xmlSBufInit(xmlSBuf *buf, unsigned max) {
723
732k
    buf->mem = NULL;
724
732k
    buf->size = 0;
725
732k
    buf->cap = 0;
726
732k
    buf->max = max;
727
732k
    buf->code = XML_ERR_OK;
728
732k
}
729
730
static int
731
174k
xmlSBufGrow(xmlSBuf *buf, unsigned len) {
732
174k
    xmlChar *mem;
733
174k
    unsigned cap;
734
735
174k
    if (len >= UINT_MAX / 2 - buf->size) {
736
0
        if (buf->code == XML_ERR_OK)
737
0
            buf->code = XML_ERR_RESOURCE_LIMIT;
738
0
        return(-1);
739
0
    }
740
741
174k
    cap = (buf->size + len) * 2;
742
174k
    if (cap < 240)
743
131k
        cap = 240;
744
745
174k
    mem = xmlRealloc(buf->mem, cap);
746
174k
    if (mem == NULL) {
747
0
        buf->code = XML_ERR_NO_MEMORY;
748
0
        return(-1);
749
0
    }
750
751
174k
    buf->mem = mem;
752
174k
    buf->cap = cap;
753
754
174k
    return(0);
755
174k
}
756
757
static void
758
23.7M
xmlSBufAddString(xmlSBuf *buf, const xmlChar *str, unsigned len) {
759
23.7M
    if (buf->max - buf->size < len) {
760
735k
        if (buf->code == XML_ERR_OK)
761
108
            buf->code = XML_ERR_RESOURCE_LIMIT;
762
735k
        return;
763
735k
    }
764
765
23.0M
    if (buf->cap - buf->size <= len) {
766
165k
        if (xmlSBufGrow(buf, len) < 0)
767
0
            return;
768
165k
    }
769
770
23.0M
    if (len > 0)
771
23.0M
        memcpy(buf->mem + buf->size, str, len);
772
23.0M
    buf->size += len;
773
23.0M
}
774
775
static void
776
18.7M
xmlSBufAddCString(xmlSBuf *buf, const char *str, unsigned len) {
777
18.7M
    xmlSBufAddString(buf, (const xmlChar *) str, len);
778
18.7M
}
779
780
static void
781
141k
xmlSBufAddChar(xmlSBuf *buf, int c) {
782
141k
    xmlChar *end;
783
784
141k
    if (buf->max - buf->size < 4) {
785
593
        if (buf->code == XML_ERR_OK)
786
0
            buf->code = XML_ERR_RESOURCE_LIMIT;
787
593
        return;
788
593
    }
789
790
140k
    if (buf->cap - buf->size <= 4) {
791
9.32k
        if (xmlSBufGrow(buf, 4) < 0)
792
0
            return;
793
9.32k
    }
794
795
140k
    end = buf->mem + buf->size;
796
797
140k
    if (c < 0x80) {
798
45.2k
        *end = (xmlChar) c;
799
45.2k
        buf->size += 1;
800
95.7k
    } else {
801
95.7k
        buf->size += xmlCopyCharMultiByte(end, c);
802
95.7k
    }
803
140k
}
804
805
static void
806
12.4M
xmlSBufAddReplChar(xmlSBuf *buf) {
807
12.4M
    xmlSBufAddCString(buf, "\xEF\xBF\xBD", 3);
808
12.4M
}
809
810
static void
811
108
xmlSBufReportError(xmlSBuf *buf, xmlParserCtxtPtr ctxt, const char *errMsg) {
812
108
    if (buf->code == XML_ERR_NO_MEMORY)
813
0
        xmlCtxtErrMemory(ctxt);
814
108
    else
815
108
        xmlFatalErr(ctxt, buf->code, errMsg);
816
108
}
817
818
static xmlChar *
819
xmlSBufFinish(xmlSBuf *buf, int *sizeOut, xmlParserCtxtPtr ctxt,
820
159k
              const char *errMsg) {
821
159k
    if (buf->mem == NULL) {
822
26.3k
        buf->mem = xmlMalloc(1);
823
26.3k
        if (buf->mem == NULL) {
824
0
            buf->code = XML_ERR_NO_MEMORY;
825
26.3k
        } else {
826
26.3k
            buf->mem[0] = 0;
827
26.3k
        }
828
133k
    } else {
829
133k
        buf->mem[buf->size] = 0;
830
133k
    }
831
832
159k
    if (buf->code == XML_ERR_OK) {
833
159k
        if (sizeOut != NULL)
834
68.5k
            *sizeOut = buf->size;
835
159k
        return(buf->mem);
836
159k
    }
837
838
35
    xmlSBufReportError(buf, ctxt, errMsg);
839
840
35
    xmlFree(buf->mem);
841
842
35
    if (sizeOut != NULL)
843
34
        *sizeOut = 0;
844
35
    return(NULL);
845
159k
}
846
847
static void
848
561k
xmlSBufCleanup(xmlSBuf *buf, xmlParserCtxtPtr ctxt, const char *errMsg) {
849
561k
    if (buf->code != XML_ERR_OK)
850
73
        xmlSBufReportError(buf, ctxt, errMsg);
851
852
561k
    xmlFree(buf->mem);
853
561k
}
854
855
static int
856
xmlUTF8MultibyteLen(xmlParserCtxtPtr ctxt, const xmlChar *str,
857
409M
                    const char *errMsg) {
858
409M
    int c = str[0];
859
409M
    int c1 = str[1];
860
861
409M
    if ((c1 & 0xC0) != 0x80)
862
1.15M
        goto encoding_error;
863
864
408M
    if (c < 0xE0) {
865
        /* 2-byte sequence */
866
10.0M
        if (c < 0xC2)
867
8.41M
            goto encoding_error;
868
869
1.64M
        return(2);
870
398M
    } else {
871
398M
        int c2 = str[2];
872
873
398M
        if ((c2 & 0xC0) != 0x80)
874
8.36k
            goto encoding_error;
875
876
398M
        if (c < 0xF0) {
877
            /* 3-byte sequence */
878
397M
            if (c == 0xE0) {
879
                /* overlong */
880
397M
                if (c1 < 0xA0)
881
5.87k
                    goto encoding_error;
882
397M
            } else if (c == 0xED) {
883
                /* surrogate */
884
2.09k
                if (c1 >= 0xA0)
885
567
                    goto encoding_error;
886
244k
            } else if (c == 0xEF) {
887
                /* U+FFFE and U+FFFF are invalid Chars */
888
8.07k
                if ((c1 == 0xBF) && (c2 >= 0xBE))
889
2.51k
                    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR, errMsg);
890
8.07k
            }
891
892
397M
            return(3);
893
397M
        } else {
894
            /* 4-byte sequence */
895
14.4k
            if ((str[3] & 0xC0) != 0x80)
896
1.43k
                goto encoding_error;
897
13.0k
            if (c == 0xF0) {
898
                /* overlong */
899
2.10k
                if (c1 < 0x90)
900
1.17k
                    goto encoding_error;
901
10.9k
            } else if (c >= 0xF4) {
902
                /* greater than 0x10FFFF */
903
3.31k
                if ((c > 0xF4) || (c1 >= 0x90))
904
2.05k
                    goto encoding_error;
905
3.31k
            }
906
907
9.80k
            return(4);
908
13.0k
        }
909
398M
    }
910
911
9.59M
encoding_error:
912
    /* Only report the first error */
913
9.59M
    if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
914
1.33k
        xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
915
1.33k
        ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
916
1.33k
    }
917
918
9.59M
    return(0);
919
408M
}
920
921
/************************************************************************
922
 *                  *
923
 *    SAX2 defaulted attributes handling      *
924
 *                  *
925
 ************************************************************************/
926
927
/**
928
 * xmlCtxtInitializeLate:
929
 * @ctxt:  an XML parser context
930
 *
931
 * Final initialization of the parser context before starting to parse.
932
 *
933
 * This accounts for users modifying struct members of parser context
934
 * directly.
935
 */
936
static void
937
20.2k
xmlCtxtInitializeLate(xmlParserCtxtPtr ctxt) {
938
20.2k
    xmlSAXHandlerPtr sax;
939
940
    /* Avoid unused variable warning if features are disabled. */
941
20.2k
    (void) sax;
942
943
    /*
944
     * Changing the SAX struct directly is still widespread practice
945
     * in internal and external code.
946
     */
947
20.2k
    if (ctxt == NULL) return;
948
20.2k
    sax = ctxt->sax;
949
20.2k
#ifdef LIBXML_SAX1_ENABLED
950
    /*
951
     * Only enable SAX2 if there SAX2 element handlers, except when there
952
     * are no element handlers at all.
953
     */
954
20.2k
    if (((ctxt->options & XML_PARSE_SAX1) == 0) &&
955
20.2k
        (sax) &&
956
20.2k
        (sax->initialized == XML_SAX2_MAGIC) &&
957
20.2k
        ((sax->startElementNs != NULL) ||
958
20.2k
         (sax->endElementNs != NULL) ||
959
20.2k
         ((sax->startElement == NULL) && (sax->endElement == NULL))))
960
20.2k
        ctxt->sax2 = 1;
961
#else
962
    ctxt->sax2 = 1;
963
#endif /* LIBXML_SAX1_ENABLED */
964
965
    /*
966
     * Some users replace the dictionary directly in the context struct.
967
     * We really need an API function to do that cleanly.
968
     */
969
20.2k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
970
20.2k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
971
20.2k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
972
20.2k
    if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
973
20.2k
    (ctxt->str_xml_ns == NULL)) {
974
0
        xmlErrMemory(ctxt);
975
0
    }
976
977
20.2k
    xmlDictSetLimit(ctxt->dict,
978
20.2k
                    (ctxt->options & XML_PARSE_HUGE) ?
979
0
                        0 :
980
20.2k
                        XML_MAX_DICTIONARY_LIMIT);
981
20.2k
}
982
983
typedef struct {
984
    xmlHashedString prefix;
985
    xmlHashedString name;
986
    xmlHashedString value;
987
    const xmlChar *valueEnd;
988
    int external;
989
    int expandedSize;
990
} xmlDefAttr;
991
992
typedef struct _xmlDefAttrs xmlDefAttrs;
993
typedef xmlDefAttrs *xmlDefAttrsPtr;
994
struct _xmlDefAttrs {
995
    int nbAttrs;  /* number of defaulted attributes on that element */
996
    int maxAttrs;       /* the size of the array */
997
#if __STDC_VERSION__ >= 199901L
998
    /* Using a C99 flexible array member avoids UBSan errors. */
999
    xmlDefAttr attrs[]; /* array of localname/prefix/values/external */
1000
#else
1001
    xmlDefAttr attrs[1];
1002
#endif
1003
};
1004
1005
/**
1006
 * xmlAttrNormalizeSpace:
1007
 * @src: the source string
1008
 * @dst: the target string
1009
 *
1010
 * Normalize the space in non CDATA attribute values:
1011
 * If the attribute type is not CDATA, then the XML processor MUST further
1012
 * process the normalized attribute value by discarding any leading and
1013
 * trailing space (#x20) characters, and by replacing sequences of space
1014
 * (#x20) characters by a single space (#x20) character.
1015
 * Note that the size of dst need to be at least src, and if one doesn't need
1016
 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1017
 * passing src as dst is just fine.
1018
 *
1019
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1020
 *         is needed.
1021
 */
1022
static xmlChar *
1023
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1024
39.8k
{
1025
39.8k
    if ((src == NULL) || (dst == NULL))
1026
0
        return(NULL);
1027
1028
42.8k
    while (*src == 0x20) src++;
1029
634k
    while (*src != 0) {
1030
594k
  if (*src == 0x20) {
1031
88.8k
      while (*src == 0x20) src++;
1032
27.2k
      if (*src != 0)
1033
21.8k
    *dst++ = 0x20;
1034
567k
  } else {
1035
567k
      *dst++ = *src++;
1036
567k
  }
1037
594k
    }
1038
39.8k
    *dst = 0;
1039
39.8k
    if (dst == src)
1040
30.9k
       return(NULL);
1041
8.91k
    return(dst);
1042
39.8k
}
1043
1044
/**
1045
 * xmlAddDefAttrs:
1046
 * @ctxt:  an XML parser context
1047
 * @fullname:  the element fullname
1048
 * @fullattr:  the attribute fullname
1049
 * @value:  the attribute value
1050
 *
1051
 * Add a defaulted attribute for an element
1052
 */
1053
static void
1054
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1055
               const xmlChar *fullname,
1056
               const xmlChar *fullattr,
1057
45.5k
               const xmlChar *value) {
1058
45.5k
    xmlDefAttrsPtr defaults;
1059
45.5k
    xmlDefAttr *attr;
1060
45.5k
    int len, expandedSize;
1061
45.5k
    xmlHashedString name;
1062
45.5k
    xmlHashedString prefix;
1063
45.5k
    xmlHashedString hvalue;
1064
45.5k
    const xmlChar *localname;
1065
1066
    /*
1067
     * Allows to detect attribute redefinitions
1068
     */
1069
45.5k
    if (ctxt->attsSpecial != NULL) {
1070
43.4k
        if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1071
24.2k
      return;
1072
43.4k
    }
1073
1074
21.2k
    if (ctxt->attsDefault == NULL) {
1075
2.06k
        ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1076
2.06k
  if (ctxt->attsDefault == NULL)
1077
0
      goto mem_error;
1078
2.06k
    }
1079
1080
    /*
1081
     * split the element name into prefix:localname , the string found
1082
     * are within the DTD and then not associated to namespace names.
1083
     */
1084
21.2k
    localname = xmlSplitQName3(fullname, &len);
1085
21.2k
    if (localname == NULL) {
1086
16.5k
        name = xmlDictLookupHashed(ctxt->dict, fullname, -1);
1087
16.5k
  prefix.name = NULL;
1088
16.5k
    } else {
1089
4.74k
        name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1090
4.74k
  prefix = xmlDictLookupHashed(ctxt->dict, fullname, len);
1091
4.74k
        if (prefix.name == NULL)
1092
0
            goto mem_error;
1093
4.74k
    }
1094
21.2k
    if (name.name == NULL)
1095
0
        goto mem_error;
1096
1097
    /*
1098
     * make sure there is some storage
1099
     */
1100
21.2k
    defaults = xmlHashLookup2(ctxt->attsDefault, name.name, prefix.name);
1101
21.2k
    if ((defaults == NULL) ||
1102
21.2k
        (defaults->nbAttrs >= defaults->maxAttrs)) {
1103
6.22k
        xmlDefAttrsPtr temp;
1104
6.22k
        int newSize;
1105
1106
6.22k
        newSize = (defaults != NULL) ? 2 * defaults->maxAttrs : 4;
1107
6.22k
        temp = xmlRealloc(defaults,
1108
6.22k
                          sizeof(*defaults) + newSize * sizeof(xmlDefAttr));
1109
6.22k
  if (temp == NULL)
1110
0
      goto mem_error;
1111
6.22k
        if (defaults == NULL)
1112
3.32k
            temp->nbAttrs = 0;
1113
6.22k
  temp->maxAttrs = newSize;
1114
6.22k
        defaults = temp;
1115
6.22k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name.name, prefix.name,
1116
6.22k
                          defaults, NULL) < 0) {
1117
0
      xmlFree(defaults);
1118
0
      goto mem_error;
1119
0
  }
1120
6.22k
    }
1121
1122
    /*
1123
     * Split the attribute name into prefix:localname , the string found
1124
     * are within the DTD and hen not associated to namespace names.
1125
     */
1126
21.2k
    localname = xmlSplitQName3(fullattr, &len);
1127
21.2k
    if (localname == NULL) {
1128
12.2k
        name = xmlDictLookupHashed(ctxt->dict, fullattr, -1);
1129
12.2k
  prefix.name = NULL;
1130
12.2k
    } else {
1131
9.04k
        name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1132
9.04k
  prefix = xmlDictLookupHashed(ctxt->dict, fullattr, len);
1133
9.04k
        if (prefix.name == NULL)
1134
0
            goto mem_error;
1135
9.04k
    }
1136
21.2k
    if (name.name == NULL)
1137
0
        goto mem_error;
1138
1139
    /* intern the string and precompute the end */
1140
21.2k
    len = strlen((const char *) value);
1141
21.2k
    hvalue = xmlDictLookupHashed(ctxt->dict, value, len);
1142
21.2k
    if (hvalue.name == NULL)
1143
0
        goto mem_error;
1144
1145
21.2k
    expandedSize = strlen((const char *) name.name);
1146
21.2k
    if (prefix.name != NULL)
1147
9.04k
        expandedSize += strlen((const char *) prefix.name);
1148
21.2k
    expandedSize += len;
1149
1150
21.2k
    attr = &defaults->attrs[defaults->nbAttrs++];
1151
21.2k
    attr->name = name;
1152
21.2k
    attr->prefix = prefix;
1153
21.2k
    attr->value = hvalue;
1154
21.2k
    attr->valueEnd = hvalue.name + len;
1155
21.2k
    attr->external = PARSER_EXTERNAL(ctxt);
1156
21.2k
    attr->expandedSize = expandedSize;
1157
1158
21.2k
    return;
1159
1160
0
mem_error:
1161
0
    xmlErrMemory(ctxt);
1162
0
}
1163
1164
/**
1165
 * xmlAddSpecialAttr:
1166
 * @ctxt:  an XML parser context
1167
 * @fullname:  the element fullname
1168
 * @fullattr:  the attribute fullname
1169
 * @type:  the attribute type
1170
 *
1171
 * Register this attribute type
1172
 */
1173
static void
1174
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1175
      const xmlChar *fullname,
1176
      const xmlChar *fullattr,
1177
      int type)
1178
47.6k
{
1179
47.6k
    if (ctxt->attsSpecial == NULL) {
1180
2.19k
        ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1181
2.19k
  if (ctxt->attsSpecial == NULL)
1182
0
      goto mem_error;
1183
2.19k
    }
1184
1185
47.6k
    if (xmlHashAdd2(ctxt->attsSpecial, fullname, fullattr,
1186
47.6k
                    (void *) (ptrdiff_t) type) < 0)
1187
0
        goto mem_error;
1188
47.6k
    return;
1189
1190
47.6k
mem_error:
1191
0
    xmlErrMemory(ctxt);
1192
0
}
1193
1194
/**
1195
 * xmlCleanSpecialAttrCallback:
1196
 *
1197
 * Removes CDATA attributes from the special attribute table
1198
 */
1199
static void
1200
xmlCleanSpecialAttrCallback(void *payload, void *data,
1201
                            const xmlChar *fullname, const xmlChar *fullattr,
1202
21.9k
                            const xmlChar *unused ATTRIBUTE_UNUSED) {
1203
21.9k
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1204
1205
21.9k
    if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1206
4.41k
        xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1207
4.41k
    }
1208
21.9k
}
1209
1210
/**
1211
 * xmlCleanSpecialAttr:
1212
 * @ctxt:  an XML parser context
1213
 *
1214
 * Trim the list of attributes defined to remove all those of type
1215
 * CDATA as they are not special. This call should be done when finishing
1216
 * to parse the DTD and before starting to parse the document root.
1217
 */
1218
static void
1219
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1220
6.97k
{
1221
6.97k
    if (ctxt->attsSpecial == NULL)
1222
4.77k
        return;
1223
1224
2.19k
    xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1225
1226
2.19k
    if (xmlHashSize(ctxt->attsSpecial) == 0) {
1227
29
        xmlHashFree(ctxt->attsSpecial, NULL);
1228
29
        ctxt->attsSpecial = NULL;
1229
29
    }
1230
2.19k
}
1231
1232
/**
1233
 * xmlCheckLanguageID:
1234
 * @lang:  pointer to the string value
1235
 *
1236
 * DEPRECATED: Internal function, do not use.
1237
 *
1238
 * Checks that the value conforms to the LanguageID production:
1239
 *
1240
 * NOTE: this is somewhat deprecated, those productions were removed from
1241
 *       the XML Second edition.
1242
 *
1243
 * [33] LanguageID ::= Langcode ('-' Subcode)*
1244
 * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1245
 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1246
 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1247
 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1248
 * [38] Subcode ::= ([a-z] | [A-Z])+
1249
 *
1250
 * The current REC reference the successors of RFC 1766, currently 5646
1251
 *
1252
 * http://www.rfc-editor.org/rfc/rfc5646.txt
1253
 * langtag       = language
1254
 *                 ["-" script]
1255
 *                 ["-" region]
1256
 *                 *("-" variant)
1257
 *                 *("-" extension)
1258
 *                 ["-" privateuse]
1259
 * language      = 2*3ALPHA            ; shortest ISO 639 code
1260
 *                 ["-" extlang]       ; sometimes followed by
1261
 *                                     ; extended language subtags
1262
 *               / 4ALPHA              ; or reserved for future use
1263
 *               / 5*8ALPHA            ; or registered language subtag
1264
 *
1265
 * extlang       = 3ALPHA              ; selected ISO 639 codes
1266
 *                 *2("-" 3ALPHA)      ; permanently reserved
1267
 *
1268
 * script        = 4ALPHA              ; ISO 15924 code
1269
 *
1270
 * region        = 2ALPHA              ; ISO 3166-1 code
1271
 *               / 3DIGIT              ; UN M.49 code
1272
 *
1273
 * variant       = 5*8alphanum         ; registered variants
1274
 *               / (DIGIT 3alphanum)
1275
 *
1276
 * extension     = singleton 1*("-" (2*8alphanum))
1277
 *
1278
 *                                     ; Single alphanumerics
1279
 *                                     ; "x" reserved for private use
1280
 * singleton     = DIGIT               ; 0 - 9
1281
 *               / %x41-57             ; A - W
1282
 *               / %x59-5A             ; Y - Z
1283
 *               / %x61-77             ; a - w
1284
 *               / %x79-7A             ; y - z
1285
 *
1286
 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1287
 * The parser below doesn't try to cope with extension or privateuse
1288
 * that could be added but that's not interoperable anyway
1289
 *
1290
 * Returns 1 if correct 0 otherwise
1291
 **/
1292
int
1293
xmlCheckLanguageID(const xmlChar * lang)
1294
0
{
1295
0
    const xmlChar *cur = lang, *nxt;
1296
1297
0
    if (cur == NULL)
1298
0
        return (0);
1299
0
    if (((cur[0] == 'i') && (cur[1] == '-')) ||
1300
0
        ((cur[0] == 'I') && (cur[1] == '-')) ||
1301
0
        ((cur[0] == 'x') && (cur[1] == '-')) ||
1302
0
        ((cur[0] == 'X') && (cur[1] == '-'))) {
1303
        /*
1304
         * Still allow IANA code and user code which were coming
1305
         * from the previous version of the XML-1.0 specification
1306
         * it's deprecated but we should not fail
1307
         */
1308
0
        cur += 2;
1309
0
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1310
0
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
1311
0
            cur++;
1312
0
        return(cur[0] == 0);
1313
0
    }
1314
0
    nxt = cur;
1315
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1316
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1317
0
           nxt++;
1318
0
    if (nxt - cur >= 4) {
1319
        /*
1320
         * Reserved
1321
         */
1322
0
        if ((nxt - cur > 8) || (nxt[0] != 0))
1323
0
            return(0);
1324
0
        return(1);
1325
0
    }
1326
0
    if (nxt - cur < 2)
1327
0
        return(0);
1328
    /* we got an ISO 639 code */
1329
0
    if (nxt[0] == 0)
1330
0
        return(1);
1331
0
    if (nxt[0] != '-')
1332
0
        return(0);
1333
1334
0
    nxt++;
1335
0
    cur = nxt;
1336
    /* now we can have extlang or script or region or variant */
1337
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1338
0
        goto region_m49;
1339
1340
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1341
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1342
0
           nxt++;
1343
0
    if (nxt - cur == 4)
1344
0
        goto script;
1345
0
    if (nxt - cur == 2)
1346
0
        goto region;
1347
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1348
0
        goto variant;
1349
0
    if (nxt - cur != 3)
1350
0
        return(0);
1351
    /* we parsed an extlang */
1352
0
    if (nxt[0] == 0)
1353
0
        return(1);
1354
0
    if (nxt[0] != '-')
1355
0
        return(0);
1356
1357
0
    nxt++;
1358
0
    cur = nxt;
1359
    /* now we can have script or region or variant */
1360
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1361
0
        goto region_m49;
1362
1363
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1364
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1365
0
           nxt++;
1366
0
    if (nxt - cur == 2)
1367
0
        goto region;
1368
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1369
0
        goto variant;
1370
0
    if (nxt - cur != 4)
1371
0
        return(0);
1372
    /* we parsed a script */
1373
0
script:
1374
0
    if (nxt[0] == 0)
1375
0
        return(1);
1376
0
    if (nxt[0] != '-')
1377
0
        return(0);
1378
1379
0
    nxt++;
1380
0
    cur = nxt;
1381
    /* now we can have region or variant */
1382
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1383
0
        goto region_m49;
1384
1385
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1386
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1387
0
           nxt++;
1388
1389
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1390
0
        goto variant;
1391
0
    if (nxt - cur != 2)
1392
0
        return(0);
1393
    /* we parsed a region */
1394
0
region:
1395
0
    if (nxt[0] == 0)
1396
0
        return(1);
1397
0
    if (nxt[0] != '-')
1398
0
        return(0);
1399
1400
0
    nxt++;
1401
0
    cur = nxt;
1402
    /* now we can just have a variant */
1403
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1404
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1405
0
           nxt++;
1406
1407
0
    if ((nxt - cur < 5) || (nxt - cur > 8))
1408
0
        return(0);
1409
1410
    /* we parsed a variant */
1411
0
variant:
1412
0
    if (nxt[0] == 0)
1413
0
        return(1);
1414
0
    if (nxt[0] != '-')
1415
0
        return(0);
1416
    /* extensions and private use subtags not checked */
1417
0
    return (1);
1418
1419
0
region_m49:
1420
0
    if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1421
0
        ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1422
0
        nxt += 3;
1423
0
        goto region;
1424
0
    }
1425
0
    return(0);
1426
0
}
1427
1428
/************************************************************************
1429
 *                  *
1430
 *    Parser stacks related functions and macros    *
1431
 *                  *
1432
 ************************************************************************/
1433
1434
static xmlChar *
1435
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar **str);
1436
1437
/**
1438
 * xmlParserNsCreate:
1439
 *
1440
 * Create a new namespace database.
1441
 *
1442
 * Returns the new obejct.
1443
 */
1444
xmlParserNsData *
1445
20.2k
xmlParserNsCreate(void) {
1446
20.2k
    xmlParserNsData *nsdb = xmlMalloc(sizeof(*nsdb));
1447
1448
20.2k
    if (nsdb == NULL)
1449
0
        return(NULL);
1450
20.2k
    memset(nsdb, 0, sizeof(*nsdb));
1451
20.2k
    nsdb->defaultNsIndex = INT_MAX;
1452
1453
20.2k
    return(nsdb);
1454
20.2k
}
1455
1456
/**
1457
 * xmlParserNsFree:
1458
 * @nsdb: namespace database
1459
 *
1460
 * Free a namespace database.
1461
 */
1462
void
1463
20.2k
xmlParserNsFree(xmlParserNsData *nsdb) {
1464
20.2k
    if (nsdb == NULL)
1465
0
        return;
1466
1467
20.2k
    xmlFree(nsdb->extra);
1468
20.2k
    xmlFree(nsdb->hash);
1469
20.2k
    xmlFree(nsdb);
1470
20.2k
}
1471
1472
/**
1473
 * xmlParserNsReset:
1474
 * @nsdb: namespace database
1475
 *
1476
 * Reset a namespace database.
1477
 */
1478
static void
1479
0
xmlParserNsReset(xmlParserNsData *nsdb) {
1480
0
    if (nsdb == NULL)
1481
0
        return;
1482
1483
0
    nsdb->hashElems = 0;
1484
0
    nsdb->elementId = 0;
1485
0
    nsdb->defaultNsIndex = INT_MAX;
1486
1487
0
    if (nsdb->hash)
1488
0
        memset(nsdb->hash, 0, nsdb->hashSize * sizeof(nsdb->hash[0]));
1489
0
}
1490
1491
/**
1492
 * xmlParserStartElement:
1493
 * @nsdb: namespace database
1494
 *
1495
 * Signal that a new element has started.
1496
 *
1497
 * Returns 0 on success, -1 if the element counter overflowed.
1498
 */
1499
static int
1500
665k
xmlParserNsStartElement(xmlParserNsData *nsdb) {
1501
665k
    if (nsdb->elementId == UINT_MAX)
1502
0
        return(-1);
1503
665k
    nsdb->elementId++;
1504
1505
665k
    return(0);
1506
665k
}
1507
1508
/**
1509
 * xmlParserNsLookup:
1510
 * @ctxt: parser context
1511
 * @prefix: namespace prefix
1512
 * @bucketPtr: optional bucket (return value)
1513
 *
1514
 * Lookup namespace with given prefix. If @bucketPtr is non-NULL, it will
1515
 * be set to the matching bucket, or the first empty bucket if no match
1516
 * was found.
1517
 *
1518
 * Returns the namespace index on success, INT_MAX if no namespace was
1519
 * found.
1520
 */
1521
static int
1522
xmlParserNsLookup(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1523
792k
                  xmlParserNsBucket **bucketPtr) {
1524
792k
    xmlParserNsBucket *bucket, *tombstone;
1525
792k
    unsigned index, hashValue;
1526
1527
792k
    if (prefix->name == NULL)
1528
398k
        return(ctxt->nsdb->defaultNsIndex);
1529
1530
393k
    if (ctxt->nsdb->hashSize == 0)
1531
14.3k
        return(INT_MAX);
1532
1533
379k
    hashValue = prefix->hashValue;
1534
379k
    index = hashValue & (ctxt->nsdb->hashSize - 1);
1535
379k
    bucket = &ctxt->nsdb->hash[index];
1536
379k
    tombstone = NULL;
1537
1538
516k
    while (bucket->hashValue) {
1539
375k
        if (bucket->index == INT_MAX) {
1540
58.9k
            if (tombstone == NULL)
1541
56.1k
                tombstone = bucket;
1542
316k
        } else if (bucket->hashValue == hashValue) {
1543
238k
            if (ctxt->nsTab[bucket->index * 2] == prefix->name) {
1544
238k
                if (bucketPtr != NULL)
1545
168k
                    *bucketPtr = bucket;
1546
238k
                return(bucket->index);
1547
238k
            }
1548
238k
        }
1549
1550
136k
        index++;
1551
136k
        bucket++;
1552
136k
        if (index == ctxt->nsdb->hashSize) {
1553
14.4k
            index = 0;
1554
14.4k
            bucket = ctxt->nsdb->hash;
1555
14.4k
        }
1556
136k
    }
1557
1558
141k
    if (bucketPtr != NULL)
1559
41.5k
        *bucketPtr = tombstone ? tombstone : bucket;
1560
141k
    return(INT_MAX);
1561
379k
}
1562
1563
/**
1564
 * xmlParserNsLookupUri:
1565
 * @ctxt: parser context
1566
 * @prefix: namespace prefix
1567
 *
1568
 * Lookup namespace URI with given prefix.
1569
 *
1570
 * Returns the namespace URI on success, NULL if no namespace was found.
1571
 */
1572
static const xmlChar *
1573
448k
xmlParserNsLookupUri(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix) {
1574
448k
    const xmlChar *ret;
1575
448k
    int nsIndex;
1576
1577
448k
    if (prefix->name == ctxt->str_xml)
1578
4.25k
        return(ctxt->str_xml_ns);
1579
1580
    /*
1581
     * minNsIndex is used when building an entity tree. We must
1582
     * ignore namespaces declared outside the entity.
1583
     */
1584
444k
    nsIndex = xmlParserNsLookup(ctxt, prefix, NULL);
1585
444k
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1586
212k
        return(NULL);
1587
1588
231k
    ret = ctxt->nsTab[nsIndex * 2 + 1];
1589
231k
    if (ret[0] == 0)
1590
6.40k
        ret = NULL;
1591
231k
    return(ret);
1592
444k
}
1593
1594
/**
1595
 * xmlParserNsLookupSax:
1596
 * @ctxt: parser context
1597
 * @prefix: namespace prefix
1598
 *
1599
 * Lookup extra data for the given prefix. This returns data stored
1600
 * with xmlParserNsUdpateSax.
1601
 *
1602
 * Returns the data on success, NULL if no namespace was found.
1603
 */
1604
void *
1605
13.5k
xmlParserNsLookupSax(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
1606
13.5k
    xmlHashedString hprefix;
1607
13.5k
    int nsIndex;
1608
1609
13.5k
    if (prefix == ctxt->str_xml)
1610
8.13k
        return(NULL);
1611
1612
5.41k
    hprefix.name = prefix;
1613
5.41k
    if (prefix != NULL)
1614
1.54k
        hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1615
3.87k
    else
1616
3.87k
        hprefix.hashValue = 0;
1617
5.41k
    nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1618
5.41k
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1619
0
        return(NULL);
1620
1621
5.41k
    return(ctxt->nsdb->extra[nsIndex].saxData);
1622
5.41k
}
1623
1624
/**
1625
 * xmlParserNsUpdateSax:
1626
 * @ctxt: parser context
1627
 * @prefix: namespace prefix
1628
 * @saxData: extra data for SAX handler
1629
 *
1630
 * Sets or updates extra data for the given prefix. This value will be
1631
 * returned by xmlParserNsLookupSax as long as the namespace with the
1632
 * given prefix is in scope.
1633
 *
1634
 * Returns the data on success, NULL if no namespace was found.
1635
 */
1636
int
1637
xmlParserNsUpdateSax(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
1638
8.37k
                     void *saxData) {
1639
8.37k
    xmlHashedString hprefix;
1640
8.37k
    int nsIndex;
1641
1642
8.37k
    if (prefix == ctxt->str_xml)
1643
0
        return(-1);
1644
1645
8.37k
    hprefix.name = prefix;
1646
8.37k
    if (prefix != NULL)
1647
5.82k
        hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1648
2.54k
    else
1649
2.54k
        hprefix.hashValue = 0;
1650
8.37k
    nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1651
8.37k
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1652
0
        return(-1);
1653
1654
8.37k
    ctxt->nsdb->extra[nsIndex].saxData = saxData;
1655
8.37k
    return(0);
1656
8.37k
}
1657
1658
/**
1659
 * xmlParserNsGrow:
1660
 * @ctxt: parser context
1661
 *
1662
 * Grows the namespace tables.
1663
 *
1664
 * Returns 0 on success, -1 if a memory allocation failed.
1665
 */
1666
static int
1667
5.02k
xmlParserNsGrow(xmlParserCtxtPtr ctxt) {
1668
5.02k
    const xmlChar **table;
1669
5.02k
    xmlParserNsExtra *extra;
1670
5.02k
    int newSize;
1671
1672
5.02k
    if (ctxt->nsMax > INT_MAX / 2)
1673
0
        goto error;
1674
5.02k
    newSize = ctxt->nsMax ? ctxt->nsMax * 2 : 16;
1675
1676
5.02k
    table = xmlRealloc(ctxt->nsTab, 2 * newSize * sizeof(table[0]));
1677
5.02k
    if (table == NULL)
1678
0
        goto error;
1679
5.02k
    ctxt->nsTab = table;
1680
1681
5.02k
    extra = xmlRealloc(ctxt->nsdb->extra, newSize * sizeof(extra[0]));
1682
5.02k
    if (extra == NULL)
1683
0
        goto error;
1684
5.02k
    ctxt->nsdb->extra = extra;
1685
1686
5.02k
    ctxt->nsMax = newSize;
1687
5.02k
    return(0);
1688
1689
0
error:
1690
0
    xmlErrMemory(ctxt);
1691
0
    return(-1);
1692
5.02k
}
1693
1694
/**
1695
 * xmlParserNsPush:
1696
 * @ctxt: parser context
1697
 * @prefix: prefix with hash value
1698
 * @uri: uri with hash value
1699
 * @saxData: extra data for SAX handler
1700
 * @defAttr: whether the namespace comes from a default attribute
1701
 *
1702
 * Push a new namespace on the table.
1703
 *
1704
 * Returns 1 if the namespace was pushed, 0 if the namespace was ignored,
1705
 * -1 if a memory allocation failed.
1706
 */
1707
static int
1708
xmlParserNsPush(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1709
196k
                const xmlHashedString *uri, void *saxData, int defAttr) {
1710
196k
    xmlParserNsBucket *bucket = NULL;
1711
196k
    xmlParserNsExtra *extra;
1712
196k
    const xmlChar **ns;
1713
196k
    unsigned hashValue, nsIndex, oldIndex;
1714
1715
196k
    if ((prefix != NULL) && (prefix->name == ctxt->str_xml))
1716
444
        return(0);
1717
1718
196k
    if ((ctxt->nsNr >= ctxt->nsMax) && (xmlParserNsGrow(ctxt) < 0)) {
1719
0
        xmlErrMemory(ctxt);
1720
0
        return(-1);
1721
0
    }
1722
1723
    /*
1724
     * Default namespace and 'xml' namespace
1725
     */
1726
196k
    if ((prefix == NULL) || (prefix->name == NULL)) {
1727
88.2k
        oldIndex = ctxt->nsdb->defaultNsIndex;
1728
1729
88.2k
        if (oldIndex != INT_MAX) {
1730
62.3k
            extra = &ctxt->nsdb->extra[oldIndex];
1731
1732
62.3k
            if (extra->elementId == ctxt->nsdb->elementId) {
1733
7.51k
                if (defAttr == 0)
1734
6.59k
                    xmlErrAttributeDup(ctxt, NULL, BAD_CAST "xmlns");
1735
7.51k
                return(0);
1736
7.51k
            }
1737
1738
54.8k
            if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1739
54.8k
                (uri->name == ctxt->nsTab[oldIndex * 2 + 1]))
1740
0
                return(0);
1741
54.8k
        }
1742
1743
80.7k
        ctxt->nsdb->defaultNsIndex = ctxt->nsNr;
1744
80.7k
        goto populate_entry;
1745
88.2k
    }
1746
1747
    /*
1748
     * Hash table lookup
1749
     */
1750
108k
    oldIndex = xmlParserNsLookup(ctxt, prefix, &bucket);
1751
108k
    if (oldIndex != INT_MAX) {
1752
65.0k
        extra = &ctxt->nsdb->extra[oldIndex];
1753
1754
        /*
1755
         * Check for duplicate definitions on the same element.
1756
         */
1757
65.0k
        if (extra->elementId == ctxt->nsdb->elementId) {
1758
3.54k
            if (defAttr == 0)
1759
2.13k
                xmlErrAttributeDup(ctxt, BAD_CAST "xmlns", prefix->name);
1760
3.54k
            return(0);
1761
3.54k
        }
1762
1763
61.5k
        if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1764
61.5k
            (uri->name == ctxt->nsTab[bucket->index * 2 + 1]))
1765
0
            return(0);
1766
1767
61.5k
        bucket->index = ctxt->nsNr;
1768
61.5k
        goto populate_entry;
1769
61.5k
    }
1770
1771
    /*
1772
     * Insert new bucket
1773
     */
1774
1775
42.9k
    hashValue = prefix->hashValue;
1776
1777
    /*
1778
     * Grow hash table, 50% fill factor
1779
     */
1780
42.9k
    if (ctxt->nsdb->hashElems + 1 > ctxt->nsdb->hashSize / 2) {
1781
2.57k
        xmlParserNsBucket *newHash;
1782
2.57k
        unsigned newSize, i, index;
1783
1784
2.57k
        if (ctxt->nsdb->hashSize > UINT_MAX / 2) {
1785
0
            xmlErrMemory(ctxt);
1786
0
            return(-1);
1787
0
        }
1788
2.57k
        newSize = ctxt->nsdb->hashSize ? ctxt->nsdb->hashSize * 2 : 16;
1789
2.57k
        newHash = xmlMalloc(newSize * sizeof(newHash[0]));
1790
2.57k
        if (newHash == NULL) {
1791
0
            xmlErrMemory(ctxt);
1792
0
            return(-1);
1793
0
        }
1794
2.57k
        memset(newHash, 0, newSize * sizeof(newHash[0]));
1795
1796
118k
        for (i = 0; i < ctxt->nsdb->hashSize; i++) {
1797
116k
            unsigned hv = ctxt->nsdb->hash[i].hashValue;
1798
116k
            unsigned newIndex;
1799
1800
116k
            if ((hv == 0) || (ctxt->nsdb->hash[i].index == INT_MAX))
1801
113k
                continue;
1802
2.66k
            newIndex = hv & (newSize - 1);
1803
1804
3.07k
            while (newHash[newIndex].hashValue != 0) {
1805
407
                newIndex++;
1806
407
                if (newIndex == newSize)
1807
58
                    newIndex = 0;
1808
407
            }
1809
1810
2.66k
            newHash[newIndex] = ctxt->nsdb->hash[i];
1811
2.66k
        }
1812
1813
2.57k
        xmlFree(ctxt->nsdb->hash);
1814
2.57k
        ctxt->nsdb->hash = newHash;
1815
2.57k
        ctxt->nsdb->hashSize = newSize;
1816
1817
        /*
1818
         * Relookup
1819
         */
1820
2.57k
        index = hashValue & (newSize - 1);
1821
1822
2.79k
        while (newHash[index].hashValue != 0) {
1823
217
            index++;
1824
217
            if (index == newSize)
1825
31
                index = 0;
1826
217
        }
1827
1828
2.57k
        bucket = &newHash[index];
1829
2.57k
    }
1830
1831
42.9k
    bucket->hashValue = hashValue;
1832
42.9k
    bucket->index = ctxt->nsNr;
1833
42.9k
    ctxt->nsdb->hashElems++;
1834
42.9k
    oldIndex = INT_MAX;
1835
1836
185k
populate_entry:
1837
185k
    nsIndex = ctxt->nsNr;
1838
1839
185k
    ns = &ctxt->nsTab[nsIndex * 2];
1840
185k
    ns[0] = prefix ? prefix->name : NULL;
1841
185k
    ns[1] = uri->name;
1842
1843
185k
    extra = &ctxt->nsdb->extra[nsIndex];
1844
185k
    extra->saxData = saxData;
1845
185k
    extra->prefixHashValue = prefix ? prefix->hashValue : 0;
1846
185k
    extra->uriHashValue = uri->hashValue;
1847
185k
    extra->elementId = ctxt->nsdb->elementId;
1848
185k
    extra->oldIndex = oldIndex;
1849
1850
185k
    ctxt->nsNr++;
1851
1852
185k
    return(1);
1853
42.9k
}
1854
1855
/**
1856
 * xmlParserNsPop:
1857
 * @ctxt: an XML parser context
1858
 * @nr:  the number to pop
1859
 *
1860
 * Pops the top @nr namespaces and restores the hash table.
1861
 *
1862
 * Returns the number of namespaces popped.
1863
 */
1864
static int
1865
xmlParserNsPop(xmlParserCtxtPtr ctxt, int nr)
1866
123k
{
1867
123k
    int i;
1868
1869
    /* assert(nr <= ctxt->nsNr); */
1870
1871
306k
    for (i = ctxt->nsNr - 1; i >= ctxt->nsNr - nr; i--) {
1872
182k
        const xmlChar *prefix = ctxt->nsTab[i * 2];
1873
182k
        xmlParserNsExtra *extra = &ctxt->nsdb->extra[i];
1874
1875
182k
        if (prefix == NULL) {
1876
79.8k
            ctxt->nsdb->defaultNsIndex = extra->oldIndex;
1877
103k
        } else {
1878
103k
            xmlHashedString hprefix;
1879
103k
            xmlParserNsBucket *bucket = NULL;
1880
1881
103k
            hprefix.name = prefix;
1882
103k
            hprefix.hashValue = extra->prefixHashValue;
1883
103k
            xmlParserNsLookup(ctxt, &hprefix, &bucket);
1884
            /* assert(bucket && bucket->hashValue); */
1885
103k
            bucket->index = extra->oldIndex;
1886
103k
        }
1887
182k
    }
1888
1889
123k
    ctxt->nsNr -= nr;
1890
123k
    return(nr);
1891
123k
}
1892
1893
static int
1894
3.80k
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1895
3.80k
    const xmlChar **atts;
1896
3.80k
    unsigned *attallocs;
1897
3.80k
    int maxatts;
1898
1899
3.80k
    if (nr + 5 > ctxt->maxatts) {
1900
3.80k
  maxatts = ctxt->maxatts == 0 ? 55 : (nr + 5) * 2;
1901
3.80k
  atts = (const xmlChar **) xmlMalloc(
1902
3.80k
             maxatts * sizeof(const xmlChar *));
1903
3.80k
  if (atts == NULL) goto mem_error;
1904
3.80k
  attallocs = xmlRealloc(ctxt->attallocs,
1905
3.80k
                               (maxatts / 5) * sizeof(attallocs[0]));
1906
3.80k
  if (attallocs == NULL) {
1907
0
            xmlFree(atts);
1908
0
            goto mem_error;
1909
0
        }
1910
3.80k
        if (ctxt->maxatts > 0)
1911
203
            memcpy(atts, ctxt->atts, ctxt->maxatts * sizeof(const xmlChar *));
1912
3.80k
        xmlFree(ctxt->atts);
1913
3.80k
  ctxt->atts = atts;
1914
3.80k
  ctxt->attallocs = attallocs;
1915
3.80k
  ctxt->maxatts = maxatts;
1916
3.80k
    }
1917
3.80k
    return(ctxt->maxatts);
1918
0
mem_error:
1919
0
    xmlErrMemory(ctxt);
1920
0
    return(-1);
1921
3.80k
}
1922
1923
/**
1924
 * inputPush:
1925
 * @ctxt:  an XML parser context
1926
 * @value:  the parser input
1927
 *
1928
 * Pushes a new parser input on top of the input stack
1929
 *
1930
 * Returns -1 in case of error, the index in the stack otherwise
1931
 */
1932
int
1933
inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1934
38.7k
{
1935
38.7k
    char *directory = NULL;
1936
1937
38.7k
    if ((ctxt == NULL) || (value == NULL))
1938
0
        return(-1);
1939
1940
38.7k
    if (ctxt->inputNr >= ctxt->inputMax) {
1941
1.85k
        size_t newSize = ctxt->inputMax * 2;
1942
1.85k
        xmlParserInputPtr *tmp;
1943
1944
1.85k
        tmp = (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1945
1.85k
                                               newSize * sizeof(*tmp));
1946
1.85k
        if (tmp == NULL) {
1947
0
            xmlErrMemory(ctxt);
1948
0
            return (-1);
1949
0
        }
1950
1.85k
        ctxt->inputTab = tmp;
1951
1.85k
        ctxt->inputMax = newSize;
1952
1.85k
    }
1953
1954
38.7k
    if ((ctxt->inputNr == 0) && (value->filename != NULL)) {
1955
9.89k
        directory = xmlParserGetDirectory(value->filename);
1956
9.89k
        if (directory == NULL) {
1957
0
            xmlErrMemory(ctxt);
1958
0
            return(-1);
1959
0
        }
1960
9.89k
    }
1961
1962
38.7k
    if (ctxt->input_id >= INT_MAX) {
1963
0
        xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT, "Input ID overflow\n");
1964
0
        return(-1);
1965
0
    }
1966
1967
38.7k
    ctxt->inputTab[ctxt->inputNr] = value;
1968
38.7k
    ctxt->input = value;
1969
1970
38.7k
    if (ctxt->inputNr == 0) {
1971
20.2k
        xmlFree(ctxt->directory);
1972
20.2k
        ctxt->directory = directory;
1973
20.2k
    }
1974
1975
    /*
1976
     * Internally, the input ID is only used to detect parameter entity
1977
     * boundaries. But there are entity loaders in downstream code that
1978
     * detect the main document by checking for "input_id == 1".
1979
     */
1980
38.7k
    value->id = ctxt->input_id++;
1981
1982
38.7k
    return(ctxt->inputNr++);
1983
38.7k
}
1984
/**
1985
 * inputPop:
1986
 * @ctxt: an XML parser context
1987
 *
1988
 * Pops the top parser input from the input stack
1989
 *
1990
 * Returns the input just removed
1991
 */
1992
xmlParserInputPtr
1993
inputPop(xmlParserCtxtPtr ctxt)
1994
79.1k
{
1995
79.1k
    xmlParserInputPtr ret;
1996
1997
79.1k
    if (ctxt == NULL)
1998
0
        return(NULL);
1999
79.1k
    if (ctxt->inputNr <= 0)
2000
40.4k
        return (NULL);
2001
38.7k
    ctxt->inputNr--;
2002
38.7k
    if (ctxt->inputNr > 0)
2003
18.5k
        ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
2004
20.2k
    else
2005
20.2k
        ctxt->input = NULL;
2006
38.7k
    ret = ctxt->inputTab[ctxt->inputNr];
2007
38.7k
    ctxt->inputTab[ctxt->inputNr] = NULL;
2008
38.7k
    return (ret);
2009
79.1k
}
2010
/**
2011
 * nodePush:
2012
 * @ctxt:  an XML parser context
2013
 * @value:  the element node
2014
 *
2015
 * DEPRECATED: Internal function, do not use.
2016
 *
2017
 * Pushes a new element node on top of the node stack
2018
 *
2019
 * Returns -1 in case of error, the index in the stack otherwise
2020
 */
2021
int
2022
nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
2023
43.8k
{
2024
43.8k
    int maxDepth;
2025
2026
43.8k
    if (ctxt == NULL)
2027
0
        return(0);
2028
2029
43.8k
    maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
2030
43.8k
    if (ctxt->nodeNr > maxDepth) {
2031
0
        xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
2032
0
                "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
2033
0
                ctxt->nodeNr);
2034
0
        xmlHaltParser(ctxt);
2035
0
        return(-1);
2036
0
    }
2037
43.8k
    if (ctxt->nodeNr >= ctxt->nodeMax) {
2038
7.19k
        xmlNodePtr *tmp;
2039
2040
7.19k
  tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
2041
7.19k
                                      ctxt->nodeMax * 2 *
2042
7.19k
                                      sizeof(ctxt->nodeTab[0]));
2043
7.19k
        if (tmp == NULL) {
2044
0
            xmlErrMemory(ctxt);
2045
0
            return (-1);
2046
0
        }
2047
7.19k
        ctxt->nodeTab = tmp;
2048
7.19k
  ctxt->nodeMax *= 2;
2049
7.19k
    }
2050
43.8k
    ctxt->nodeTab[ctxt->nodeNr] = value;
2051
43.8k
    ctxt->node = value;
2052
43.8k
    return (ctxt->nodeNr++);
2053
43.8k
}
2054
2055
/**
2056
 * nodePop:
2057
 * @ctxt: an XML parser context
2058
 *
2059
 * DEPRECATED: Internal function, do not use.
2060
 *
2061
 * Pops the top element node from the node stack
2062
 *
2063
 * Returns the node just removed
2064
 */
2065
xmlNodePtr
2066
nodePop(xmlParserCtxtPtr ctxt)
2067
254k
{
2068
254k
    xmlNodePtr ret;
2069
2070
254k
    if (ctxt == NULL) return(NULL);
2071
254k
    if (ctxt->nodeNr <= 0)
2072
213k
        return (NULL);
2073
40.4k
    ctxt->nodeNr--;
2074
40.4k
    if (ctxt->nodeNr > 0)
2075
37.3k
        ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
2076
3.03k
    else
2077
3.03k
        ctxt->node = NULL;
2078
40.4k
    ret = ctxt->nodeTab[ctxt->nodeNr];
2079
40.4k
    ctxt->nodeTab[ctxt->nodeNr] = NULL;
2080
40.4k
    return (ret);
2081
254k
}
2082
2083
/**
2084
 * nameNsPush:
2085
 * @ctxt:  an XML parser context
2086
 * @value:  the element name
2087
 * @prefix:  the element prefix
2088
 * @URI:  the element namespace name
2089
 * @line:  the current line number for error messages
2090
 * @nsNr:  the number of namespaces pushed on the namespace table
2091
 *
2092
 * Pushes a new element name/prefix/URL on top of the name stack
2093
 *
2094
 * Returns -1 in case of error, the index in the stack otherwise
2095
 */
2096
static int
2097
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
2098
           const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
2099
449k
{
2100
449k
    xmlStartTag *tag;
2101
2102
449k
    if (ctxt->nameNr >= ctxt->nameMax) {
2103
19.6k
        const xmlChar * *tmp;
2104
19.6k
        xmlStartTag *tmp2;
2105
19.6k
        ctxt->nameMax *= 2;
2106
19.6k
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
2107
19.6k
                                    ctxt->nameMax *
2108
19.6k
                                    sizeof(ctxt->nameTab[0]));
2109
19.6k
        if (tmp == NULL) {
2110
0
      ctxt->nameMax /= 2;
2111
0
      goto mem_error;
2112
0
        }
2113
19.6k
  ctxt->nameTab = tmp;
2114
19.6k
        tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
2115
19.6k
                                    ctxt->nameMax *
2116
19.6k
                                    sizeof(ctxt->pushTab[0]));
2117
19.6k
        if (tmp2 == NULL) {
2118
0
      ctxt->nameMax /= 2;
2119
0
      goto mem_error;
2120
0
        }
2121
19.6k
  ctxt->pushTab = tmp2;
2122
429k
    } else if (ctxt->pushTab == NULL) {
2123
9.46k
        ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
2124
9.46k
                                            sizeof(ctxt->pushTab[0]));
2125
9.46k
        if (ctxt->pushTab == NULL)
2126
0
            goto mem_error;
2127
9.46k
    }
2128
449k
    ctxt->nameTab[ctxt->nameNr] = value;
2129
449k
    ctxt->name = value;
2130
449k
    tag = &ctxt->pushTab[ctxt->nameNr];
2131
449k
    tag->prefix = prefix;
2132
449k
    tag->URI = URI;
2133
449k
    tag->line = line;
2134
449k
    tag->nsNr = nsNr;
2135
449k
    return (ctxt->nameNr++);
2136
0
mem_error:
2137
0
    xmlErrMemory(ctxt);
2138
0
    return (-1);
2139
449k
}
2140
#ifdef LIBXML_PUSH_ENABLED
2141
/**
2142
 * nameNsPop:
2143
 * @ctxt: an XML parser context
2144
 *
2145
 * Pops the top element/prefix/URI name from the name stack
2146
 *
2147
 * Returns the name just removed
2148
 */
2149
static const xmlChar *
2150
nameNsPop(xmlParserCtxtPtr ctxt)
2151
0
{
2152
0
    const xmlChar *ret;
2153
2154
0
    if (ctxt->nameNr <= 0)
2155
0
        return (NULL);
2156
0
    ctxt->nameNr--;
2157
0
    if (ctxt->nameNr > 0)
2158
0
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2159
0
    else
2160
0
        ctxt->name = NULL;
2161
0
    ret = ctxt->nameTab[ctxt->nameNr];
2162
0
    ctxt->nameTab[ctxt->nameNr] = NULL;
2163
0
    return (ret);
2164
0
}
2165
#endif /* LIBXML_PUSH_ENABLED */
2166
2167
/**
2168
 * namePush:
2169
 * @ctxt:  an XML parser context
2170
 * @value:  the element name
2171
 *
2172
 * DEPRECATED: Internal function, do not use.
2173
 *
2174
 * Pushes a new element name on top of the name stack
2175
 *
2176
 * Returns -1 in case of error, the index in the stack otherwise
2177
 */
2178
int
2179
namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
2180
0
{
2181
0
    if (ctxt == NULL) return (-1);
2182
2183
0
    if (ctxt->nameNr >= ctxt->nameMax) {
2184
0
        const xmlChar * *tmp;
2185
0
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
2186
0
                                    ctxt->nameMax * 2 *
2187
0
                                    sizeof(ctxt->nameTab[0]));
2188
0
        if (tmp == NULL) {
2189
0
      goto mem_error;
2190
0
        }
2191
0
  ctxt->nameTab = tmp;
2192
0
        ctxt->nameMax *= 2;
2193
0
    }
2194
0
    ctxt->nameTab[ctxt->nameNr] = value;
2195
0
    ctxt->name = value;
2196
0
    return (ctxt->nameNr++);
2197
0
mem_error:
2198
0
    xmlErrMemory(ctxt);
2199
0
    return (-1);
2200
0
}
2201
2202
/**
2203
 * namePop:
2204
 * @ctxt: an XML parser context
2205
 *
2206
 * DEPRECATED: Internal function, do not use.
2207
 *
2208
 * Pops the top element name from the name stack
2209
 *
2210
 * Returns the name just removed
2211
 */
2212
const xmlChar *
2213
namePop(xmlParserCtxtPtr ctxt)
2214
441k
{
2215
441k
    const xmlChar *ret;
2216
2217
441k
    if ((ctxt == NULL) || (ctxt->nameNr <= 0))
2218
0
        return (NULL);
2219
441k
    ctxt->nameNr--;
2220
441k
    if (ctxt->nameNr > 0)
2221
439k
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2222
2.23k
    else
2223
2.23k
        ctxt->name = NULL;
2224
441k
    ret = ctxt->nameTab[ctxt->nameNr];
2225
441k
    ctxt->nameTab[ctxt->nameNr] = NULL;
2226
441k
    return (ret);
2227
441k
}
2228
2229
666k
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
2230
666k
    if (ctxt->spaceNr >= ctxt->spaceMax) {
2231
26.0k
        int *tmp;
2232
2233
26.0k
  ctxt->spaceMax *= 2;
2234
26.0k
        tmp = (int *) xmlRealloc(ctxt->spaceTab,
2235
26.0k
                           ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
2236
26.0k
        if (tmp == NULL) {
2237
0
      xmlErrMemory(ctxt);
2238
0
      ctxt->spaceMax /=2;
2239
0
      return(-1);
2240
0
  }
2241
26.0k
  ctxt->spaceTab = tmp;
2242
26.0k
    }
2243
666k
    ctxt->spaceTab[ctxt->spaceNr] = val;
2244
666k
    ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
2245
666k
    return(ctxt->spaceNr++);
2246
666k
}
2247
2248
659k
static int spacePop(xmlParserCtxtPtr ctxt) {
2249
659k
    int ret;
2250
659k
    if (ctxt->spaceNr <= 0) return(0);
2251
659k
    ctxt->spaceNr--;
2252
659k
    if (ctxt->spaceNr > 0)
2253
659k
  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
2254
0
    else
2255
0
        ctxt->space = &ctxt->spaceTab[0];
2256
659k
    ret = ctxt->spaceTab[ctxt->spaceNr];
2257
659k
    ctxt->spaceTab[ctxt->spaceNr] = -1;
2258
659k
    return(ret);
2259
659k
}
2260
2261
/*
2262
 * Macros for accessing the content. Those should be used only by the parser,
2263
 * and not exported.
2264
 *
2265
 * Dirty macros, i.e. one often need to make assumption on the context to
2266
 * use them
2267
 *
2268
 *   CUR_PTR return the current pointer to the xmlChar to be parsed.
2269
 *           To be used with extreme caution since operations consuming
2270
 *           characters may move the input buffer to a different location !
2271
 *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
2272
 *           This should be used internally by the parser
2273
 *           only to compare to ASCII values otherwise it would break when
2274
 *           running with UTF-8 encoding.
2275
 *   RAW     same as CUR but in the input buffer, bypass any token
2276
 *           extraction that may have been done
2277
 *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
2278
 *           to compare on ASCII based substring.
2279
 *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2280
 *           strings without newlines within the parser.
2281
 *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2282
 *           defined char within the parser.
2283
 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2284
 *
2285
 *   NEXT    Skip to the next character, this does the proper decoding
2286
 *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2287
 *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2288
 *   CUR_SCHAR  same but operate on a string instead of the context
2289
 *   COPY_BUF  copy the current unicode char to the target buffer, increment
2290
 *            the index
2291
 *   GROW, SHRINK  handling of input buffers
2292
 */
2293
2294
11.2M
#define RAW (*ctxt->input->cur)
2295
744M
#define CUR (*ctxt->input->cur)
2296
3.95M
#define NXT(val) ctxt->input->cur[(val)]
2297
1.73G
#define CUR_PTR ctxt->input->cur
2298
3.92M
#define BASE_PTR ctxt->input->base
2299
2300
#define CMP4( s, c1, c2, c3, c4 ) \
2301
22.1M
  ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2302
11.0M
    ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2303
#define CMP5( s, c1, c2, c3, c4, c5 ) \
2304
21.9M
  ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2305
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2306
21.6M
  ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2307
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2308
21.3M
  ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2309
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2310
21.2M
  ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2311
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2312
10.5M
  ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2313
10.5M
    ((unsigned char *) s)[ 8 ] == c9 )
2314
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2315
1.81k
  ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2316
1.81k
    ((unsigned char *) s)[ 9 ] == c10 )
2317
2318
639k
#define SKIP(val) do {             \
2319
639k
    ctxt->input->cur += (val),ctxt->input->col+=(val);      \
2320
639k
    if (*ctxt->input->cur == 0)           \
2321
639k
        xmlParserGrow(ctxt);           \
2322
639k
  } while (0)
2323
2324
#define SKIPL(val) do {             \
2325
    int skipl;                \
2326
    for(skipl=0; skipl<val; skipl++) {          \
2327
  if (*(ctxt->input->cur) == '\n') {        \
2328
  ctxt->input->line++; ctxt->input->col = 1;      \
2329
  } else ctxt->input->col++;          \
2330
  ctxt->input->cur++;           \
2331
    }                 \
2332
    if (*ctxt->input->cur == 0)           \
2333
        xmlParserGrow(ctxt);            \
2334
  } while (0)
2335
2336
#define SHRINK \
2337
20.8M
    if ((!PARSER_PROGRESSIVE(ctxt)) && \
2338
20.8M
        (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2339
20.8M
  (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2340
20.8M
  xmlParserShrink(ctxt);
2341
2342
#define GROW \
2343
37.3M
    if ((!PARSER_PROGRESSIVE(ctxt)) && \
2344
37.3M
        (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2345
1.09M
  xmlParserGrow(ctxt);
2346
2347
2.58M
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2348
2349
636k
#define SKIP_BLANKS_PE xmlSkipBlankCharsPE(ctxt)
2350
2351
5.26M
#define NEXT xmlNextChar(ctxt)
2352
2353
909k
#define NEXT1 {               \
2354
909k
  ctxt->input->col++;           \
2355
909k
  ctxt->input->cur++;           \
2356
909k
  if (*ctxt->input->cur == 0)         \
2357
909k
      xmlParserGrow(ctxt);           \
2358
909k
    }
2359
2360
950M
#define NEXTL(l) do {             \
2361
950M
    if (*(ctxt->input->cur) == '\n') {         \
2362
11.1M
  ctxt->input->line++; ctxt->input->col = 1;      \
2363
939M
    } else ctxt->input->col++;           \
2364
950M
    ctxt->input->cur += l;        \
2365
950M
  } while (0)
2366
2367
6.91M
#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2368
2369
#define COPY_BUF(b, i, v)           \
2370
171M
    if (v < 0x80) b[i++] = v;           \
2371
171M
    else i += xmlCopyCharMultiByte(&b[i],v)
2372
2373
static int
2374
171M
xmlCurrentCharRecover(xmlParserCtxtPtr ctxt, int *len) {
2375
171M
    int c = xmlCurrentChar(ctxt, len);
2376
2377
171M
    if (c == XML_INVALID_CHAR)
2378
8.48M
        c = 0xFFFD; /* replacement character */
2379
2380
171M
    return(c);
2381
171M
}
2382
2383
/**
2384
 * xmlSkipBlankChars:
2385
 * @ctxt:  the XML parser context
2386
 *
2387
 * DEPRECATED: Internal function, do not use.
2388
 *
2389
 * Skip whitespace in the input stream.
2390
 *
2391
 * Returns the number of space chars skipped
2392
 */
2393
int
2394
2.97M
xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2395
2.97M
    const xmlChar *cur;
2396
2.97M
    int res = 0;
2397
2398
    /*
2399
     * It's Okay to use CUR/NEXT here since all the blanks are on
2400
     * the ASCII range.
2401
     */
2402
2.97M
    cur = ctxt->input->cur;
2403
10.0M
    while (IS_BLANK_CH(*cur)) {
2404
10.0M
        if (*cur == '\n') {
2405
2.21M
            ctxt->input->line++; ctxt->input->col = 1;
2406
7.82M
        } else {
2407
7.82M
            ctxt->input->col++;
2408
7.82M
        }
2409
10.0M
        cur++;
2410
10.0M
        if (res < INT_MAX)
2411
10.0M
            res++;
2412
10.0M
        if (*cur == 0) {
2413
5.22k
            ctxt->input->cur = cur;
2414
5.22k
            xmlParserGrow(ctxt);
2415
5.22k
            cur = ctxt->input->cur;
2416
5.22k
        }
2417
10.0M
    }
2418
2.97M
    ctxt->input->cur = cur;
2419
2420
2.97M
    return(res);
2421
2.97M
}
2422
2423
static void
2424
17.6k
xmlPopPE(xmlParserCtxtPtr ctxt) {
2425
17.6k
    unsigned long consumed;
2426
17.6k
    xmlEntityPtr ent;
2427
2428
17.6k
    ent = ctxt->input->entity;
2429
2430
17.6k
    ent->flags &= ~XML_ENT_EXPANDING;
2431
2432
17.6k
    if ((ent->flags & XML_ENT_CHECKED) == 0) {
2433
1.45k
        int result;
2434
2435
        /*
2436
         * Read the rest of the stream in case of errors. We want
2437
         * to account for the whole entity size.
2438
         */
2439
1.45k
        do {
2440
1.45k
            ctxt->input->cur = ctxt->input->end;
2441
1.45k
            xmlParserShrink(ctxt);
2442
1.45k
            result = xmlParserGrow(ctxt);
2443
1.45k
        } while (result > 0);
2444
2445
1.45k
        consumed = ctxt->input->consumed;
2446
1.45k
        xmlSaturatedAddSizeT(&consumed,
2447
1.45k
                             ctxt->input->end - ctxt->input->base);
2448
2449
1.45k
        xmlSaturatedAdd(&ent->expandedSize, consumed);
2450
2451
        /*
2452
         * Add to sizeentities when parsing an external entity
2453
         * for the first time.
2454
         */
2455
1.45k
        if (ent->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
2456
0
            xmlSaturatedAdd(&ctxt->sizeentities, consumed);
2457
0
        }
2458
2459
1.45k
        ent->flags |= XML_ENT_CHECKED;
2460
1.45k
    }
2461
2462
17.6k
    xmlPopInput(ctxt);
2463
2464
17.6k
    xmlParserEntityCheck(ctxt, ent->expandedSize);
2465
17.6k
}
2466
2467
/**
2468
 * xmlSkipBlankCharsPE:
2469
 * @ctxt:  the XML parser context
2470
 *
2471
 * Skip whitespace in the input stream, also handling parameter
2472
 * entities.
2473
 *
2474
 * Returns the number of space chars skipped
2475
 */
2476
static int
2477
636k
xmlSkipBlankCharsPE(xmlParserCtxtPtr ctxt) {
2478
636k
    int res = 0;
2479
636k
    int inParam;
2480
636k
    int expandParam;
2481
2482
636k
    inParam = PARSER_IN_PE(ctxt);
2483
636k
    expandParam = PARSER_EXTERNAL(ctxt);
2484
2485
636k
    if (!inParam && !expandParam)
2486
395k
        return(xmlSkipBlankChars(ctxt));
2487
2488
450k
    while (PARSER_STOPPED(ctxt) == 0) {
2489
450k
        if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2490
192k
            NEXT;
2491
258k
        } else if (CUR == '%') {
2492
16.2k
            if ((expandParam == 0) ||
2493
16.2k
                (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2494
16.2k
                break;
2495
2496
            /*
2497
             * Expand parameter entity. We continue to consume
2498
             * whitespace at the start of the entity and possible
2499
             * even consume the whole entity and pop it. We might
2500
             * even pop multiple PEs in this loop.
2501
             */
2502
0
            xmlParsePEReference(ctxt);
2503
2504
0
            inParam = PARSER_IN_PE(ctxt);
2505
0
            expandParam = PARSER_EXTERNAL(ctxt);
2506
242k
        } else if (CUR == 0) {
2507
17.9k
            if (inParam == 0)
2508
306
                break;
2509
2510
17.6k
            xmlPopPE(ctxt);
2511
2512
17.6k
            inParam = PARSER_IN_PE(ctxt);
2513
17.6k
            expandParam = PARSER_EXTERNAL(ctxt);
2514
224k
        } else {
2515
224k
            break;
2516
224k
        }
2517
2518
        /*
2519
         * Also increase the counter when entering or exiting a PERef.
2520
         * The spec says: "When a parameter-entity reference is recognized
2521
         * in the DTD and included, its replacement text MUST be enlarged
2522
         * by the attachment of one leading and one following space (#x20)
2523
         * character."
2524
         */
2525
209k
        if (res < INT_MAX)
2526
209k
            res++;
2527
209k
    }
2528
2529
240k
    return(res);
2530
636k
}
2531
2532
/************************************************************************
2533
 *                  *
2534
 *    Commodity functions to handle entities      *
2535
 *                  *
2536
 ************************************************************************/
2537
2538
/**
2539
 * xmlPopInput:
2540
 * @ctxt:  an XML parser context
2541
 *
2542
 * xmlPopInput: the current input pointed by ctxt->input came to an end
2543
 *          pop it and return the next char.
2544
 *
2545
 * Returns the current xmlChar in the parser context
2546
 */
2547
xmlChar
2548
17.6k
xmlPopInput(xmlParserCtxtPtr ctxt) {
2549
17.6k
    xmlParserInputPtr input;
2550
2551
17.6k
    if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2552
17.6k
    input = inputPop(ctxt);
2553
17.6k
    xmlFreeInputStream(input);
2554
17.6k
    if (*ctxt->input->cur == 0)
2555
310
        xmlParserGrow(ctxt);
2556
17.6k
    return(CUR);
2557
17.6k
}
2558
2559
/**
2560
 * xmlPushInput:
2561
 * @ctxt:  an XML parser context
2562
 * @input:  an XML parser input fragment (entity, XML fragment ...).
2563
 *
2564
 * Push an input stream onto the stack.
2565
 *
2566
 * Returns -1 in case of error or the index in the input stack
2567
 */
2568
int
2569
18.5k
xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2570
18.5k
    int maxDepth;
2571
18.5k
    int ret;
2572
2573
18.5k
    if ((ctxt == NULL) || (input == NULL))
2574
0
        return(-1);
2575
2576
18.5k
    maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
2577
18.5k
    if (ctxt->inputNr > maxDepth) {
2578
2
        xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
2579
2
                       "Maximum entity nesting depth exceeded");
2580
2
        xmlHaltParser(ctxt);
2581
2
  return(-1);
2582
2
    }
2583
18.5k
    ret = inputPush(ctxt, input);
2584
18.5k
    if (ret >= 0)
2585
18.5k
        GROW;
2586
18.5k
    return(ret);
2587
18.5k
}
2588
2589
/**
2590
 * xmlParseCharRef:
2591
 * @ctxt:  an XML parser context
2592
 *
2593
 * DEPRECATED: Internal function, don't use.
2594
 *
2595
 * Parse a numeric character reference. Always consumes '&'.
2596
 *
2597
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2598
 *                  '&#x' [0-9a-fA-F]+ ';'
2599
 *
2600
 * [ WFC: Legal Character ]
2601
 * Characters referred to using character references must match the
2602
 * production for Char.
2603
 *
2604
 * Returns the value parsed (as an int), 0 in case of error
2605
 */
2606
int
2607
134k
xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2608
134k
    int val = 0;
2609
134k
    int count = 0;
2610
2611
    /*
2612
     * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2613
     */
2614
134k
    if ((RAW == '&') && (NXT(1) == '#') &&
2615
134k
        (NXT(2) == 'x')) {
2616
74.5k
  SKIP(3);
2617
74.5k
  GROW;
2618
364k
  while ((RAW != ';') && (PARSER_STOPPED(ctxt) == 0)) {
2619
329k
      if (count++ > 20) {
2620
1.77k
    count = 0;
2621
1.77k
    GROW;
2622
1.77k
      }
2623
329k
      if ((RAW >= '0') && (RAW <= '9'))
2624
64.2k
          val = val * 16 + (CUR - '0');
2625
264k
      else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2626
18.8k
          val = val * 16 + (CUR - 'a') + 10;
2627
245k
      else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2628
207k
          val = val * 16 + (CUR - 'A') + 10;
2629
38.8k
      else {
2630
38.8k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2631
38.8k
    val = 0;
2632
38.8k
    break;
2633
38.8k
      }
2634
290k
      if (val > 0x110000)
2635
103k
          val = 0x110000;
2636
2637
290k
      NEXT;
2638
290k
      count++;
2639
290k
  }
2640
74.5k
  if (RAW == ';') {
2641
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2642
35.6k
      ctxt->input->col++;
2643
35.6k
      ctxt->input->cur++;
2644
35.6k
  }
2645
74.5k
    } else if  ((RAW == '&') && (NXT(1) == '#')) {
2646
60.0k
  SKIP(2);
2647
60.0k
  GROW;
2648
215k
  while (RAW != ';') { /* loop blocked by count */
2649
170k
      if (count++ > 20) {
2650
1.34k
    count = 0;
2651
1.34k
    GROW;
2652
1.34k
      }
2653
170k
      if ((RAW >= '0') && (RAW <= '9'))
2654
155k
          val = val * 10 + (CUR - '0');
2655
15.6k
      else {
2656
15.6k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2657
15.6k
    val = 0;
2658
15.6k
    break;
2659
15.6k
      }
2660
155k
      if (val > 0x110000)
2661
16.0k
          val = 0x110000;
2662
2663
155k
      NEXT;
2664
155k
      count++;
2665
155k
  }
2666
60.0k
  if (RAW == ';') {
2667
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2668
44.3k
      ctxt->input->col++;
2669
44.3k
      ctxt->input->cur++;
2670
44.3k
  }
2671
60.0k
    } else {
2672
0
        if (RAW == '&')
2673
0
            SKIP(1);
2674
0
        xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2675
0
    }
2676
2677
    /*
2678
     * [ WFC: Legal Character ]
2679
     * Characters referred to using character references must match the
2680
     * production for Char.
2681
     */
2682
134k
    if (val >= 0x110000) {
2683
4.46k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2684
4.46k
                "xmlParseCharRef: character reference out of bounds\n",
2685
4.46k
          val);
2686
130k
    } else if (IS_CHAR(val)) {
2687
70.7k
        return(val);
2688
70.7k
    } else {
2689
59.3k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2690
59.3k
                          "xmlParseCharRef: invalid xmlChar value %d\n",
2691
59.3k
                    val);
2692
59.3k
    }
2693
63.8k
    return(0);
2694
134k
}
2695
2696
/**
2697
 * xmlParseStringCharRef:
2698
 * @ctxt:  an XML parser context
2699
 * @str:  a pointer to an index in the string
2700
 *
2701
 * parse Reference declarations, variant parsing from a string rather
2702
 * than an an input flow.
2703
 *
2704
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2705
 *                  '&#x' [0-9a-fA-F]+ ';'
2706
 *
2707
 * [ WFC: Legal Character ]
2708
 * Characters referred to using character references must match the
2709
 * production for Char.
2710
 *
2711
 * Returns the value parsed (as an int), 0 in case of error, str will be
2712
 *         updated to the current value of the index
2713
 */
2714
static int
2715
131k
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2716
131k
    const xmlChar *ptr;
2717
131k
    xmlChar cur;
2718
131k
    int val = 0;
2719
2720
131k
    if ((str == NULL) || (*str == NULL)) return(0);
2721
131k
    ptr = *str;
2722
131k
    cur = *ptr;
2723
131k
    if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2724
69.7k
  ptr += 3;
2725
69.7k
  cur = *ptr;
2726
252k
  while (cur != ';') { /* Non input consuming loop */
2727
190k
      if ((cur >= '0') && (cur <= '9'))
2728
68.8k
          val = val * 16 + (cur - '0');
2729
121k
      else if ((cur >= 'a') && (cur <= 'f'))
2730
47.1k
          val = val * 16 + (cur - 'a') + 10;
2731
74.7k
      else if ((cur >= 'A') && (cur <= 'F'))
2732
66.5k
          val = val * 16 + (cur - 'A') + 10;
2733
8.18k
      else {
2734
8.18k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2735
8.18k
    val = 0;
2736
8.18k
    break;
2737
8.18k
      }
2738
182k
      if (val > 0x110000)
2739
5.10k
          val = 0x110000;
2740
2741
182k
      ptr++;
2742
182k
      cur = *ptr;
2743
182k
  }
2744
69.7k
  if (cur == ';')
2745
61.5k
      ptr++;
2746
69.7k
    } else if  ((cur == '&') && (ptr[1] == '#')){
2747
61.4k
  ptr += 2;
2748
61.4k
  cur = *ptr;
2749
301k
  while (cur != ';') { /* Non input consuming loops */
2750
246k
      if ((cur >= '0') && (cur <= '9'))
2751
240k
          val = val * 10 + (cur - '0');
2752
6.53k
      else {
2753
6.53k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2754
6.53k
    val = 0;
2755
6.53k
    break;
2756
6.53k
      }
2757
240k
      if (val > 0x110000)
2758
13.8k
          val = 0x110000;
2759
2760
240k
      ptr++;
2761
240k
      cur = *ptr;
2762
240k
  }
2763
61.4k
  if (cur == ';')
2764
54.8k
      ptr++;
2765
61.4k
    } else {
2766
0
  xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2767
0
  return(0);
2768
0
    }
2769
131k
    *str = ptr;
2770
2771
    /*
2772
     * [ WFC: Legal Character ]
2773
     * Characters referred to using character references must match the
2774
     * production for Char.
2775
     */
2776
131k
    if (val >= 0x110000) {
2777
2.67k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2778
2.67k
                "xmlParseStringCharRef: character reference out of bounds\n",
2779
2.67k
                val);
2780
128k
    } else if (IS_CHAR(val)) {
2781
111k
        return(val);
2782
111k
    } else {
2783
16.9k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2784
16.9k
        "xmlParseStringCharRef: invalid xmlChar value %d\n",
2785
16.9k
        val);
2786
16.9k
    }
2787
19.6k
    return(0);
2788
131k
}
2789
2790
/**
2791
 * xmlParserHandlePEReference:
2792
 * @ctxt:  the parser context
2793
 *
2794
 * DEPRECATED: Internal function, do not use.
2795
 *
2796
 * [69] PEReference ::= '%' Name ';'
2797
 *
2798
 * [ WFC: No Recursion ]
2799
 * A parsed entity must not contain a recursive
2800
 * reference to itself, either directly or indirectly.
2801
 *
2802
 * [ WFC: Entity Declared ]
2803
 * In a document without any DTD, a document with only an internal DTD
2804
 * subset which contains no parameter entity references, or a document
2805
 * with "standalone='yes'", ...  ... The declaration of a parameter
2806
 * entity must precede any reference to it...
2807
 *
2808
 * [ VC: Entity Declared ]
2809
 * In a document with an external subset or external parameter entities
2810
 * with "standalone='no'", ...  ... The declaration of a parameter entity
2811
 * must precede any reference to it...
2812
 *
2813
 * [ WFC: In DTD ]
2814
 * Parameter-entity references may only appear in the DTD.
2815
 * NOTE: misleading but this is handled.
2816
 *
2817
 * A PEReference may have been detected in the current input stream
2818
 * the handling is done accordingly to
2819
 *      http://www.w3.org/TR/REC-xml#entproc
2820
 * i.e.
2821
 *   - Included in literal in entity values
2822
 *   - Included as Parameter Entity reference within DTDs
2823
 */
2824
void
2825
0
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2826
0
    xmlParsePEReference(ctxt);
2827
0
}
2828
2829
/**
2830
 * xmlStringLenDecodeEntities:
2831
 * @ctxt:  the parser context
2832
 * @str:  the input string
2833
 * @len: the string length
2834
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2835
 * @end:  an end marker xmlChar, 0 if none
2836
 * @end2:  an end marker xmlChar, 0 if none
2837
 * @end3:  an end marker xmlChar, 0 if none
2838
 *
2839
 * DEPRECATED: Internal function, don't use.
2840
 *
2841
 * Returns A newly allocated string with the substitution done. The caller
2842
 *      must deallocate it !
2843
 */
2844
xmlChar *
2845
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2846
                           int what ATTRIBUTE_UNUSED,
2847
0
                           xmlChar end, xmlChar end2, xmlChar end3) {
2848
0
    if ((ctxt == NULL) || (str == NULL) || (len < 0))
2849
0
        return(NULL);
2850
2851
0
    if ((str[len] != 0) ||
2852
0
        (end != 0) || (end2 != 0) || (end3 != 0))
2853
0
        return(NULL);
2854
2855
0
    return(xmlExpandEntitiesInAttValue(ctxt, str, 0));
2856
0
}
2857
2858
/**
2859
 * xmlStringDecodeEntities:
2860
 * @ctxt:  the parser context
2861
 * @str:  the input string
2862
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2863
 * @end:  an end marker xmlChar, 0 if none
2864
 * @end2:  an end marker xmlChar, 0 if none
2865
 * @end3:  an end marker xmlChar, 0 if none
2866
 *
2867
 * DEPRECATED: Internal function, don't use.
2868
 *
2869
 * Returns A newly allocated string with the substitution done. The caller
2870
 *      must deallocate it !
2871
 */
2872
xmlChar *
2873
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str,
2874
                        int what ATTRIBUTE_UNUSED,
2875
0
            xmlChar end, xmlChar  end2, xmlChar end3) {
2876
0
    if ((ctxt == NULL) || (str == NULL))
2877
0
        return(NULL);
2878
2879
0
    if ((end != 0) || (end2 != 0) || (end3 != 0))
2880
0
        return(NULL);
2881
2882
0
    return(xmlExpandEntitiesInAttValue(ctxt, str, 0));
2883
0
}
2884
2885
/************************************************************************
2886
 *                  *
2887
 *    Commodity functions, cleanup needed ?     *
2888
 *                  *
2889
 ************************************************************************/
2890
2891
/**
2892
 * areBlanks:
2893
 * @ctxt:  an XML parser context
2894
 * @str:  a xmlChar *
2895
 * @len:  the size of @str
2896
 * @blank_chars: we know the chars are blanks
2897
 *
2898
 * Is this a sequence of blank chars that one can ignore ?
2899
 *
2900
 * Returns 1 if ignorable 0 otherwise.
2901
 */
2902
2903
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2904
87.4k
                     int blank_chars) {
2905
87.4k
    int i;
2906
87.4k
    xmlNodePtr lastChild;
2907
2908
    /*
2909
     * Don't spend time trying to differentiate them, the same callback is
2910
     * used !
2911
     */
2912
87.4k
    if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2913
87.4k
  return(0);
2914
2915
    /*
2916
     * Check for xml:space value.
2917
     */
2918
0
    if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2919
0
        (*(ctxt->space) == -2))
2920
0
  return(0);
2921
2922
    /*
2923
     * Check that the string is made of blanks
2924
     */
2925
0
    if (blank_chars == 0) {
2926
0
  for (i = 0;i < len;i++)
2927
0
      if (!(IS_BLANK_CH(str[i]))) return(0);
2928
0
    }
2929
2930
    /*
2931
     * Look if the element is mixed content in the DTD if available
2932
     */
2933
0
    if (ctxt->node == NULL) return(0);
2934
0
    if (ctxt->myDoc != NULL) {
2935
0
        xmlElementPtr elemDecl = NULL;
2936
0
        xmlDocPtr doc = ctxt->myDoc;
2937
0
        const xmlChar *prefix = NULL;
2938
2939
0
        if (ctxt->node->ns)
2940
0
            prefix = ctxt->node->ns->prefix;
2941
0
        if (doc->intSubset != NULL)
2942
0
            elemDecl = xmlHashLookup2(doc->intSubset->elements, ctxt->node->name,
2943
0
                                      prefix);
2944
0
        if ((elemDecl == NULL) && (doc->extSubset != NULL))
2945
0
            elemDecl = xmlHashLookup2(doc->extSubset->elements, ctxt->node->name,
2946
0
                                      prefix);
2947
0
        if (elemDecl != NULL) {
2948
0
            if (elemDecl->etype == XML_ELEMENT_TYPE_ELEMENT)
2949
0
                return(1);
2950
0
            if ((elemDecl->etype == XML_ELEMENT_TYPE_ANY) ||
2951
0
                (elemDecl->etype == XML_ELEMENT_TYPE_MIXED))
2952
0
                return(0);
2953
0
        }
2954
0
    }
2955
2956
    /*
2957
     * Otherwise, heuristic :-\
2958
     */
2959
0
    if ((RAW != '<') && (RAW != 0xD)) return(0);
2960
0
    if ((ctxt->node->children == NULL) &&
2961
0
  (RAW == '<') && (NXT(1) == '/')) return(0);
2962
2963
0
    lastChild = xmlGetLastChild(ctxt->node);
2964
0
    if (lastChild == NULL) {
2965
0
        if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2966
0
            (ctxt->node->content != NULL)) return(0);
2967
0
    } else if (xmlNodeIsText(lastChild))
2968
0
        return(0);
2969
0
    else if ((ctxt->node->children != NULL) &&
2970
0
             (xmlNodeIsText(ctxt->node->children)))
2971
0
        return(0);
2972
0
    return(1);
2973
0
}
2974
2975
/************************************************************************
2976
 *                  *
2977
 *    Extra stuff for namespace support     *
2978
 *  Relates to http://www.w3.org/TR/WD-xml-names      *
2979
 *                  *
2980
 ************************************************************************/
2981
2982
/**
2983
 * xmlSplitQName:
2984
 * @ctxt:  an XML parser context
2985
 * @name:  an XML parser context
2986
 * @prefixOut:  a xmlChar **
2987
 *
2988
 * DEPRECATED: Don't use.
2989
 *
2990
 * parse an UTF8 encoded XML qualified name string
2991
 *
2992
 * [NS 5] QName ::= (Prefix ':')? LocalPart
2993
 *
2994
 * [NS 6] Prefix ::= NCName
2995
 *
2996
 * [NS 7] LocalPart ::= NCName
2997
 *
2998
 * Returns the local part, and prefix is updated
2999
 *   to get the Prefix if any.
3000
 */
3001
3002
xmlChar *
3003
0
xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefixOut) {
3004
0
    xmlChar buf[XML_MAX_NAMELEN + 5];
3005
0
    xmlChar *buffer = NULL;
3006
0
    int len = 0;
3007
0
    int max = XML_MAX_NAMELEN;
3008
0
    xmlChar *ret = NULL;
3009
0
    xmlChar *prefix;
3010
0
    const xmlChar *cur = name;
3011
0
    int c;
3012
3013
0
    if (prefixOut == NULL) return(NULL);
3014
0
    *prefixOut = NULL;
3015
3016
0
    if (cur == NULL) return(NULL);
3017
3018
    /* nasty but well=formed */
3019
0
    if (cur[0] == ':')
3020
0
  return(xmlStrdup(name));
3021
3022
0
    c = *cur++;
3023
0
    while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
3024
0
  buf[len++] = c;
3025
0
  c = *cur++;
3026
0
    }
3027
0
    if (len >= max) {
3028
  /*
3029
   * Okay someone managed to make a huge name, so he's ready to pay
3030
   * for the processing speed.
3031
   */
3032
0
  max = len * 2;
3033
3034
0
  buffer = xmlMalloc(max);
3035
0
  if (buffer == NULL) {
3036
0
      xmlErrMemory(ctxt);
3037
0
      return(NULL);
3038
0
  }
3039
0
  memcpy(buffer, buf, len);
3040
0
  while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3041
0
      if (len + 10 > max) {
3042
0
          xmlChar *tmp;
3043
3044
0
    max *= 2;
3045
0
    tmp = (xmlChar *) xmlRealloc(buffer, max);
3046
0
    if (tmp == NULL) {
3047
0
        xmlFree(buffer);
3048
0
        xmlErrMemory(ctxt);
3049
0
        return(NULL);
3050
0
    }
3051
0
    buffer = tmp;
3052
0
      }
3053
0
      buffer[len++] = c;
3054
0
      c = *cur++;
3055
0
  }
3056
0
  buffer[len] = 0;
3057
0
    }
3058
3059
0
    if ((c == ':') && (*cur == 0)) {
3060
0
        if (buffer != NULL)
3061
0
      xmlFree(buffer);
3062
0
  return(xmlStrdup(name));
3063
0
    }
3064
3065
0
    if (buffer == NULL) {
3066
0
  ret = xmlStrndup(buf, len);
3067
0
        if (ret == NULL) {
3068
0
      xmlErrMemory(ctxt);
3069
0
      return(NULL);
3070
0
        }
3071
0
    } else {
3072
0
  ret = buffer;
3073
0
  buffer = NULL;
3074
0
  max = XML_MAX_NAMELEN;
3075
0
    }
3076
3077
3078
0
    if (c == ':') {
3079
0
  c = *cur;
3080
0
        prefix = ret;
3081
0
  if (c == 0) {
3082
0
      ret = xmlStrndup(BAD_CAST "", 0);
3083
0
            if (ret == NULL) {
3084
0
                xmlFree(prefix);
3085
0
                return(NULL);
3086
0
            }
3087
0
            *prefixOut = prefix;
3088
0
            return(ret);
3089
0
  }
3090
0
  len = 0;
3091
3092
  /*
3093
   * Check that the first character is proper to start
3094
   * a new name
3095
   */
3096
0
  if (!(((c >= 0x61) && (c <= 0x7A)) ||
3097
0
        ((c >= 0x41) && (c <= 0x5A)) ||
3098
0
        (c == '_') || (c == ':'))) {
3099
0
      int l;
3100
0
      int first = CUR_SCHAR(cur, l);
3101
3102
0
      if (!IS_LETTER(first) && (first != '_')) {
3103
0
    xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3104
0
          "Name %s is not XML Namespace compliant\n",
3105
0
          name);
3106
0
      }
3107
0
  }
3108
0
  cur++;
3109
3110
0
  while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3111
0
      buf[len++] = c;
3112
0
      c = *cur++;
3113
0
  }
3114
0
  if (len >= max) {
3115
      /*
3116
       * Okay someone managed to make a huge name, so he's ready to pay
3117
       * for the processing speed.
3118
       */
3119
0
      max = len * 2;
3120
3121
0
      buffer = xmlMalloc(max);
3122
0
      if (buffer == NULL) {
3123
0
          xmlErrMemory(ctxt);
3124
0
                xmlFree(prefix);
3125
0
    return(NULL);
3126
0
      }
3127
0
      memcpy(buffer, buf, len);
3128
0
      while (c != 0) { /* tested bigname2.xml */
3129
0
    if (len + 10 > max) {
3130
0
        xmlChar *tmp;
3131
3132
0
        max *= 2;
3133
0
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3134
0
        if (tmp == NULL) {
3135
0
      xmlErrMemory(ctxt);
3136
0
                        xmlFree(prefix);
3137
0
      xmlFree(buffer);
3138
0
      return(NULL);
3139
0
        }
3140
0
        buffer = tmp;
3141
0
    }
3142
0
    buffer[len++] = c;
3143
0
    c = *cur++;
3144
0
      }
3145
0
      buffer[len] = 0;
3146
0
  }
3147
3148
0
  if (buffer == NULL) {
3149
0
      ret = xmlStrndup(buf, len);
3150
0
            if (ret == NULL) {
3151
0
                xmlFree(prefix);
3152
0
                return(NULL);
3153
0
            }
3154
0
  } else {
3155
0
      ret = buffer;
3156
0
  }
3157
3158
0
        *prefixOut = prefix;
3159
0
    }
3160
3161
0
    return(ret);
3162
0
}
3163
3164
/************************************************************************
3165
 *                  *
3166
 *      The parser itself       *
3167
 *  Relates to http://www.w3.org/TR/REC-xml       *
3168
 *                  *
3169
 ************************************************************************/
3170
3171
/************************************************************************
3172
 *                  *
3173
 *  Routines to parse Name, NCName and NmToken      *
3174
 *                  *
3175
 ************************************************************************/
3176
3177
/*
3178
 * The two following functions are related to the change of accepted
3179
 * characters for Name and NmToken in the Revision 5 of XML-1.0
3180
 * They correspond to the modified production [4] and the new production [4a]
3181
 * changes in that revision. Also note that the macros used for the
3182
 * productions Letter, Digit, CombiningChar and Extender are not needed
3183
 * anymore.
3184
 * We still keep compatibility to pre-revision5 parsing semantic if the
3185
 * new XML_PARSE_OLD10 option is given to the parser.
3186
 */
3187
static int
3188
2.10M
xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3189
2.10M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3190
        /*
3191
   * Use the new checks of production [4] [4a] amd [5] of the
3192
   * Update 5 of XML-1.0
3193
   */
3194
2.10M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3195
2.10M
      (((c >= 'a') && (c <= 'z')) ||
3196
2.10M
       ((c >= 'A') && (c <= 'Z')) ||
3197
2.10M
       (c == '_') || (c == ':') ||
3198
2.10M
       ((c >= 0xC0) && (c <= 0xD6)) ||
3199
2.10M
       ((c >= 0xD8) && (c <= 0xF6)) ||
3200
2.10M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3201
2.10M
       ((c >= 0x370) && (c <= 0x37D)) ||
3202
2.10M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3203
2.10M
       ((c >= 0x200C) && (c <= 0x200D)) ||
3204
2.10M
       ((c >= 0x2070) && (c <= 0x218F)) ||
3205
2.10M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3206
2.10M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3207
2.10M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3208
2.10M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3209
2.10M
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3210
1.80M
      return(1);
3211
2.10M
    } else {
3212
0
        if (IS_LETTER(c) || (c == '_') || (c == ':'))
3213
0
      return(1);
3214
0
    }
3215
296k
    return(0);
3216
2.10M
}
3217
3218
static int
3219
48.2M
xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3220
48.2M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3221
        /*
3222
   * Use the new checks of production [4] [4a] amd [5] of the
3223
   * Update 5 of XML-1.0
3224
   */
3225
48.2M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3226
48.2M
      (((c >= 'a') && (c <= 'z')) ||
3227
48.2M
       ((c >= 'A') && (c <= 'Z')) ||
3228
48.2M
       ((c >= '0') && (c <= '9')) || /* !start */
3229
48.2M
       (c == '_') || (c == ':') ||
3230
48.2M
       (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3231
48.2M
       ((c >= 0xC0) && (c <= 0xD6)) ||
3232
48.2M
       ((c >= 0xD8) && (c <= 0xF6)) ||
3233
48.2M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3234
48.2M
       ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3235
48.2M
       ((c >= 0x370) && (c <= 0x37D)) ||
3236
48.2M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3237
48.2M
       ((c >= 0x200C) && (c <= 0x200D)) ||
3238
48.2M
       ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3239
48.2M
       ((c >= 0x2070) && (c <= 0x218F)) ||
3240
48.2M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3241
48.2M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3242
48.2M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3243
48.2M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3244
48.2M
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3245
46.4M
       return(1);
3246
48.2M
    } else {
3247
0
        if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3248
0
            (c == '.') || (c == '-') ||
3249
0
      (c == '_') || (c == ':') ||
3250
0
      (IS_COMBINING(c)) ||
3251
0
      (IS_EXTENDER(c)))
3252
0
      return(1);
3253
0
    }
3254
1.82M
    return(0);
3255
48.2M
}
3256
3257
static const xmlChar *
3258
209k
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3259
209k
    const xmlChar *ret;
3260
209k
    int len = 0, l;
3261
209k
    int c;
3262
209k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3263
0
                    XML_MAX_TEXT_LENGTH :
3264
209k
                    XML_MAX_NAME_LENGTH;
3265
3266
    /*
3267
     * Handler for more complex cases
3268
     */
3269
209k
    c = xmlCurrentChar(ctxt, &l);
3270
209k
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3271
        /*
3272
   * Use the new checks of production [4] [4a] amd [5] of the
3273
   * Update 5 of XML-1.0
3274
   */
3275
209k
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3276
209k
      (!(((c >= 'a') && (c <= 'z')) ||
3277
203k
         ((c >= 'A') && (c <= 'Z')) ||
3278
203k
         (c == '_') || (c == ':') ||
3279
203k
         ((c >= 0xC0) && (c <= 0xD6)) ||
3280
203k
         ((c >= 0xD8) && (c <= 0xF6)) ||
3281
203k
         ((c >= 0xF8) && (c <= 0x2FF)) ||
3282
203k
         ((c >= 0x370) && (c <= 0x37D)) ||
3283
203k
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
3284
203k
         ((c >= 0x200C) && (c <= 0x200D)) ||
3285
203k
         ((c >= 0x2070) && (c <= 0x218F)) ||
3286
203k
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3287
203k
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
3288
203k
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
3289
203k
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3290
203k
         ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3291
135k
      return(NULL);
3292
135k
  }
3293
74.6k
  len += l;
3294
74.6k
  NEXTL(l);
3295
74.6k
  c = xmlCurrentChar(ctxt, &l);
3296
5.37M
  while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3297
5.37M
         (((c >= 'a') && (c <= 'z')) ||
3298
5.36M
          ((c >= 'A') && (c <= 'Z')) ||
3299
5.36M
          ((c >= '0') && (c <= '9')) || /* !start */
3300
5.36M
          (c == '_') || (c == ':') ||
3301
5.36M
          (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3302
5.36M
          ((c >= 0xC0) && (c <= 0xD6)) ||
3303
5.36M
          ((c >= 0xD8) && (c <= 0xF6)) ||
3304
5.36M
          ((c >= 0xF8) && (c <= 0x2FF)) ||
3305
5.36M
          ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3306
5.36M
          ((c >= 0x370) && (c <= 0x37D)) ||
3307
5.36M
          ((c >= 0x37F) && (c <= 0x1FFF)) ||
3308
5.36M
          ((c >= 0x200C) && (c <= 0x200D)) ||
3309
5.36M
          ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3310
5.36M
          ((c >= 0x2070) && (c <= 0x218F)) ||
3311
5.36M
          ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3312
5.36M
          ((c >= 0x3001) && (c <= 0xD7FF)) ||
3313
5.36M
          ((c >= 0xF900) && (c <= 0xFDCF)) ||
3314
5.36M
          ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3315
5.36M
          ((c >= 0x10000) && (c <= 0xEFFFF))
3316
5.36M
    )) {
3317
5.30M
            if (len <= INT_MAX - l)
3318
5.30M
          len += l;
3319
5.30M
      NEXTL(l);
3320
5.30M
      c = xmlCurrentChar(ctxt, &l);
3321
5.30M
  }
3322
74.6k
    } else {
3323
0
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3324
0
      (!IS_LETTER(c) && (c != '_') &&
3325
0
       (c != ':'))) {
3326
0
      return(NULL);
3327
0
  }
3328
0
  len += l;
3329
0
  NEXTL(l);
3330
0
  c = xmlCurrentChar(ctxt, &l);
3331
3332
0
  while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3333
0
         ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3334
0
    (c == '.') || (c == '-') ||
3335
0
    (c == '_') || (c == ':') ||
3336
0
    (IS_COMBINING(c)) ||
3337
0
    (IS_EXTENDER(c)))) {
3338
0
            if (len <= INT_MAX - l)
3339
0
          len += l;
3340
0
      NEXTL(l);
3341
0
      c = xmlCurrentChar(ctxt, &l);
3342
0
  }
3343
0
    }
3344
74.6k
    if (len > maxLength) {
3345
48
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3346
48
        return(NULL);
3347
48
    }
3348
74.5k
    if (ctxt->input->cur - ctxt->input->base < len) {
3349
        /*
3350
         * There were a couple of bugs where PERefs lead to to a change
3351
         * of the buffer. Check the buffer size to avoid passing an invalid
3352
         * pointer to xmlDictLookup.
3353
         */
3354
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3355
0
                    "unexpected change of input buffer");
3356
0
        return (NULL);
3357
0
    }
3358
74.5k
    if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3359
837
        ret = xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len);
3360
73.7k
    else
3361
73.7k
        ret = xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len);
3362
74.5k
    if (ret == NULL)
3363
0
        xmlErrMemory(ctxt);
3364
74.5k
    return(ret);
3365
74.5k
}
3366
3367
/**
3368
 * xmlParseName:
3369
 * @ctxt:  an XML parser context
3370
 *
3371
 * DEPRECATED: Internal function, don't use.
3372
 *
3373
 * parse an XML name.
3374
 *
3375
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3376
 *                  CombiningChar | Extender
3377
 *
3378
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3379
 *
3380
 * [6] Names ::= Name (#x20 Name)*
3381
 *
3382
 * Returns the Name parsed or NULL
3383
 */
3384
3385
const xmlChar *
3386
1.11M
xmlParseName(xmlParserCtxtPtr ctxt) {
3387
1.11M
    const xmlChar *in;
3388
1.11M
    const xmlChar *ret;
3389
1.11M
    size_t count = 0;
3390
1.11M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3391
0
                       XML_MAX_TEXT_LENGTH :
3392
1.11M
                       XML_MAX_NAME_LENGTH;
3393
3394
1.11M
    GROW;
3395
3396
    /*
3397
     * Accelerator for simple ASCII names
3398
     */
3399
1.11M
    in = ctxt->input->cur;
3400
1.11M
    if (((*in >= 0x61) && (*in <= 0x7A)) ||
3401
1.11M
  ((*in >= 0x41) && (*in <= 0x5A)) ||
3402
1.11M
  (*in == '_') || (*in == ':')) {
3403
942k
  in++;
3404
2.21M
  while (((*in >= 0x61) && (*in <= 0x7A)) ||
3405
2.21M
         ((*in >= 0x41) && (*in <= 0x5A)) ||
3406
2.21M
         ((*in >= 0x30) && (*in <= 0x39)) ||
3407
2.21M
         (*in == '_') || (*in == '-') ||
3408
2.21M
         (*in == ':') || (*in == '.'))
3409
1.27M
      in++;
3410
942k
  if ((*in > 0) && (*in < 0x80)) {
3411
905k
      count = in - ctxt->input->cur;
3412
905k
            if (count > maxLength) {
3413
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3414
0
                return(NULL);
3415
0
            }
3416
905k
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3417
905k
      ctxt->input->cur = in;
3418
905k
      ctxt->input->col += count;
3419
905k
      if (ret == NULL)
3420
0
          xmlErrMemory(ctxt);
3421
905k
      return(ret);
3422
905k
  }
3423
942k
    }
3424
    /* accelerator for special cases */
3425
209k
    return(xmlParseNameComplex(ctxt));
3426
1.11M
}
3427
3428
static xmlHashedString
3429
446k
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3430
446k
    xmlHashedString ret;
3431
446k
    int len = 0, l;
3432
446k
    int c;
3433
446k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3434
0
                    XML_MAX_TEXT_LENGTH :
3435
446k
                    XML_MAX_NAME_LENGTH;
3436
446k
    size_t startPosition = 0;
3437
3438
446k
    ret.name = NULL;
3439
446k
    ret.hashValue = 0;
3440
3441
    /*
3442
     * Handler for more complex cases
3443
     */
3444
446k
    startPosition = CUR_PTR - BASE_PTR;
3445
446k
    c = xmlCurrentChar(ctxt, &l);
3446
446k
    if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3447
446k
  (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3448
329k
  return(ret);
3449
329k
    }
3450
3451
38.3M
    while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3452
38.3M
     (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3453
38.2M
        if (len <= INT_MAX - l)
3454
38.2M
      len += l;
3455
38.2M
  NEXTL(l);
3456
38.2M
  c = xmlCurrentChar(ctxt, &l);
3457
38.2M
    }
3458
117k
    if (len > maxLength) {
3459
1.14k
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3460
1.14k
        return(ret);
3461
1.14k
    }
3462
116k
    ret = xmlDictLookupHashed(ctxt->dict, (BASE_PTR + startPosition), len);
3463
116k
    if (ret.name == NULL)
3464
0
        xmlErrMemory(ctxt);
3465
116k
    return(ret);
3466
117k
}
3467
3468
/**
3469
 * xmlParseNCName:
3470
 * @ctxt:  an XML parser context
3471
 * @len:  length of the string parsed
3472
 *
3473
 * parse an XML name.
3474
 *
3475
 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3476
 *                      CombiningChar | Extender
3477
 *
3478
 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3479
 *
3480
 * Returns the Name parsed or NULL
3481
 */
3482
3483
static xmlHashedString
3484
1.65M
xmlParseNCName(xmlParserCtxtPtr ctxt) {
3485
1.65M
    const xmlChar *in, *e;
3486
1.65M
    xmlHashedString ret;
3487
1.65M
    size_t count = 0;
3488
1.65M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3489
0
                       XML_MAX_TEXT_LENGTH :
3490
1.65M
                       XML_MAX_NAME_LENGTH;
3491
3492
1.65M
    ret.name = NULL;
3493
3494
    /*
3495
     * Accelerator for simple ASCII names
3496
     */
3497
1.65M
    in = ctxt->input->cur;
3498
1.65M
    e = ctxt->input->end;
3499
1.65M
    if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3500
1.65M
   ((*in >= 0x41) && (*in <= 0x5A)) ||
3501
1.65M
   (*in == '_')) && (in < e)) {
3502
1.25M
  in++;
3503
3.53M
  while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3504
3.53M
          ((*in >= 0x41) && (*in <= 0x5A)) ||
3505
3.53M
          ((*in >= 0x30) && (*in <= 0x39)) ||
3506
3.53M
          (*in == '_') || (*in == '-') ||
3507
3.53M
          (*in == '.')) && (in < e))
3508
2.28M
      in++;
3509
1.25M
  if (in >= e)
3510
759
      goto complex;
3511
1.25M
  if ((*in > 0) && (*in < 0x80)) {
3512
1.21M
      count = in - ctxt->input->cur;
3513
1.21M
            if (count > maxLength) {
3514
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3515
0
                return(ret);
3516
0
            }
3517
1.21M
      ret = xmlDictLookupHashed(ctxt->dict, ctxt->input->cur, count);
3518
1.21M
      ctxt->input->cur = in;
3519
1.21M
      ctxt->input->col += count;
3520
1.21M
      if (ret.name == NULL) {
3521
0
          xmlErrMemory(ctxt);
3522
0
      }
3523
1.21M
      return(ret);
3524
1.21M
  }
3525
1.25M
    }
3526
446k
complex:
3527
446k
    return(xmlParseNCNameComplex(ctxt));
3528
1.65M
}
3529
3530
/**
3531
 * xmlParseNameAndCompare:
3532
 * @ctxt:  an XML parser context
3533
 *
3534
 * parse an XML name and compares for match
3535
 * (specialized for endtag parsing)
3536
 *
3537
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3538
 * and the name for mismatch
3539
 */
3540
3541
static const xmlChar *
3542
43.8k
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3543
43.8k
    register const xmlChar *cmp = other;
3544
43.8k
    register const xmlChar *in;
3545
43.8k
    const xmlChar *ret;
3546
3547
43.8k
    GROW;
3548
3549
43.8k
    in = ctxt->input->cur;
3550
185k
    while (*in != 0 && *in == *cmp) {
3551
141k
  ++in;
3552
141k
  ++cmp;
3553
141k
    }
3554
43.8k
    if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3555
  /* success */
3556
26.2k
  ctxt->input->col += in - ctxt->input->cur;
3557
26.2k
  ctxt->input->cur = in;
3558
26.2k
  return (const xmlChar*) 1;
3559
26.2k
    }
3560
    /* failure (or end of input buffer), check with full function */
3561
17.5k
    ret = xmlParseName (ctxt);
3562
    /* strings coming from the dictionary direct compare possible */
3563
17.5k
    if (ret == other) {
3564
2.28k
  return (const xmlChar*) 1;
3565
2.28k
    }
3566
15.3k
    return ret;
3567
17.5k
}
3568
3569
/**
3570
 * xmlParseStringName:
3571
 * @ctxt:  an XML parser context
3572
 * @str:  a pointer to the string pointer (IN/OUT)
3573
 *
3574
 * parse an XML name.
3575
 *
3576
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3577
 *                  CombiningChar | Extender
3578
 *
3579
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3580
 *
3581
 * [6] Names ::= Name (#x20 Name)*
3582
 *
3583
 * Returns the Name parsed or NULL. The @str pointer
3584
 * is updated to the current location in the string.
3585
 */
3586
3587
static xmlChar *
3588
1.67M
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3589
1.67M
    xmlChar buf[XML_MAX_NAMELEN + 5];
3590
1.67M
    xmlChar *ret;
3591
1.67M
    const xmlChar *cur = *str;
3592
1.67M
    int len = 0, l;
3593
1.67M
    int c;
3594
1.67M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3595
0
                    XML_MAX_TEXT_LENGTH :
3596
1.67M
                    XML_MAX_NAME_LENGTH;
3597
3598
1.67M
    c = CUR_SCHAR(cur, l);
3599
1.67M
    if (!xmlIsNameStartChar(ctxt, c)) {
3600
2.49k
  return(NULL);
3601
2.49k
    }
3602
3603
1.67M
    COPY_BUF(buf, len, c);
3604
1.67M
    cur += l;
3605
1.67M
    c = CUR_SCHAR(cur, l);
3606
3.66M
    while (xmlIsNameChar(ctxt, c)) {
3607
2.00M
  COPY_BUF(buf, len, c);
3608
2.00M
  cur += l;
3609
2.00M
  c = CUR_SCHAR(cur, l);
3610
2.00M
  if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3611
      /*
3612
       * Okay someone managed to make a huge name, so he's ready to pay
3613
       * for the processing speed.
3614
       */
3615
6.61k
      xmlChar *buffer;
3616
6.61k
      int max = len * 2;
3617
3618
6.61k
      buffer = xmlMalloc(max);
3619
6.61k
      if (buffer == NULL) {
3620
0
          xmlErrMemory(ctxt);
3621
0
    return(NULL);
3622
0
      }
3623
6.61k
      memcpy(buffer, buf, len);
3624
1.56M
      while (xmlIsNameChar(ctxt, c)) {
3625
1.55M
    if (len + 10 > max) {
3626
3.90k
        xmlChar *tmp;
3627
3628
3.90k
        max *= 2;
3629
3.90k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3630
3.90k
        if (tmp == NULL) {
3631
0
      xmlErrMemory(ctxt);
3632
0
      xmlFree(buffer);
3633
0
      return(NULL);
3634
0
        }
3635
3.90k
        buffer = tmp;
3636
3.90k
    }
3637
1.55M
    COPY_BUF(buffer, len, c);
3638
1.55M
    cur += l;
3639
1.55M
    c = CUR_SCHAR(cur, l);
3640
1.55M
                if (len > maxLength) {
3641
74
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3642
74
                    xmlFree(buffer);
3643
74
                    return(NULL);
3644
74
                }
3645
1.55M
      }
3646
6.54k
      buffer[len] = 0;
3647
6.54k
      *str = cur;
3648
6.54k
      return(buffer);
3649
6.61k
  }
3650
2.00M
    }
3651
1.66M
    if (len > maxLength) {
3652
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3653
0
        return(NULL);
3654
0
    }
3655
1.66M
    *str = cur;
3656
1.66M
    ret = xmlStrndup(buf, len);
3657
1.66M
    if (ret == NULL)
3658
0
        xmlErrMemory(ctxt);
3659
1.66M
    return(ret);
3660
1.66M
}
3661
3662
/**
3663
 * xmlParseNmtoken:
3664
 * @ctxt:  an XML parser context
3665
 *
3666
 * DEPRECATED: Internal function, don't use.
3667
 *
3668
 * parse an XML Nmtoken.
3669
 *
3670
 * [7] Nmtoken ::= (NameChar)+
3671
 *
3672
 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3673
 *
3674
 * Returns the Nmtoken parsed or NULL
3675
 */
3676
3677
xmlChar *
3678
48.3k
xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3679
48.3k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3680
48.3k
    xmlChar *ret;
3681
48.3k
    int len = 0, l;
3682
48.3k
    int c;
3683
48.3k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3684
0
                    XML_MAX_TEXT_LENGTH :
3685
48.3k
                    XML_MAX_NAME_LENGTH;
3686
3687
48.3k
    c = xmlCurrentChar(ctxt, &l);
3688
3689
251k
    while (xmlIsNameChar(ctxt, c)) {
3690
206k
  COPY_BUF(buf, len, c);
3691
206k
  NEXTL(l);
3692
206k
  c = xmlCurrentChar(ctxt, &l);
3693
206k
  if (len >= XML_MAX_NAMELEN) {
3694
      /*
3695
       * Okay someone managed to make a huge token, so he's ready to pay
3696
       * for the processing speed.
3697
       */
3698
2.69k
      xmlChar *buffer;
3699
2.69k
      int max = len * 2;
3700
3701
2.69k
      buffer = xmlMalloc(max);
3702
2.69k
      if (buffer == NULL) {
3703
0
          xmlErrMemory(ctxt);
3704
0
    return(NULL);
3705
0
      }
3706
2.69k
      memcpy(buffer, buf, len);
3707
4.43M
      while (xmlIsNameChar(ctxt, c)) {
3708
4.43M
    if (len + 10 > max) {
3709
6.26k
        xmlChar *tmp;
3710
3711
6.26k
        max *= 2;
3712
6.26k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3713
6.26k
        if (tmp == NULL) {
3714
0
      xmlErrMemory(ctxt);
3715
0
      xmlFree(buffer);
3716
0
      return(NULL);
3717
0
        }
3718
6.26k
        buffer = tmp;
3719
6.26k
    }
3720
4.43M
    COPY_BUF(buffer, len, c);
3721
4.43M
                if (len > maxLength) {
3722
203
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3723
203
                    xmlFree(buffer);
3724
203
                    return(NULL);
3725
203
                }
3726
4.43M
    NEXTL(l);
3727
4.43M
    c = xmlCurrentChar(ctxt, &l);
3728
4.43M
      }
3729
2.49k
      buffer[len] = 0;
3730
2.49k
      return(buffer);
3731
2.69k
  }
3732
206k
    }
3733
45.6k
    if (len == 0)
3734
8.61k
        return(NULL);
3735
37.0k
    if (len > maxLength) {
3736
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3737
0
        return(NULL);
3738
0
    }
3739
37.0k
    ret = xmlStrndup(buf, len);
3740
37.0k
    if (ret == NULL)
3741
0
        xmlErrMemory(ctxt);
3742
37.0k
    return(ret);
3743
37.0k
}
3744
3745
/**
3746
 * xmlExpandPEsInEntityValue:
3747
 * @ctxt:  parser context
3748
 * @buf:  string buffer
3749
 * @str:  entity value
3750
 * @length:  size of entity value
3751
 * @depth:  nesting depth
3752
 *
3753
 * Validate an entity value and expand parameter entities.
3754
 */
3755
static void
3756
xmlExpandPEsInEntityValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf,
3757
45.4k
                          const xmlChar *str, int length, int depth) {
3758
45.4k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3759
45.4k
    const xmlChar *end, *chunk;
3760
45.4k
    int c, l;
3761
3762
45.4k
    if (str == NULL)
3763
0
        return;
3764
3765
45.4k
    depth += 1;
3766
45.4k
    if (depth > maxDepth) {
3767
0
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3768
0
                       "Maximum entity nesting depth exceeded");
3769
0
  return;
3770
0
    }
3771
3772
45.4k
    end = str + length;
3773
45.4k
    chunk = str;
3774
3775
146M
    while ((str < end) && (!PARSER_STOPPED(ctxt))) {
3776
146M
        c = *str;
3777
3778
146M
        if (c >= 0x80) {
3779
142M
            l = xmlUTF8MultibyteLen(ctxt, str,
3780
142M
                    "invalid character in entity value\n");
3781
142M
            if (l == 0) {
3782
4.24M
                if (chunk < str)
3783
71.8k
                    xmlSBufAddString(buf, chunk, str - chunk);
3784
4.24M
                xmlSBufAddReplChar(buf);
3785
4.24M
                str += 1;
3786
4.24M
                chunk = str;
3787
138M
            } else {
3788
138M
                str += l;
3789
138M
            }
3790
142M
        } else if (c == '&') {
3791
152k
            if (str[1] == '#') {
3792
79.6k
                if (chunk < str)
3793
61.3k
                    xmlSBufAddString(buf, chunk, str - chunk);
3794
3795
79.6k
                c = xmlParseStringCharRef(ctxt, &str);
3796
79.6k
                if (c == 0)
3797
19.5k
                    return;
3798
3799
60.0k
                xmlSBufAddChar(buf, c);
3800
3801
60.0k
                chunk = str;
3802
72.4k
            } else {
3803
72.4k
                xmlChar *name;
3804
3805
                /*
3806
                 * General entity references are checked for
3807
                 * syntactic validity.
3808
                 */
3809
72.4k
                str++;
3810
72.4k
                name = xmlParseStringName(ctxt, &str);
3811
3812
72.4k
                if ((name == NULL) || (*str++ != ';')) {
3813
5.71k
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3814
5.71k
                            "EntityValue: '&' forbidden except for entities "
3815
5.71k
                            "references\n");
3816
5.71k
                    xmlFree(name);
3817
5.71k
                    return;
3818
5.71k
                }
3819
3820
66.6k
                xmlFree(name);
3821
66.6k
            }
3822
3.94M
        } else if (c == '%') {
3823
4.15k
            xmlEntityPtr ent;
3824
3825
4.15k
            if (chunk < str)
3826
3.51k
                xmlSBufAddString(buf, chunk, str - chunk);
3827
3828
4.15k
            ent = xmlParseStringPEReference(ctxt, &str);
3829
4.15k
            if (ent == NULL)
3830
3.50k
                return;
3831
3832
642
            if (!PARSER_EXTERNAL(ctxt)) {
3833
642
                xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3834
642
                return;
3835
642
            }
3836
3837
0
            if (ent->content == NULL) {
3838
                /*
3839
                 * Note: external parsed entities will not be loaded,
3840
                 * it is not required for a non-validating parser to
3841
                 * complete external PEReferences coming from the
3842
                 * internal subset
3843
                 */
3844
0
                if (((ctxt->options & XML_PARSE_NO_XXE) == 0) &&
3845
0
                    ((ctxt->replaceEntities) ||
3846
0
                     (ctxt->validate))) {
3847
0
                    xmlLoadEntityContent(ctxt, ent);
3848
0
                } else {
3849
0
                    xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
3850
0
                                  "not validating will not read content for "
3851
0
                                  "PE entity %s\n", ent->name, NULL);
3852
0
                }
3853
0
            }
3854
3855
            /*
3856
             * TODO: Skip if ent->content is still NULL.
3857
             */
3858
3859
0
            if (xmlParserEntityCheck(ctxt, ent->length))
3860
0
                return;
3861
3862
0
            if (ent->flags & XML_ENT_EXPANDING) {
3863
0
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
3864
0
                xmlHaltParser(ctxt);
3865
0
                return;
3866
0
            }
3867
3868
0
            ent->flags |= XML_ENT_EXPANDING;
3869
0
            xmlExpandPEsInEntityValue(ctxt, buf, ent->content, ent->length,
3870
0
                                      depth);
3871
0
            ent->flags &= ~XML_ENT_EXPANDING;
3872
3873
0
            chunk = str;
3874
3.93M
        } else {
3875
            /* Normal ASCII char */
3876
3.93M
            if (!IS_BYTE_CHAR(c)) {
3877
158k
                xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3878
158k
                        "invalid character in entity value\n");
3879
158k
                if (chunk < str)
3880
5.22k
                    xmlSBufAddString(buf, chunk, str - chunk);
3881
158k
                xmlSBufAddReplChar(buf);
3882
158k
                str += 1;
3883
158k
                chunk = str;
3884
3.77M
            } else {
3885
3.77M
                str += 1;
3886
3.77M
            }
3887
3.93M
        }
3888
146M
    }
3889
3890
15.9k
    if (chunk < str)
3891
11.6k
        xmlSBufAddString(buf, chunk, str - chunk);
3892
15.9k
}
3893
3894
/**
3895
 * xmlParseEntityValue:
3896
 * @ctxt:  an XML parser context
3897
 * @orig:  if non-NULL store a copy of the original entity value
3898
 *
3899
 * DEPRECATED: Internal function, don't use.
3900
 *
3901
 * parse a value for ENTITY declarations
3902
 *
3903
 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3904
 *                 "'" ([^%&'] | PEReference | Reference)* "'"
3905
 *
3906
 * Returns the EntityValue parsed with reference substituted or NULL
3907
 */
3908
xmlChar *
3909
45.8k
xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3910
45.8k
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3911
0
                         XML_MAX_HUGE_LENGTH :
3912
45.8k
                         XML_MAX_TEXT_LENGTH;
3913
45.8k
    xmlSBuf buf;
3914
45.8k
    const xmlChar *start;
3915
45.8k
    int quote, length;
3916
3917
45.8k
    xmlSBufInit(&buf, maxLength);
3918
3919
45.8k
    GROW;
3920
3921
45.8k
    quote = CUR;
3922
45.8k
    if ((quote != '"') && (quote != '\'')) {
3923
0
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3924
0
  return(NULL);
3925
0
    }
3926
45.8k
    CUR_PTR++;
3927
3928
45.8k
    length = 0;
3929
3930
    /*
3931
     * Copy raw content of the entity into a buffer
3932
     */
3933
446M
    while (1) {
3934
446M
        int c;
3935
3936
446M
        if (PARSER_STOPPED(ctxt))
3937
1
            goto error;
3938
3939
446M
        if (CUR_PTR >= ctxt->input->end) {
3940
393
            xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3941
393
            goto error;
3942
393
        }
3943
3944
446M
        c = CUR;
3945
3946
446M
        if (c == 0) {
3947
36
            xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3948
36
                    "invalid character in entity value\n");
3949
36
            goto error;
3950
36
        }
3951
446M
        if (c == quote)
3952
45.4k
            break;
3953
446M
        NEXTL(1);
3954
446M
        length += 1;
3955
3956
        /*
3957
         * TODO: Check growth threshold
3958
         */
3959
446M
        if (ctxt->input->end - CUR_PTR < 10)
3960
18.2k
            GROW;
3961
446M
    }
3962
3963
45.4k
    start = CUR_PTR - length;
3964
3965
45.4k
    if (orig != NULL) {
3966
45.4k
        *orig = xmlStrndup(start, length);
3967
45.4k
        if (*orig == NULL)
3968
0
            xmlErrMemory(ctxt);
3969
45.4k
    }
3970
3971
45.4k
    xmlExpandPEsInEntityValue(ctxt, &buf, start, length, ctxt->inputNr);
3972
3973
45.4k
    NEXTL(1);
3974
3975
45.4k
    return(xmlSBufFinish(&buf, NULL, ctxt, "entity length too long"));
3976
3977
430
error:
3978
430
    xmlSBufCleanup(&buf, ctxt, "entity length too long");
3979
430
    return(NULL);
3980
45.8k
}
3981
3982
/**
3983
 * xmlCheckEntityInAttValue:
3984
 * @ctxt:  parser context
3985
 * @pent:  entity
3986
 * @depth:  nesting depth
3987
 *
3988
 * Check an entity reference in an attribute value for validity
3989
 * without expanding it.
3990
 */
3991
static void
3992
1.23k
xmlCheckEntityInAttValue(xmlParserCtxtPtr ctxt, xmlEntityPtr pent, int depth) {
3993
1.23k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3994
1.23k
    const xmlChar *str;
3995
1.23k
    unsigned long expandedSize = pent->length;
3996
1.23k
    int c, flags;
3997
3998
1.23k
    depth += 1;
3999
1.23k
    if (depth > maxDepth) {
4000
0
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
4001
0
                       "Maximum entity nesting depth exceeded");
4002
0
  return;
4003
0
    }
4004
4005
1.23k
    if (pent->flags & XML_ENT_EXPANDING) {
4006
29
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
4007
29
        xmlHaltParser(ctxt);
4008
29
        return;
4009
29
    }
4010
4011
    /*
4012
     * If we're parsing a default attribute value in DTD content,
4013
     * the entity might reference other entities which weren't
4014
     * defined yet, so the check isn't reliable.
4015
     */
4016
1.21k
    if (ctxt->inSubset == 0)
4017
1.18k
        flags = XML_ENT_CHECKED | XML_ENT_VALIDATED;
4018
30
    else
4019
30
        flags = XML_ENT_VALIDATED;
4020
4021
1.21k
    str = pent->content;
4022
1.21k
    if (str == NULL)
4023
0
        goto done;
4024
4025
    /*
4026
     * Note that entity values are already validated. We only check
4027
     * for illegal less-than signs and compute the expanded size
4028
     * of the entity. No special handling for multi-byte characters
4029
     * is needed.
4030
     */
4031
33.7M
    while (!PARSER_STOPPED(ctxt)) {
4032
33.7M
        c = *str;
4033
4034
33.7M
  if (c != '&') {
4035
33.7M
            if (c == 0)
4036
1.14k
                break;
4037
4038
33.7M
            if (c == '<')
4039
620
                xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
4040
620
                        "'<' in entity '%s' is not allowed in attributes "
4041
620
                        "values\n", pent->name);
4042
4043
33.7M
            str += 1;
4044
33.7M
        } else if (str[1] == '#') {
4045
935
            int val;
4046
4047
935
      val = xmlParseStringCharRef(ctxt, &str);
4048
935
      if (val == 0) {
4049
6
                pent->content[0] = 0;
4050
6
                break;
4051
6
            }
4052
9.57k
  } else {
4053
9.57k
            xmlChar *name;
4054
9.57k
            xmlEntityPtr ent;
4055
4056
9.57k
      name = xmlParseStringEntityRef(ctxt, &str);
4057
9.57k
      if (name == NULL) {
4058
7
                pent->content[0] = 0;
4059
7
                break;
4060
7
            }
4061
4062
9.57k
            ent = xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 1);
4063
9.57k
            xmlFree(name);
4064
4065
9.57k
            if ((ent != NULL) &&
4066
9.57k
                (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
4067
2.60k
                if ((ent->flags & flags) != flags) {
4068
598
                    pent->flags |= XML_ENT_EXPANDING;
4069
598
                    xmlCheckEntityInAttValue(ctxt, ent, depth);
4070
598
                    pent->flags &= ~XML_ENT_EXPANDING;
4071
598
                }
4072
4073
2.60k
                xmlSaturatedAdd(&expandedSize, ent->expandedSize);
4074
2.60k
                xmlSaturatedAdd(&expandedSize, XML_ENT_FIXED_COST);
4075
2.60k
            }
4076
9.57k
        }
4077
33.7M
    }
4078
4079
1.21k
done:
4080
1.21k
    if (ctxt->inSubset == 0)
4081
1.18k
        pent->expandedSize = expandedSize;
4082
4083
1.21k
    pent->flags |= flags;
4084
1.21k
}
4085
4086
/**
4087
 * xmlExpandEntityInAttValue:
4088
 * @ctxt:  parser context
4089
 * @buf:  string buffer
4090
 * @str:  entity or attribute value
4091
 * @pent:  entity for entity value, NULL for attribute values
4092
 * @normalize:  whether to collapse whitespace
4093
 * @inSpace:  whitespace state
4094
 * @depth:  nesting depth
4095
 * @check:  whether to check for amplification
4096
 *
4097
 * Expand general entity references in an entity or attribute value.
4098
 * Perform attribute value normalization.
4099
 */
4100
static void
4101
xmlExpandEntityInAttValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf,
4102
                          const xmlChar *str, xmlEntityPtr pent, int normalize,
4103
357k
                          int *inSpace, int depth, int check) {
4104
357k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
4105
357k
    int c, chunkSize;
4106
4107
357k
    if (str == NULL)
4108
0
        return;
4109
4110
357k
    depth += 1;
4111
357k
    if (depth > maxDepth) {
4112
0
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
4113
0
                       "Maximum entity nesting depth exceeded");
4114
0
  return;
4115
0
    }
4116
4117
357k
    if (pent != NULL) {
4118
357k
        if (pent->flags & XML_ENT_EXPANDING) {
4119
6
            xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
4120
6
            xmlHaltParser(ctxt);
4121
6
            return;
4122
6
        }
4123
4124
357k
        if (check) {
4125
357k
            if (xmlParserEntityCheck(ctxt, pent->length))
4126
33
                return;
4127
357k
        }
4128
357k
    }
4129
4130
357k
    chunkSize = 0;
4131
4132
    /*
4133
     * Note that entity values are already validated. No special
4134
     * handling for multi-byte characters is needed.
4135
     */
4136
1.25G
    while (!PARSER_STOPPED(ctxt)) {
4137
1.25G
        c = *str;
4138
4139
1.25G
  if (c != '&') {
4140
1.24G
            if (c == 0)
4141
325k
                break;
4142
4143
            /*
4144
             * If this function is called without an entity, it is used to
4145
             * expand entities in an attribute content where less-than was
4146
             * already unscaped and is allowed.
4147
             */
4148
1.24G
            if ((pent != NULL) && (c == '<')) {
4149
32.0k
                xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
4150
32.0k
                        "'<' in entity '%s' is not allowed in attributes "
4151
32.0k
                        "values\n", pent->name);
4152
32.0k
                break;
4153
32.0k
            }
4154
4155
1.24G
            if (c <= 0x20) {
4156
1.34M
                if ((normalize) && (*inSpace)) {
4157
                    /* Skip char */
4158
0
                    if (chunkSize > 0) {
4159
0
                        xmlSBufAddString(buf, str - chunkSize, chunkSize);
4160
0
                        chunkSize = 0;
4161
0
                    }
4162
1.34M
                } else if (c < 0x20) {
4163
1.33M
                    if (chunkSize > 0) {
4164
163k
                        xmlSBufAddString(buf, str - chunkSize, chunkSize);
4165
163k
                        chunkSize = 0;
4166
163k
                    }
4167
4168
1.33M
                    xmlSBufAddCString(buf, " ", 1);
4169
1.33M
                } else {
4170
14.2k
                    chunkSize += 1;
4171
14.2k
                }
4172
4173
1.34M
                *inSpace = 1;
4174
1.24G
            } else {
4175
1.24G
                chunkSize += 1;
4176
1.24G
                *inSpace = 0;
4177
1.24G
            }
4178
4179
1.24G
            str += 1;
4180
1.24G
        } else if (str[1] == '#') {
4181
50.5k
            int val;
4182
4183
50.5k
            if (chunkSize > 0) {
4184
22.0k
                xmlSBufAddString(buf, str - chunkSize, chunkSize);
4185
22.0k
                chunkSize = 0;
4186
22.0k
            }
4187
4188
50.5k
      val = xmlParseStringCharRef(ctxt, &str);
4189
50.5k
      if (val == 0) {
4190
8
                if (pent != NULL)
4191
8
                    pent->content[0] = 0;
4192
8
                break;
4193
8
            }
4194
4195
50.5k
            if (val == ' ') {
4196
2.88k
                if ((!normalize) || (!*inSpace))
4197
2.88k
                    xmlSBufAddCString(buf, " ", 1);
4198
2.88k
                *inSpace = 1;
4199
47.6k
            } else {
4200
47.6k
                xmlSBufAddChar(buf, val);
4201
47.6k
                *inSpace = 0;
4202
47.6k
            }
4203
1.58M
  } else {
4204
1.58M
            xmlChar *name;
4205
1.58M
            xmlEntityPtr ent;
4206
4207
1.58M
            if (chunkSize > 0) {
4208
1.54M
                xmlSBufAddString(buf, str - chunkSize, chunkSize);
4209
1.54M
                chunkSize = 0;
4210
1.54M
            }
4211
4212
1.58M
      name = xmlParseStringEntityRef(ctxt, &str);
4213
1.58M
            if (name == NULL) {
4214
5
                if (pent != NULL)
4215
5
                    pent->content[0] = 0;
4216
5
                break;
4217
5
            }
4218
4219
1.58M
            ent = xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 1);
4220
1.58M
            xmlFree(name);
4221
4222
1.58M
      if ((ent != NULL) &&
4223
1.58M
    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
4224
1.46M
    if (ent->content == NULL) {
4225
0
        xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
4226
0
          "predefined entity has no content\n");
4227
0
                    break;
4228
0
                }
4229
4230
1.46M
                xmlSBufAddString(buf, ent->content, ent->length);
4231
4232
1.46M
                *inSpace = 0;
4233
1.46M
      } else if ((ent != NULL) && (ent->content != NULL)) {
4234
104k
                if (pent != NULL)
4235
104k
                    pent->flags |= XML_ENT_EXPANDING;
4236
104k
    xmlExpandEntityInAttValue(ctxt, buf, ent->content, ent,
4237
104k
                                          normalize, inSpace, depth, check);
4238
104k
                if (pent != NULL)
4239
104k
                    pent->flags &= ~XML_ENT_EXPANDING;
4240
104k
      }
4241
1.58M
        }
4242
1.25G
    }
4243
4244
357k
    if (chunkSize > 0)
4245
344k
        xmlSBufAddString(buf, str - chunkSize, chunkSize);
4246
357k
}
4247
4248
/**
4249
 * xmlExpandEntitiesInAttValue:
4250
 * @ctxt:  parser context
4251
 * @str:  entity or attribute value
4252
 * @normalize:  whether to collapse whitespace
4253
 *
4254
 * Expand general entity references in an entity or attribute value.
4255
 * Perform attribute value normalization.
4256
 *
4257
 * Returns the expanded attribtue value.
4258
 */
4259
xmlChar *
4260
xmlExpandEntitiesInAttValue(xmlParserCtxtPtr ctxt, const xmlChar *str,
4261
0
                            int normalize) {
4262
0
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4263
0
                         XML_MAX_HUGE_LENGTH :
4264
0
                         XML_MAX_TEXT_LENGTH;
4265
0
    xmlSBuf buf;
4266
0
    int inSpace = 1;
4267
4268
0
    xmlSBufInit(&buf, maxLength);
4269
4270
0
    xmlExpandEntityInAttValue(ctxt, &buf, str, NULL, normalize, &inSpace,
4271
0
                              ctxt->inputNr, /* check */ 0);
4272
4273
0
    if ((normalize) && (inSpace) && (buf.size > 0))
4274
0
        buf.size--;
4275
4276
0
    return(xmlSBufFinish(&buf, NULL, ctxt, "AttValue length too long"));
4277
0
}
4278
4279
/**
4280
 * xmlParseAttValueInternal:
4281
 * @ctxt:  an XML parser context
4282
 * @len:  attribute len result
4283
 * @alloc:  whether the attribute was reallocated as a new string
4284
 * @normalize:  if 1 then further non-CDATA normalization must be done
4285
 *
4286
 * parse a value for an attribute.
4287
 * NOTE: if no normalization is needed, the routine will return pointers
4288
 *       directly from the data buffer.
4289
 *
4290
 * 3.3.3 Attribute-Value Normalization:
4291
 * Before the value of an attribute is passed to the application or
4292
 * checked for validity, the XML processor must normalize it as follows:
4293
 * - a character reference is processed by appending the referenced
4294
 *   character to the attribute value
4295
 * - an entity reference is processed by recursively processing the
4296
 *   replacement text of the entity
4297
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4298
 *   appending #x20 to the normalized value, except that only a single
4299
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4300
 *   parsed entity or the literal entity value of an internal parsed entity
4301
 * - other characters are processed by appending them to the normalized value
4302
 * If the declared value is not CDATA, then the XML processor must further
4303
 * process the normalized attribute value by discarding any leading and
4304
 * trailing space (#x20) characters, and by replacing sequences of space
4305
 * (#x20) characters by a single space (#x20) character.
4306
 * All attributes for which no declaration has been read should be treated
4307
 * by a non-validating parser as if declared CDATA.
4308
 *
4309
 * Returns the AttValue parsed or NULL. The value has to be freed by the
4310
 *     caller if it was copied, this can be detected by val[*len] == 0.
4311
 */
4312
static xmlChar *
4313
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *attlen, int *alloc,
4314
686k
                         int normalize, int isNamespace) {
4315
686k
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4316
0
                         XML_MAX_HUGE_LENGTH :
4317
686k
                         XML_MAX_TEXT_LENGTH;
4318
686k
    xmlSBuf buf;
4319
686k
    xmlChar *ret;
4320
686k
    int c, l, quote, flags, chunkSize;
4321
686k
    int inSpace = 1;
4322
686k
    int replaceEntities;
4323
4324
    /* Always expand namespace URIs */
4325
686k
    replaceEntities = (ctxt->replaceEntities) || (isNamespace);
4326
4327
686k
    xmlSBufInit(&buf, maxLength);
4328
4329
686k
    GROW;
4330
4331
686k
    quote = CUR;
4332
686k
    if ((quote != '"') && (quote != '\'')) {
4333
10.5k
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
4334
10.5k
  return(NULL);
4335
10.5k
    }
4336
675k
    NEXTL(1);
4337
4338
675k
    if (ctxt->inSubset == 0)
4339
629k
        flags = XML_ENT_CHECKED | XML_ENT_VALIDATED;
4340
46.4k
    else
4341
46.4k
        flags = XML_ENT_VALIDATED;
4342
4343
675k
    inSpace = 1;
4344
675k
    chunkSize = 0;
4345
4346
284M
    while (1) {
4347
284M
        if (PARSER_STOPPED(ctxt))
4348
76
            goto error;
4349
4350
284M
        if (CUR_PTR >= ctxt->input->end) {
4351
1.80k
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4352
1.80k
                           "AttValue: ' expected\n");
4353
1.80k
            goto error;
4354
1.80k
        }
4355
4356
        /*
4357
         * TODO: Check growth threshold
4358
         */
4359
284M
        if (ctxt->input->end - CUR_PTR < 10)
4360
53.8k
            GROW;
4361
4362
284M
        c = CUR;
4363
4364
284M
        if (c >= 0x80) {
4365
266M
            l = xmlUTF8MultibyteLen(ctxt, CUR_PTR,
4366
266M
                    "invalid character in attribute value\n");
4367
266M
            if (l == 0) {
4368
5.34M
                if (chunkSize > 0) {
4369
194k
                    xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4370
194k
                    chunkSize = 0;
4371
194k
                }
4372
5.34M
                xmlSBufAddReplChar(&buf);
4373
5.34M
                NEXTL(1);
4374
261M
            } else {
4375
261M
                chunkSize += l;
4376
261M
                NEXTL(l);
4377
261M
            }
4378
4379
266M
            inSpace = 0;
4380
266M
        } else if (c != '&') {
4381
17.3M
            if (c > 0x20) {
4382
9.09M
                if (c == quote)
4383
669k
                    break;
4384
4385
8.42M
                if (c == '<')
4386
164k
                    xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4387
4388
8.42M
                chunkSize += 1;
4389
8.42M
                inSpace = 0;
4390
8.42M
            } else if (!IS_BYTE_CHAR(c)) {
4391
2.72M
                xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4392
2.72M
                        "invalid character in attribute value\n");
4393
2.72M
                if (chunkSize > 0) {
4394
121k
                    xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4395
121k
                    chunkSize = 0;
4396
121k
                }
4397
2.72M
                xmlSBufAddReplChar(&buf);
4398
2.72M
                inSpace = 0;
4399
5.49M
            } else {
4400
                /* Whitespace */
4401
5.49M
                if ((normalize) && (inSpace)) {
4402
                    /* Skip char */
4403
281k
                    if (chunkSize > 0) {
4404
23.4k
                        xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4405
23.4k
                        chunkSize = 0;
4406
23.4k
                    }
4407
5.21M
                } else if (c < 0x20) {
4408
                    /* Convert to space */
4409
4.81M
                    if (chunkSize > 0) {
4410
162k
                        xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4411
162k
                        chunkSize = 0;
4412
162k
                    }
4413
4414
4.81M
                    xmlSBufAddCString(&buf, " ", 1);
4415
4.81M
                } else {
4416
400k
                    chunkSize += 1;
4417
400k
                }
4418
4419
5.49M
                inSpace = 1;
4420
4421
5.49M
                if ((c == 0xD) && (NXT(1) == 0xA))
4422
4.88k
                    CUR_PTR++;
4423
5.49M
            }
4424
4425
16.6M
            NEXTL(1);
4426
16.6M
        } else if (NXT(1) == '#') {
4427
43.9k
            int val;
4428
4429
43.9k
            if (chunkSize > 0) {
4430
25.3k
                xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4431
25.3k
                chunkSize = 0;
4432
25.3k
            }
4433
4434
43.9k
            val = xmlParseCharRef(ctxt);
4435
43.9k
            if (val == 0)
4436
3.81k
                goto error;
4437
4438
40.0k
            if ((val == '&') && (!replaceEntities)) {
4439
                /*
4440
                 * The reparsing will be done in xmlNodeParseContent()
4441
                 * called from SAX2.c
4442
                 */
4443
3.63k
                xmlSBufAddCString(&buf, "&#38;", 5);
4444
3.63k
                inSpace = 0;
4445
36.4k
            } else if (val == ' ') {
4446
2.59k
                if ((!normalize) || (!inSpace))
4447
2.17k
                    xmlSBufAddCString(&buf, " ", 1);
4448
2.59k
                inSpace = 1;
4449
33.8k
            } else {
4450
33.8k
                xmlSBufAddChar(&buf, val);
4451
33.8k
                inSpace = 0;
4452
33.8k
            }
4453
646k
        } else {
4454
646k
            const xmlChar *name;
4455
646k
            xmlEntityPtr ent;
4456
4457
646k
            if (chunkSize > 0) {
4458
498k
                xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4459
498k
                chunkSize = 0;
4460
498k
            }
4461
4462
646k
            name = xmlParseEntityRefInternal(ctxt);
4463
646k
            if (name == NULL) {
4464
                /*
4465
                 * Probably a literal '&' which wasn't escaped.
4466
                 * TODO: Handle gracefully in recovery mode.
4467
                 */
4468
51.7k
                continue;
4469
51.7k
            }
4470
4471
594k
            ent = xmlLookupGeneralEntity(ctxt, name, /* isAttr */ 1);
4472
594k
            if (ent == NULL)
4473
76.5k
                continue;
4474
4475
518k
            if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
4476
215k
                if ((ent->content[0] == '&') && (!replaceEntities))
4477
10.0k
                    xmlSBufAddCString(&buf, "&#38;", 5);
4478
205k
                else
4479
205k
                    xmlSBufAddString(&buf, ent->content, ent->length);
4480
215k
                inSpace = 0;
4481
302k
            } else if (replaceEntities) {
4482
252k
                xmlExpandEntityInAttValue(ctxt, &buf, ent->content, ent,
4483
252k
                                          normalize, &inSpace, ctxt->inputNr,
4484
252k
                                          /* check */ 1);
4485
252k
            } else {
4486
50.0k
                if ((ent->flags & flags) != flags)
4487
641
                    xmlCheckEntityInAttValue(ctxt, ent, ctxt->inputNr);
4488
4489
50.0k
                if (xmlParserEntityCheck(ctxt, ent->expandedSize)) {
4490
27
                    ent->content[0] = 0;
4491
27
                    goto error;
4492
27
                }
4493
4494
                /*
4495
                 * Just output the reference
4496
                 */
4497
49.9k
                xmlSBufAddCString(&buf, "&", 1);
4498
49.9k
                xmlSBufAddString(&buf, ent->name, xmlStrlen(ent->name));
4499
49.9k
                xmlSBufAddCString(&buf, ";", 1);
4500
4501
49.9k
                inSpace = 0;
4502
49.9k
            }
4503
518k
  }
4504
284M
    }
4505
4506
669k
    if ((buf.mem == NULL) && (alloc != NULL)) {
4507
555k
        ret = (xmlChar *) CUR_PTR - chunkSize;
4508
4509
555k
        if (attlen != NULL)
4510
555k
            *attlen = chunkSize;
4511
555k
        if ((normalize) && (inSpace) && (chunkSize > 0))
4512
336
            *attlen -= 1;
4513
555k
        *alloc = 0;
4514
4515
        /* Report potential error */
4516
555k
        xmlSBufCleanup(&buf, ctxt, "AttValue length too long");
4517
555k
    } else {
4518
114k
        if (chunkSize > 0)
4519
74.0k
            xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4520
4521
114k
        if ((normalize) && (inSpace) && (buf.size > 0))
4522
2.10k
            buf.size--;
4523
4524
114k
        ret = xmlSBufFinish(&buf, attlen, ctxt, "AttValue length too long");
4525
4526
114k
        if (ret != NULL) {
4527
114k
            if (attlen != NULL)
4528
68.5k
                *attlen = buf.size;
4529
114k
            if (alloc != NULL)
4530
68.5k
                *alloc = 1;
4531
114k
        }
4532
114k
    }
4533
4534
669k
    NEXTL(1);
4535
4536
669k
    return(ret);
4537
4538
5.71k
error:
4539
5.71k
    xmlSBufCleanup(&buf, ctxt, "AttValue length too long");
4540
5.71k
    return(NULL);
4541
675k
}
4542
4543
/**
4544
 * xmlParseAttValue:
4545
 * @ctxt:  an XML parser context
4546
 *
4547
 * DEPRECATED: Internal function, don't use.
4548
 *
4549
 * parse a value for an attribute
4550
 * Note: the parser won't do substitution of entities here, this
4551
 * will be handled later in xmlStringGetNodeList
4552
 *
4553
 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4554
 *                   "'" ([^<&'] | Reference)* "'"
4555
 *
4556
 * 3.3.3 Attribute-Value Normalization:
4557
 * Before the value of an attribute is passed to the application or
4558
 * checked for validity, the XML processor must normalize it as follows:
4559
 * - a character reference is processed by appending the referenced
4560
 *   character to the attribute value
4561
 * - an entity reference is processed by recursively processing the
4562
 *   replacement text of the entity
4563
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4564
 *   appending #x20 to the normalized value, except that only a single
4565
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4566
 *   parsed entity or the literal entity value of an internal parsed entity
4567
 * - other characters are processed by appending them to the normalized value
4568
 * If the declared value is not CDATA, then the XML processor must further
4569
 * process the normalized attribute value by discarding any leading and
4570
 * trailing space (#x20) characters, and by replacing sequences of space
4571
 * (#x20) characters by a single space (#x20) character.
4572
 * All attributes for which no declaration has been read should be treated
4573
 * by a non-validating parser as if declared CDATA.
4574
 *
4575
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4576
 */
4577
4578
4579
xmlChar *
4580
47.8k
xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4581
47.8k
    if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4582
47.8k
    return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0, 0));
4583
47.8k
}
4584
4585
/**
4586
 * xmlParseSystemLiteral:
4587
 * @ctxt:  an XML parser context
4588
 *
4589
 * DEPRECATED: Internal function, don't use.
4590
 *
4591
 * parse an XML Literal
4592
 *
4593
 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4594
 *
4595
 * Returns the SystemLiteral parsed or NULL
4596
 */
4597
4598
xmlChar *
4599
9.07k
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4600
9.07k
    xmlChar *buf = NULL;
4601
9.07k
    int len = 0;
4602
9.07k
    int size = XML_PARSER_BUFFER_SIZE;
4603
9.07k
    int cur, l;
4604
9.07k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4605
0
                    XML_MAX_TEXT_LENGTH :
4606
9.07k
                    XML_MAX_NAME_LENGTH;
4607
9.07k
    xmlChar stop;
4608
4609
9.07k
    if (RAW == '"') {
4610
5.15k
        NEXT;
4611
5.15k
  stop = '"';
4612
5.15k
    } else if (RAW == '\'') {
4613
3.04k
        NEXT;
4614
3.04k
  stop = '\'';
4615
3.04k
    } else {
4616
877
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4617
877
  return(NULL);
4618
877
    }
4619
4620
8.19k
    buf = xmlMalloc(size);
4621
8.19k
    if (buf == NULL) {
4622
0
        xmlErrMemory(ctxt);
4623
0
  return(NULL);
4624
0
    }
4625
8.19k
    cur = xmlCurrentCharRecover(ctxt, &l);
4626
1.13M
    while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4627
1.12M
  if (len + 5 >= size) {
4628
1.48k
      xmlChar *tmp;
4629
4630
1.48k
      size *= 2;
4631
1.48k
      tmp = (xmlChar *) xmlRealloc(buf, size);
4632
1.48k
      if (tmp == NULL) {
4633
0
          xmlFree(buf);
4634
0
    xmlErrMemory(ctxt);
4635
0
    return(NULL);
4636
0
      }
4637
1.48k
      buf = tmp;
4638
1.48k
  }
4639
1.12M
  COPY_BUF(buf, len, cur);
4640
1.12M
        if (len > maxLength) {
4641
5
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4642
5
            xmlFree(buf);
4643
5
            return(NULL);
4644
5
        }
4645
1.12M
  NEXTL(l);
4646
1.12M
  cur = xmlCurrentCharRecover(ctxt, &l);
4647
1.12M
    }
4648
8.19k
    buf[len] = 0;
4649
8.19k
    if (!IS_CHAR(cur)) {
4650
403
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4651
7.79k
    } else {
4652
7.79k
  NEXT;
4653
7.79k
    }
4654
8.19k
    return(buf);
4655
8.19k
}
4656
4657
/**
4658
 * xmlParsePubidLiteral:
4659
 * @ctxt:  an XML parser context
4660
 *
4661
 * DEPRECATED: Internal function, don't use.
4662
 *
4663
 * parse an XML public literal
4664
 *
4665
 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4666
 *
4667
 * Returns the PubidLiteral parsed or NULL.
4668
 */
4669
4670
xmlChar *
4671
4.68k
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4672
4.68k
    xmlChar *buf = NULL;
4673
4.68k
    int len = 0;
4674
4.68k
    int size = XML_PARSER_BUFFER_SIZE;
4675
4.68k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4676
0
                    XML_MAX_TEXT_LENGTH :
4677
4.68k
                    XML_MAX_NAME_LENGTH;
4678
4.68k
    xmlChar cur;
4679
4.68k
    xmlChar stop;
4680
4681
4.68k
    if (RAW == '"') {
4682
3.51k
        NEXT;
4683
3.51k
  stop = '"';
4684
3.51k
    } else if (RAW == '\'') {
4685
804
        NEXT;
4686
804
  stop = '\'';
4687
804
    } else {
4688
371
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4689
371
  return(NULL);
4690
371
    }
4691
4.31k
    buf = xmlMalloc(size);
4692
4.31k
    if (buf == NULL) {
4693
0
  xmlErrMemory(ctxt);
4694
0
  return(NULL);
4695
0
    }
4696
4.31k
    cur = CUR;
4697
299k
    while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop) &&
4698
299k
           (PARSER_STOPPED(ctxt) == 0)) { /* checked */
4699
294k
  if (len + 1 >= size) {
4700
340
      xmlChar *tmp;
4701
4702
340
      size *= 2;
4703
340
      tmp = (xmlChar *) xmlRealloc(buf, size);
4704
340
      if (tmp == NULL) {
4705
0
    xmlErrMemory(ctxt);
4706
0
    xmlFree(buf);
4707
0
    return(NULL);
4708
0
      }
4709
340
      buf = tmp;
4710
340
  }
4711
294k
  buf[len++] = cur;
4712
294k
        if (len > maxLength) {
4713
1
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4714
1
            xmlFree(buf);
4715
1
            return(NULL);
4716
1
        }
4717
294k
  NEXT;
4718
294k
  cur = CUR;
4719
294k
    }
4720
4.31k
    buf[len] = 0;
4721
4.31k
    if (cur != stop) {
4722
781
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4723
3.53k
    } else {
4724
3.53k
  NEXTL(1);
4725
3.53k
    }
4726
4.31k
    return(buf);
4727
4.31k
}
4728
4729
static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial);
4730
4731
/*
4732
 * used for the test in the inner loop of the char data testing
4733
 */
4734
static const unsigned char test_char_data[256] = {
4735
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4736
    0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4737
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4738
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4739
    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4740
    0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4741
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4742
    0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4743
    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4744
    0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4745
    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4746
    0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4747
    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4748
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4749
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4750
    0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4751
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4752
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4753
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4754
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4755
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4756
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4757
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4758
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4759
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4760
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4761
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4762
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4763
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4764
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4765
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4766
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4767
};
4768
4769
/**
4770
 * xmlParseCharDataInternal:
4771
 * @ctxt:  an XML parser context
4772
 * @partial:  buffer may contain partial UTF-8 sequences
4773
 *
4774
 * Parse character data. Always makes progress if the first char isn't
4775
 * '<' or '&'.
4776
 *
4777
 * The right angle bracket (>) may be represented using the string "&gt;",
4778
 * and must, for compatibility, be escaped using "&gt;" or a character
4779
 * reference when it appears in the string "]]>" in content, when that
4780
 * string is not marking the end of a CDATA section.
4781
 *
4782
 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4783
 */
4784
static void
4785
9.40M
xmlParseCharDataInternal(xmlParserCtxtPtr ctxt, int partial) {
4786
9.40M
    const xmlChar *in;
4787
9.40M
    int nbchar = 0;
4788
9.40M
    int line = ctxt->input->line;
4789
9.40M
    int col = ctxt->input->col;
4790
9.40M
    int ccol;
4791
4792
9.40M
    GROW;
4793
    /*
4794
     * Accelerated common case where input don't need to be
4795
     * modified before passing it to the handler.
4796
     */
4797
9.40M
    in = ctxt->input->cur;
4798
9.43M
    do {
4799
9.47M
get_more_space:
4800
10.5M
        while (*in == 0x20) { in++; ctxt->input->col++; }
4801
9.47M
        if (*in == 0xA) {
4802
797k
            do {
4803
797k
                ctxt->input->line++; ctxt->input->col = 1;
4804
797k
                in++;
4805
797k
            } while (*in == 0xA);
4806
40.9k
            goto get_more_space;
4807
40.9k
        }
4808
9.43M
        if (*in == '<') {
4809
42.3k
            nbchar = in - ctxt->input->cur;
4810
42.3k
            if (nbchar > 0) {
4811
42.3k
                const xmlChar *tmp = ctxt->input->cur;
4812
42.3k
                ctxt->input->cur = in;
4813
4814
42.3k
                if ((ctxt->sax != NULL) &&
4815
42.3k
                    (ctxt->disableSAX == 0) &&
4816
42.3k
                    (ctxt->sax->ignorableWhitespace !=
4817
5.75k
                     ctxt->sax->characters)) {
4818
0
                    if (areBlanks(ctxt, tmp, nbchar, 1)) {
4819
0
                        if (ctxt->sax->ignorableWhitespace != NULL)
4820
0
                            ctxt->sax->ignorableWhitespace(ctxt->userData,
4821
0
                                                   tmp, nbchar);
4822
0
                    } else {
4823
0
                        if (ctxt->sax->characters != NULL)
4824
0
                            ctxt->sax->characters(ctxt->userData,
4825
0
                                                  tmp, nbchar);
4826
0
                        if (*ctxt->space == -1)
4827
0
                            *ctxt->space = -2;
4828
0
                    }
4829
42.3k
                } else if ((ctxt->sax != NULL) &&
4830
42.3k
                           (ctxt->disableSAX == 0) &&
4831
42.3k
                           (ctxt->sax->characters != NULL)) {
4832
5.75k
                    ctxt->sax->characters(ctxt->userData,
4833
5.75k
                                          tmp, nbchar);
4834
5.75k
                }
4835
42.3k
            }
4836
42.3k
            return;
4837
42.3k
        }
4838
4839
9.49M
get_more:
4840
9.49M
        ccol = ctxt->input->col;
4841
19.2M
        while (test_char_data[*in]) {
4842
9.74M
            in++;
4843
9.74M
            ccol++;
4844
9.74M
        }
4845
9.49M
        ctxt->input->col = ccol;
4846
9.49M
        if (*in == 0xA) {
4847
362k
            do {
4848
362k
                ctxt->input->line++; ctxt->input->col = 1;
4849
362k
                in++;
4850
362k
            } while (*in == 0xA);
4851
68.9k
            goto get_more;
4852
68.9k
        }
4853
9.42M
        if (*in == ']') {
4854
37.4k
            if ((in[1] == ']') && (in[2] == '>')) {
4855
3.50k
                xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4856
3.50k
                ctxt->input->cur = in + 1;
4857
3.50k
                return;
4858
3.50k
            }
4859
33.8k
            in++;
4860
33.8k
            ctxt->input->col++;
4861
33.8k
            goto get_more;
4862
37.4k
        }
4863
9.38M
        nbchar = in - ctxt->input->cur;
4864
9.38M
        if (nbchar > 0) {
4865
1.52M
            if ((ctxt->sax != NULL) &&
4866
1.52M
                (ctxt->disableSAX == 0) &&
4867
1.52M
                (ctxt->sax->ignorableWhitespace !=
4868
13.2k
                 ctxt->sax->characters) &&
4869
1.52M
                (IS_BLANK_CH(*ctxt->input->cur))) {
4870
0
                const xmlChar *tmp = ctxt->input->cur;
4871
0
                ctxt->input->cur = in;
4872
4873
0
                if (areBlanks(ctxt, tmp, nbchar, 0)) {
4874
0
                    if (ctxt->sax->ignorableWhitespace != NULL)
4875
0
                        ctxt->sax->ignorableWhitespace(ctxt->userData,
4876
0
                                                       tmp, nbchar);
4877
0
                } else {
4878
0
                    if (ctxt->sax->characters != NULL)
4879
0
                        ctxt->sax->characters(ctxt->userData,
4880
0
                                              tmp, nbchar);
4881
0
                    if (*ctxt->space == -1)
4882
0
                        *ctxt->space = -2;
4883
0
                }
4884
0
                line = ctxt->input->line;
4885
0
                col = ctxt->input->col;
4886
1.52M
            } else if ((ctxt->sax != NULL) &&
4887
1.52M
                       (ctxt->disableSAX == 0)) {
4888
13.2k
                if (ctxt->sax->characters != NULL)
4889
13.2k
                    ctxt->sax->characters(ctxt->userData,
4890
13.2k
                                          ctxt->input->cur, nbchar);
4891
13.2k
                line = ctxt->input->line;
4892
13.2k
                col = ctxt->input->col;
4893
13.2k
            }
4894
1.52M
        }
4895
9.38M
        ctxt->input->cur = in;
4896
9.38M
        if (*in == 0xD) {
4897
76.3k
            in++;
4898
76.3k
            if (*in == 0xA) {
4899
30.6k
                ctxt->input->cur = in;
4900
30.6k
                in++;
4901
30.6k
                ctxt->input->line++; ctxt->input->col = 1;
4902
30.6k
                continue; /* while */
4903
30.6k
            }
4904
45.7k
            in--;
4905
45.7k
        }
4906
9.35M
        if (*in == '<') {
4907
212k
            return;
4908
212k
        }
4909
9.14M
        if (*in == '&') {
4910
113k
            return;
4911
113k
        }
4912
9.03M
        SHRINK;
4913
9.03M
        GROW;
4914
9.03M
        in = ctxt->input->cur;
4915
9.06M
    } while (((*in >= 0x20) && (*in <= 0x7F)) ||
4916
9.06M
             (*in == 0x09) || (*in == 0x0a));
4917
9.03M
    ctxt->input->line = line;
4918
9.03M
    ctxt->input->col = col;
4919
9.03M
    xmlParseCharDataComplex(ctxt, partial);
4920
9.03M
}
4921
4922
/**
4923
 * xmlParseCharDataComplex:
4924
 * @ctxt:  an XML parser context
4925
 * @cdata:  int indicating whether we are within a CDATA section
4926
 *
4927
 * Always makes progress if the first char isn't '<' or '&'.
4928
 *
4929
 * parse a CharData section.this is the fallback function
4930
 * of xmlParseCharData() when the parsing requires handling
4931
 * of non-ASCII characters.
4932
 */
4933
static void
4934
9.03M
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial) {
4935
9.03M
    xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4936
9.03M
    int nbchar = 0;
4937
9.03M
    int cur, l;
4938
4939
9.03M
    cur = xmlCurrentCharRecover(ctxt, &l);
4940
73.7M
    while ((cur != '<') && /* checked */
4941
73.7M
           (cur != '&') &&
4942
73.7M
     (IS_CHAR(cur))) {
4943
64.7M
  if ((cur == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4944
6.27k
      xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4945
6.27k
  }
4946
64.7M
  COPY_BUF(buf, nbchar, cur);
4947
  /* move current position before possible calling of ctxt->sax->characters */
4948
64.7M
  NEXTL(l);
4949
64.7M
  if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4950
458k
      buf[nbchar] = 0;
4951
4952
      /*
4953
       * OK the segment is to be consumed as chars.
4954
       */
4955
458k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4956
84.3k
    if (areBlanks(ctxt, buf, nbchar, 0)) {
4957
0
        if (ctxt->sax->ignorableWhitespace != NULL)
4958
0
      ctxt->sax->ignorableWhitespace(ctxt->userData,
4959
0
                                     buf, nbchar);
4960
84.3k
    } else {
4961
84.3k
        if (ctxt->sax->characters != NULL)
4962
84.3k
      ctxt->sax->characters(ctxt->userData, buf, nbchar);
4963
84.3k
        if ((ctxt->sax->characters !=
4964
84.3k
             ctxt->sax->ignorableWhitespace) &&
4965
84.3k
      (*ctxt->space == -1))
4966
0
      *ctxt->space = -2;
4967
84.3k
    }
4968
84.3k
      }
4969
458k
      nbchar = 0;
4970
458k
            SHRINK;
4971
458k
  }
4972
64.7M
  cur = xmlCurrentCharRecover(ctxt, &l);
4973
64.7M
    }
4974
9.03M
    if (nbchar != 0) {
4975
1.89M
        buf[nbchar] = 0;
4976
  /*
4977
   * OK the segment is to be consumed as chars.
4978
   */
4979
1.89M
  if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4980
3.09k
      if (areBlanks(ctxt, buf, nbchar, 0)) {
4981
0
    if (ctxt->sax->ignorableWhitespace != NULL)
4982
0
        ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4983
3.09k
      } else {
4984
3.09k
    if (ctxt->sax->characters != NULL)
4985
3.09k
        ctxt->sax->characters(ctxt->userData, buf, nbchar);
4986
3.09k
    if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4987
3.09k
        (*ctxt->space == -1))
4988
0
        *ctxt->space = -2;
4989
3.09k
      }
4990
3.09k
  }
4991
1.89M
    }
4992
    /*
4993
     * cur == 0 can mean
4994
     *
4995
     * - End of buffer.
4996
     * - An actual 0 character.
4997
     * - An incomplete UTF-8 sequence. This is allowed if partial is set.
4998
     */
4999
9.03M
    if (ctxt->input->cur < ctxt->input->end) {
5000
9.02M
        if ((cur == 0) && (CUR != 0)) {
5001
679
            if (partial == 0) {
5002
679
                xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
5003
679
                        "Incomplete UTF-8 sequence starting with %02X\n", CUR);
5004
679
                NEXTL(1);
5005
679
            }
5006
9.02M
        } else if ((cur != '<') && (cur != '&')) {
5007
            /* Generate the error and skip the offending character */
5008
8.74M
            xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
5009
8.74M
                              "PCDATA invalid Char value %d\n", cur);
5010
8.74M
            NEXTL(l);
5011
8.74M
        }
5012
9.02M
    }
5013
9.03M
}
5014
5015
/**
5016
 * xmlParseCharData:
5017
 * @ctxt:  an XML parser context
5018
 * @cdata:  unused
5019
 *
5020
 * DEPRECATED: Internal function, don't use.
5021
 */
5022
void
5023
0
xmlParseCharData(xmlParserCtxtPtr ctxt, ATTRIBUTE_UNUSED int cdata) {
5024
0
    xmlParseCharDataInternal(ctxt, 0);
5025
0
}
5026
5027
/**
5028
 * xmlParseExternalID:
5029
 * @ctxt:  an XML parser context
5030
 * @publicID:  a xmlChar** receiving PubidLiteral
5031
 * @strict: indicate whether we should restrict parsing to only
5032
 *          production [75], see NOTE below
5033
 *
5034
 * DEPRECATED: Internal function, don't use.
5035
 *
5036
 * Parse an External ID or a Public ID
5037
 *
5038
 * NOTE: Productions [75] and [83] interact badly since [75] can generate
5039
 *       'PUBLIC' S PubidLiteral S SystemLiteral
5040
 *
5041
 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
5042
 *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
5043
 *
5044
 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
5045
 *
5046
 * Returns the function returns SystemLiteral and in the second
5047
 *                case publicID receives PubidLiteral, is strict is off
5048
 *                it is possible to return NULL and have publicID set.
5049
 */
5050
5051
xmlChar *
5052
17.9k
xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
5053
17.9k
    xmlChar *URI = NULL;
5054
5055
17.9k
    *publicID = NULL;
5056
17.9k
    if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
5057
6.05k
        SKIP(6);
5058
6.05k
  if (SKIP_BLANKS == 0) {
5059
370
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5060
370
                     "Space required after 'SYSTEM'\n");
5061
370
  }
5062
6.05k
  URI = xmlParseSystemLiteral(ctxt);
5063
6.05k
  if (URI == NULL) {
5064
313
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
5065
313
        }
5066
11.8k
    } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
5067
4.68k
        SKIP(6);
5068
4.68k
  if (SKIP_BLANKS == 0) {
5069
756
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5070
756
        "Space required after 'PUBLIC'\n");
5071
756
  }
5072
4.68k
  *publicID = xmlParsePubidLiteral(ctxt);
5073
4.68k
  if (*publicID == NULL) {
5074
372
      xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
5075
372
  }
5076
4.68k
  if (strict) {
5077
      /*
5078
       * We don't handle [83] so "S SystemLiteral" is required.
5079
       */
5080
2.71k
      if (SKIP_BLANKS == 0) {
5081
496
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5082
496
      "Space required after the Public Identifier\n");
5083
496
      }
5084
2.71k
  } else {
5085
      /*
5086
       * We handle [83] so we return immediately, if
5087
       * "S SystemLiteral" is not detected. We skip blanks if no
5088
             * system literal was found, but this is harmless since we must
5089
             * be at the end of a NotationDecl.
5090
       */
5091
1.97k
      if (SKIP_BLANKS == 0) return(NULL);
5092
548
      if ((CUR != '\'') && (CUR != '"')) return(NULL);
5093
548
  }
5094
3.02k
  URI = xmlParseSystemLiteral(ctxt);
5095
3.02k
  if (URI == NULL) {
5096
569
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
5097
569
        }
5098
3.02k
    }
5099
16.2k
    return(URI);
5100
17.9k
}
5101
5102
/**
5103
 * xmlParseCommentComplex:
5104
 * @ctxt:  an XML parser context
5105
 * @buf:  the already parsed part of the buffer
5106
 * @len:  number of bytes in the buffer
5107
 * @size:  allocated size of the buffer
5108
 *
5109
 * Skip an XML (SGML) comment <!-- .... -->
5110
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
5111
 *  must not occur within comments. "
5112
 * This is the slow routine in case the accelerator for ascii didn't work
5113
 *
5114
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
5115
 */
5116
static void
5117
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
5118
28.2k
                       size_t len, size_t size) {
5119
28.2k
    int q, ql;
5120
28.2k
    int r, rl;
5121
28.2k
    int cur, l;
5122
28.2k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5123
0
                       XML_MAX_HUGE_LENGTH :
5124
28.2k
                       XML_MAX_TEXT_LENGTH;
5125
5126
28.2k
    if (buf == NULL) {
5127
6.93k
        len = 0;
5128
6.93k
  size = XML_PARSER_BUFFER_SIZE;
5129
6.93k
  buf = xmlMalloc(size);
5130
6.93k
  if (buf == NULL) {
5131
0
      xmlErrMemory(ctxt);
5132
0
      return;
5133
0
  }
5134
6.93k
    }
5135
28.2k
    q = xmlCurrentCharRecover(ctxt, &ql);
5136
28.2k
    if (q == 0)
5137
1.16k
        goto not_terminated;
5138
27.0k
    if (!IS_CHAR(q)) {
5139
6.47k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
5140
6.47k
                          "xmlParseComment: invalid xmlChar value %d\n",
5141
6.47k
                    q);
5142
6.47k
  xmlFree (buf);
5143
6.47k
  return;
5144
6.47k
    }
5145
20.6k
    NEXTL(ql);
5146
20.6k
    r = xmlCurrentCharRecover(ctxt, &rl);
5147
20.6k
    if (r == 0)
5148
558
        goto not_terminated;
5149
20.0k
    if (!IS_CHAR(r)) {
5150
1.70k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
5151
1.70k
                          "xmlParseComment: invalid xmlChar value %d\n",
5152
1.70k
                    r);
5153
1.70k
  xmlFree (buf);
5154
1.70k
  return;
5155
1.70k
    }
5156
18.3k
    NEXTL(rl);
5157
18.3k
    cur = xmlCurrentCharRecover(ctxt, &l);
5158
18.3k
    if (cur == 0)
5159
297
        goto not_terminated;
5160
33.0M
    while (IS_CHAR(cur) && /* checked */
5161
33.0M
           ((cur != '>') ||
5162
33.0M
      (r != '-') || (q != '-'))) {
5163
33.0M
  if ((r == '-') && (q == '-')) {
5164
107k
      xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
5165
107k
  }
5166
33.0M
  if (len + 5 >= size) {
5167
3.19k
      xmlChar *new_buf;
5168
3.19k
            size_t new_size;
5169
5170
3.19k
      new_size = size * 2;
5171
3.19k
      new_buf = (xmlChar *) xmlRealloc(buf, new_size);
5172
3.19k
      if (new_buf == NULL) {
5173
0
    xmlFree (buf);
5174
0
    xmlErrMemory(ctxt);
5175
0
    return;
5176
0
      }
5177
3.19k
      buf = new_buf;
5178
3.19k
            size = new_size;
5179
3.19k
  }
5180
33.0M
  COPY_BUF(buf, len, q);
5181
33.0M
        if (len > maxLength) {
5182
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5183
0
                         "Comment too big found", NULL);
5184
0
            xmlFree (buf);
5185
0
            return;
5186
0
        }
5187
5188
33.0M
  q = r;
5189
33.0M
  ql = rl;
5190
33.0M
  r = cur;
5191
33.0M
  rl = l;
5192
5193
33.0M
  NEXTL(l);
5194
33.0M
  cur = xmlCurrentCharRecover(ctxt, &l);
5195
5196
33.0M
    }
5197
18.0k
    buf[len] = 0;
5198
18.0k
    if (cur == 0) {
5199
2.48k
  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5200
2.48k
                       "Comment not terminated \n<!--%.50s\n", buf);
5201
15.5k
    } else if (!IS_CHAR(cur)) {
5202
9.94k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
5203
9.94k
                          "xmlParseComment: invalid xmlChar value %d\n",
5204
9.94k
                    cur);
5205
9.94k
    } else {
5206
5.62k
        NEXT;
5207
5.62k
  if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5208
5.62k
      (!ctxt->disableSAX))
5209
657
      ctxt->sax->comment(ctxt->userData, buf);
5210
5.62k
    }
5211
18.0k
    xmlFree(buf);
5212
18.0k
    return;
5213
2.01k
not_terminated:
5214
2.01k
    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5215
2.01k
       "Comment not terminated\n", NULL);
5216
2.01k
    xmlFree(buf);
5217
2.01k
}
5218
5219
/**
5220
 * xmlParseComment:
5221
 * @ctxt:  an XML parser context
5222
 *
5223
 * DEPRECATED: Internal function, don't use.
5224
 *
5225
 * Parse an XML (SGML) comment. Always consumes '<!'.
5226
 *
5227
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
5228
 *  must not occur within comments. "
5229
 *
5230
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
5231
 */
5232
void
5233
39.6k
xmlParseComment(xmlParserCtxtPtr ctxt) {
5234
39.6k
    xmlChar *buf = NULL;
5235
39.6k
    size_t size = XML_PARSER_BUFFER_SIZE;
5236
39.6k
    size_t len = 0;
5237
39.6k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5238
0
                       XML_MAX_HUGE_LENGTH :
5239
39.6k
                       XML_MAX_TEXT_LENGTH;
5240
39.6k
    const xmlChar *in;
5241
39.6k
    size_t nbchar = 0;
5242
39.6k
    int ccol;
5243
5244
    /*
5245
     * Check that there is a comment right here.
5246
     */
5247
39.6k
    if ((RAW != '<') || (NXT(1) != '!'))
5248
0
        return;
5249
39.6k
    SKIP(2);
5250
39.6k
    if ((RAW != '-') || (NXT(1) != '-'))
5251
6
        return;
5252
39.6k
    SKIP(2);
5253
39.6k
    GROW;
5254
5255
    /*
5256
     * Accelerated common case where input don't need to be
5257
     * modified before passing it to the handler.
5258
     */
5259
39.6k
    in = ctxt->input->cur;
5260
39.9k
    do {
5261
39.9k
  if (*in == 0xA) {
5262
271k
      do {
5263
271k
    ctxt->input->line++; ctxt->input->col = 1;
5264
271k
    in++;
5265
271k
      } while (*in == 0xA);
5266
2.14k
  }
5267
757k
get_more:
5268
757k
        ccol = ctxt->input->col;
5269
3.28M
  while (((*in > '-') && (*in <= 0x7F)) ||
5270
3.28M
         ((*in >= 0x20) && (*in < '-')) ||
5271
3.28M
         (*in == 0x09)) {
5272
2.52M
        in++;
5273
2.52M
        ccol++;
5274
2.52M
  }
5275
757k
  ctxt->input->col = ccol;
5276
757k
  if (*in == 0xA) {
5277
270k
      do {
5278
270k
    ctxt->input->line++; ctxt->input->col = 1;
5279
270k
    in++;
5280
270k
      } while (*in == 0xA);
5281
7.91k
      goto get_more;
5282
7.91k
  }
5283
749k
  nbchar = in - ctxt->input->cur;
5284
  /*
5285
   * save current set of data
5286
   */
5287
749k
  if (nbchar > 0) {
5288
728k
            if (buf == NULL) {
5289
24.4k
                if ((*in == '-') && (in[1] == '-'))
5290
4.21k
                    size = nbchar + 1;
5291
20.1k
                else
5292
20.1k
                    size = XML_PARSER_BUFFER_SIZE + nbchar;
5293
24.4k
                buf = xmlMalloc(size);
5294
24.4k
                if (buf == NULL) {
5295
0
                    xmlErrMemory(ctxt);
5296
0
                    return;
5297
0
                }
5298
24.4k
                len = 0;
5299
704k
            } else if (len + nbchar + 1 >= size) {
5300
4.37k
                xmlChar *new_buf;
5301
4.37k
                size  += len + nbchar + XML_PARSER_BUFFER_SIZE;
5302
4.37k
                new_buf = (xmlChar *) xmlRealloc(buf, size);
5303
4.37k
                if (new_buf == NULL) {
5304
0
                    xmlFree (buf);
5305
0
                    xmlErrMemory(ctxt);
5306
0
                    return;
5307
0
                }
5308
4.37k
                buf = new_buf;
5309
4.37k
            }
5310
728k
            memcpy(&buf[len], ctxt->input->cur, nbchar);
5311
728k
            len += nbchar;
5312
728k
            buf[len] = 0;
5313
728k
  }
5314
749k
        if (len > maxLength) {
5315
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5316
0
                         "Comment too big found", NULL);
5317
0
            xmlFree (buf);
5318
0
            return;
5319
0
        }
5320
749k
  ctxt->input->cur = in;
5321
749k
  if (*in == 0xA) {
5322
0
      in++;
5323
0
      ctxt->input->line++; ctxt->input->col = 1;
5324
0
  }
5325
749k
  if (*in == 0xD) {
5326
9.01k
      in++;
5327
9.01k
      if (*in == 0xA) {
5328
2.68k
    ctxt->input->cur = in;
5329
2.68k
    in++;
5330
2.68k
    ctxt->input->line++; ctxt->input->col = 1;
5331
2.68k
    goto get_more;
5332
2.68k
      }
5333
6.33k
      in--;
5334
6.33k
  }
5335
746k
  SHRINK;
5336
746k
  GROW;
5337
746k
  in = ctxt->input->cur;
5338
746k
  if (*in == '-') {
5339
718k
      if (in[1] == '-') {
5340
664k
          if (in[2] == '>') {
5341
11.3k
        SKIP(3);
5342
11.3k
        if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5343
11.3k
            (!ctxt->disableSAX)) {
5344
2.05k
      if (buf != NULL)
5345
545
          ctxt->sax->comment(ctxt->userData, buf);
5346
1.50k
      else
5347
1.50k
          ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5348
2.05k
        }
5349
11.3k
        if (buf != NULL)
5350
3.10k
            xmlFree(buf);
5351
11.3k
        return;
5352
11.3k
    }
5353
652k
    if (buf != NULL) {
5354
650k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5355
650k
                          "Double hyphen within comment: "
5356
650k
                                      "<!--%.50s\n",
5357
650k
              buf);
5358
650k
    } else
5359
1.90k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5360
1.90k
                          "Double hyphen within comment\n", NULL);
5361
652k
    in++;
5362
652k
    ctxt->input->col++;
5363
652k
      }
5364
706k
      in++;
5365
706k
      ctxt->input->col++;
5366
706k
      goto get_more;
5367
718k
  }
5368
746k
    } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5369
28.2k
    xmlParseCommentComplex(ctxt, buf, len, size);
5370
28.2k
}
5371
5372
5373
/**
5374
 * xmlParsePITarget:
5375
 * @ctxt:  an XML parser context
5376
 *
5377
 * DEPRECATED: Internal function, don't use.
5378
 *
5379
 * parse the name of a PI
5380
 *
5381
 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5382
 *
5383
 * Returns the PITarget name or NULL
5384
 */
5385
5386
const xmlChar *
5387
37.1k
xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5388
37.1k
    const xmlChar *name;
5389
5390
37.1k
    name = xmlParseName(ctxt);
5391
37.1k
    if ((name != NULL) &&
5392
37.1k
        ((name[0] == 'x') || (name[0] == 'X')) &&
5393
37.1k
        ((name[1] == 'm') || (name[1] == 'M')) &&
5394
37.1k
        ((name[2] == 'l') || (name[2] == 'L'))) {
5395
8.59k
  int i;
5396
8.59k
  if ((name[0] == 'x') && (name[1] == 'm') &&
5397
8.59k
      (name[2] == 'l') && (name[3] == 0)) {
5398
4.42k
      xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5399
4.42k
     "XML declaration allowed only at the start of the document\n");
5400
4.42k
      return(name);
5401
4.42k
  } else if (name[3] == 0) {
5402
1.58k
      xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5403
1.58k
      return(name);
5404
1.58k
  }
5405
7.15k
  for (i = 0;;i++) {
5406
7.15k
      if (xmlW3CPIs[i] == NULL) break;
5407
4.94k
      if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5408
372
          return(name);
5409
4.94k
  }
5410
2.20k
  xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5411
2.20k
          "xmlParsePITarget: invalid name prefix 'xml'\n",
5412
2.20k
          NULL, NULL);
5413
2.20k
    }
5414
30.7k
    if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5415
822
  xmlNsErr(ctxt, XML_NS_ERR_COLON,
5416
822
     "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5417
822
    }
5418
30.7k
    return(name);
5419
37.1k
}
5420
5421
#ifdef LIBXML_CATALOG_ENABLED
5422
/**
5423
 * xmlParseCatalogPI:
5424
 * @ctxt:  an XML parser context
5425
 * @catalog:  the PI value string
5426
 *
5427
 * parse an XML Catalog Processing Instruction.
5428
 *
5429
 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5430
 *
5431
 * Occurs only if allowed by the user and if happening in the Misc
5432
 * part of the document before any doctype information
5433
 * This will add the given catalog to the parsing context in order
5434
 * to be used if there is a resolution need further down in the document
5435
 */
5436
5437
static void
5438
4.04k
xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5439
4.04k
    xmlChar *URL = NULL;
5440
4.04k
    const xmlChar *tmp, *base;
5441
4.04k
    xmlChar marker;
5442
5443
4.04k
    tmp = catalog;
5444
4.04k
    while (IS_BLANK_CH(*tmp)) tmp++;
5445
4.04k
    if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5446
839
  goto error;
5447
3.20k
    tmp += 7;
5448
3.20k
    while (IS_BLANK_CH(*tmp)) tmp++;
5449
3.20k
    if (*tmp != '=') {
5450
406
  return;
5451
406
    }
5452
2.79k
    tmp++;
5453
8.49k
    while (IS_BLANK_CH(*tmp)) tmp++;
5454
2.79k
    marker = *tmp;
5455
2.79k
    if ((marker != '\'') && (marker != '"'))
5456
677
  goto error;
5457
2.11k
    tmp++;
5458
2.11k
    base = tmp;
5459
13.3k
    while ((*tmp != 0) && (*tmp != marker)) tmp++;
5460
2.11k
    if (*tmp == 0)
5461
294
  goto error;
5462
1.82k
    URL = xmlStrndup(base, tmp - base);
5463
1.82k
    tmp++;
5464
14.0k
    while (IS_BLANK_CH(*tmp)) tmp++;
5465
1.82k
    if (*tmp != 0)
5466
395
  goto error;
5467
5468
1.42k
    if (URL != NULL) {
5469
        /*
5470
         * Unfortunately, the catalog API doesn't report OOM errors.
5471
         * xmlGetLastError isn't very helpful since we don't know
5472
         * where the last error came from. We'd have to reset it
5473
         * before this call and restore it afterwards.
5474
         */
5475
1.42k
  ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5476
1.42k
  xmlFree(URL);
5477
1.42k
    }
5478
1.42k
    return;
5479
5480
2.20k
error:
5481
2.20k
    xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5482
2.20k
            "Catalog PI syntax error: %s\n",
5483
2.20k
      catalog, NULL);
5484
2.20k
    if (URL != NULL)
5485
395
  xmlFree(URL);
5486
2.20k
}
5487
#endif
5488
5489
/**
5490
 * xmlParsePI:
5491
 * @ctxt:  an XML parser context
5492
 *
5493
 * DEPRECATED: Internal function, don't use.
5494
 *
5495
 * parse an XML Processing Instruction.
5496
 *
5497
 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5498
 *
5499
 * The processing is transferred to SAX once parsed.
5500
 */
5501
5502
void
5503
37.1k
xmlParsePI(xmlParserCtxtPtr ctxt) {
5504
37.1k
    xmlChar *buf = NULL;
5505
37.1k
    size_t len = 0;
5506
37.1k
    size_t size = XML_PARSER_BUFFER_SIZE;
5507
37.1k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5508
0
                       XML_MAX_HUGE_LENGTH :
5509
37.1k
                       XML_MAX_TEXT_LENGTH;
5510
37.1k
    int cur, l;
5511
37.1k
    const xmlChar *target;
5512
5513
37.1k
    if ((RAW == '<') && (NXT(1) == '?')) {
5514
  /*
5515
   * this is a Processing Instruction.
5516
   */
5517
37.1k
  SKIP(2);
5518
5519
  /*
5520
   * Parse the target name and check for special support like
5521
   * namespace.
5522
   */
5523
37.1k
        target = xmlParsePITarget(ctxt);
5524
37.1k
  if (target != NULL) {
5525
32.8k
      if ((RAW == '?') && (NXT(1) == '>')) {
5526
7.22k
    SKIP(2);
5527
5528
    /*
5529
     * SAX: PI detected.
5530
     */
5531
7.22k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5532
7.22k
        (ctxt->sax->processingInstruction != NULL))
5533
3.32k
        ctxt->sax->processingInstruction(ctxt->userData,
5534
3.32k
                                         target, NULL);
5535
7.22k
    return;
5536
7.22k
      }
5537
25.6k
      buf = xmlMalloc(size);
5538
25.6k
      if (buf == NULL) {
5539
0
    xmlErrMemory(ctxt);
5540
0
    return;
5541
0
      }
5542
25.6k
      if (SKIP_BLANKS == 0) {
5543
12.8k
    xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5544
12.8k
        "ParsePI: PI %s space expected\n", target);
5545
12.8k
      }
5546
25.6k
      cur = xmlCurrentCharRecover(ctxt, &l);
5547
15.0M
      while (IS_CHAR(cur) && /* checked */
5548
15.0M
       ((cur != '?') || (NXT(1) != '>'))) {
5549
15.0M
    if (len + 5 >= size) {
5550
8.13k
        xmlChar *tmp;
5551
8.13k
                    size_t new_size = size * 2;
5552
8.13k
        tmp = (xmlChar *) xmlRealloc(buf, new_size);
5553
8.13k
        if (tmp == NULL) {
5554
0
      xmlErrMemory(ctxt);
5555
0
      xmlFree(buf);
5556
0
      return;
5557
0
        }
5558
8.13k
        buf = tmp;
5559
8.13k
                    size = new_size;
5560
8.13k
    }
5561
15.0M
    COPY_BUF(buf, len, cur);
5562
15.0M
                if (len > maxLength) {
5563
0
                    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5564
0
                                      "PI %s too big found", target);
5565
0
                    xmlFree(buf);
5566
0
                    return;
5567
0
                }
5568
15.0M
    NEXTL(l);
5569
15.0M
    cur = xmlCurrentCharRecover(ctxt, &l);
5570
15.0M
      }
5571
25.6k
      buf[len] = 0;
5572
25.6k
      if (cur != '?') {
5573
12.2k
    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5574
12.2k
          "ParsePI: PI %s never end ...\n", target);
5575
13.4k
      } else {
5576
13.4k
    SKIP(2);
5577
5578
13.4k
#ifdef LIBXML_CATALOG_ENABLED
5579
13.4k
    if ((ctxt->inSubset == 0) &&
5580
13.4k
        (xmlStrEqual(target, XML_CATALOG_PI))) {
5581
4.04k
        xmlCatalogAllow allow = xmlCatalogGetDefaults();
5582
5583
4.04k
        if (((ctxt->options & XML_PARSE_NO_CATALOG_PI) == 0) &&
5584
4.04k
                        ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5585
4.04k
       (allow == XML_CATA_ALLOW_ALL)))
5586
4.04k
      xmlParseCatalogPI(ctxt, buf);
5587
4.04k
    }
5588
13.4k
#endif
5589
5590
    /*
5591
     * SAX: PI detected.
5592
     */
5593
13.4k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5594
13.4k
        (ctxt->sax->processingInstruction != NULL))
5595
2.28k
        ctxt->sax->processingInstruction(ctxt->userData,
5596
2.28k
                                         target, buf);
5597
13.4k
      }
5598
25.6k
      xmlFree(buf);
5599
25.6k
  } else {
5600
4.32k
      xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5601
4.32k
  }
5602
37.1k
    }
5603
37.1k
}
5604
5605
/**
5606
 * xmlParseNotationDecl:
5607
 * @ctxt:  an XML parser context
5608
 *
5609
 * DEPRECATED: Internal function, don't use.
5610
 *
5611
 * Parse a notation declaration. Always consumes '<!'.
5612
 *
5613
 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
5614
 *
5615
 * Hence there is actually 3 choices:
5616
 *     'PUBLIC' S PubidLiteral
5617
 *     'PUBLIC' S PubidLiteral S SystemLiteral
5618
 * and 'SYSTEM' S SystemLiteral
5619
 *
5620
 * See the NOTE on xmlParseExternalID().
5621
 */
5622
5623
void
5624
3.30k
xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5625
3.30k
    const xmlChar *name;
5626
3.30k
    xmlChar *Pubid;
5627
3.30k
    xmlChar *Systemid;
5628
5629
3.30k
    if ((CUR != '<') || (NXT(1) != '!'))
5630
0
        return;
5631
3.30k
    SKIP(2);
5632
5633
3.30k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5634
3.28k
  int inputid = ctxt->input->id;
5635
3.28k
  SKIP(8);
5636
3.28k
  if (SKIP_BLANKS_PE == 0) {
5637
28
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5638
28
         "Space required after '<!NOTATION'\n");
5639
28
      return;
5640
28
  }
5641
5642
3.25k
        name = xmlParseName(ctxt);
5643
3.25k
  if (name == NULL) {
5644
69
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5645
69
      return;
5646
69
  }
5647
3.18k
  if (xmlStrchr(name, ':') != NULL) {
5648
135
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5649
135
         "colons are forbidden from notation names '%s'\n",
5650
135
         name, NULL, NULL);
5651
135
  }
5652
3.18k
  if (SKIP_BLANKS_PE == 0) {
5653
145
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5654
145
         "Space required after the NOTATION name'\n");
5655
145
      return;
5656
145
  }
5657
5658
  /*
5659
   * Parse the IDs.
5660
   */
5661
3.04k
  Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5662
3.04k
  SKIP_BLANKS_PE;
5663
5664
3.04k
  if (RAW == '>') {
5665
2.02k
      if (inputid != ctxt->input->id) {
5666
200
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5667
200
                         "Notation declaration doesn't start and stop"
5668
200
                               " in the same entity\n");
5669
200
      }
5670
2.02k
      NEXT;
5671
2.02k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5672
2.02k
    (ctxt->sax->notationDecl != NULL))
5673
1.41k
    ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5674
2.02k
  } else {
5675
1.01k
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5676
1.01k
  }
5677
3.04k
  if (Systemid != NULL) xmlFree(Systemid);
5678
3.04k
  if (Pubid != NULL) xmlFree(Pubid);
5679
3.04k
    }
5680
3.30k
}
5681
5682
/**
5683
 * xmlParseEntityDecl:
5684
 * @ctxt:  an XML parser context
5685
 *
5686
 * DEPRECATED: Internal function, don't use.
5687
 *
5688
 * Parse an entity declaration. Always consumes '<!'.
5689
 *
5690
 * [70] EntityDecl ::= GEDecl | PEDecl
5691
 *
5692
 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5693
 *
5694
 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5695
 *
5696
 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5697
 *
5698
 * [74] PEDef ::= EntityValue | ExternalID
5699
 *
5700
 * [76] NDataDecl ::= S 'NDATA' S Name
5701
 *
5702
 * [ VC: Notation Declared ]
5703
 * The Name must match the declared name of a notation.
5704
 */
5705
5706
void
5707
54.1k
xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5708
54.1k
    const xmlChar *name = NULL;
5709
54.1k
    xmlChar *value = NULL;
5710
54.1k
    xmlChar *URI = NULL, *literal = NULL;
5711
54.1k
    const xmlChar *ndata = NULL;
5712
54.1k
    int isParameter = 0;
5713
54.1k
    xmlChar *orig = NULL;
5714
5715
54.1k
    if ((CUR != '<') || (NXT(1) != '!'))
5716
0
        return;
5717
54.1k
    SKIP(2);
5718
5719
    /* GROW; done in the caller */
5720
54.1k
    if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5721
54.0k
  int inputid = ctxt->input->id;
5722
54.0k
  SKIP(6);
5723
54.0k
  if (SKIP_BLANKS_PE == 0) {
5724
20.7k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5725
20.7k
         "Space required after '<!ENTITY'\n");
5726
20.7k
  }
5727
5728
54.0k
  if (RAW == '%') {
5729
8.29k
      NEXT;
5730
8.29k
      if (SKIP_BLANKS_PE == 0) {
5731
2.18k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5732
2.18k
             "Space required after '%%'\n");
5733
2.18k
      }
5734
8.29k
      isParameter = 1;
5735
8.29k
  }
5736
5737
54.0k
        name = xmlParseName(ctxt);
5738
54.0k
  if (name == NULL) {
5739
304
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5740
304
                     "xmlParseEntityDecl: no name\n");
5741
304
            return;
5742
304
  }
5743
53.7k
  if (xmlStrchr(name, ':') != NULL) {
5744
2.69k
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5745
2.69k
         "colons are forbidden from entities names '%s'\n",
5746
2.69k
         name, NULL, NULL);
5747
2.69k
  }
5748
53.7k
  if (SKIP_BLANKS_PE == 0) {
5749
25.6k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5750
25.6k
         "Space required after the entity name\n");
5751
25.6k
  }
5752
5753
  /*
5754
   * handle the various case of definitions...
5755
   */
5756
53.7k
  if (isParameter) {
5757
8.26k
      if ((RAW == '"') || (RAW == '\'')) {
5758
5.31k
          value = xmlParseEntityValue(ctxt, &orig);
5759
5.31k
    if (value) {
5760
5.28k
        if ((ctxt->sax != NULL) &&
5761
5.28k
      (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5762
2.06k
      ctxt->sax->entityDecl(ctxt->userData, name,
5763
2.06k
                        XML_INTERNAL_PARAMETER_ENTITY,
5764
2.06k
            NULL, NULL, value);
5765
5.28k
    }
5766
5.31k
      } else {
5767
2.95k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5768
2.95k
    if ((URI == NULL) && (literal == NULL)) {
5769
113
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5770
113
    }
5771
2.95k
    if (URI) {
5772
2.64k
                    if (xmlStrchr(URI, '#')) {
5773
693
                        xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5774
1.95k
                    } else {
5775
1.95k
                        if ((ctxt->sax != NULL) &&
5776
1.95k
                            (!ctxt->disableSAX) &&
5777
1.95k
                            (ctxt->sax->entityDecl != NULL))
5778
442
                            ctxt->sax->entityDecl(ctxt->userData, name,
5779
442
                                        XML_EXTERNAL_PARAMETER_ENTITY,
5780
442
                                        literal, URI, NULL);
5781
1.95k
                    }
5782
2.64k
    }
5783
2.95k
      }
5784
45.5k
  } else {
5785
45.5k
      if ((RAW == '"') || (RAW == '\'')) {
5786
40.5k
          value = xmlParseEntityValue(ctxt, &orig);
5787
40.5k
    if ((ctxt->sax != NULL) &&
5788
40.5k
        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5789
4.91k
        ctxt->sax->entityDecl(ctxt->userData, name,
5790
4.91k
        XML_INTERNAL_GENERAL_ENTITY,
5791
4.91k
        NULL, NULL, value);
5792
    /*
5793
     * For expat compatibility in SAX mode.
5794
     */
5795
40.5k
    if ((ctxt->myDoc == NULL) ||
5796
40.5k
        (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5797
3.48k
        if (ctxt->myDoc == NULL) {
5798
469
      ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5799
469
      if (ctxt->myDoc == NULL) {
5800
0
          xmlErrMemory(ctxt);
5801
0
          goto done;
5802
0
      }
5803
469
      ctxt->myDoc->properties = XML_DOC_INTERNAL;
5804
469
        }
5805
3.48k
        if (ctxt->myDoc->intSubset == NULL) {
5806
469
      ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5807
469
              BAD_CAST "fake", NULL, NULL);
5808
469
                        if (ctxt->myDoc->intSubset == NULL) {
5809
0
                            xmlErrMemory(ctxt);
5810
0
                            goto done;
5811
0
                        }
5812
469
                    }
5813
5814
3.48k
        xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5815
3.48k
                    NULL, NULL, value);
5816
3.48k
    }
5817
40.5k
      } else {
5818
4.98k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5819
4.98k
    if ((URI == NULL) && (literal == NULL)) {
5820
569
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5821
569
    }
5822
4.98k
    if (URI) {
5823
4.27k
                    if (xmlStrchr(URI, '#')) {
5824
707
                        xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5825
707
                    }
5826
4.27k
    }
5827
4.98k
    if ((RAW != '>') && (SKIP_BLANKS_PE == 0)) {
5828
255
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5829
255
           "Space required before 'NDATA'\n");
5830
255
    }
5831
4.98k
    if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5832
749
        SKIP(5);
5833
749
        if (SKIP_BLANKS_PE == 0) {
5834
188
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5835
188
               "Space required after 'NDATA'\n");
5836
188
        }
5837
749
        ndata = xmlParseName(ctxt);
5838
749
        if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5839
749
            (ctxt->sax->unparsedEntityDecl != NULL))
5840
189
      ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5841
189
            literal, URI, ndata);
5842
4.23k
    } else {
5843
4.23k
        if ((ctxt->sax != NULL) &&
5844
4.23k
            (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5845
2.07k
      ctxt->sax->entityDecl(ctxt->userData, name,
5846
2.07k
            XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5847
2.07k
            literal, URI, NULL);
5848
        /*
5849
         * For expat compatibility in SAX mode.
5850
         * assuming the entity replacement was asked for
5851
         */
5852
4.23k
        if ((ctxt->replaceEntities != 0) &&
5853
4.23k
      ((ctxt->myDoc == NULL) ||
5854
0
      (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5855
0
      if (ctxt->myDoc == NULL) {
5856
0
          ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5857
0
          if (ctxt->myDoc == NULL) {
5858
0
              xmlErrMemory(ctxt);
5859
0
        goto done;
5860
0
          }
5861
0
          ctxt->myDoc->properties = XML_DOC_INTERNAL;
5862
0
      }
5863
5864
0
      if (ctxt->myDoc->intSubset == NULL) {
5865
0
          ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5866
0
            BAD_CAST "fake", NULL, NULL);
5867
0
                            if (ctxt->myDoc->intSubset == NULL) {
5868
0
                                xmlErrMemory(ctxt);
5869
0
                                goto done;
5870
0
                            }
5871
0
                        }
5872
0
      xmlSAX2EntityDecl(ctxt, name,
5873
0
                  XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5874
0
                  literal, URI, NULL);
5875
0
        }
5876
4.23k
    }
5877
4.98k
      }
5878
45.5k
  }
5879
53.7k
  SKIP_BLANKS_PE;
5880
53.7k
  if (RAW != '>') {
5881
725
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5882
725
              "xmlParseEntityDecl: entity %s not terminated\n", name);
5883
725
      xmlHaltParser(ctxt);
5884
53.0k
  } else {
5885
53.0k
      if (inputid != ctxt->input->id) {
5886
432
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5887
432
                         "Entity declaration doesn't start and stop in"
5888
432
                               " the same entity\n");
5889
432
      }
5890
53.0k
      NEXT;
5891
53.0k
  }
5892
53.7k
  if (orig != NULL) {
5893
      /*
5894
       * Ugly mechanism to save the raw entity value.
5895
       */
5896
45.4k
      xmlEntityPtr cur = NULL;
5897
5898
45.4k
      if (isParameter) {
5899
5.28k
          if ((ctxt->sax != NULL) &&
5900
5.28k
        (ctxt->sax->getParameterEntity != NULL))
5901
5.28k
        cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5902
40.1k
      } else {
5903
40.1k
          if ((ctxt->sax != NULL) &&
5904
40.1k
        (ctxt->sax->getEntity != NULL))
5905
40.1k
        cur = ctxt->sax->getEntity(ctxt->userData, name);
5906
40.1k
    if ((cur == NULL) && (ctxt->userData==ctxt)) {
5907
28.0k
        cur = xmlSAX2GetEntity(ctxt, name);
5908
28.0k
    }
5909
40.1k
      }
5910
45.4k
            if ((cur != NULL) && (cur->orig == NULL)) {
5911
4.77k
    cur->orig = orig;
5912
4.77k
                orig = NULL;
5913
4.77k
      }
5914
45.4k
  }
5915
5916
53.7k
done:
5917
53.7k
  if (value != NULL) xmlFree(value);
5918
53.7k
  if (URI != NULL) xmlFree(URI);
5919
53.7k
  if (literal != NULL) xmlFree(literal);
5920
53.7k
        if (orig != NULL) xmlFree(orig);
5921
53.7k
    }
5922
54.1k
}
5923
5924
/**
5925
 * xmlParseDefaultDecl:
5926
 * @ctxt:  an XML parser context
5927
 * @value:  Receive a possible fixed default value for the attribute
5928
 *
5929
 * DEPRECATED: Internal function, don't use.
5930
 *
5931
 * Parse an attribute default declaration
5932
 *
5933
 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5934
 *
5935
 * [ VC: Required Attribute ]
5936
 * if the default declaration is the keyword #REQUIRED, then the
5937
 * attribute must be specified for all elements of the type in the
5938
 * attribute-list declaration.
5939
 *
5940
 * [ VC: Attribute Default Legal ]
5941
 * The declared default value must meet the lexical constraints of
5942
 * the declared attribute type c.f. xmlValidateAttributeDecl()
5943
 *
5944
 * [ VC: Fixed Attribute Default ]
5945
 * if an attribute has a default value declared with the #FIXED
5946
 * keyword, instances of that attribute must match the default value.
5947
 *
5948
 * [ WFC: No < in Attribute Values ]
5949
 * handled in xmlParseAttValue()
5950
 *
5951
 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5952
 *          or XML_ATTRIBUTE_FIXED.
5953
 */
5954
5955
int
5956
49.1k
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5957
49.1k
    int val;
5958
49.1k
    xmlChar *ret;
5959
5960
49.1k
    *value = NULL;
5961
49.1k
    if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5962
698
  SKIP(9);
5963
698
  return(XML_ATTRIBUTE_REQUIRED);
5964
698
    }
5965
48.4k
    if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5966
621
  SKIP(8);
5967
621
  return(XML_ATTRIBUTE_IMPLIED);
5968
621
    }
5969
47.8k
    val = XML_ATTRIBUTE_NONE;
5970
47.8k
    if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5971
545
  SKIP(6);
5972
545
  val = XML_ATTRIBUTE_FIXED;
5973
545
  if (SKIP_BLANKS_PE == 0) {
5974
248
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5975
248
         "Space required after '#FIXED'\n");
5976
248
  }
5977
545
    }
5978
47.8k
    ret = xmlParseAttValue(ctxt);
5979
47.8k
    if (ret == NULL) {
5980
2.19k
  xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5981
2.19k
           "Attribute default value declaration error\n");
5982
2.19k
    } else
5983
45.6k
        *value = ret;
5984
47.8k
    return(val);
5985
48.4k
}
5986
5987
/**
5988
 * xmlParseNotationType:
5989
 * @ctxt:  an XML parser context
5990
 *
5991
 * DEPRECATED: Internal function, don't use.
5992
 *
5993
 * parse an Notation attribute type.
5994
 *
5995
 * Note: the leading 'NOTATION' S part has already being parsed...
5996
 *
5997
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5998
 *
5999
 * [ VC: Notation Attributes ]
6000
 * Values of this type must match one of the notation names included
6001
 * in the declaration; all notation names in the declaration must be declared.
6002
 *
6003
 * Returns: the notation attribute tree built while parsing
6004
 */
6005
6006
xmlEnumerationPtr
6007
2.88k
xmlParseNotationType(xmlParserCtxtPtr ctxt) {
6008
2.88k
    const xmlChar *name;
6009
2.88k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
6010
6011
2.88k
    if (RAW != '(') {
6012
76
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
6013
76
  return(NULL);
6014
76
    }
6015
3.27k
    do {
6016
3.27k
        NEXT;
6017
3.27k
  SKIP_BLANKS_PE;
6018
3.27k
        name = xmlParseName(ctxt);
6019
3.27k
  if (name == NULL) {
6020
53
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6021
53
         "Name expected in NOTATION declaration\n");
6022
53
            xmlFreeEnumeration(ret);
6023
53
      return(NULL);
6024
53
  }
6025
3.22k
  tmp = ret;
6026
4.20k
  while (tmp != NULL) {
6027
1.34k
      if (xmlStrEqual(name, tmp->name)) {
6028
373
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
6029
373
    "standalone: attribute notation value token %s duplicated\n",
6030
373
         name, NULL);
6031
373
    if (!xmlDictOwns(ctxt->dict, name))
6032
0
        xmlFree((xmlChar *) name);
6033
373
    break;
6034
373
      }
6035
976
      tmp = tmp->next;
6036
976
  }
6037
3.22k
  if (tmp == NULL) {
6038
2.85k
      cur = xmlCreateEnumeration(name);
6039
2.85k
      if (cur == NULL) {
6040
0
                xmlErrMemory(ctxt);
6041
0
                xmlFreeEnumeration(ret);
6042
0
                return(NULL);
6043
0
            }
6044
2.85k
      if (last == NULL) ret = last = cur;
6045
84
      else {
6046
84
    last->next = cur;
6047
84
    last = cur;
6048
84
      }
6049
2.85k
  }
6050
3.22k
  SKIP_BLANKS_PE;
6051
3.22k
    } while (RAW == '|');
6052
2.75k
    if (RAW != ')') {
6053
38
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
6054
38
        xmlFreeEnumeration(ret);
6055
38
  return(NULL);
6056
38
    }
6057
2.71k
    NEXT;
6058
2.71k
    return(ret);
6059
2.75k
}
6060
6061
/**
6062
 * xmlParseEnumerationType:
6063
 * @ctxt:  an XML parser context
6064
 *
6065
 * DEPRECATED: Internal function, don't use.
6066
 *
6067
 * parse an Enumeration attribute type.
6068
 *
6069
 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
6070
 *
6071
 * [ VC: Enumeration ]
6072
 * Values of this type must match one of the Nmtoken tokens in
6073
 * the declaration
6074
 *
6075
 * Returns: the enumeration attribute tree built while parsing
6076
 */
6077
6078
xmlEnumerationPtr
6079
12.6k
xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
6080
12.6k
    xmlChar *name;
6081
12.6k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
6082
6083
12.6k
    if (RAW != '(') {
6084
507
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
6085
507
  return(NULL);
6086
507
    }
6087
14.5k
    do {
6088
14.5k
        NEXT;
6089
14.5k
  SKIP_BLANKS_PE;
6090
14.5k
        name = xmlParseNmtoken(ctxt);
6091
14.5k
  if (name == NULL) {
6092
89
      xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
6093
89
      return(ret);
6094
89
  }
6095
14.4k
  tmp = ret;
6096
23.8k
  while (tmp != NULL) {
6097
9.80k
      if (xmlStrEqual(name, tmp->name)) {
6098
419
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
6099
419
    "standalone: attribute enumeration value token %s duplicated\n",
6100
419
         name, NULL);
6101
419
    if (!xmlDictOwns(ctxt->dict, name))
6102
419
        xmlFree(name);
6103
419
    break;
6104
419
      }
6105
9.38k
      tmp = tmp->next;
6106
9.38k
  }
6107
14.4k
  if (tmp == NULL) {
6108
14.0k
      cur = xmlCreateEnumeration(name);
6109
14.0k
      if (!xmlDictOwns(ctxt->dict, name))
6110
14.0k
    xmlFree(name);
6111
14.0k
      if (cur == NULL) {
6112
0
                xmlErrMemory(ctxt);
6113
0
                xmlFreeEnumeration(ret);
6114
0
                return(NULL);
6115
0
            }
6116
14.0k
      if (last == NULL) ret = last = cur;
6117
2.00k
      else {
6118
2.00k
    last->next = cur;
6119
2.00k
    last = cur;
6120
2.00k
      }
6121
14.0k
  }
6122
14.4k
  SKIP_BLANKS_PE;
6123
14.4k
    } while (RAW == '|');
6124
12.0k
    if (RAW != ')') {
6125
338
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
6126
338
  return(ret);
6127
338
    }
6128
11.6k
    NEXT;
6129
11.6k
    return(ret);
6130
12.0k
}
6131
6132
/**
6133
 * xmlParseEnumeratedType:
6134
 * @ctxt:  an XML parser context
6135
 * @tree:  the enumeration tree built while parsing
6136
 *
6137
 * DEPRECATED: Internal function, don't use.
6138
 *
6139
 * parse an Enumerated attribute type.
6140
 *
6141
 * [57] EnumeratedType ::= NotationType | Enumeration
6142
 *
6143
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
6144
 *
6145
 *
6146
 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
6147
 */
6148
6149
int
6150
15.5k
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6151
15.5k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
6152
2.88k
  SKIP(8);
6153
2.88k
  if (SKIP_BLANKS_PE == 0) {
6154
3
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6155
3
         "Space required after 'NOTATION'\n");
6156
3
      return(0);
6157
3
  }
6158
2.88k
  *tree = xmlParseNotationType(ctxt);
6159
2.88k
  if (*tree == NULL) return(0);
6160
2.71k
  return(XML_ATTRIBUTE_NOTATION);
6161
2.88k
    }
6162
12.6k
    *tree = xmlParseEnumerationType(ctxt);
6163
12.6k
    if (*tree == NULL) return(0);
6164
12.0k
    return(XML_ATTRIBUTE_ENUMERATION);
6165
12.6k
}
6166
6167
/**
6168
 * xmlParseAttributeType:
6169
 * @ctxt:  an XML parser context
6170
 * @tree:  the enumeration tree built while parsing
6171
 *
6172
 * DEPRECATED: Internal function, don't use.
6173
 *
6174
 * parse the Attribute list def for an element
6175
 *
6176
 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
6177
 *
6178
 * [55] StringType ::= 'CDATA'
6179
 *
6180
 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
6181
 *                        'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
6182
 *
6183
 * Validity constraints for attribute values syntax are checked in
6184
 * xmlValidateAttributeValue()
6185
 *
6186
 * [ VC: ID ]
6187
 * Values of type ID must match the Name production. A name must not
6188
 * appear more than once in an XML document as a value of this type;
6189
 * i.e., ID values must uniquely identify the elements which bear them.
6190
 *
6191
 * [ VC: One ID per Element Type ]
6192
 * No element type may have more than one ID attribute specified.
6193
 *
6194
 * [ VC: ID Attribute Default ]
6195
 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
6196
 *
6197
 * [ VC: IDREF ]
6198
 * Values of type IDREF must match the Name production, and values
6199
 * of type IDREFS must match Names; each IDREF Name must match the value
6200
 * of an ID attribute on some element in the XML document; i.e. IDREF
6201
 * values must match the value of some ID attribute.
6202
 *
6203
 * [ VC: Entity Name ]
6204
 * Values of type ENTITY must match the Name production, values
6205
 * of type ENTITIES must match Names; each Entity Name must match the
6206
 * name of an unparsed entity declared in the DTD.
6207
 *
6208
 * [ VC: Name Token ]
6209
 * Values of type NMTOKEN must match the Nmtoken production; values
6210
 * of type NMTOKENS must match Nmtokens.
6211
 *
6212
 * Returns the attribute type
6213
 */
6214
int
6215
50.3k
xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6216
50.3k
    if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
6217
5.81k
  SKIP(5);
6218
5.81k
  return(XML_ATTRIBUTE_CDATA);
6219
44.4k
     } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
6220
5.47k
  SKIP(6);
6221
5.47k
  return(XML_ATTRIBUTE_IDREFS);
6222
39.0k
     } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
6223
1.01k
  SKIP(5);
6224
1.01k
  return(XML_ATTRIBUTE_IDREF);
6225
38.0k
     } else if ((RAW == 'I') && (NXT(1) == 'D')) {
6226
20.1k
        SKIP(2);
6227
20.1k
  return(XML_ATTRIBUTE_ID);
6228
20.1k
     } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
6229
527
  SKIP(6);
6230
527
  return(XML_ATTRIBUTE_ENTITY);
6231
17.3k
     } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
6232
579
  SKIP(8);
6233
579
  return(XML_ATTRIBUTE_ENTITIES);
6234
16.7k
     } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
6235
157
  SKIP(8);
6236
157
  return(XML_ATTRIBUTE_NMTOKENS);
6237
16.5k
     } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
6238
1.06k
  SKIP(7);
6239
1.06k
  return(XML_ATTRIBUTE_NMTOKEN);
6240
1.06k
     }
6241
15.5k
     return(xmlParseEnumeratedType(ctxt, tree));
6242
50.3k
}
6243
6244
/**
6245
 * xmlParseAttributeListDecl:
6246
 * @ctxt:  an XML parser context
6247
 *
6248
 * DEPRECATED: Internal function, don't use.
6249
 *
6250
 * Parse an attribute list declaration for an element. Always consumes '<!'.
6251
 *
6252
 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
6253
 *
6254
 * [53] AttDef ::= S Name S AttType S DefaultDecl
6255
 *
6256
 */
6257
void
6258
11.9k
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
6259
11.9k
    const xmlChar *elemName;
6260
11.9k
    const xmlChar *attrName;
6261
11.9k
    xmlEnumerationPtr tree;
6262
6263
11.9k
    if ((CUR != '<') || (NXT(1) != '!'))
6264
0
        return;
6265
11.9k
    SKIP(2);
6266
6267
11.9k
    if (CMP7(CUR_PTR, 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
6268
11.9k
  int inputid = ctxt->input->id;
6269
6270
11.9k
  SKIP(7);
6271
11.9k
  if (SKIP_BLANKS_PE == 0) {
6272
901
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6273
901
                     "Space required after '<!ATTLIST'\n");
6274
901
  }
6275
11.9k
        elemName = xmlParseName(ctxt);
6276
11.9k
  if (elemName == NULL) {
6277
165
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6278
165
         "ATTLIST: no name for Element\n");
6279
165
      return;
6280
165
  }
6281
11.7k
  SKIP_BLANKS_PE;
6282
11.7k
  GROW;
6283
59.4k
  while ((RAW != '>') && (PARSER_STOPPED(ctxt) == 0)) {
6284
52.0k
      int type;
6285
52.0k
      int def;
6286
52.0k
      xmlChar *defaultValue = NULL;
6287
6288
52.0k
      GROW;
6289
52.0k
            tree = NULL;
6290
52.0k
      attrName = xmlParseName(ctxt);
6291
52.0k
      if (attrName == NULL) {
6292
1.19k
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6293
1.19k
             "ATTLIST: no name for Attribute\n");
6294
1.19k
    break;
6295
1.19k
      }
6296
50.8k
      GROW;
6297
50.8k
      if (SKIP_BLANKS_PE == 0) {
6298
558
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6299
558
            "Space required after the attribute name\n");
6300
558
    break;
6301
558
      }
6302
6303
50.3k
      type = xmlParseAttributeType(ctxt, &tree);
6304
50.3k
      if (type <= 0) {
6305
747
          break;
6306
747
      }
6307
6308
49.5k
      GROW;
6309
49.5k
      if (SKIP_BLANKS_PE == 0) {
6310
431
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6311
431
             "Space required after the attribute type\n");
6312
431
          if (tree != NULL)
6313
362
        xmlFreeEnumeration(tree);
6314
431
    break;
6315
431
      }
6316
6317
49.1k
      def = xmlParseDefaultDecl(ctxt, &defaultValue);
6318
49.1k
      if (def <= 0) {
6319
0
                if (defaultValue != NULL)
6320
0
        xmlFree(defaultValue);
6321
0
          if (tree != NULL)
6322
0
        xmlFreeEnumeration(tree);
6323
0
          break;
6324
0
      }
6325
49.1k
      if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6326
39.8k
          xmlAttrNormalizeSpace(defaultValue, defaultValue);
6327
6328
49.1k
      GROW;
6329
49.1k
            if (RAW != '>') {
6330
41.9k
    if (SKIP_BLANKS_PE == 0) {
6331
1.49k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6332
1.49k
      "Space required after the attribute default value\n");
6333
1.49k
        if (defaultValue != NULL)
6334
79
      xmlFree(defaultValue);
6335
1.49k
        if (tree != NULL)
6336
195
      xmlFreeEnumeration(tree);
6337
1.49k
        break;
6338
1.49k
    }
6339
41.9k
      }
6340
47.6k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6341
47.6k
    (ctxt->sax->attributeDecl != NULL))
6342
38.4k
    ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6343
38.4k
                          type, def, defaultValue, tree);
6344
9.14k
      else if (tree != NULL)
6345
1.96k
    xmlFreeEnumeration(tree);
6346
6347
47.6k
      if ((ctxt->sax2) && (defaultValue != NULL) &&
6348
47.6k
          (def != XML_ATTRIBUTE_IMPLIED) &&
6349
47.6k
    (def != XML_ATTRIBUTE_REQUIRED)) {
6350
45.5k
    xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6351
45.5k
      }
6352
47.6k
      if (ctxt->sax2) {
6353
47.6k
    xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6354
47.6k
      }
6355
47.6k
      if (defaultValue != NULL)
6356
45.5k
          xmlFree(defaultValue);
6357
47.6k
      GROW;
6358
47.6k
  }
6359
11.7k
  if (RAW == '>') {
6360
7.41k
      if (inputid != ctxt->input->id) {
6361
20
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6362
20
                               "Attribute list declaration doesn't start and"
6363
20
                               " stop in the same entity\n");
6364
20
      }
6365
7.41k
      NEXT;
6366
7.41k
  }
6367
11.7k
    }
6368
11.9k
}
6369
6370
/**
6371
 * xmlParseElementMixedContentDecl:
6372
 * @ctxt:  an XML parser context
6373
 * @inputchk:  the input used for the current entity, needed for boundary checks
6374
 *
6375
 * DEPRECATED: Internal function, don't use.
6376
 *
6377
 * parse the declaration for a Mixed Element content
6378
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6379
 *
6380
 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6381
 *                '(' S? '#PCDATA' S? ')'
6382
 *
6383
 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6384
 *
6385
 * [ VC: No Duplicate Types ]
6386
 * The same name must not appear more than once in a single
6387
 * mixed-content declaration.
6388
 *
6389
 * returns: the list of the xmlElementContentPtr describing the element choices
6390
 */
6391
xmlElementContentPtr
6392
1.37k
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6393
1.37k
    xmlElementContentPtr ret = NULL, cur = NULL, n;
6394
1.37k
    const xmlChar *elem = NULL;
6395
6396
1.37k
    GROW;
6397
1.37k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6398
1.37k
  SKIP(7);
6399
1.37k
  SKIP_BLANKS_PE;
6400
1.37k
  if (RAW == ')') {
6401
410
      if (ctxt->input->id != inputchk) {
6402
68
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6403
68
                               "Element content declaration doesn't start and"
6404
68
                               " stop in the same entity\n");
6405
68
      }
6406
410
      NEXT;
6407
410
      ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6408
410
      if (ret == NULL)
6409
0
                goto mem_error;
6410
410
      if (RAW == '*') {
6411
156
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6412
156
    NEXT;
6413
156
      }
6414
410
      return(ret);
6415
410
  }
6416
969
  if ((RAW == '(') || (RAW == '|')) {
6417
787
      ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6418
787
      if (ret == NULL)
6419
0
                goto mem_error;
6420
787
  }
6421
2.38k
  while ((RAW == '|') && (PARSER_STOPPED(ctxt) == 0)) {
6422
1.48k
      NEXT;
6423
1.48k
            n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6424
1.48k
            if (n == NULL)
6425
0
                goto mem_error;
6426
1.48k
      if (elem == NULL) {
6427
785
    n->c1 = cur;
6428
785
    if (cur != NULL)
6429
785
        cur->parent = n;
6430
785
    ret = cur = n;
6431
785
      } else {
6432
702
          cur->c2 = n;
6433
702
    n->parent = cur;
6434
702
    n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6435
702
                if (n->c1 == NULL)
6436
0
                    goto mem_error;
6437
702
    n->c1->parent = n;
6438
702
    cur = n;
6439
702
      }
6440
1.48k
      SKIP_BLANKS_PE;
6441
1.48k
      elem = xmlParseName(ctxt);
6442
1.48k
      if (elem == NULL) {
6443
67
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6444
67
      "xmlParseElementMixedContentDecl : Name expected\n");
6445
67
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6446
67
    return(NULL);
6447
67
      }
6448
1.42k
      SKIP_BLANKS_PE;
6449
1.42k
      GROW;
6450
1.42k
  }
6451
902
  if ((RAW == ')') && (NXT(1) == '*')) {
6452
704
      if (elem != NULL) {
6453
704
    cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6454
704
                                   XML_ELEMENT_CONTENT_ELEMENT);
6455
704
    if (cur->c2 == NULL)
6456
0
                    goto mem_error;
6457
704
    cur->c2->parent = cur;
6458
704
            }
6459
704
            if (ret != NULL)
6460
704
                ret->ocur = XML_ELEMENT_CONTENT_MULT;
6461
704
      if (ctxt->input->id != inputchk) {
6462
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6463
0
                               "Element content declaration doesn't start and"
6464
0
                               " stop in the same entity\n");
6465
0
      }
6466
704
      SKIP(2);
6467
704
  } else {
6468
198
      xmlFreeDocElementContent(ctxt->myDoc, ret);
6469
198
      xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6470
198
      return(NULL);
6471
198
  }
6472
6473
902
    } else {
6474
0
  xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6475
0
    }
6476
704
    return(ret);
6477
6478
0
mem_error:
6479
0
    xmlErrMemory(ctxt);
6480
0
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6481
0
    return(NULL);
6482
1.37k
}
6483
6484
/**
6485
 * xmlParseElementChildrenContentDeclPriv:
6486
 * @ctxt:  an XML parser context
6487
 * @inputchk:  the input used for the current entity, needed for boundary checks
6488
 * @depth: the level of recursion
6489
 *
6490
 * parse the declaration for a Mixed Element content
6491
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6492
 *
6493
 *
6494
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6495
 *
6496
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6497
 *
6498
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6499
 *
6500
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6501
 *
6502
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6503
 * TODO Parameter-entity replacement text must be properly nested
6504
 *  with parenthesized groups. That is to say, if either of the
6505
 *  opening or closing parentheses in a choice, seq, or Mixed
6506
 *  construct is contained in the replacement text for a parameter
6507
 *  entity, both must be contained in the same replacement text. For
6508
 *  interoperability, if a parameter-entity reference appears in a
6509
 *  choice, seq, or Mixed construct, its replacement text should not
6510
 *  be empty, and neither the first nor last non-blank character of
6511
 *  the replacement text should be a connector (| or ,).
6512
 *
6513
 * Returns the tree of xmlElementContentPtr describing the element
6514
 *          hierarchy.
6515
 */
6516
static xmlElementContentPtr
6517
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6518
19.0k
                                       int depth) {
6519
19.0k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
6520
19.0k
    xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6521
19.0k
    const xmlChar *elem;
6522
19.0k
    xmlChar type = 0;
6523
6524
19.0k
    if (depth > maxDepth) {
6525
2
        xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
6526
2
                "xmlParseElementChildrenContentDecl : depth %d too deep, "
6527
2
                "use XML_PARSE_HUGE\n", depth);
6528
2
  return(NULL);
6529
2
    }
6530
19.0k
    SKIP_BLANKS_PE;
6531
19.0k
    GROW;
6532
19.0k
    if (RAW == '(') {
6533
8.07k
  int inputid = ctxt->input->id;
6534
6535
        /* Recurse on first child */
6536
8.07k
  NEXT;
6537
8.07k
  SKIP_BLANKS_PE;
6538
8.07k
        cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6539
8.07k
                                                           depth + 1);
6540
8.07k
        if (cur == NULL)
6541
5.60k
            return(NULL);
6542
2.46k
  SKIP_BLANKS_PE;
6543
2.46k
  GROW;
6544
10.9k
    } else {
6545
10.9k
  elem = xmlParseName(ctxt);
6546
10.9k
  if (elem == NULL) {
6547
378
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6548
378
      return(NULL);
6549
378
  }
6550
10.5k
        cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6551
10.5k
  if (cur == NULL) {
6552
0
      xmlErrMemory(ctxt);
6553
0
      return(NULL);
6554
0
  }
6555
10.5k
  GROW;
6556
10.5k
  if (RAW == '?') {
6557
1.19k
      cur->ocur = XML_ELEMENT_CONTENT_OPT;
6558
1.19k
      NEXT;
6559
9.35k
  } else if (RAW == '*') {
6560
399
      cur->ocur = XML_ELEMENT_CONTENT_MULT;
6561
399
      NEXT;
6562
8.95k
  } else if (RAW == '+') {
6563
438
      cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6564
438
      NEXT;
6565
8.51k
  } else {
6566
8.51k
      cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6567
8.51k
  }
6568
10.5k
  GROW;
6569
10.5k
    }
6570
13.0k
    SKIP_BLANKS_PE;
6571
25.4k
    while ((RAW != ')') && (PARSER_STOPPED(ctxt) == 0)) {
6572
        /*
6573
   * Each loop we parse one separator and one element.
6574
   */
6575
16.0k
        if (RAW == ',') {
6576
1.88k
      if (type == 0) type = CUR;
6577
6578
      /*
6579
       * Detect "Name | Name , Name" error
6580
       */
6581
1.28k
      else if (type != CUR) {
6582
3
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6583
3
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6584
3
                      type);
6585
3
    if ((last != NULL) && (last != ret))
6586
3
        xmlFreeDocElementContent(ctxt->myDoc, last);
6587
3
    if (ret != NULL)
6588
3
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6589
3
    return(NULL);
6590
3
      }
6591
1.88k
      NEXT;
6592
6593
1.88k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6594
1.88k
      if (op == NULL) {
6595
0
                xmlErrMemory(ctxt);
6596
0
    if ((last != NULL) && (last != ret))
6597
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6598
0
          xmlFreeDocElementContent(ctxt->myDoc, ret);
6599
0
    return(NULL);
6600
0
      }
6601
1.88k
      if (last == NULL) {
6602
598
    op->c1 = ret;
6603
598
    if (ret != NULL)
6604
598
        ret->parent = op;
6605
598
    ret = cur = op;
6606
1.28k
      } else {
6607
1.28k
          cur->c2 = op;
6608
1.28k
    if (op != NULL)
6609
1.28k
        op->parent = cur;
6610
1.28k
    op->c1 = last;
6611
1.28k
    if (last != NULL)
6612
1.28k
        last->parent = op;
6613
1.28k
    cur =op;
6614
1.28k
    last = NULL;
6615
1.28k
      }
6616
14.1k
  } else if (RAW == '|') {
6617
12.7k
      if (type == 0) type = CUR;
6618
6619
      /*
6620
       * Detect "Name , Name | Name" error
6621
       */
6622
5.08k
      else if (type != CUR) {
6623
3
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6624
3
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6625
3
          type);
6626
3
    if ((last != NULL) && (last != ret))
6627
3
        xmlFreeDocElementContent(ctxt->myDoc, last);
6628
3
    if (ret != NULL)
6629
3
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6630
3
    return(NULL);
6631
3
      }
6632
12.7k
      NEXT;
6633
6634
12.7k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6635
12.7k
      if (op == NULL) {
6636
0
                xmlErrMemory(ctxt);
6637
0
    if ((last != NULL) && (last != ret))
6638
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6639
0
    if (ret != NULL)
6640
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6641
0
    return(NULL);
6642
0
      }
6643
12.7k
      if (last == NULL) {
6644
7.65k
    op->c1 = ret;
6645
7.65k
    if (ret != NULL)
6646
7.65k
        ret->parent = op;
6647
7.65k
    ret = cur = op;
6648
7.65k
      } else {
6649
5.07k
          cur->c2 = op;
6650
5.07k
    if (op != NULL)
6651
5.07k
        op->parent = cur;
6652
5.07k
    op->c1 = last;
6653
5.07k
    if (last != NULL)
6654
5.07k
        last->parent = op;
6655
5.07k
    cur =op;
6656
5.07k
    last = NULL;
6657
5.07k
      }
6658
12.7k
  } else {
6659
1.44k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6660
1.44k
      if ((last != NULL) && (last != ret))
6661
818
          xmlFreeDocElementContent(ctxt->myDoc, last);
6662
1.44k
      if (ret != NULL)
6663
1.44k
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6664
1.44k
      return(NULL);
6665
1.44k
  }
6666
14.6k
  GROW;
6667
14.6k
  SKIP_BLANKS_PE;
6668
14.6k
  GROW;
6669
14.6k
  if (RAW == '(') {
6670
4.21k
      int inputid = ctxt->input->id;
6671
      /* Recurse on second child */
6672
4.21k
      NEXT;
6673
4.21k
      SKIP_BLANKS_PE;
6674
4.21k
      last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6675
4.21k
                                                          depth + 1);
6676
4.21k
            if (last == NULL) {
6677
2.14k
    if (ret != NULL)
6678
2.14k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6679
2.14k
    return(NULL);
6680
2.14k
            }
6681
2.06k
      SKIP_BLANKS_PE;
6682
10.4k
  } else {
6683
10.4k
      elem = xmlParseName(ctxt);
6684
10.4k
      if (elem == NULL) {
6685
68
    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6686
68
    if (ret != NULL)
6687
68
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6688
68
    return(NULL);
6689
68
      }
6690
10.3k
      last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6691
10.3k
      if (last == NULL) {
6692
0
                xmlErrMemory(ctxt);
6693
0
    if (ret != NULL)
6694
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6695
0
    return(NULL);
6696
0
      }
6697
10.3k
      if (RAW == '?') {
6698
434
    last->ocur = XML_ELEMENT_CONTENT_OPT;
6699
434
    NEXT;
6700
9.89k
      } else if (RAW == '*') {
6701
301
    last->ocur = XML_ELEMENT_CONTENT_MULT;
6702
301
    NEXT;
6703
9.59k
      } else if (RAW == '+') {
6704
371
    last->ocur = XML_ELEMENT_CONTENT_PLUS;
6705
371
    NEXT;
6706
9.22k
      } else {
6707
9.22k
    last->ocur = XML_ELEMENT_CONTENT_ONCE;
6708
9.22k
      }
6709
10.3k
  }
6710
12.3k
  SKIP_BLANKS_PE;
6711
12.3k
  GROW;
6712
12.3k
    }
6713
9.34k
    if ((cur != NULL) && (last != NULL)) {
6714
5.21k
        cur->c2 = last;
6715
5.21k
  if (last != NULL)
6716
5.21k
      last->parent = cur;
6717
5.21k
    }
6718
9.34k
    if (ctxt->input->id != inputchk) {
6719
23
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6720
23
                       "Element content declaration doesn't start and stop in"
6721
23
                       " the same entity\n");
6722
23
    }
6723
9.34k
    NEXT;
6724
9.34k
    if (RAW == '?') {
6725
1.99k
  if (ret != NULL) {
6726
1.99k
      if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6727
1.99k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6728
659
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6729
1.33k
      else
6730
1.33k
          ret->ocur = XML_ELEMENT_CONTENT_OPT;
6731
1.99k
  }
6732
1.99k
  NEXT;
6733
7.34k
    } else if (RAW == '*') {
6734
3.91k
  if (ret != NULL) {
6735
3.91k
      ret->ocur = XML_ELEMENT_CONTENT_MULT;
6736
3.91k
      cur = ret;
6737
      /*
6738
       * Some normalization:
6739
       * (a | b* | c?)* == (a | b | c)*
6740
       */
6741
15.6k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6742
11.7k
    if ((cur->c1 != NULL) &&
6743
11.7k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6744
11.7k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6745
647
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6746
11.7k
    if ((cur->c2 != NULL) &&
6747
11.7k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6748
11.7k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6749
908
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6750
11.7k
    cur = cur->c2;
6751
11.7k
      }
6752
3.91k
  }
6753
3.91k
  NEXT;
6754
3.91k
    } else if (RAW == '+') {
6755
1.44k
  if (ret != NULL) {
6756
1.44k
      int found = 0;
6757
6758
1.44k
      if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6759
1.44k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6760
371
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6761
1.07k
      else
6762
1.07k
          ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6763
      /*
6764
       * Some normalization:
6765
       * (a | b*)+ == (a | b)*
6766
       * (a | b?)+ == (a | b)*
6767
       */
6768
6.48k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6769
5.04k
    if ((cur->c1 != NULL) &&
6770
5.04k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6771
5.04k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6772
458
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6773
458
        found = 1;
6774
458
    }
6775
5.04k
    if ((cur->c2 != NULL) &&
6776
5.04k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6777
5.04k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6778
768
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6779
768
        found = 1;
6780
768
    }
6781
5.04k
    cur = cur->c2;
6782
5.04k
      }
6783
1.44k
      if (found)
6784
740
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6785
1.44k
  }
6786
1.44k
  NEXT;
6787
1.44k
    }
6788
9.34k
    return(ret);
6789
13.0k
}
6790
6791
/**
6792
 * xmlParseElementChildrenContentDecl:
6793
 * @ctxt:  an XML parser context
6794
 * @inputchk:  the input used for the current entity, needed for boundary checks
6795
 *
6796
 * DEPRECATED: Internal function, don't use.
6797
 *
6798
 * parse the declaration for a Mixed Element content
6799
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6800
 *
6801
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6802
 *
6803
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6804
 *
6805
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6806
 *
6807
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6808
 *
6809
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6810
 * TODO Parameter-entity replacement text must be properly nested
6811
 *  with parenthesized groups. That is to say, if either of the
6812
 *  opening or closing parentheses in a choice, seq, or Mixed
6813
 *  construct is contained in the replacement text for a parameter
6814
 *  entity, both must be contained in the same replacement text. For
6815
 *  interoperability, if a parameter-entity reference appears in a
6816
 *  choice, seq, or Mixed construct, its replacement text should not
6817
 *  be empty, and neither the first nor last non-blank character of
6818
 *  the replacement text should be a connector (| or ,).
6819
 *
6820
 * Returns the tree of xmlElementContentPtr describing the element
6821
 *          hierarchy.
6822
 */
6823
xmlElementContentPtr
6824
0
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6825
    /* stub left for API/ABI compat */
6826
0
    return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6827
0
}
6828
6829
/**
6830
 * xmlParseElementContentDecl:
6831
 * @ctxt:  an XML parser context
6832
 * @name:  the name of the element being defined.
6833
 * @result:  the Element Content pointer will be stored here if any
6834
 *
6835
 * DEPRECATED: Internal function, don't use.
6836
 *
6837
 * parse the declaration for an Element content either Mixed or Children,
6838
 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6839
 *
6840
 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6841
 *
6842
 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6843
 */
6844
6845
int
6846
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6847
8.10k
                           xmlElementContentPtr *result) {
6848
6849
8.10k
    xmlElementContentPtr tree = NULL;
6850
8.10k
    int inputid = ctxt->input->id;
6851
8.10k
    int res;
6852
6853
8.10k
    *result = NULL;
6854
6855
8.10k
    if (RAW != '(') {
6856
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6857
0
    "xmlParseElementContentDecl : %s '(' expected\n", name);
6858
0
  return(-1);
6859
0
    }
6860
8.10k
    NEXT;
6861
8.10k
    GROW;
6862
8.10k
    SKIP_BLANKS_PE;
6863
8.10k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6864
1.37k
        tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6865
1.37k
  res = XML_ELEMENT_TYPE_MIXED;
6866
6.72k
    } else {
6867
6.72k
        tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6868
6.72k
  res = XML_ELEMENT_TYPE_ELEMENT;
6869
6.72k
    }
6870
8.10k
    SKIP_BLANKS_PE;
6871
8.10k
    *result = tree;
6872
8.10k
    return(res);
6873
8.10k
}
6874
6875
/**
6876
 * xmlParseElementDecl:
6877
 * @ctxt:  an XML parser context
6878
 *
6879
 * DEPRECATED: Internal function, don't use.
6880
 *
6881
 * Parse an element declaration. Always consumes '<!'.
6882
 *
6883
 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6884
 *
6885
 * [ VC: Unique Element Type Declaration ]
6886
 * No element type may be declared more than once
6887
 *
6888
 * Returns the type of the element, or -1 in case of error
6889
 */
6890
int
6891
10.0k
xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6892
10.0k
    const xmlChar *name;
6893
10.0k
    int ret = -1;
6894
10.0k
    xmlElementContentPtr content  = NULL;
6895
6896
10.0k
    if ((CUR != '<') || (NXT(1) != '!'))
6897
0
        return(ret);
6898
10.0k
    SKIP(2);
6899
6900
    /* GROW; done in the caller */
6901
10.0k
    if (CMP7(CUR_PTR, 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6902
9.99k
  int inputid = ctxt->input->id;
6903
6904
9.99k
  SKIP(7);
6905
9.99k
  if (SKIP_BLANKS_PE == 0) {
6906
550
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6907
550
               "Space required after 'ELEMENT'\n");
6908
550
      return(-1);
6909
550
  }
6910
9.44k
        name = xmlParseName(ctxt);
6911
9.44k
  if (name == NULL) {
6912
510
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6913
510
         "xmlParseElementDecl: no name for Element\n");
6914
510
      return(-1);
6915
510
  }
6916
8.93k
  if (SKIP_BLANKS_PE == 0) {
6917
1.23k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6918
1.23k
         "Space required after the element name\n");
6919
1.23k
  }
6920
8.93k
  if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6921
457
      SKIP(5);
6922
      /*
6923
       * Element must always be empty.
6924
       */
6925
457
      ret = XML_ELEMENT_TYPE_EMPTY;
6926
8.47k
  } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6927
8.47k
             (NXT(2) == 'Y')) {
6928
226
      SKIP(3);
6929
      /*
6930
       * Element is a generic container.
6931
       */
6932
226
      ret = XML_ELEMENT_TYPE_ANY;
6933
8.25k
  } else if (RAW == '(') {
6934
8.10k
      ret = xmlParseElementContentDecl(ctxt, name, &content);
6935
8.10k
  } else {
6936
      /*
6937
       * [ WFC: PEs in Internal Subset ] error handling.
6938
       */
6939
151
            xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6940
151
                  "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6941
151
      return(-1);
6942
151
  }
6943
6944
8.78k
  SKIP_BLANKS_PE;
6945
6946
8.78k
  if (RAW != '>') {
6947
2.20k
      xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6948
2.20k
      if (content != NULL) {
6949
226
    xmlFreeDocElementContent(ctxt->myDoc, content);
6950
226
      }
6951
6.58k
  } else {
6952
6.58k
      if (inputid != ctxt->input->id) {
6953
64
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6954
64
                               "Element declaration doesn't start and stop in"
6955
64
                               " the same entity\n");
6956
64
      }
6957
6958
6.58k
      NEXT;
6959
6.58k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6960
6.58k
    (ctxt->sax->elementDecl != NULL)) {
6961
2.12k
    if (content != NULL)
6962
1.81k
        content->parent = NULL;
6963
2.12k
          ctxt->sax->elementDecl(ctxt->userData, name, ret,
6964
2.12k
                           content);
6965
2.12k
    if ((content != NULL) && (content->parent == NULL)) {
6966
        /*
6967
         * this is a trick: if xmlAddElementDecl is called,
6968
         * instead of copying the full tree it is plugged directly
6969
         * if called from the parser. Avoid duplicating the
6970
         * interfaces or change the API/ABI
6971
         */
6972
1.51k
        xmlFreeDocElementContent(ctxt->myDoc, content);
6973
1.51k
    }
6974
4.45k
      } else if (content != NULL) {
6975
3.89k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6976
3.89k
      }
6977
6.58k
  }
6978
8.78k
    }
6979
8.81k
    return(ret);
6980
10.0k
}
6981
6982
/**
6983
 * xmlParseConditionalSections
6984
 * @ctxt:  an XML parser context
6985
 *
6986
 * Parse a conditional section. Always consumes '<!['.
6987
 *
6988
 * [61] conditionalSect ::= includeSect | ignoreSect
6989
 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6990
 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6991
 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6992
 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6993
 */
6994
6995
static void
6996
0
xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6997
0
    int *inputIds = NULL;
6998
0
    size_t inputIdsSize = 0;
6999
0
    size_t depth = 0;
7000
7001
0
    while (PARSER_STOPPED(ctxt) == 0) {
7002
0
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7003
0
            int id = ctxt->input->id;
7004
7005
0
            SKIP(3);
7006
0
            SKIP_BLANKS_PE;
7007
7008
0
            if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
7009
0
                SKIP(7);
7010
0
                SKIP_BLANKS_PE;
7011
0
                if (RAW != '[') {
7012
0
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
7013
0
                    xmlHaltParser(ctxt);
7014
0
                    goto error;
7015
0
                }
7016
0
                if (ctxt->input->id != id) {
7017
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
7018
0
                                   "All markup of the conditional section is"
7019
0
                                   " not in the same entity\n");
7020
0
                }
7021
0
                NEXT;
7022
7023
0
                if (inputIdsSize <= depth) {
7024
0
                    int *tmp;
7025
7026
0
                    inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2);
7027
0
                    tmp = (int *) xmlRealloc(inputIds,
7028
0
                            inputIdsSize * sizeof(int));
7029
0
                    if (tmp == NULL) {
7030
0
                        xmlErrMemory(ctxt);
7031
0
                        goto error;
7032
0
                    }
7033
0
                    inputIds = tmp;
7034
0
                }
7035
0
                inputIds[depth] = id;
7036
0
                depth++;
7037
0
            } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
7038
0
                size_t ignoreDepth = 0;
7039
7040
0
                SKIP(6);
7041
0
                SKIP_BLANKS_PE;
7042
0
                if (RAW != '[') {
7043
0
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
7044
0
                    xmlHaltParser(ctxt);
7045
0
                    goto error;
7046
0
                }
7047
0
                if (ctxt->input->id != id) {
7048
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
7049
0
                                   "All markup of the conditional section is"
7050
0
                                   " not in the same entity\n");
7051
0
                }
7052
0
                NEXT;
7053
7054
0
                while (PARSER_STOPPED(ctxt) == 0) {
7055
0
                    if (RAW == 0) {
7056
0
                        xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
7057
0
                        goto error;
7058
0
                    }
7059
0
                    if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7060
0
                        SKIP(3);
7061
0
                        ignoreDepth++;
7062
                        /* Check for integer overflow */
7063
0
                        if (ignoreDepth == 0) {
7064
0
                            xmlErrMemory(ctxt);
7065
0
                            goto error;
7066
0
                        }
7067
0
                    } else if ((RAW == ']') && (NXT(1) == ']') &&
7068
0
                               (NXT(2) == '>')) {
7069
0
                        SKIP(3);
7070
0
                        if (ignoreDepth == 0)
7071
0
                            break;
7072
0
                        ignoreDepth--;
7073
0
                    } else {
7074
0
                        NEXT;
7075
0
                    }
7076
0
                }
7077
7078
0
                if (ctxt->input->id != id) {
7079
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
7080
0
                                   "All markup of the conditional section is"
7081
0
                                   " not in the same entity\n");
7082
0
                }
7083
0
            } else {
7084
0
                xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
7085
0
                xmlHaltParser(ctxt);
7086
0
                goto error;
7087
0
            }
7088
0
        } else if ((depth > 0) &&
7089
0
                   (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
7090
0
            depth--;
7091
0
            if (ctxt->input->id != inputIds[depth]) {
7092
0
                xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
7093
0
                               "All markup of the conditional section is not"
7094
0
                               " in the same entity\n");
7095
0
            }
7096
0
            SKIP(3);
7097
0
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7098
0
            xmlParseMarkupDecl(ctxt);
7099
0
        } else {
7100
0
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7101
0
            xmlHaltParser(ctxt);
7102
0
            goto error;
7103
0
        }
7104
7105
0
        if (depth == 0)
7106
0
            break;
7107
7108
0
        SKIP_BLANKS_PE;
7109
0
        SHRINK;
7110
0
        GROW;
7111
0
    }
7112
7113
0
error:
7114
0
    xmlFree(inputIds);
7115
0
}
7116
7117
/**
7118
 * xmlParseMarkupDecl:
7119
 * @ctxt:  an XML parser context
7120
 *
7121
 * DEPRECATED: Internal function, don't use.
7122
 *
7123
 * Parse markup declarations. Always consumes '<!' or '<?'.
7124
 *
7125
 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
7126
 *                     NotationDecl | PI | Comment
7127
 *
7128
 * [ VC: Proper Declaration/PE Nesting ]
7129
 * Parameter-entity replacement text must be properly nested with
7130
 * markup declarations. That is to say, if either the first character
7131
 * or the last character of a markup declaration (markupdecl above) is
7132
 * contained in the replacement text for a parameter-entity reference,
7133
 * both must be contained in the same replacement text.
7134
 *
7135
 * [ WFC: PEs in Internal Subset ]
7136
 * In the internal DTD subset, parameter-entity references can occur
7137
 * only where markup declarations can occur, not within markup declarations.
7138
 * (This does not apply to references that occur in external parameter
7139
 * entities or to the external subset.)
7140
 */
7141
void
7142
93.1k
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
7143
93.1k
    GROW;
7144
93.1k
    if (CUR == '<') {
7145
93.1k
        if (NXT(1) == '!') {
7146
84.5k
      switch (NXT(2)) {
7147
64.2k
          case 'E':
7148
64.2k
        if (NXT(3) == 'L')
7149
10.0k
      xmlParseElementDecl(ctxt);
7150
54.2k
        else if (NXT(3) == 'N')
7151
54.1k
      xmlParseEntityDecl(ctxt);
7152
63
                    else
7153
63
                        SKIP(2);
7154
64.2k
        break;
7155
11.9k
          case 'A':
7156
11.9k
        xmlParseAttributeListDecl(ctxt);
7157
11.9k
        break;
7158
3.30k
          case 'N':
7159
3.30k
        xmlParseNotationDecl(ctxt);
7160
3.30k
        break;
7161
4.39k
          case '-':
7162
4.39k
        xmlParseComment(ctxt);
7163
4.39k
        break;
7164
595
    default:
7165
        /* there is an error but it will be detected later */
7166
595
                    SKIP(2);
7167
595
        break;
7168
84.5k
      }
7169
84.5k
  } else if (NXT(1) == '?') {
7170
8.68k
      xmlParsePI(ctxt);
7171
8.68k
  }
7172
93.1k
    }
7173
93.1k
}
7174
7175
/**
7176
 * xmlParseTextDecl:
7177
 * @ctxt:  an XML parser context
7178
 *
7179
 * DEPRECATED: Internal function, don't use.
7180
 *
7181
 * parse an XML declaration header for external entities
7182
 *
7183
 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
7184
 */
7185
7186
void
7187
0
xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
7188
0
    xmlChar *version;
7189
7190
    /*
7191
     * We know that '<?xml' is here.
7192
     */
7193
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7194
0
  SKIP(5);
7195
0
    } else {
7196
0
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
7197
0
  return;
7198
0
    }
7199
7200
0
    if (SKIP_BLANKS == 0) {
7201
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7202
0
           "Space needed after '<?xml'\n");
7203
0
    }
7204
7205
    /*
7206
     * We may have the VersionInfo here.
7207
     */
7208
0
    version = xmlParseVersionInfo(ctxt);
7209
0
    if (version == NULL) {
7210
0
  version = xmlCharStrdup(XML_DEFAULT_VERSION);
7211
0
        if (version == NULL) {
7212
0
            xmlErrMemory(ctxt);
7213
0
            return;
7214
0
        }
7215
0
    } else {
7216
0
  if (SKIP_BLANKS == 0) {
7217
0
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7218
0
               "Space needed here\n");
7219
0
  }
7220
0
    }
7221
0
    ctxt->input->version = version;
7222
7223
    /*
7224
     * We must have the encoding declaration
7225
     */
7226
0
    xmlParseEncodingDecl(ctxt);
7227
7228
0
    SKIP_BLANKS;
7229
0
    if ((RAW == '?') && (NXT(1) == '>')) {
7230
0
        SKIP(2);
7231
0
    } else if (RAW == '>') {
7232
        /* Deprecated old WD ... */
7233
0
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7234
0
  NEXT;
7235
0
    } else {
7236
0
        int c;
7237
7238
0
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7239
0
        while ((PARSER_STOPPED(ctxt) == 0) && ((c = CUR) != 0)) {
7240
0
            NEXT;
7241
0
            if (c == '>')
7242
0
                break;
7243
0
        }
7244
0
    }
7245
0
}
7246
7247
/**
7248
 * xmlParseExternalSubset:
7249
 * @ctxt:  an XML parser context
7250
 * @ExternalID: the external identifier
7251
 * @SystemID: the system identifier (or URL)
7252
 *
7253
 * parse Markup declarations from an external subset
7254
 *
7255
 * [30] extSubset ::= textDecl? extSubsetDecl
7256
 *
7257
 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7258
 */
7259
void
7260
xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7261
0
                       const xmlChar *SystemID) {
7262
0
    int oldInputNr;
7263
7264
0
    xmlCtxtInitializeLate(ctxt);
7265
7266
0
    xmlDetectEncoding(ctxt);
7267
7268
0
    if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7269
0
  xmlParseTextDecl(ctxt);
7270
0
    }
7271
0
    if (ctxt->myDoc == NULL) {
7272
0
        ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7273
0
  if (ctxt->myDoc == NULL) {
7274
0
      xmlErrMemory(ctxt);
7275
0
      return;
7276
0
  }
7277
0
  ctxt->myDoc->properties = XML_DOC_INTERNAL;
7278
0
    }
7279
0
    if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL) &&
7280
0
        (xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID) == NULL)) {
7281
0
        xmlErrMemory(ctxt);
7282
0
    }
7283
7284
0
    ctxt->inSubset = 2;
7285
0
    oldInputNr = ctxt->inputNr;
7286
7287
0
    SKIP_BLANKS_PE;
7288
0
    while (((RAW != 0) || (ctxt->inputNr > oldInputNr)) &&
7289
0
           (!PARSER_STOPPED(ctxt))) {
7290
0
  GROW;
7291
0
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7292
0
            xmlParseConditionalSections(ctxt);
7293
0
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7294
0
            xmlParseMarkupDecl(ctxt);
7295
0
        } else {
7296
0
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7297
0
            xmlHaltParser(ctxt);
7298
0
            return;
7299
0
        }
7300
0
        SKIP_BLANKS_PE;
7301
0
        SHRINK;
7302
0
    }
7303
7304
0
    while (ctxt->inputNr > oldInputNr)
7305
0
        xmlPopPE(ctxt);
7306
7307
0
    if (RAW != 0) {
7308
0
  xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7309
0
    }
7310
0
}
7311
7312
/**
7313
 * xmlParseReference:
7314
 * @ctxt:  an XML parser context
7315
 *
7316
 * DEPRECATED: Internal function, don't use.
7317
 *
7318
 * parse and handle entity references in content, depending on the SAX
7319
 * interface, this may end-up in a call to character() if this is a
7320
 * CharRef, a predefined entity, if there is no reference() callback.
7321
 * or if the parser was asked to switch to that mode.
7322
 *
7323
 * Always consumes '&'.
7324
 *
7325
 * [67] Reference ::= EntityRef | CharRef
7326
 */
7327
void
7328
307k
xmlParseReference(xmlParserCtxtPtr ctxt) {
7329
307k
    xmlEntityPtr ent = NULL;
7330
307k
    const xmlChar *name;
7331
307k
    xmlChar *val;
7332
7333
307k
    if (RAW != '&')
7334
0
        return;
7335
7336
    /*
7337
     * Simple case of a CharRef
7338
     */
7339
307k
    if (NXT(1) == '#') {
7340
90.6k
  int i = 0;
7341
90.6k
  xmlChar out[16];
7342
90.6k
  int value = xmlParseCharRef(ctxt);
7343
7344
90.6k
  if (value == 0)
7345
59.9k
      return;
7346
7347
        /*
7348
         * Just encode the value in UTF-8
7349
         */
7350
30.6k
        COPY_BUF(out, i, value);
7351
30.6k
        out[i] = 0;
7352
30.6k
        if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7353
30.6k
            (!ctxt->disableSAX))
7354
1.07k
            ctxt->sax->characters(ctxt->userData, out, i);
7355
30.6k
  return;
7356
90.6k
    }
7357
7358
    /*
7359
     * We are seeing an entity reference
7360
     */
7361
216k
    name = xmlParseEntityRefInternal(ctxt);
7362
216k
    if (name == NULL)
7363
124k
        return;
7364
92.5k
    ent = xmlLookupGeneralEntity(ctxt, name, /* isAttr */ 0);
7365
92.5k
    if (ent == NULL) {
7366
        /*
7367
         * Create a reference for undeclared entities.
7368
         */
7369
47.0k
        if ((ctxt->replaceEntities == 0) &&
7370
47.0k
            (ctxt->sax != NULL) &&
7371
47.0k
            (ctxt->disableSAX == 0) &&
7372
47.0k
            (ctxt->sax->reference != NULL)) {
7373
448
            ctxt->sax->reference(ctxt->userData, name);
7374
448
        }
7375
47.0k
        return;
7376
47.0k
    }
7377
45.5k
    if (!ctxt->wellFormed)
7378
42.9k
  return;
7379
7380
    /* special case of predefined entities */
7381
2.58k
    if ((ent->name == NULL) ||
7382
2.58k
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7383
316
  val = ent->content;
7384
316
  if (val == NULL) return;
7385
  /*
7386
   * inline the entity.
7387
   */
7388
316
  if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7389
316
      (!ctxt->disableSAX))
7390
316
      ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7391
316
  return;
7392
316
    }
7393
7394
    /*
7395
     * Some users try to parse entities on their own and used to set
7396
     * the renamed "checked" member. Fix the flags to cover this
7397
     * case.
7398
     */
7399
2.26k
    if (((ent->flags & XML_ENT_PARSED) == 0) && (ent->children != NULL))
7400
0
        ent->flags |= XML_ENT_PARSED;
7401
7402
    /*
7403
     * The first reference to the entity trigger a parsing phase
7404
     * where the ent->children is filled with the result from
7405
     * the parsing.
7406
     * Note: external parsed entities will not be loaded, it is not
7407
     * required for a non-validating parser, unless the parsing option
7408
     * of validating, or substituting entities were given. Doing so is
7409
     * far more secure as the parser will only process data coming from
7410
     * the document entity by default.
7411
     *
7412
     * FIXME: This doesn't work correctly since entities can be
7413
     * expanded with different namespace declarations in scope.
7414
     * For example:
7415
     *
7416
     * <!DOCTYPE doc [
7417
     *   <!ENTITY ent "<ns:elem/>">
7418
     * ]>
7419
     * <doc>
7420
     *   <decl1 xmlns:ns="urn:ns1">
7421
     *     &ent;
7422
     *   </decl1>
7423
     *   <decl2 xmlns:ns="urn:ns2">
7424
     *     &ent;
7425
     *   </decl2>
7426
     * </doc>
7427
     *
7428
     * Proposed fix:
7429
     *
7430
     * - Ignore current namespace declarations when parsing the
7431
     *   entity. If a prefix can't be resolved, don't report an error
7432
     *   but mark it as unresolved.
7433
     * - Try to resolve these prefixes when expanding the entity.
7434
     *   This will require a specialized version of xmlStaticCopyNode
7435
     *   which can also make use of the namespace hash table to avoid
7436
     *   quadratic behavior.
7437
     *
7438
     * Alternatively, we could simply reparse the entity on each
7439
     * expansion like we already do with custom SAX callbacks.
7440
     * External entity content should be cached in this case.
7441
     */
7442
2.26k
    if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7443
2.26k
        (((ctxt->options & XML_PARSE_NO_XXE) == 0) &&
7444
448
         ((ctxt->replaceEntities) ||
7445
1.81k
          (ctxt->validate)))) {
7446
1.81k
        if ((ent->flags & XML_ENT_PARSED) == 0) {
7447
556
            xmlCtxtParseEntity(ctxt, ent);
7448
1.26k
        } else if (ent->children == NULL) {
7449
            /*
7450
             * Probably running in SAX mode and the callbacks don't
7451
             * build the entity content. Parse the entity again.
7452
             *
7453
             * This will also be triggered in normal tree builder mode
7454
             * if an entity happens to be empty, causing unnecessary
7455
             * reloads. It's hard to come up with a reliable check in
7456
             * which mode we're running.
7457
             */
7458
328
            xmlCtxtParseEntity(ctxt, ent);
7459
328
        }
7460
1.81k
    }
7461
7462
    /*
7463
     * We also check for amplification if entities aren't substituted.
7464
     * They might be expanded later.
7465
     */
7466
2.26k
    if (xmlParserEntityCheck(ctxt, ent->expandedSize))
7467
10
        return;
7468
7469
2.25k
    if ((ctxt->sax == NULL) || (ctxt->disableSAX))
7470
425
        return;
7471
7472
1.83k
    if (ctxt->replaceEntities == 0) {
7473
  /*
7474
   * Create a reference
7475
   */
7476
1.83k
        if (ctxt->sax->reference != NULL)
7477
1.83k
      ctxt->sax->reference(ctxt->userData, ent->name);
7478
1.83k
    } else if ((ent->children != NULL) && (ctxt->node != NULL)) {
7479
0
        xmlNodePtr copy, cur;
7480
7481
        /*
7482
         * Seems we are generating the DOM content, copy the tree
7483
   */
7484
0
        cur = ent->children;
7485
7486
        /*
7487
         * Handle first text node with SAX to coalesce text efficiently
7488
         */
7489
0
        if ((cur->type == XML_TEXT_NODE) ||
7490
0
            (cur->type == XML_CDATA_SECTION_NODE)) {
7491
0
            int len = xmlStrlen(cur->content);
7492
7493
0
            if ((cur->type == XML_TEXT_NODE) ||
7494
0
                (ctxt->sax->cdataBlock == NULL)) {
7495
0
                if (ctxt->sax->characters != NULL)
7496
0
                    ctxt->sax->characters(ctxt, cur->content, len);
7497
0
            } else {
7498
0
                if (ctxt->sax->cdataBlock != NULL)
7499
0
                    ctxt->sax->cdataBlock(ctxt, cur->content, len);
7500
0
            }
7501
7502
0
            cur = cur->next;
7503
0
        }
7504
7505
0
        while (cur != NULL) {
7506
0
            xmlNodePtr last;
7507
7508
            /*
7509
             * Handle last text node with SAX to coalesce text efficiently
7510
             */
7511
0
            if ((cur->next == NULL) &&
7512
0
                ((cur->type == XML_TEXT_NODE) ||
7513
0
                 (cur->type == XML_CDATA_SECTION_NODE))) {
7514
0
                int len = xmlStrlen(cur->content);
7515
7516
0
                if ((cur->type == XML_TEXT_NODE) ||
7517
0
                    (ctxt->sax->cdataBlock == NULL)) {
7518
0
                    if (ctxt->sax->characters != NULL)
7519
0
                        ctxt->sax->characters(ctxt, cur->content, len);
7520
0
                } else {
7521
0
                    if (ctxt->sax->cdataBlock != NULL)
7522
0
                        ctxt->sax->cdataBlock(ctxt, cur->content, len);
7523
0
                }
7524
7525
0
                break;
7526
0
            }
7527
7528
            /*
7529
             * Reset coalesce buffer stats only for non-text nodes.
7530
             */
7531
0
            ctxt->nodemem = 0;
7532
0
            ctxt->nodelen = 0;
7533
7534
0
            copy = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7535
7536
0
            if (copy == NULL) {
7537
0
                xmlErrMemory(ctxt);
7538
0
                break;
7539
0
            }
7540
7541
0
            if (ctxt->parseMode == XML_PARSE_READER) {
7542
                /* Needed for reader */
7543
0
                copy->extra = cur->extra;
7544
                /* Maybe needed for reader */
7545
0
                copy->_private = cur->_private;
7546
0
            }
7547
7548
0
            copy->parent = ctxt->node;
7549
0
            last = ctxt->node->last;
7550
0
            if (last == NULL) {
7551
0
                ctxt->node->children = copy;
7552
0
            } else {
7553
0
                last->next = copy;
7554
0
                copy->prev = last;
7555
0
            }
7556
0
            ctxt->node->last = copy;
7557
7558
0
            cur = cur->next;
7559
0
        }
7560
0
    }
7561
1.83k
}
7562
7563
static void
7564
156k
xmlHandleUndeclaredEntity(xmlParserCtxtPtr ctxt, const xmlChar *name) {
7565
    /*
7566
     * [ WFC: Entity Declared ]
7567
     * In a document without any DTD, a document with only an
7568
     * internal DTD subset which contains no parameter entity
7569
     * references, or a document with "standalone='yes'", the
7570
     * Name given in the entity reference must match that in an
7571
     * entity declaration, except that well-formed documents
7572
     * need not declare any of the following entities: amp, lt,
7573
     * gt, apos, quot.
7574
     * The declaration of a parameter entity must precede any
7575
     * reference to it.
7576
     * Similarly, the declaration of a general entity must
7577
     * precede any reference to it which appears in a default
7578
     * value in an attribute-list declaration. Note that if
7579
     * entities are declared in the external subset or in
7580
     * external parameter entities, a non-validating processor
7581
     * is not obligated to read and process their declarations;
7582
     * for such documents, the rule that an entity must be
7583
     * declared is a well-formedness constraint only if
7584
     * standalone='yes'.
7585
     */
7586
156k
    if ((ctxt->standalone == 1) ||
7587
156k
        ((ctxt->hasExternalSubset == 0) &&
7588
155k
         (ctxt->hasPErefs == 0))) {
7589
135k
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7590
135k
                          "Entity '%s' not defined\n", name);
7591
135k
    } else if (ctxt->validate) {
7592
        /*
7593
         * [ VC: Entity Declared ]
7594
         * In a document with an external subset or external
7595
         * parameter entities with "standalone='no'", ...
7596
         * ... The declaration of a parameter entity must
7597
         * precede any reference to it...
7598
         */
7599
0
        xmlValidityError(ctxt, XML_ERR_UNDECLARED_ENTITY,
7600
0
                         "Entity '%s' not defined\n", name, NULL);
7601
20.7k
    } else if ((ctxt->loadsubset & ~XML_SKIP_IDS) ||
7602
20.7k
               ((ctxt->replaceEntities) &&
7603
20.7k
                ((ctxt->options & XML_PARSE_NO_XXE) == 0))) {
7604
        /*
7605
         * Also raise a non-fatal error
7606
         *
7607
         * - if the external subset is loaded and all entity declarations
7608
         *   should be available, or
7609
         * - entity substition was requested without restricting
7610
         *   external entity access.
7611
         */
7612
0
        xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7613
0
                     "Entity '%s' not defined\n", name);
7614
20.7k
    } else {
7615
20.7k
        xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7616
20.7k
                      "Entity '%s' not defined\n", name, NULL);
7617
20.7k
    }
7618
7619
156k
    ctxt->valid = 0;
7620
156k
}
7621
7622
static xmlEntityPtr
7623
2.28M
xmlLookupGeneralEntity(xmlParserCtxtPtr ctxt, const xmlChar *name, int inAttr) {
7624
2.28M
    xmlEntityPtr ent = NULL;
7625
7626
    /*
7627
     * Predefined entities override any extra definition
7628
     */
7629
2.28M
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7630
2.28M
        ent = xmlGetPredefinedEntity(name);
7631
2.28M
        if (ent != NULL)
7632
1.70M
            return(ent);
7633
2.28M
    }
7634
7635
    /*
7636
     * Ask first SAX for entity resolution, otherwise try the
7637
     * entities which may have stored in the parser context.
7638
     */
7639
577k
    if (ctxt->sax != NULL) {
7640
577k
  if (ctxt->sax->getEntity != NULL)
7641
577k
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7642
577k
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7643
577k
      (ctxt->options & XML_PARSE_OLDSAX))
7644
0
      ent = xmlGetPredefinedEntity(name);
7645
577k
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7646
577k
      (ctxt->userData==ctxt)) {
7647
3.29k
      ent = xmlSAX2GetEntity(ctxt, name);
7648
3.29k
  }
7649
577k
    }
7650
7651
577k
    if (ent == NULL) {
7652
146k
        xmlHandleUndeclaredEntity(ctxt, name);
7653
146k
    }
7654
7655
    /*
7656
     * [ WFC: Parsed Entity ]
7657
     * An entity reference must not contain the name of an
7658
     * unparsed entity
7659
     */
7660
430k
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7661
276
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7662
276
     "Entity reference to unparsed entity %s\n", name);
7663
276
        ent = NULL;
7664
276
    }
7665
7666
    /*
7667
     * [ WFC: No External Entity References ]
7668
     * Attribute values cannot contain direct or indirect
7669
     * entity references to external entities.
7670
     */
7671
430k
    else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7672
1.57k
        if (inAttr) {
7673
417
            xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7674
417
                 "Attribute references external entity '%s'\n", name);
7675
417
            ent = NULL;
7676
417
        }
7677
1.57k
    }
7678
7679
577k
    return(ent);
7680
2.28M
}
7681
7682
/**
7683
 * xmlParseEntityRefInternal:
7684
 * @ctxt:  an XML parser context
7685
 * @inAttr:  whether we are in an attribute value
7686
 *
7687
 * Parse an entity reference. Always consumes '&'.
7688
 *
7689
 * [68] EntityRef ::= '&' Name ';'
7690
 *
7691
 * Returns the name, or NULL in case of error.
7692
 */
7693
static const xmlChar *
7694
863k
xmlParseEntityRefInternal(xmlParserCtxtPtr ctxt) {
7695
863k
    const xmlChar *name;
7696
7697
863k
    GROW;
7698
7699
863k
    if (RAW != '&')
7700
0
        return(NULL);
7701
863k
    NEXT;
7702
863k
    name = xmlParseName(ctxt);
7703
863k
    if (name == NULL) {
7704
116k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7705
116k
           "xmlParseEntityRef: no name\n");
7706
116k
        return(NULL);
7707
116k
    }
7708
746k
    if (RAW != ';') {
7709
59.4k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7710
59.4k
  return(NULL);
7711
59.4k
    }
7712
687k
    NEXT;
7713
7714
687k
    return(name);
7715
746k
}
7716
7717
/**
7718
 * xmlParseEntityRef:
7719
 * @ctxt:  an XML parser context
7720
 *
7721
 * DEPRECATED: Internal function, don't use.
7722
 *
7723
 * Returns the xmlEntityPtr if found, or NULL otherwise.
7724
 */
7725
xmlEntityPtr
7726
0
xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7727
0
    const xmlChar *name;
7728
7729
0
    if (ctxt == NULL)
7730
0
        return(NULL);
7731
7732
0
    name = xmlParseEntityRefInternal(ctxt);
7733
0
    if (name == NULL)
7734
0
        return(NULL);
7735
7736
0
    return(xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 0));
7737
0
}
7738
7739
/**
7740
 * xmlParseStringEntityRef:
7741
 * @ctxt:  an XML parser context
7742
 * @str:  a pointer to an index in the string
7743
 *
7744
 * parse ENTITY references declarations, but this version parses it from
7745
 * a string value.
7746
 *
7747
 * [68] EntityRef ::= '&' Name ';'
7748
 *
7749
 * [ WFC: Entity Declared ]
7750
 * In a document without any DTD, a document with only an internal DTD
7751
 * subset which contains no parameter entity references, or a document
7752
 * with "standalone='yes'", the Name given in the entity reference
7753
 * must match that in an entity declaration, except that well-formed
7754
 * documents need not declare any of the following entities: amp, lt,
7755
 * gt, apos, quot.  The declaration of a parameter entity must precede
7756
 * any reference to it.  Similarly, the declaration of a general entity
7757
 * must precede any reference to it which appears in a default value in an
7758
 * attribute-list declaration. Note that if entities are declared in the
7759
 * external subset or in external parameter entities, a non-validating
7760
 * processor is not obligated to read and process their declarations;
7761
 * for such documents, the rule that an entity must be declared is a
7762
 * well-formedness constraint only if standalone='yes'.
7763
 *
7764
 * [ WFC: Parsed Entity ]
7765
 * An entity reference must not contain the name of an unparsed entity
7766
 *
7767
 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7768
 * is updated to the current location in the string.
7769
 */
7770
static xmlChar *
7771
1.59M
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7772
1.59M
    xmlChar *name;
7773
1.59M
    const xmlChar *ptr;
7774
1.59M
    xmlChar cur;
7775
7776
1.59M
    if ((str == NULL) || (*str == NULL))
7777
0
        return(NULL);
7778
1.59M
    ptr = *str;
7779
1.59M
    cur = *ptr;
7780
1.59M
    if (cur != '&')
7781
0
  return(NULL);
7782
7783
1.59M
    ptr++;
7784
1.59M
    name = xmlParseStringName(ctxt, &ptr);
7785
1.59M
    if (name == NULL) {
7786
7
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7787
7
           "xmlParseStringEntityRef: no name\n");
7788
7
  *str = ptr;
7789
7
  return(NULL);
7790
7
    }
7791
1.59M
    if (*ptr != ';') {
7792
5
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7793
5
        xmlFree(name);
7794
5
  *str = ptr;
7795
5
  return(NULL);
7796
5
    }
7797
1.59M
    ptr++;
7798
7799
1.59M
    *str = ptr;
7800
1.59M
    return(name);
7801
1.59M
}
7802
7803
/**
7804
 * xmlParsePEReference:
7805
 * @ctxt:  an XML parser context
7806
 *
7807
 * DEPRECATED: Internal function, don't use.
7808
 *
7809
 * Parse a parameter entity reference. Always consumes '%'.
7810
 *
7811
 * The entity content is handled directly by pushing it's content as
7812
 * a new input stream.
7813
 *
7814
 * [69] PEReference ::= '%' Name ';'
7815
 *
7816
 * [ WFC: No Recursion ]
7817
 * A parsed entity must not contain a recursive
7818
 * reference to itself, either directly or indirectly.
7819
 *
7820
 * [ WFC: Entity Declared ]
7821
 * In a document without any DTD, a document with only an internal DTD
7822
 * subset which contains no parameter entity references, or a document
7823
 * with "standalone='yes'", ...  ... The declaration of a parameter
7824
 * entity must precede any reference to it...
7825
 *
7826
 * [ VC: Entity Declared ]
7827
 * In a document with an external subset or external parameter entities
7828
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7829
 * must precede any reference to it...
7830
 *
7831
 * [ WFC: In DTD ]
7832
 * Parameter-entity references may only appear in the DTD.
7833
 * NOTE: misleading but this is handled.
7834
 */
7835
void
7836
xmlParsePEReference(xmlParserCtxtPtr ctxt)
7837
32.8k
{
7838
32.8k
    const xmlChar *name;
7839
32.8k
    xmlEntityPtr entity = NULL;
7840
32.8k
    xmlParserInputPtr input;
7841
7842
32.8k
    if (RAW != '%')
7843
0
        return;
7844
32.8k
    NEXT;
7845
32.8k
    name = xmlParseName(ctxt);
7846
32.8k
    if (name == NULL) {
7847
4.61k
  xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
7848
4.61k
  return;
7849
4.61k
    }
7850
28.2k
    if (RAW != ';') {
7851
1.93k
  xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
7852
1.93k
        return;
7853
1.93k
    }
7854
7855
26.3k
    NEXT;
7856
7857
    /* Must be set before xmlHandleUndeclaredEntity */
7858
26.3k
    ctxt->hasPErefs = 1;
7859
7860
    /*
7861
     * Request the entity from SAX
7862
     */
7863
26.3k
    if ((ctxt->sax != NULL) &&
7864
26.3k
  (ctxt->sax->getParameterEntity != NULL))
7865
26.3k
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7866
7867
26.3k
    if (entity == NULL) {
7868
7.95k
        xmlHandleUndeclaredEntity(ctxt, name);
7869
18.3k
    } else {
7870
  /*
7871
   * Internal checking in case the entity quest barfed
7872
   */
7873
18.3k
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7874
18.3k
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7875
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7876
0
      "Internal: %%%s; is not a parameter entity\n",
7877
0
        name, NULL);
7878
18.3k
  } else {
7879
18.3k
      if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7880
18.3k
                ((ctxt->options & XML_PARSE_NO_XXE) ||
7881
681
     ((ctxt->loadsubset == 0) &&
7882
681
      (ctxt->replaceEntities == 0) &&
7883
681
      (ctxt->validate == 0))))
7884
681
    return;
7885
7886
17.6k
            if (entity->flags & XML_ENT_EXPANDING) {
7887
2
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7888
2
                xmlHaltParser(ctxt);
7889
2
                return;
7890
2
            }
7891
7892
17.6k
      input = xmlNewEntityInputStream(ctxt, entity);
7893
17.6k
      if (xmlPushInput(ctxt, input) < 0) {
7894
0
                xmlFreeInputStream(input);
7895
0
    return;
7896
0
            }
7897
7898
17.6k
            entity->flags |= XML_ENT_EXPANDING;
7899
7900
17.6k
      if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
7901
0
                xmlDetectEncoding(ctxt);
7902
7903
0
                if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7904
0
                    (IS_BLANK_CH(NXT(5)))) {
7905
0
                    xmlParseTextDecl(ctxt);
7906
0
                }
7907
0
            }
7908
17.6k
  }
7909
18.3k
    }
7910
26.3k
}
7911
7912
/**
7913
 * xmlLoadEntityContent:
7914
 * @ctxt:  an XML parser context
7915
 * @entity: an unloaded system entity
7916
 *
7917
 * Load the content of an entity.
7918
 *
7919
 * Returns 0 in case of success and -1 in case of failure
7920
 */
7921
static int
7922
0
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7923
0
    xmlParserInputPtr oldinput, input = NULL;
7924
0
    xmlParserInputPtr *oldinputTab;
7925
0
    const xmlChar *oldencoding;
7926
0
    xmlChar *content = NULL;
7927
0
    xmlResourceType rtype;
7928
0
    size_t length, i;
7929
0
    int oldinputNr, oldinputMax;
7930
0
    int ret = -1;
7931
0
    int res;
7932
7933
0
    if ((ctxt == NULL) || (entity == NULL) ||
7934
0
        ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7935
0
   (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7936
0
  (entity->content != NULL)) {
7937
0
  xmlFatalErr(ctxt, XML_ERR_ARGUMENT,
7938
0
              "xmlLoadEntityContent parameter error");
7939
0
        return(-1);
7940
0
    }
7941
7942
0
    if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)
7943
0
        rtype = XML_RESOURCE_PARAMETER_ENTITY;
7944
0
    else
7945
0
        rtype = XML_RESOURCE_GENERAL_ENTITY;
7946
7947
0
    input = xmlLoadResource(ctxt, (char *) entity->URI,
7948
0
                            (char *) entity->ExternalID, rtype);
7949
0
    if (input == NULL)
7950
0
        return(-1);
7951
7952
0
    oldinput = ctxt->input;
7953
0
    oldinputNr = ctxt->inputNr;
7954
0
    oldinputMax = ctxt->inputMax;
7955
0
    oldinputTab = ctxt->inputTab;
7956
0
    oldencoding = ctxt->encoding;
7957
7958
0
    ctxt->input = NULL;
7959
0
    ctxt->inputNr = 0;
7960
0
    ctxt->inputMax = 1;
7961
0
    ctxt->encoding = NULL;
7962
0
    ctxt->inputTab = xmlMalloc(sizeof(xmlParserInputPtr));
7963
0
    if (ctxt->inputTab == NULL) {
7964
0
        xmlErrMemory(ctxt);
7965
0
        xmlFreeInputStream(input);
7966
0
        goto error;
7967
0
    }
7968
7969
0
    xmlBufResetInput(input->buf->buffer, input);
7970
7971
0
    if (inputPush(ctxt, input) < 0) {
7972
0
        xmlFreeInputStream(input);
7973
0
        goto error;
7974
0
    }
7975
7976
0
    xmlDetectEncoding(ctxt);
7977
7978
    /*
7979
     * Parse a possible text declaration first
7980
     */
7981
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7982
0
  xmlParseTextDecl(ctxt);
7983
        /*
7984
         * An XML-1.0 document can't reference an entity not XML-1.0
7985
         */
7986
0
        if ((xmlStrEqual(ctxt->version, BAD_CAST "1.0")) &&
7987
0
            (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
7988
0
            xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
7989
0
                           "Version mismatch between document and entity\n");
7990
0
        }
7991
0
    }
7992
7993
0
    length = input->cur - input->base;
7994
0
    xmlBufShrink(input->buf->buffer, length);
7995
0
    xmlSaturatedAdd(&ctxt->sizeentities, length);
7996
7997
0
    while ((res = xmlParserInputBufferGrow(input->buf, 4096)) > 0)
7998
0
        ;
7999
8000
0
    xmlBufResetInput(input->buf->buffer, input);
8001
8002
0
    if (res < 0) {
8003
0
        xmlCtxtErrIO(ctxt, input->buf->error, NULL);
8004
0
        goto error;
8005
0
    }
8006
8007
0
    length = xmlBufUse(input->buf->buffer);
8008
0
    if (length > INT_MAX) {
8009
0
        xmlErrMemory(ctxt);
8010
0
        goto error;
8011
0
    }
8012
8013
0
    content = xmlStrndup(xmlBufContent(input->buf->buffer), length);
8014
0
    if (content == NULL) {
8015
0
        xmlErrMemory(ctxt);
8016
0
        goto error;
8017
0
    }
8018
8019
0
    for (i = 0; i < length; ) {
8020
0
        int clen = length - i;
8021
0
        int c = xmlGetUTF8Char(content + i, &clen);
8022
8023
0
        if ((c < 0) || (!IS_CHAR(c))) {
8024
0
            xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8025
0
                              "xmlLoadEntityContent: invalid char value %d\n",
8026
0
                              content[i]);
8027
0
            goto error;
8028
0
        }
8029
0
        i += clen;
8030
0
    }
8031
8032
0
    xmlSaturatedAdd(&ctxt->sizeentities, length);
8033
0
    entity->content = content;
8034
0
    entity->length = length;
8035
0
    content = NULL;
8036
0
    ret = 0;
8037
8038
0
error:
8039
0
    while (ctxt->inputNr > 0)
8040
0
        xmlFreeInputStream(inputPop(ctxt));
8041
0
    xmlFree(ctxt->inputTab);
8042
0
    xmlFree((xmlChar *) ctxt->encoding);
8043
8044
0
    ctxt->input = oldinput;
8045
0
    ctxt->inputNr = oldinputNr;
8046
0
    ctxt->inputMax = oldinputMax;
8047
0
    ctxt->inputTab = oldinputTab;
8048
0
    ctxt->encoding = oldencoding;
8049
8050
0
    xmlFree(content);
8051
8052
0
    return(ret);
8053
0
}
8054
8055
/**
8056
 * xmlParseStringPEReference:
8057
 * @ctxt:  an XML parser context
8058
 * @str:  a pointer to an index in the string
8059
 *
8060
 * parse PEReference declarations
8061
 *
8062
 * [69] PEReference ::= '%' Name ';'
8063
 *
8064
 * [ WFC: No Recursion ]
8065
 * A parsed entity must not contain a recursive
8066
 * reference to itself, either directly or indirectly.
8067
 *
8068
 * [ WFC: Entity Declared ]
8069
 * In a document without any DTD, a document with only an internal DTD
8070
 * subset which contains no parameter entity references, or a document
8071
 * with "standalone='yes'", ...  ... The declaration of a parameter
8072
 * entity must precede any reference to it...
8073
 *
8074
 * [ VC: Entity Declared ]
8075
 * In a document with an external subset or external parameter entities
8076
 * with "standalone='no'", ...  ... The declaration of a parameter entity
8077
 * must precede any reference to it...
8078
 *
8079
 * [ WFC: In DTD ]
8080
 * Parameter-entity references may only appear in the DTD.
8081
 * NOTE: misleading but this is handled.
8082
 *
8083
 * Returns the string of the entity content.
8084
 *         str is updated to the current value of the index
8085
 */
8086
static xmlEntityPtr
8087
4.15k
xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8088
4.15k
    const xmlChar *ptr;
8089
4.15k
    xmlChar cur;
8090
4.15k
    xmlChar *name;
8091
4.15k
    xmlEntityPtr entity = NULL;
8092
8093
4.15k
    if ((str == NULL) || (*str == NULL)) return(NULL);
8094
4.15k
    ptr = *str;
8095
4.15k
    cur = *ptr;
8096
4.15k
    if (cur != '%')
8097
0
        return(NULL);
8098
4.15k
    ptr++;
8099
4.15k
    name = xmlParseStringName(ctxt, &ptr);
8100
4.15k
    if (name == NULL) {
8101
958
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8102
958
           "xmlParseStringPEReference: no name\n");
8103
958
  *str = ptr;
8104
958
  return(NULL);
8105
958
    }
8106
3.19k
    cur = *ptr;
8107
3.19k
    if (cur != ';') {
8108
716
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8109
716
  xmlFree(name);
8110
716
  *str = ptr;
8111
716
  return(NULL);
8112
716
    }
8113
2.47k
    ptr++;
8114
8115
    /* Must be set before xmlHandleUndeclaredEntity */
8116
2.47k
    ctxt->hasPErefs = 1;
8117
8118
    /*
8119
     * Request the entity from SAX
8120
     */
8121
2.47k
    if ((ctxt->sax != NULL) &&
8122
2.47k
  (ctxt->sax->getParameterEntity != NULL))
8123
2.47k
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8124
8125
2.47k
    if (entity == NULL) {
8126
1.83k
        xmlHandleUndeclaredEntity(ctxt, name);
8127
1.83k
    } else {
8128
  /*
8129
   * Internal checking in case the entity quest barfed
8130
   */
8131
642
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8132
642
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8133
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8134
0
        "%%%s; is not a parameter entity\n",
8135
0
        name, NULL);
8136
0
  }
8137
642
    }
8138
8139
2.47k
    xmlFree(name);
8140
2.47k
    *str = ptr;
8141
2.47k
    return(entity);
8142
3.19k
}
8143
8144
/**
8145
 * xmlParseDocTypeDecl:
8146
 * @ctxt:  an XML parser context
8147
 *
8148
 * DEPRECATED: Internal function, don't use.
8149
 *
8150
 * parse a DOCTYPE declaration
8151
 *
8152
 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8153
 *                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8154
 *
8155
 * [ VC: Root Element Type ]
8156
 * The Name in the document type declaration must match the element
8157
 * type of the root element.
8158
 */
8159
8160
void
8161
6.97k
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8162
6.97k
    const xmlChar *name = NULL;
8163
6.97k
    xmlChar *ExternalID = NULL;
8164
6.97k
    xmlChar *URI = NULL;
8165
8166
    /*
8167
     * We know that '<!DOCTYPE' has been detected.
8168
     */
8169
6.97k
    SKIP(9);
8170
8171
6.97k
    SKIP_BLANKS;
8172
8173
    /*
8174
     * Parse the DOCTYPE name.
8175
     */
8176
6.97k
    name = xmlParseName(ctxt);
8177
6.97k
    if (name == NULL) {
8178
527
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8179
527
           "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8180
527
    }
8181
6.97k
    ctxt->intSubName = name;
8182
8183
6.97k
    SKIP_BLANKS;
8184
8185
    /*
8186
     * Check for SystemID and ExternalID
8187
     */
8188
6.97k
    URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8189
8190
6.97k
    if ((URI != NULL) || (ExternalID != NULL)) {
8191
277
        ctxt->hasExternalSubset = 1;
8192
277
    }
8193
6.97k
    ctxt->extSubURI = URI;
8194
6.97k
    ctxt->extSubSystem = ExternalID;
8195
8196
6.97k
    SKIP_BLANKS;
8197
8198
    /*
8199
     * Create and update the internal subset.
8200
     */
8201
6.97k
    if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8202
6.97k
  (!ctxt->disableSAX))
8203
5.77k
  ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8204
8205
    /*
8206
     * Is there any internal subset declarations ?
8207
     * they are handled separately in xmlParseInternalSubset()
8208
     */
8209
6.97k
    if (RAW == '[')
8210
6.50k
  return;
8211
8212
    /*
8213
     * We should be at the end of the DOCTYPE declaration.
8214
     */
8215
466
    if (RAW != '>') {
8216
306
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8217
306
    }
8218
466
    NEXT;
8219
466
}
8220
8221
/**
8222
 * xmlParseInternalSubset:
8223
 * @ctxt:  an XML parser context
8224
 *
8225
 * parse the internal subset declaration
8226
 *
8227
 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8228
 */
8229
8230
static void
8231
6.59k
xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8232
    /*
8233
     * Is there any DTD definition ?
8234
     */
8235
6.59k
    if (RAW == '[') {
8236
6.59k
        int oldInputNr = ctxt->inputNr;
8237
8238
6.59k
        NEXT;
8239
  /*
8240
   * Parse the succession of Markup declarations and
8241
   * PEReferences.
8242
   * Subsequence (markupdecl | PEReference | S)*
8243
   */
8244
6.59k
  SKIP_BLANKS;
8245
132k
  while (((RAW != ']') || (ctxt->inputNr > oldInputNr)) &&
8246
132k
               (PARSER_STOPPED(ctxt) == 0)) {
8247
8248
            /*
8249
             * Conditional sections are allowed from external entities included
8250
             * by PE References in the internal subset.
8251
             */
8252
129k
            if ((PARSER_EXTERNAL(ctxt)) &&
8253
129k
                (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8254
0
                xmlParseConditionalSections(ctxt);
8255
129k
            } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
8256
93.1k
          xmlParseMarkupDecl(ctxt);
8257
93.1k
            } else if (RAW == '%') {
8258
32.8k
          xmlParsePEReference(ctxt);
8259
32.8k
            } else {
8260
3.60k
    xmlFatalErr(ctxt, XML_ERR_INT_SUBSET_NOT_FINISHED, NULL);
8261
3.60k
                break;
8262
3.60k
            }
8263
126k
      SKIP_BLANKS_PE;
8264
126k
            SHRINK;
8265
126k
            GROW;
8266
126k
  }
8267
8268
6.65k
        while (ctxt->inputNr > oldInputNr)
8269
60
            xmlPopPE(ctxt);
8270
8271
6.59k
  if (RAW == ']') {
8272
2.27k
      NEXT;
8273
2.27k
      SKIP_BLANKS;
8274
2.27k
  }
8275
6.59k
    }
8276
8277
    /*
8278
     * We should be at the end of the DOCTYPE declaration.
8279
     */
8280
6.59k
    if ((ctxt->wellFormed) && (RAW != '>')) {
8281
108
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8282
108
  return;
8283
108
    }
8284
6.49k
    NEXT;
8285
6.49k
}
8286
8287
#ifdef LIBXML_SAX1_ENABLED
8288
/**
8289
 * xmlParseAttribute:
8290
 * @ctxt:  an XML parser context
8291
 * @value:  a xmlChar ** used to store the value of the attribute
8292
 *
8293
 * DEPRECATED: Internal function, don't use.
8294
 *
8295
 * parse an attribute
8296
 *
8297
 * [41] Attribute ::= Name Eq AttValue
8298
 *
8299
 * [ WFC: No External Entity References ]
8300
 * Attribute values cannot contain direct or indirect entity references
8301
 * to external entities.
8302
 *
8303
 * [ WFC: No < in Attribute Values ]
8304
 * The replacement text of any entity referred to directly or indirectly in
8305
 * an attribute value (other than "&lt;") must not contain a <.
8306
 *
8307
 * [ VC: Attribute Value Type ]
8308
 * The attribute must have been declared; the value must be of the type
8309
 * declared for it.
8310
 *
8311
 * [25] Eq ::= S? '=' S?
8312
 *
8313
 * With namespace:
8314
 *
8315
 * [NS 11] Attribute ::= QName Eq AttValue
8316
 *
8317
 * Also the case QName == xmlns:??? is handled independently as a namespace
8318
 * definition.
8319
 *
8320
 * Returns the attribute name, and the value in *value.
8321
 */
8322
8323
const xmlChar *
8324
0
xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8325
0
    const xmlChar *name;
8326
0
    xmlChar *val;
8327
8328
0
    *value = NULL;
8329
0
    GROW;
8330
0
    name = xmlParseName(ctxt);
8331
0
    if (name == NULL) {
8332
0
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8333
0
                 "error parsing attribute name\n");
8334
0
        return(NULL);
8335
0
    }
8336
8337
    /*
8338
     * read the value
8339
     */
8340
0
    SKIP_BLANKS;
8341
0
    if (RAW == '=') {
8342
0
        NEXT;
8343
0
  SKIP_BLANKS;
8344
0
  val = xmlParseAttValue(ctxt);
8345
0
    } else {
8346
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8347
0
         "Specification mandates value for attribute %s\n", name);
8348
0
  return(name);
8349
0
    }
8350
8351
    /*
8352
     * Check that xml:lang conforms to the specification
8353
     * No more registered as an error, just generate a warning now
8354
     * since this was deprecated in XML second edition
8355
     */
8356
0
    if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8357
0
  if (!xmlCheckLanguageID(val)) {
8358
0
      xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8359
0
              "Malformed value for xml:lang : %s\n",
8360
0
        val, NULL);
8361
0
  }
8362
0
    }
8363
8364
    /*
8365
     * Check that xml:space conforms to the specification
8366
     */
8367
0
    if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8368
0
  if (xmlStrEqual(val, BAD_CAST "default"))
8369
0
      *(ctxt->space) = 0;
8370
0
  else if (xmlStrEqual(val, BAD_CAST "preserve"))
8371
0
      *(ctxt->space) = 1;
8372
0
  else {
8373
0
    xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8374
0
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8375
0
                                 val, NULL);
8376
0
  }
8377
0
    }
8378
8379
0
    *value = val;
8380
0
    return(name);
8381
0
}
8382
8383
/**
8384
 * xmlParseStartTag:
8385
 * @ctxt:  an XML parser context
8386
 *
8387
 * DEPRECATED: Internal function, don't use.
8388
 *
8389
 * Parse a start tag. Always consumes '<'.
8390
 *
8391
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8392
 *
8393
 * [ WFC: Unique Att Spec ]
8394
 * No attribute name may appear more than once in the same start-tag or
8395
 * empty-element tag.
8396
 *
8397
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8398
 *
8399
 * [ WFC: Unique Att Spec ]
8400
 * No attribute name may appear more than once in the same start-tag or
8401
 * empty-element tag.
8402
 *
8403
 * With namespace:
8404
 *
8405
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8406
 *
8407
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8408
 *
8409
 * Returns the element name parsed
8410
 */
8411
8412
const xmlChar *
8413
0
xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8414
0
    const xmlChar *name;
8415
0
    const xmlChar *attname;
8416
0
    xmlChar *attvalue;
8417
0
    const xmlChar **atts = ctxt->atts;
8418
0
    int nbatts = 0;
8419
0
    int maxatts = ctxt->maxatts;
8420
0
    int i;
8421
8422
0
    if (RAW != '<') return(NULL);
8423
0
    NEXT1;
8424
8425
0
    name = xmlParseName(ctxt);
8426
0
    if (name == NULL) {
8427
0
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8428
0
       "xmlParseStartTag: invalid element name\n");
8429
0
        return(NULL);
8430
0
    }
8431
8432
    /*
8433
     * Now parse the attributes, it ends up with the ending
8434
     *
8435
     * (S Attribute)* S?
8436
     */
8437
0
    SKIP_BLANKS;
8438
0
    GROW;
8439
8440
0
    while (((RAW != '>') &&
8441
0
     ((RAW != '/') || (NXT(1) != '>')) &&
8442
0
     (IS_BYTE_CHAR(RAW))) && (PARSER_STOPPED(ctxt) == 0)) {
8443
0
  attname = xmlParseAttribute(ctxt, &attvalue);
8444
0
        if (attname == NULL)
8445
0
      break;
8446
0
        if (attvalue != NULL) {
8447
      /*
8448
       * [ WFC: Unique Att Spec ]
8449
       * No attribute name may appear more than once in the same
8450
       * start-tag or empty-element tag.
8451
       */
8452
0
      for (i = 0; i < nbatts;i += 2) {
8453
0
          if (xmlStrEqual(atts[i], attname)) {
8454
0
        xmlErrAttributeDup(ctxt, NULL, attname);
8455
0
        xmlFree(attvalue);
8456
0
        goto failed;
8457
0
    }
8458
0
      }
8459
      /*
8460
       * Add the pair to atts
8461
       */
8462
0
      if (atts == NULL) {
8463
0
          maxatts = 22; /* allow for 10 attrs by default */
8464
0
          atts = (const xmlChar **)
8465
0
           xmlMalloc(maxatts * sizeof(xmlChar *));
8466
0
    if (atts == NULL) {
8467
0
        xmlErrMemory(ctxt);
8468
0
        if (attvalue != NULL)
8469
0
      xmlFree(attvalue);
8470
0
        goto failed;
8471
0
    }
8472
0
    ctxt->atts = atts;
8473
0
    ctxt->maxatts = maxatts;
8474
0
      } else if (nbatts + 4 > maxatts) {
8475
0
          const xmlChar **n;
8476
8477
0
          maxatts *= 2;
8478
0
          n = (const xmlChar **) xmlRealloc((void *) atts,
8479
0
               maxatts * sizeof(const xmlChar *));
8480
0
    if (n == NULL) {
8481
0
        xmlErrMemory(ctxt);
8482
0
        if (attvalue != NULL)
8483
0
      xmlFree(attvalue);
8484
0
        goto failed;
8485
0
    }
8486
0
    atts = n;
8487
0
    ctxt->atts = atts;
8488
0
    ctxt->maxatts = maxatts;
8489
0
      }
8490
0
      atts[nbatts++] = attname;
8491
0
      atts[nbatts++] = attvalue;
8492
0
      atts[nbatts] = NULL;
8493
0
      atts[nbatts + 1] = NULL;
8494
0
  } else {
8495
0
      if (attvalue != NULL)
8496
0
    xmlFree(attvalue);
8497
0
  }
8498
8499
0
failed:
8500
8501
0
  GROW
8502
0
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8503
0
      break;
8504
0
  if (SKIP_BLANKS == 0) {
8505
0
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8506
0
         "attributes construct error\n");
8507
0
  }
8508
0
  SHRINK;
8509
0
        GROW;
8510
0
    }
8511
8512
    /*
8513
     * SAX: Start of Element !
8514
     */
8515
0
    if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8516
0
  (!ctxt->disableSAX)) {
8517
0
  if (nbatts > 0)
8518
0
      ctxt->sax->startElement(ctxt->userData, name, atts);
8519
0
  else
8520
0
      ctxt->sax->startElement(ctxt->userData, name, NULL);
8521
0
    }
8522
8523
0
    if (atts != NULL) {
8524
        /* Free only the content strings */
8525
0
        for (i = 1;i < nbatts;i+=2)
8526
0
      if (atts[i] != NULL)
8527
0
         xmlFree((xmlChar *) atts[i]);
8528
0
    }
8529
0
    return(name);
8530
0
}
8531
8532
/**
8533
 * xmlParseEndTag1:
8534
 * @ctxt:  an XML parser context
8535
 * @line:  line of the start tag
8536
 * @nsNr:  number of namespaces on the start tag
8537
 *
8538
 * Parse an end tag. Always consumes '</'.
8539
 *
8540
 * [42] ETag ::= '</' Name S? '>'
8541
 *
8542
 * With namespace
8543
 *
8544
 * [NS 9] ETag ::= '</' QName S? '>'
8545
 */
8546
8547
static void
8548
0
xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8549
0
    const xmlChar *name;
8550
8551
0
    GROW;
8552
0
    if ((RAW != '<') || (NXT(1) != '/')) {
8553
0
  xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8554
0
           "xmlParseEndTag: '</' not found\n");
8555
0
  return;
8556
0
    }
8557
0
    SKIP(2);
8558
8559
0
    name = xmlParseNameAndCompare(ctxt,ctxt->name);
8560
8561
    /*
8562
     * We should definitely be at the ending "S? '>'" part
8563
     */
8564
0
    GROW;
8565
0
    SKIP_BLANKS;
8566
0
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8567
0
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8568
0
    } else
8569
0
  NEXT1;
8570
8571
    /*
8572
     * [ WFC: Element Type Match ]
8573
     * The Name in an element's end-tag must match the element type in the
8574
     * start-tag.
8575
     *
8576
     */
8577
0
    if (name != (xmlChar*)1) {
8578
0
        if (name == NULL) name = BAD_CAST "unparsable";
8579
0
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8580
0
         "Opening and ending tag mismatch: %s line %d and %s\n",
8581
0
                    ctxt->name, line, name);
8582
0
    }
8583
8584
    /*
8585
     * SAX: End of Tag
8586
     */
8587
0
    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8588
0
  (!ctxt->disableSAX))
8589
0
        ctxt->sax->endElement(ctxt->userData, ctxt->name);
8590
8591
0
    namePop(ctxt);
8592
0
    spacePop(ctxt);
8593
0
}
8594
8595
/**
8596
 * xmlParseEndTag:
8597
 * @ctxt:  an XML parser context
8598
 *
8599
 * DEPRECATED: Internal function, don't use.
8600
 *
8601
 * parse an end of tag
8602
 *
8603
 * [42] ETag ::= '</' Name S? '>'
8604
 *
8605
 * With namespace
8606
 *
8607
 * [NS 9] ETag ::= '</' QName S? '>'
8608
 */
8609
8610
void
8611
0
xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8612
0
    xmlParseEndTag1(ctxt, 0);
8613
0
}
8614
#endif /* LIBXML_SAX1_ENABLED */
8615
8616
/************************************************************************
8617
 *                  *
8618
 *          SAX 2 specific operations       *
8619
 *                  *
8620
 ************************************************************************/
8621
8622
/**
8623
 * xmlParseQNameHashed:
8624
 * @ctxt:  an XML parser context
8625
 * @prefix:  pointer to store the prefix part
8626
 *
8627
 * parse an XML Namespace QName
8628
 *
8629
 * [6]  QName  ::= (Prefix ':')? LocalPart
8630
 * [7]  Prefix  ::= NCName
8631
 * [8]  LocalPart  ::= NCName
8632
 *
8633
 * Returns the Name parsed or NULL
8634
 */
8635
8636
static xmlHashedString
8637
1.44M
xmlParseQNameHashed(xmlParserCtxtPtr ctxt, xmlHashedString *prefix) {
8638
1.44M
    xmlHashedString l, p;
8639
1.44M
    int start, isNCName = 0;
8640
8641
1.44M
    l.name = NULL;
8642
1.44M
    p.name = NULL;
8643
8644
1.44M
    GROW;
8645
1.44M
    start = CUR_PTR - BASE_PTR;
8646
8647
1.44M
    l = xmlParseNCName(ctxt);
8648
1.44M
    if (l.name != NULL) {
8649
1.11M
        isNCName = 1;
8650
1.11M
        if (CUR == ':') {
8651
217k
            NEXT;
8652
217k
            p = l;
8653
217k
            l = xmlParseNCName(ctxt);
8654
217k
        }
8655
1.11M
    }
8656
1.44M
    if ((l.name == NULL) || (CUR == ':')) {
8657
336k
        xmlChar *tmp;
8658
8659
336k
        l.name = NULL;
8660
336k
        p.name = NULL;
8661
336k
        if ((isNCName == 0) && (CUR != ':'))
8662
302k
            return(l);
8663
33.7k
        tmp = xmlParseNmtoken(ctxt);
8664
33.7k
        if (tmp != NULL)
8665
25.0k
            xmlFree(tmp);
8666
33.7k
        l = xmlDictLookupHashed(ctxt->dict, BASE_PTR + start,
8667
33.7k
                                CUR_PTR - (BASE_PTR + start));
8668
33.7k
        if (l.name == NULL) {
8669
0
            xmlErrMemory(ctxt);
8670
0
            return(l);
8671
0
        }
8672
33.7k
        xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8673
33.7k
                 "Failed to parse QName '%s'\n", l.name, NULL, NULL);
8674
33.7k
    }
8675
8676
1.13M
    *prefix = p;
8677
1.13M
    return(l);
8678
1.44M
}
8679
8680
/**
8681
 * xmlParseQName:
8682
 * @ctxt:  an XML parser context
8683
 * @prefix:  pointer to store the prefix part
8684
 *
8685
 * parse an XML Namespace QName
8686
 *
8687
 * [6]  QName  ::= (Prefix ':')? LocalPart
8688
 * [7]  Prefix  ::= NCName
8689
 * [8]  LocalPart  ::= NCName
8690
 *
8691
 * Returns the Name parsed or NULL
8692
 */
8693
8694
static const xmlChar *
8695
11.6k
xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8696
11.6k
    xmlHashedString n, p;
8697
8698
11.6k
    n = xmlParseQNameHashed(ctxt, &p);
8699
11.6k
    if (n.name == NULL)
8700
5.19k
        return(NULL);
8701
6.40k
    *prefix = p.name;
8702
6.40k
    return(n.name);
8703
11.6k
}
8704
8705
/**
8706
 * xmlParseQNameAndCompare:
8707
 * @ctxt:  an XML parser context
8708
 * @name:  the localname
8709
 * @prefix:  the prefix, if any.
8710
 *
8711
 * parse an XML name and compares for match
8712
 * (specialized for endtag parsing)
8713
 *
8714
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8715
 * and the name for mismatch
8716
 */
8717
8718
static const xmlChar *
8719
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8720
20.6k
                        xmlChar const *prefix) {
8721
20.6k
    const xmlChar *cmp;
8722
20.6k
    const xmlChar *in;
8723
20.6k
    const xmlChar *ret;
8724
20.6k
    const xmlChar *prefix2;
8725
8726
20.6k
    if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8727
8728
20.6k
    GROW;
8729
20.6k
    in = ctxt->input->cur;
8730
8731
20.6k
    cmp = prefix;
8732
43.8k
    while (*in != 0 && *in == *cmp) {
8733
23.2k
  ++in;
8734
23.2k
  ++cmp;
8735
23.2k
    }
8736
20.6k
    if ((*cmp == 0) && (*in == ':')) {
8737
10.8k
        in++;
8738
10.8k
  cmp = name;
8739
84.8k
  while (*in != 0 && *in == *cmp) {
8740
73.9k
      ++in;
8741
73.9k
      ++cmp;
8742
73.9k
  }
8743
10.8k
  if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8744
      /* success */
8745
9.01k
            ctxt->input->col += in - ctxt->input->cur;
8746
9.01k
      ctxt->input->cur = in;
8747
9.01k
      return((const xmlChar*) 1);
8748
9.01k
  }
8749
10.8k
    }
8750
    /*
8751
     * all strings coms from the dictionary, equality can be done directly
8752
     */
8753
11.6k
    ret = xmlParseQName (ctxt, &prefix2);
8754
11.6k
    if (ret == NULL)
8755
5.19k
        return(NULL);
8756
6.40k
    if ((ret == name) && (prefix == prefix2))
8757
655
  return((const xmlChar*) 1);
8758
5.74k
    return ret;
8759
6.40k
}
8760
8761
/**
8762
 * xmlParseAttribute2:
8763
 * @ctxt:  an XML parser context
8764
 * @pref:  the element prefix
8765
 * @elem:  the element name
8766
 * @prefix:  a xmlChar ** used to store the value of the attribute prefix
8767
 * @value:  a xmlChar ** used to store the value of the attribute
8768
 * @len:  an int * to save the length of the attribute
8769
 * @alloc:  an int * to indicate if the attribute was allocated
8770
 *
8771
 * parse an attribute in the new SAX2 framework.
8772
 *
8773
 * Returns the attribute name, and the value in *value, .
8774
 */
8775
8776
static xmlHashedString
8777
xmlParseAttribute2(xmlParserCtxtPtr ctxt,
8778
                   const xmlChar * pref, const xmlChar * elem,
8779
                   xmlHashedString * hprefix, xmlChar ** value,
8780
                   int *len, int *alloc)
8781
762k
{
8782
762k
    xmlHashedString hname;
8783
762k
    const xmlChar *prefix, *name;
8784
762k
    xmlChar *val = NULL, *internal_val = NULL;
8785
762k
    int normalize = 0;
8786
762k
    int isNamespace;
8787
8788
762k
    *value = NULL;
8789
762k
    GROW;
8790
762k
    hname = xmlParseQNameHashed(ctxt, hprefix);
8791
762k
    if (hname.name == NULL) {
8792
79.4k
        xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8793
79.4k
                       "error parsing attribute name\n");
8794
79.4k
        return(hname);
8795
79.4k
    }
8796
683k
    name = hname.name;
8797
683k
    if (hprefix->name != NULL)
8798
143k
        prefix = hprefix->name;
8799
539k
    else
8800
539k
        prefix = NULL;
8801
8802
    /*
8803
     * get the type if needed
8804
     */
8805
683k
    if (ctxt->attsSpecial != NULL) {
8806
71.4k
        int type;
8807
8808
71.4k
        type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
8809
71.4k
                                                 pref, elem,
8810
71.4k
                                                 prefix, name);
8811
71.4k
        if (type != 0)
8812
8.60k
            normalize = 1;
8813
71.4k
    }
8814
8815
    /*
8816
     * read the value
8817
     */
8818
683k
    SKIP_BLANKS;
8819
683k
    if (RAW == '=') {
8820
638k
        NEXT;
8821
638k
        SKIP_BLANKS;
8822
638k
        isNamespace = (((prefix == NULL) && (name == ctxt->str_xmlns)) ||
8823
638k
                       (prefix == ctxt->str_xmlns));
8824
638k
        val = xmlParseAttValueInternal(ctxt, len, alloc, normalize,
8825
638k
                                       isNamespace);
8826
638k
        if (val == NULL)
8827
14.0k
            goto error;
8828
638k
    } else {
8829
44.6k
        xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8830
44.6k
                          "Specification mandates value for attribute %s\n",
8831
44.6k
                          name);
8832
44.6k
        goto error;
8833
44.6k
    }
8834
8835
624k
    if (prefix == ctxt->str_xml) {
8836
        /*
8837
         * Check that xml:lang conforms to the specification
8838
         * No more registered as an error, just generate a warning now
8839
         * since this was deprecated in XML second edition
8840
         */
8841
19.0k
        if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8842
0
            internal_val = xmlStrndup(val, *len);
8843
0
            if (internal_val == NULL)
8844
0
                goto mem_error;
8845
0
            if (!xmlCheckLanguageID(internal_val)) {
8846
0
                xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8847
0
                              "Malformed value for xml:lang : %s\n",
8848
0
                              internal_val, NULL);
8849
0
            }
8850
0
        }
8851
8852
        /*
8853
         * Check that xml:space conforms to the specification
8854
         */
8855
19.0k
        if (xmlStrEqual(name, BAD_CAST "space")) {
8856
1.96k
            internal_val = xmlStrndup(val, *len);
8857
1.96k
            if (internal_val == NULL)
8858
0
                goto mem_error;
8859
1.96k
            if (xmlStrEqual(internal_val, BAD_CAST "default"))
8860
480
                *(ctxt->space) = 0;
8861
1.48k
            else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8862
749
                *(ctxt->space) = 1;
8863
734
            else {
8864
734
                xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8865
734
                              "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8866
734
                              internal_val, NULL);
8867
734
            }
8868
1.96k
        }
8869
19.0k
        if (internal_val) {
8870
1.96k
            xmlFree(internal_val);
8871
1.96k
        }
8872
19.0k
    }
8873
8874
624k
    *value = val;
8875
624k
    return (hname);
8876
8877
0
mem_error:
8878
0
    xmlErrMemory(ctxt);
8879
58.7k
error:
8880
58.7k
    if ((val != NULL) && (*alloc != 0))
8881
0
        xmlFree(val);
8882
58.7k
    return(hname);
8883
0
}
8884
8885
/**
8886
 * xmlAttrHashInsert:
8887
 * @ctxt: parser context
8888
 * @size: size of the hash table
8889
 * @name: attribute name
8890
 * @uri: namespace uri
8891
 * @hashValue: combined hash value of name and uri
8892
 * @aindex: attribute index (this is a multiple of 5)
8893
 *
8894
 * Inserts a new attribute into the hash table.
8895
 *
8896
 * Returns INT_MAX if no existing attribute was found, the attribute
8897
 * index if an attribute was found, -1 if a memory allocation failed.
8898
 */
8899
static int
8900
xmlAttrHashInsert(xmlParserCtxtPtr ctxt, unsigned size, const xmlChar *name,
8901
542k
                  const xmlChar *uri, unsigned hashValue, int aindex) {
8902
542k
    xmlAttrHashBucket *table = ctxt->attrHash;
8903
542k
    xmlAttrHashBucket *bucket;
8904
542k
    unsigned hindex;
8905
8906
542k
    hindex = hashValue & (size - 1);
8907
542k
    bucket = &table[hindex];
8908
8909
581k
    while (bucket->index >= 0) {
8910
417k
        const xmlChar **atts = &ctxt->atts[bucket->index];
8911
8912
417k
        if (name == atts[0]) {
8913
382k
            int nsIndex = (int) (ptrdiff_t) atts[2];
8914
8915
382k
            if ((nsIndex == NS_INDEX_EMPTY) ? (uri == NULL) :
8916
382k
                (nsIndex == NS_INDEX_XML) ? (uri == ctxt->str_xml_ns) :
8917
5.96k
                (uri == ctxt->nsTab[nsIndex * 2 + 1]))
8918
379k
                return(bucket->index);
8919
382k
        }
8920
8921
38.8k
        hindex++;
8922
38.8k
        bucket++;
8923
38.8k
        if (hindex >= size) {
8924
9.78k
            hindex = 0;
8925
9.78k
            bucket = table;
8926
9.78k
        }
8927
38.8k
    }
8928
8929
163k
    bucket->index = aindex;
8930
8931
163k
    return(INT_MAX);
8932
542k
}
8933
8934
/**
8935
 * xmlParseStartTag2:
8936
 * @ctxt:  an XML parser context
8937
 *
8938
 * Parse a start tag. Always consumes '<'.
8939
 *
8940
 * This routine is called when running SAX2 parsing
8941
 *
8942
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8943
 *
8944
 * [ WFC: Unique Att Spec ]
8945
 * No attribute name may appear more than once in the same start-tag or
8946
 * empty-element tag.
8947
 *
8948
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8949
 *
8950
 * [ WFC: Unique Att Spec ]
8951
 * No attribute name may appear more than once in the same start-tag or
8952
 * empty-element tag.
8953
 *
8954
 * With namespace:
8955
 *
8956
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8957
 *
8958
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8959
 *
8960
 * Returns the element name parsed
8961
 */
8962
8963
static const xmlChar *
8964
xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
8965
665k
                  const xmlChar **URI, int *nbNsPtr) {
8966
665k
    xmlHashedString hlocalname;
8967
665k
    xmlHashedString hprefix;
8968
665k
    xmlHashedString hattname;
8969
665k
    xmlHashedString haprefix;
8970
665k
    const xmlChar *localname;
8971
665k
    const xmlChar *prefix;
8972
665k
    const xmlChar *attname;
8973
665k
    const xmlChar *aprefix;
8974
665k
    const xmlChar *uri;
8975
665k
    xmlChar *attvalue = NULL;
8976
665k
    const xmlChar **atts = ctxt->atts;
8977
665k
    unsigned attrHashSize = 0;
8978
665k
    int maxatts = ctxt->maxatts;
8979
665k
    int nratts, nbatts, nbdef;
8980
665k
    int i, j, nbNs, nbTotalDef, attval, nsIndex, maxAtts;
8981
665k
    int alloc = 0;
8982
8983
665k
    if (RAW != '<') return(NULL);
8984
665k
    NEXT1;
8985
8986
665k
    nbatts = 0;
8987
665k
    nratts = 0;
8988
665k
    nbdef = 0;
8989
665k
    nbNs = 0;
8990
665k
    nbTotalDef = 0;
8991
665k
    attval = 0;
8992
8993
665k
    if (xmlParserNsStartElement(ctxt->nsdb) < 0) {
8994
0
        xmlErrMemory(ctxt);
8995
0
        return(NULL);
8996
0
    }
8997
8998
665k
    hlocalname = xmlParseQNameHashed(ctxt, &hprefix);
8999
665k
    if (hlocalname.name == NULL) {
9000
217k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9001
217k
           "StartTag: invalid element name\n");
9002
217k
        return(NULL);
9003
217k
    }
9004
448k
    localname = hlocalname.name;
9005
448k
    prefix = hprefix.name;
9006
9007
    /*
9008
     * Now parse the attributes, it ends up with the ending
9009
     *
9010
     * (S Attribute)* S?
9011
     */
9012
448k
    SKIP_BLANKS;
9013
448k
    GROW;
9014
9015
    /*
9016
     * The ctxt->atts array will be ultimately passed to the SAX callback
9017
     * containing five xmlChar pointers for each attribute:
9018
     *
9019
     * [0] attribute name
9020
     * [1] attribute prefix
9021
     * [2] namespace URI
9022
     * [3] attribute value
9023
     * [4] end of attribute value
9024
     *
9025
     * To save memory, we reuse this array temporarily and store integers
9026
     * in these pointer variables.
9027
     *
9028
     * [0] attribute name
9029
     * [1] attribute prefix
9030
     * [2] hash value of attribute prefix, and later namespace index
9031
     * [3] for non-allocated values: ptrdiff_t offset into input buffer
9032
     * [4] for non-allocated values: ptrdiff_t offset into input buffer
9033
     *
9034
     * The ctxt->attallocs array contains an additional unsigned int for
9035
     * each attribute, containing the hash value of the attribute name
9036
     * and the alloc flag in bit 31.
9037
     */
9038
9039
927k
    while (((RAW != '>') &&
9040
927k
     ((RAW != '/') || (NXT(1) != '>')) &&
9041
927k
     (IS_BYTE_CHAR(RAW))) && (PARSER_STOPPED(ctxt) == 0)) {
9042
762k
  int len = -1;
9043
9044
762k
  hattname = xmlParseAttribute2(ctxt, prefix, localname,
9045
762k
                                          &haprefix, &attvalue, &len,
9046
762k
                                          &alloc);
9047
762k
        if (hattname.name == NULL)
9048
79.4k
      break;
9049
683k
        if (attvalue == NULL)
9050
58.7k
            goto next_attr;
9051
624k
        attname = hattname.name;
9052
624k
        aprefix = haprefix.name;
9053
624k
  if (len < 0) len = xmlStrlen(attvalue);
9054
9055
624k
        if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9056
77.8k
            xmlHashedString huri;
9057
77.8k
            xmlURIPtr parsedUri;
9058
9059
77.8k
            huri = xmlDictLookupHashed(ctxt->dict, attvalue, len);
9060
77.8k
            uri = huri.name;
9061
77.8k
            if (uri == NULL) {
9062
0
                xmlErrMemory(ctxt);
9063
0
                goto next_attr;
9064
0
            }
9065
77.8k
            if (*uri != 0) {
9066
74.7k
                if (xmlParseURISafe((const char *) uri, &parsedUri) < 0) {
9067
0
                    xmlErrMemory(ctxt);
9068
0
                    goto next_attr;
9069
0
                }
9070
74.7k
                if (parsedUri == NULL) {
9071
59.2k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9072
59.2k
                             "xmlns: '%s' is not a valid URI\n",
9073
59.2k
                                       uri, NULL, NULL);
9074
59.2k
                } else {
9075
15.4k
                    if (parsedUri->scheme == NULL) {
9076
7.58k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9077
7.58k
                                  "xmlns: URI %s is not absolute\n",
9078
7.58k
                                  uri, NULL, NULL);
9079
7.58k
                    }
9080
15.4k
                    xmlFreeURI(parsedUri);
9081
15.4k
                }
9082
74.7k
                if (uri == ctxt->str_xml_ns) {
9083
487
                    if (attname != ctxt->str_xml) {
9084
487
                        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9085
487
                     "xml namespace URI cannot be the default namespace\n",
9086
487
                                 NULL, NULL, NULL);
9087
487
                    }
9088
487
                    goto next_attr;
9089
487
                }
9090
74.2k
                if ((len == 29) &&
9091
74.2k
                    (xmlStrEqual(uri,
9092
2.14k
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9093
1.15k
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9094
1.15k
                         "reuse of the xmlns namespace name is forbidden\n",
9095
1.15k
                             NULL, NULL, NULL);
9096
1.15k
                    goto next_attr;
9097
1.15k
                }
9098
74.2k
            }
9099
9100
76.1k
            if (xmlParserNsPush(ctxt, NULL, &huri, NULL, 0) > 0)
9101
69.6k
                nbNs++;
9102
546k
        } else if (aprefix == ctxt->str_xmlns) {
9103
54.1k
            xmlHashedString huri;
9104
54.1k
            xmlURIPtr parsedUri;
9105
9106
54.1k
            huri = xmlDictLookupHashed(ctxt->dict, attvalue, len);
9107
54.1k
            uri = huri.name;
9108
54.1k
            if (uri == NULL) {
9109
0
                xmlErrMemory(ctxt);
9110
0
                goto next_attr;
9111
0
            }
9112
9113
54.1k
            if (attname == ctxt->str_xml) {
9114
1.88k
                if (uri != ctxt->str_xml_ns) {
9115
1.82k
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9116
1.82k
                             "xml namespace prefix mapped to wrong URI\n",
9117
1.82k
                             NULL, NULL, NULL);
9118
1.82k
                }
9119
                /*
9120
                 * Do not keep a namespace definition node
9121
                 */
9122
1.88k
                goto next_attr;
9123
1.88k
            }
9124
52.2k
            if (uri == ctxt->str_xml_ns) {
9125
272
                if (attname != ctxt->str_xml) {
9126
272
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9127
272
                             "xml namespace URI mapped to wrong prefix\n",
9128
272
                             NULL, NULL, NULL);
9129
272
                }
9130
272
                goto next_attr;
9131
272
            }
9132
51.9k
            if (attname == ctxt->str_xmlns) {
9133
303
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9134
303
                         "redefinition of the xmlns prefix is forbidden\n",
9135
303
                         NULL, NULL, NULL);
9136
303
                goto next_attr;
9137
303
            }
9138
51.6k
            if ((len == 29) &&
9139
51.6k
                (xmlStrEqual(uri,
9140
758
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9141
208
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9142
208
                         "reuse of the xmlns namespace name is forbidden\n",
9143
208
                         NULL, NULL, NULL);
9144
208
                goto next_attr;
9145
208
            }
9146
51.4k
            if ((uri == NULL) || (uri[0] == 0)) {
9147
1.28k
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9148
1.28k
                         "xmlns:%s: Empty XML namespace is not allowed\n",
9149
1.28k
                              attname, NULL, NULL);
9150
1.28k
                goto next_attr;
9151
50.1k
            } else {
9152
50.1k
                if (xmlParseURISafe((const char *) uri, &parsedUri) < 0) {
9153
0
                    xmlErrMemory(ctxt);
9154
0
                    goto next_attr;
9155
0
                }
9156
50.1k
                if (parsedUri == NULL) {
9157
18.9k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9158
18.9k
                         "xmlns:%s: '%s' is not a valid URI\n",
9159
18.9k
                                       attname, uri, NULL);
9160
31.1k
                } else {
9161
31.1k
                    if ((ctxt->pedantic) && (parsedUri->scheme == NULL)) {
9162
0
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9163
0
                                  "xmlns:%s: URI %s is not absolute\n",
9164
0
                                  attname, uri, NULL);
9165
0
                    }
9166
31.1k
                    xmlFreeURI(parsedUri);
9167
31.1k
                }
9168
50.1k
            }
9169
9170
50.1k
            if (xmlParserNsPush(ctxt, &hattname, &huri, NULL, 0) > 0)
9171
48.0k
                nbNs++;
9172
492k
        } else {
9173
            /*
9174
             * Populate attributes array, see above for repurposing
9175
             * of xmlChar pointers.
9176
             */
9177
492k
            if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9178
3.30k
                if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9179
0
                    goto next_attr;
9180
0
                }
9181
3.30k
                maxatts = ctxt->maxatts;
9182
3.30k
                atts = ctxt->atts;
9183
3.30k
            }
9184
492k
            ctxt->attallocs[nratts++] = (hattname.hashValue & 0x7FFFFFFF) |
9185
492k
                                        ((unsigned) alloc << 31);
9186
492k
            atts[nbatts++] = attname;
9187
492k
            atts[nbatts++] = aprefix;
9188
492k
            atts[nbatts++] = (const xmlChar *) (size_t) haprefix.hashValue;
9189
492k
            if (alloc) {
9190
28.2k
                atts[nbatts++] = attvalue;
9191
28.2k
                attvalue += len;
9192
28.2k
                atts[nbatts++] = attvalue;
9193
464k
            } else {
9194
                /*
9195
                 * attvalue points into the input buffer which can be
9196
                 * reallocated. Store differences to input->base instead.
9197
                 * The pointers will be reconstructed later.
9198
                 */
9199
464k
                atts[nbatts++] = (void *) (attvalue - BASE_PTR);
9200
464k
                attvalue += len;
9201
464k
                atts[nbatts++] = (void *) (attvalue - BASE_PTR);
9202
464k
            }
9203
            /*
9204
             * tag if some deallocation is needed
9205
             */
9206
492k
            if (alloc != 0) attval = 1;
9207
492k
            attvalue = NULL; /* moved into atts */
9208
492k
        }
9209
9210
683k
next_attr:
9211
683k
        if ((attvalue != NULL) && (alloc != 0)) {
9212
40.3k
            xmlFree(attvalue);
9213
40.3k
            attvalue = NULL;
9214
40.3k
        }
9215
9216
683k
  GROW
9217
683k
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9218
74.7k
      break;
9219
608k
  if (SKIP_BLANKS == 0) {
9220
129k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9221
129k
         "attributes construct error\n");
9222
129k
      break;
9223
129k
  }
9224
479k
        GROW;
9225
479k
    }
9226
9227
    /*
9228
     * Namespaces from default attributes
9229
     */
9230
448k
    if (ctxt->attsDefault != NULL) {
9231
84.4k
        xmlDefAttrsPtr defaults;
9232
9233
84.4k
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9234
84.4k
  if (defaults != NULL) {
9235
214k
      for (i = 0; i < defaults->nbAttrs; i++) {
9236
187k
                xmlDefAttr *attr = &defaults->attrs[i];
9237
9238
187k
          attname = attr->name.name;
9239
187k
    aprefix = attr->prefix.name;
9240
9241
187k
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9242
12.0k
                    xmlParserEntityCheck(ctxt, attr->expandedSize);
9243
9244
12.0k
                    if (xmlParserNsPush(ctxt, NULL, &attr->value, NULL, 1) > 0)
9245
11.1k
                        nbNs++;
9246
174k
    } else if (aprefix == ctxt->str_xmlns) {
9247
58.3k
                    xmlParserEntityCheck(ctxt, attr->expandedSize);
9248
9249
58.3k
                    if (xmlParserNsPush(ctxt, &attr->name, &attr->value,
9250
58.3k
                                      NULL, 1) > 0)
9251
56.4k
                        nbNs++;
9252
116k
    } else {
9253
116k
                    nbTotalDef += 1;
9254
116k
                }
9255
187k
      }
9256
27.2k
  }
9257
84.4k
    }
9258
9259
    /*
9260
     * Resolve attribute namespaces
9261
     */
9262
940k
    for (i = 0; i < nbatts; i += 5) {
9263
492k
        attname = atts[i];
9264
492k
        aprefix = atts[i+1];
9265
9266
        /*
9267
  * The default namespace does not apply to attribute names.
9268
  */
9269
492k
  if (aprefix == NULL) {
9270
417k
            nsIndex = NS_INDEX_EMPTY;
9271
417k
        } else if (aprefix == ctxt->str_xml) {
9272
19.0k
            nsIndex = NS_INDEX_XML;
9273
55.9k
        } else {
9274
55.9k
            haprefix.name = aprefix;
9275
55.9k
            haprefix.hashValue = (size_t) atts[i+2];
9276
55.9k
            nsIndex = xmlParserNsLookup(ctxt, &haprefix, NULL);
9277
9278
55.9k
      if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex)) {
9279
16.7k
                xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9280
16.7k
        "Namespace prefix %s for %s on %s is not defined\n",
9281
16.7k
        aprefix, attname, localname);
9282
16.7k
                nsIndex = NS_INDEX_EMPTY;
9283
16.7k
            }
9284
55.9k
        }
9285
9286
492k
        atts[i+2] = (const xmlChar *) (ptrdiff_t) nsIndex;
9287
492k
    }
9288
9289
    /*
9290
     * Maximum number of attributes including default attributes.
9291
     */
9292
448k
    maxAtts = nratts + nbTotalDef;
9293
9294
    /*
9295
     * Verify that attribute names are unique.
9296
     */
9297
448k
    if (maxAtts > 1) {
9298
39.1k
        attrHashSize = 4;
9299
88.3k
        while (attrHashSize / 2 < (unsigned) maxAtts)
9300
49.2k
            attrHashSize *= 2;
9301
9302
39.1k
        if (attrHashSize > ctxt->attrHashMax) {
9303
1.35k
            xmlAttrHashBucket *tmp;
9304
9305
1.35k
            tmp = xmlRealloc(ctxt->attrHash, attrHashSize * sizeof(tmp[0]));
9306
1.35k
            if (tmp == NULL) {
9307
0
                xmlErrMemory(ctxt);
9308
0
                goto done;
9309
0
            }
9310
9311
1.35k
            ctxt->attrHash = tmp;
9312
1.35k
            ctxt->attrHashMax = attrHashSize;
9313
1.35k
        }
9314
9315
39.1k
        memset(ctxt->attrHash, -1, attrHashSize * sizeof(ctxt->attrHash[0]));
9316
9317
480k
        for (i = 0, j = 0; j < nratts; i += 5, j++) {
9318
441k
            const xmlChar *nsuri;
9319
441k
            unsigned hashValue, nameHashValue, uriHashValue;
9320
441k
            int res;
9321
9322
441k
            attname = atts[i];
9323
441k
            aprefix = atts[i+1];
9324
441k
            nsIndex = (ptrdiff_t) atts[i+2];
9325
            /* Hash values always have bit 31 set, see dict.c */
9326
441k
            nameHashValue = ctxt->attallocs[j] | 0x80000000;
9327
9328
441k
            if (nsIndex == NS_INDEX_EMPTY) {
9329
                /*
9330
                 * Prefix with empty namespace means an undeclared
9331
                 * prefix which was already reported above.
9332
                 */
9333
401k
                if (aprefix != NULL)
9334
10.1k
                    continue;
9335
391k
                nsuri = NULL;
9336
391k
                uriHashValue = URI_HASH_EMPTY;
9337
391k
            } else if (nsIndex == NS_INDEX_XML) {
9338
2.86k
                nsuri = ctxt->str_xml_ns;
9339
2.86k
                uriHashValue = URI_HASH_XML;
9340
36.1k
            } else {
9341
36.1k
                nsuri = ctxt->nsTab[nsIndex * 2 + 1];
9342
36.1k
                uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue;
9343
36.1k
            }
9344
9345
430k
            hashValue = xmlDictCombineHash(nameHashValue, uriHashValue);
9346
430k
            res = xmlAttrHashInsert(ctxt, attrHashSize, attname, nsuri,
9347
430k
                                    hashValue, i);
9348
430k
            if (res < 0)
9349
0
                continue;
9350
9351
            /*
9352
             * [ WFC: Unique Att Spec ]
9353
             * No attribute name may appear more than once in the same
9354
             * start-tag or empty-element tag.
9355
             * As extended by the Namespace in XML REC.
9356
             */
9357
430k
            if (res < INT_MAX) {
9358
363k
                if (aprefix == atts[res+1]) {
9359
362k
                    xmlErrAttributeDup(ctxt, aprefix, attname);
9360
362k
                } else {
9361
939
                    xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9362
939
                             "Namespaced Attribute %s in '%s' redefined\n",
9363
939
                             attname, nsuri, NULL);
9364
939
                }
9365
363k
            }
9366
430k
        }
9367
39.1k
    }
9368
9369
    /*
9370
     * Default attributes
9371
     */
9372
448k
    if (ctxt->attsDefault != NULL) {
9373
84.4k
        xmlDefAttrsPtr defaults;
9374
9375
84.4k
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9376
84.4k
  if (defaults != NULL) {
9377
214k
      for (i = 0; i < defaults->nbAttrs; i++) {
9378
187k
                xmlDefAttr *attr = &defaults->attrs[i];
9379
187k
                const xmlChar *nsuri = NULL;
9380
187k
                unsigned hashValue, uriHashValue = 0;
9381
187k
                int res;
9382
9383
187k
          attname = attr->name.name;
9384
187k
    aprefix = attr->prefix.name;
9385
9386
187k
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL))
9387
12.0k
                    continue;
9388
174k
    if (aprefix == ctxt->str_xmlns)
9389
58.3k
                    continue;
9390
9391
116k
                if (aprefix == NULL) {
9392
42.0k
                    nsIndex = NS_INDEX_EMPTY;
9393
42.0k
                    nsuri = NULL;
9394
42.0k
                    uriHashValue = URI_HASH_EMPTY;
9395
116k
                } if (aprefix == ctxt->str_xml) {
9396
7.02k
                    nsIndex = NS_INDEX_XML;
9397
7.02k
                    nsuri = ctxt->str_xml_ns;
9398
7.02k
                    uriHashValue = URI_HASH_XML;
9399
109k
                } else if (aprefix != NULL) {
9400
67.5k
                    nsIndex = xmlParserNsLookup(ctxt, &attr->prefix, NULL);
9401
67.5k
                    if ((nsIndex == INT_MAX) ||
9402
67.5k
                        (nsIndex < ctxt->nsdb->minNsIndex)) {
9403
63.9k
                        xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9404
63.9k
                                 "Namespace prefix %s for %s on %s is not "
9405
63.9k
                                 "defined\n",
9406
63.9k
                                 aprefix, attname, localname);
9407
63.9k
                        nsIndex = NS_INDEX_EMPTY;
9408
63.9k
                        nsuri = NULL;
9409
63.9k
                        uriHashValue = URI_HASH_EMPTY;
9410
63.9k
                    } else {
9411
3.53k
                        nsuri = ctxt->nsTab[nsIndex * 2 + 1];
9412
3.53k
                        uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue;
9413
3.53k
                    }
9414
67.5k
                }
9415
9416
                /*
9417
                 * Check whether the attribute exists
9418
                 */
9419
116k
                if (maxAtts > 1) {
9420
111k
                    hashValue = xmlDictCombineHash(attr->name.hashValue,
9421
111k
                                                   uriHashValue);
9422
111k
                    res = xmlAttrHashInsert(ctxt, attrHashSize, attname, nsuri,
9423
111k
                                            hashValue, nbatts);
9424
111k
                    if (res < 0)
9425
0
                        continue;
9426
111k
                    if (res < INT_MAX) {
9427
15.4k
                        if (aprefix == atts[res+1])
9428
4.64k
                            continue;
9429
10.7k
                        xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9430
10.7k
                                 "Namespaced Attribute %s in '%s' redefined\n",
9431
10.7k
                                 attname, nsuri, NULL);
9432
10.7k
                    }
9433
111k
                }
9434
9435
112k
                xmlParserEntityCheck(ctxt, attr->expandedSize);
9436
9437
112k
                if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9438
506
                    if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9439
0
                        localname = NULL;
9440
0
                        goto done;
9441
0
                    }
9442
506
                    maxatts = ctxt->maxatts;
9443
506
                    atts = ctxt->atts;
9444
506
                }
9445
9446
112k
                atts[nbatts++] = attname;
9447
112k
                atts[nbatts++] = aprefix;
9448
112k
                atts[nbatts++] = (const xmlChar *) (ptrdiff_t) nsIndex;
9449
112k
                atts[nbatts++] = attr->value.name;
9450
112k
                atts[nbatts++] = attr->valueEnd;
9451
112k
                if ((ctxt->standalone == 1) && (attr->external != 0)) {
9452
0
                    xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9453
0
                            "standalone: attribute %s on %s defaulted "
9454
0
                            "from external subset\n",
9455
0
                            attname, localname);
9456
0
                }
9457
112k
                nbdef++;
9458
112k
      }
9459
27.2k
  }
9460
84.4k
    }
9461
9462
    /*
9463
     * Reconstruct attribute pointers
9464
     */
9465
1.05M
    for (i = 0, j = 0; i < nbatts; i += 5, j++) {
9466
        /* namespace URI */
9467
604k
        nsIndex = (ptrdiff_t) atts[i+2];
9468
604k
        if (nsIndex == INT_MAX)
9469
535k
            atts[i+2] = NULL;
9470
68.4k
        else if (nsIndex == INT_MAX - 1)
9471
25.7k
            atts[i+2] = ctxt->str_xml_ns;
9472
42.6k
        else
9473
42.6k
            atts[i+2] = ctxt->nsTab[nsIndex * 2 + 1];
9474
9475
604k
        if ((j < nratts) && (ctxt->attallocs[j] & 0x80000000) == 0) {
9476
464k
            atts[i+3] = BASE_PTR + (ptrdiff_t) atts[i+3];  /* value */
9477
464k
            atts[i+4] = BASE_PTR + (ptrdiff_t) atts[i+4];  /* valuend */
9478
464k
        }
9479
604k
    }
9480
9481
448k
    uri = xmlParserNsLookupUri(ctxt, &hprefix);
9482
448k
    if ((prefix != NULL) && (uri == NULL)) {
9483
32.0k
  xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9484
32.0k
           "Namespace prefix %s on %s is not defined\n",
9485
32.0k
     prefix, localname, NULL);
9486
32.0k
    }
9487
448k
    *pref = prefix;
9488
448k
    *URI = uri;
9489
9490
    /*
9491
     * SAX callback
9492
     */
9493
448k
    if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9494
448k
  (!ctxt->disableSAX)) {
9495
43.0k
  if (nbNs > 0)
9496
4.08k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix, uri,
9497
4.08k
                          nbNs, ctxt->nsTab + 2 * (ctxt->nsNr - nbNs),
9498
4.08k
        nbatts / 5, nbdef, atts);
9499
38.9k
  else
9500
38.9k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix, uri,
9501
38.9k
                          0, NULL, nbatts / 5, nbdef, atts);
9502
43.0k
    }
9503
9504
448k
done:
9505
    /*
9506
     * Free allocated attribute values
9507
     */
9508
448k
    if (attval != 0) {
9509
344k
  for (i = 0, j = 0; j < nratts; i += 5, j++)
9510
317k
      if (ctxt->attallocs[j] & 0x80000000)
9511
28.2k
          xmlFree((xmlChar *) atts[i+3]);
9512
26.6k
    }
9513
9514
448k
    *nbNsPtr = nbNs;
9515
448k
    return(localname);
9516
448k
}
9517
9518
/**
9519
 * xmlParseEndTag2:
9520
 * @ctxt:  an XML parser context
9521
 * @line:  line of the start tag
9522
 * @nsNr:  number of namespaces on the start tag
9523
 *
9524
 * Parse an end tag. Always consumes '</'.
9525
 *
9526
 * [42] ETag ::= '</' Name S? '>'
9527
 *
9528
 * With namespace
9529
 *
9530
 * [NS 9] ETag ::= '</' QName S? '>'
9531
 */
9532
9533
static void
9534
64.5k
xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9535
64.5k
    const xmlChar *name;
9536
9537
64.5k
    GROW;
9538
64.5k
    if ((RAW != '<') || (NXT(1) != '/')) {
9539
131
  xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9540
131
  return;
9541
131
    }
9542
64.4k
    SKIP(2);
9543
9544
64.4k
    if (tag->prefix == NULL)
9545
43.8k
        name = xmlParseNameAndCompare(ctxt, ctxt->name);
9546
20.6k
    else
9547
20.6k
        name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9548
9549
    /*
9550
     * We should definitely be at the ending "S? '>'" part
9551
     */
9552
64.4k
    GROW;
9553
64.4k
    SKIP_BLANKS;
9554
64.4k
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9555
23.1k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9556
23.1k
    } else
9557
41.2k
  NEXT1;
9558
9559
    /*
9560
     * [ WFC: Element Type Match ]
9561
     * The Name in an element's end-tag must match the element type in the
9562
     * start-tag.
9563
     *
9564
     */
9565
64.4k
    if (name != (xmlChar*)1) {
9566
26.2k
        if (name == NULL) name = BAD_CAST "unparsable";
9567
26.2k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9568
26.2k
         "Opening and ending tag mismatch: %s line %d and %s\n",
9569
26.2k
                    ctxt->name, tag->line, name);
9570
26.2k
    }
9571
9572
    /*
9573
     * SAX: End of Tag
9574
     */
9575
64.4k
    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9576
64.4k
  (!ctxt->disableSAX))
9577
4.43k
  ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9578
4.43k
                                tag->URI);
9579
9580
64.4k
    spacePop(ctxt);
9581
64.4k
    if (tag->nsNr != 0)
9582
4.21k
  xmlParserNsPop(ctxt, tag->nsNr);
9583
64.4k
}
9584
9585
/**
9586
 * xmlParseCDSect:
9587
 * @ctxt:  an XML parser context
9588
 *
9589
 * DEPRECATED: Internal function, don't use.
9590
 *
9591
 * Parse escaped pure raw content. Always consumes '<!['.
9592
 *
9593
 * [18] CDSect ::= CDStart CData CDEnd
9594
 *
9595
 * [19] CDStart ::= '<![CDATA['
9596
 *
9597
 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9598
 *
9599
 * [21] CDEnd ::= ']]>'
9600
 */
9601
void
9602
18.4k
xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9603
18.4k
    xmlChar *buf = NULL;
9604
18.4k
    int len = 0;
9605
18.4k
    int size = XML_PARSER_BUFFER_SIZE;
9606
18.4k
    int r, rl;
9607
18.4k
    int s, sl;
9608
18.4k
    int cur, l;
9609
18.4k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9610
0
                    XML_MAX_HUGE_LENGTH :
9611
18.4k
                    XML_MAX_TEXT_LENGTH;
9612
9613
18.4k
    if ((CUR != '<') || (NXT(1) != '!') || (NXT(2) != '['))
9614
0
        return;
9615
18.4k
    SKIP(3);
9616
9617
18.4k
    if (!CMP6(CUR_PTR, 'C', 'D', 'A', 'T', 'A', '['))
9618
0
        return;
9619
18.4k
    SKIP(6);
9620
9621
18.4k
    r = xmlCurrentCharRecover(ctxt, &rl);
9622
18.4k
    if (!IS_CHAR(r)) {
9623
2.86k
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9624
2.86k
        goto out;
9625
2.86k
    }
9626
15.6k
    NEXTL(rl);
9627
15.6k
    s = xmlCurrentCharRecover(ctxt, &sl);
9628
15.6k
    if (!IS_CHAR(s)) {
9629
1.57k
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9630
1.57k
        goto out;
9631
1.57k
    }
9632
14.0k
    NEXTL(sl);
9633
14.0k
    cur = xmlCurrentCharRecover(ctxt, &l);
9634
14.0k
    buf = xmlMalloc(size);
9635
14.0k
    if (buf == NULL) {
9636
0
  xmlErrMemory(ctxt);
9637
0
        goto out;
9638
0
    }
9639
48.0M
    while (IS_CHAR(cur) &&
9640
48.0M
           ((r != ']') || (s != ']') || (cur != '>'))) {
9641
47.9M
  if (len + 5 >= size) {
9642
3.50k
      xmlChar *tmp;
9643
9644
3.50k
      tmp = (xmlChar *) xmlRealloc(buf, size * 2);
9645
3.50k
      if (tmp == NULL) {
9646
0
    xmlErrMemory(ctxt);
9647
0
                goto out;
9648
0
      }
9649
3.50k
      buf = tmp;
9650
3.50k
      size *= 2;
9651
3.50k
  }
9652
47.9M
  COPY_BUF(buf, len, r);
9653
47.9M
        if (len > maxLength) {
9654
3
            xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9655
3
                           "CData section too big found\n");
9656
3
            goto out;
9657
3
        }
9658
47.9M
  r = s;
9659
47.9M
  rl = sl;
9660
47.9M
  s = cur;
9661
47.9M
  sl = l;
9662
47.9M
  NEXTL(l);
9663
47.9M
  cur = xmlCurrentCharRecover(ctxt, &l);
9664
47.9M
    }
9665
14.0k
    buf[len] = 0;
9666
14.0k
    if (cur != '>') {
9667
2.95k
  xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9668
2.95k
                       "CData section not finished\n%.50s\n", buf);
9669
2.95k
        goto out;
9670
2.95k
    }
9671
11.0k
    NEXTL(l);
9672
9673
    /*
9674
     * OK the buffer is to be consumed as cdata.
9675
     */
9676
11.0k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9677
2.36k
  if (ctxt->sax->cdataBlock != NULL)
9678
2.36k
      ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9679
0
  else if (ctxt->sax->characters != NULL)
9680
0
      ctxt->sax->characters(ctxt->userData, buf, len);
9681
2.36k
    }
9682
9683
18.4k
out:
9684
18.4k
    xmlFree(buf);
9685
18.4k
}
9686
9687
/**
9688
 * xmlParseContentInternal:
9689
 * @ctxt:  an XML parser context
9690
 *
9691
 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9692
 * unexpected EOF to the caller.
9693
 */
9694
9695
static void
9696
8.39k
xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9697
8.39k
    int oldNameNr = ctxt->nameNr;
9698
8.39k
    int oldSpaceNr = ctxt->spaceNr;
9699
8.39k
    int oldNodeNr = ctxt->nodeNr;
9700
9701
8.39k
    GROW;
9702
10.5M
    while ((ctxt->input->cur < ctxt->input->end) &&
9703
10.5M
     (PARSER_STOPPED(ctxt) == 0)) {
9704
10.5M
  const xmlChar *cur = ctxt->input->cur;
9705
9706
  /*
9707
   * First case : a Processing Instruction.
9708
   */
9709
10.5M
  if ((*cur == '<') && (cur[1] == '?')) {
9710
22.6k
      xmlParsePI(ctxt);
9711
22.6k
  }
9712
9713
  /*
9714
   * Second case : a CDSection
9715
   */
9716
  /* 2.6.0 test was *cur not RAW */
9717
10.4M
  else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9718
18.4k
      xmlParseCDSect(ctxt);
9719
18.4k
  }
9720
9721
  /*
9722
   * Third case :  a comment
9723
   */
9724
10.4M
  else if ((*cur == '<') && (NXT(1) == '!') &&
9725
10.4M
     (NXT(2) == '-') && (NXT(3) == '-')) {
9726
32.4k
      xmlParseComment(ctxt);
9727
32.4k
  }
9728
9729
  /*
9730
   * Fourth case :  a sub-element.
9731
   */
9732
10.4M
  else if (*cur == '<') {
9733
720k
            if (NXT(1) == '/') {
9734
64.4k
                if (ctxt->nameNr <= oldNameNr)
9735
190
                    break;
9736
64.2k
          xmlParseElementEnd(ctxt);
9737
656k
            } else {
9738
656k
          xmlParseElementStart(ctxt);
9739
656k
            }
9740
720k
  }
9741
9742
  /*
9743
   * Fifth case : a reference. If if has not been resolved,
9744
   *    parsing returns it's Name, create the node
9745
   */
9746
9747
9.71M
  else if (*cur == '&') {
9748
307k
      xmlParseReference(ctxt);
9749
307k
  }
9750
9751
  /*
9752
   * Last case, text. Note that References are handled directly.
9753
   */
9754
9.40M
  else {
9755
9.40M
      xmlParseCharDataInternal(ctxt, 0);
9756
9.40M
  }
9757
9758
10.5M
  SHRINK;
9759
10.5M
  GROW;
9760
10.5M
    }
9761
9762
8.39k
    if ((ctxt->nameNr > oldNameNr) &&
9763
8.39k
        (ctxt->input->cur >= ctxt->input->end) &&
9764
8.39k
        (ctxt->wellFormed)) {
9765
333
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9766
333
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9767
333
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9768
333
                "Premature end of data in tag %s line %d\n",
9769
333
                name, line, NULL);
9770
333
    }
9771
9772
    /*
9773
     * Clean up in error case
9774
     */
9775
9776
30.3k
    while (ctxt->nodeNr > oldNodeNr)
9777
21.9k
        nodePop(ctxt);
9778
9779
138k
    while (ctxt->nameNr > oldNameNr) {
9780
130k
        xmlStartTag *tag = &ctxt->pushTab[ctxt->nameNr - 1];
9781
9782
130k
        if (tag->nsNr != 0)
9783
34.1k
            xmlParserNsPop(ctxt, tag->nsNr);
9784
9785
130k
        namePop(ctxt);
9786
130k
    }
9787
9788
138k
    while (ctxt->spaceNr > oldSpaceNr)
9789
130k
        spacePop(ctxt);
9790
8.39k
}
9791
9792
/**
9793
 * xmlParseContent:
9794
 * @ctxt:  an XML parser context
9795
 *
9796
 * Parse XML element content. This is useful if you're only interested
9797
 * in custom SAX callbacks. If you want a node list, use
9798
 * xmlCtxtParseContent.
9799
 */
9800
void
9801
0
xmlParseContent(xmlParserCtxtPtr ctxt) {
9802
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
9803
0
        return;
9804
9805
0
    xmlCtxtInitializeLate(ctxt);
9806
9807
0
    xmlParseContentInternal(ctxt);
9808
9809
0
    if (ctxt->input->cur < ctxt->input->end)
9810
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
9811
0
}
9812
9813
/**
9814
 * xmlParseElement:
9815
 * @ctxt:  an XML parser context
9816
 *
9817
 * DEPRECATED: Internal function, don't use.
9818
 *
9819
 * parse an XML element
9820
 *
9821
 * [39] element ::= EmptyElemTag | STag content ETag
9822
 *
9823
 * [ WFC: Element Type Match ]
9824
 * The Name in an element's end-tag must match the element type in the
9825
 * start-tag.
9826
 *
9827
 */
9828
9829
void
9830
9.76k
xmlParseElement(xmlParserCtxtPtr ctxt) {
9831
9.76k
    if (xmlParseElementStart(ctxt) != 0)
9832
2.22k
        return;
9833
9834
7.53k
    xmlParseContentInternal(ctxt);
9835
9836
7.53k
    if (ctxt->input->cur >= ctxt->input->end) {
9837
7.23k
        if (ctxt->wellFormed) {
9838
616
            const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9839
616
            int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9840
616
            xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9841
616
                    "Premature end of data in tag %s line %d\n",
9842
616
                    name, line, NULL);
9843
616
        }
9844
7.23k
        return;
9845
7.23k
    }
9846
9847
303
    xmlParseElementEnd(ctxt);
9848
303
}
9849
9850
/**
9851
 * xmlParseElementStart:
9852
 * @ctxt:  an XML parser context
9853
 *
9854
 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
9855
 * opening tag was parsed, 1 if an empty element was parsed.
9856
 *
9857
 * Always consumes '<'.
9858
 */
9859
static int
9860
665k
xmlParseElementStart(xmlParserCtxtPtr ctxt) {
9861
665k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
9862
665k
    const xmlChar *name;
9863
665k
    const xmlChar *prefix = NULL;
9864
665k
    const xmlChar *URI = NULL;
9865
665k
    xmlParserNodeInfo node_info;
9866
665k
    int line;
9867
665k
    xmlNodePtr cur;
9868
665k
    int nbNs = 0;
9869
9870
665k
    if (ctxt->nameNr > maxDepth) {
9871
48
        xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
9872
48
                "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9873
48
                ctxt->nameNr);
9874
48
  xmlHaltParser(ctxt);
9875
48
  return(-1);
9876
48
    }
9877
9878
    /* Capture start position */
9879
665k
    if (ctxt->record_info) {
9880
0
        node_info.begin_pos = ctxt->input->consumed +
9881
0
                          (CUR_PTR - ctxt->input->base);
9882
0
  node_info.begin_line = ctxt->input->line;
9883
0
    }
9884
9885
665k
    if (ctxt->spaceNr == 0)
9886
0
  spacePush(ctxt, -1);
9887
665k
    else if (*ctxt->space == -2)
9888
0
  spacePush(ctxt, -1);
9889
665k
    else
9890
665k
  spacePush(ctxt, *ctxt->space);
9891
9892
665k
    line = ctxt->input->line;
9893
665k
#ifdef LIBXML_SAX1_ENABLED
9894
665k
    if (ctxt->sax2)
9895
665k
#endif /* LIBXML_SAX1_ENABLED */
9896
665k
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &nbNs);
9897
0
#ifdef LIBXML_SAX1_ENABLED
9898
0
    else
9899
0
  name = xmlParseStartTag(ctxt);
9900
665k
#endif /* LIBXML_SAX1_ENABLED */
9901
665k
    if (name == NULL) {
9902
217k
  spacePop(ctxt);
9903
217k
        return(-1);
9904
217k
    }
9905
448k
    nameNsPush(ctxt, name, prefix, URI, line, nbNs);
9906
448k
    cur = ctxt->node;
9907
9908
448k
#ifdef LIBXML_VALID_ENABLED
9909
    /*
9910
     * [ VC: Root Element Type ]
9911
     * The Name in the document type declaration must match the element
9912
     * type of the root element.
9913
     */
9914
448k
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9915
448k
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
9916
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9917
448k
#endif /* LIBXML_VALID_ENABLED */
9918
9919
    /*
9920
     * Check for an Empty Element.
9921
     */
9922
448k
    if ((RAW == '/') && (NXT(1) == '>')) {
9923
23.9k
        SKIP(2);
9924
23.9k
  if (ctxt->sax2) {
9925
23.9k
      if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9926
23.9k
    (!ctxt->disableSAX))
9927
4.94k
    ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
9928
23.9k
#ifdef LIBXML_SAX1_ENABLED
9929
23.9k
  } else {
9930
0
      if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9931
0
    (!ctxt->disableSAX))
9932
0
    ctxt->sax->endElement(ctxt->userData, name);
9933
0
#endif /* LIBXML_SAX1_ENABLED */
9934
0
  }
9935
23.9k
  namePop(ctxt);
9936
23.9k
  spacePop(ctxt);
9937
23.9k
  if (nbNs > 0)
9938
7.24k
      xmlParserNsPop(ctxt, nbNs);
9939
23.9k
  if (cur != NULL && ctxt->record_info) {
9940
0
            node_info.node = cur;
9941
0
            node_info.end_pos = ctxt->input->consumed +
9942
0
                                (CUR_PTR - ctxt->input->base);
9943
0
            node_info.end_line = ctxt->input->line;
9944
0
            xmlParserAddNodeInfo(ctxt, &node_info);
9945
0
  }
9946
23.9k
  return(1);
9947
23.9k
    }
9948
424k
    if (RAW == '>') {
9949
202k
        NEXT1;
9950
202k
        if (cur != NULL && ctxt->record_info) {
9951
0
            node_info.node = cur;
9952
0
            node_info.end_pos = 0;
9953
0
            node_info.end_line = 0;
9954
0
            xmlParserAddNodeInfo(ctxt, &node_info);
9955
0
        }
9956
222k
    } else {
9957
222k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9958
222k
         "Couldn't find end of Start Tag %s line %d\n",
9959
222k
                    name, line, NULL);
9960
9961
  /*
9962
   * end of parsing of this node.
9963
   */
9964
222k
  nodePop(ctxt);
9965
222k
  namePop(ctxt);
9966
222k
  spacePop(ctxt);
9967
222k
  if (nbNs > 0)
9968
78.1k
      xmlParserNsPop(ctxt, nbNs);
9969
222k
  return(-1);
9970
222k
    }
9971
9972
202k
    return(0);
9973
424k
}
9974
9975
/**
9976
 * xmlParseElementEnd:
9977
 * @ctxt:  an XML parser context
9978
 *
9979
 * Parse the end of an XML element. Always consumes '</'.
9980
 */
9981
static void
9982
64.5k
xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
9983
64.5k
    xmlNodePtr cur = ctxt->node;
9984
9985
64.5k
    if (ctxt->nameNr <= 0) {
9986
0
        if ((RAW == '<') && (NXT(1) == '/'))
9987
0
            SKIP(2);
9988
0
        return;
9989
0
    }
9990
9991
    /*
9992
     * parse the end of tag: '</' should be here.
9993
     */
9994
64.5k
    if (ctxt->sax2) {
9995
64.5k
  xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
9996
64.5k
  namePop(ctxt);
9997
64.5k
    }
9998
0
#ifdef LIBXML_SAX1_ENABLED
9999
0
    else
10000
0
  xmlParseEndTag1(ctxt, 0);
10001
64.5k
#endif /* LIBXML_SAX1_ENABLED */
10002
10003
    /*
10004
     * Capture end position
10005
     */
10006
64.5k
    if (cur != NULL && ctxt->record_info) {
10007
0
        xmlParserNodeInfoPtr node_info;
10008
10009
0
        node_info = (xmlParserNodeInfoPtr) xmlParserFindNodeInfo(ctxt, cur);
10010
0
        if (node_info != NULL) {
10011
0
            node_info->end_pos = ctxt->input->consumed +
10012
0
                                 (CUR_PTR - ctxt->input->base);
10013
0
            node_info->end_line = ctxt->input->line;
10014
0
        }
10015
0
    }
10016
64.5k
}
10017
10018
/**
10019
 * xmlParseVersionNum:
10020
 * @ctxt:  an XML parser context
10021
 *
10022
 * DEPRECATED: Internal function, don't use.
10023
 *
10024
 * parse the XML version value.
10025
 *
10026
 * [26] VersionNum ::= '1.' [0-9]+
10027
 *
10028
 * In practice allow [0-9].[0-9]+ at that level
10029
 *
10030
 * Returns the string giving the XML version number, or NULL
10031
 */
10032
xmlChar *
10033
2.03k
xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10034
2.03k
    xmlChar *buf = NULL;
10035
2.03k
    int len = 0;
10036
2.03k
    int size = 10;
10037
2.03k
    xmlChar cur;
10038
10039
2.03k
    buf = xmlMalloc(size);
10040
2.03k
    if (buf == NULL) {
10041
0
  xmlErrMemory(ctxt);
10042
0
  return(NULL);
10043
0
    }
10044
2.03k
    cur = CUR;
10045
2.03k
    if (!((cur >= '0') && (cur <= '9'))) {
10046
82
  xmlFree(buf);
10047
82
  return(NULL);
10048
82
    }
10049
1.95k
    buf[len++] = cur;
10050
1.95k
    NEXT;
10051
1.95k
    cur=CUR;
10052
1.95k
    if (cur != '.') {
10053
23
  xmlFree(buf);
10054
23
  return(NULL);
10055
23
    }
10056
1.93k
    buf[len++] = cur;
10057
1.93k
    NEXT;
10058
1.93k
    cur=CUR;
10059
398k
    while ((cur >= '0') && (cur <= '9')) {
10060
396k
  if (len + 1 >= size) {
10061
149
      xmlChar *tmp;
10062
10063
149
      size *= 2;
10064
149
      tmp = (xmlChar *) xmlRealloc(buf, size);
10065
149
      if (tmp == NULL) {
10066
0
          xmlFree(buf);
10067
0
    xmlErrMemory(ctxt);
10068
0
    return(NULL);
10069
0
      }
10070
149
      buf = tmp;
10071
149
  }
10072
396k
  buf[len++] = cur;
10073
396k
  NEXT;
10074
396k
  cur=CUR;
10075
396k
    }
10076
1.93k
    buf[len] = 0;
10077
1.93k
    return(buf);
10078
1.93k
}
10079
10080
/**
10081
 * xmlParseVersionInfo:
10082
 * @ctxt:  an XML parser context
10083
 *
10084
 * DEPRECATED: Internal function, don't use.
10085
 *
10086
 * parse the XML version.
10087
 *
10088
 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10089
 *
10090
 * [25] Eq ::= S? '=' S?
10091
 *
10092
 * Returns the version string, e.g. "1.0"
10093
 */
10094
10095
xmlChar *
10096
3.44k
xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10097
3.44k
    xmlChar *version = NULL;
10098
10099
3.44k
    if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10100
2.12k
  SKIP(7);
10101
2.12k
  SKIP_BLANKS;
10102
2.12k
  if (RAW != '=') {
10103
25
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10104
25
      return(NULL);
10105
25
        }
10106
2.09k
  NEXT;
10107
2.09k
  SKIP_BLANKS;
10108
2.09k
  if (RAW == '"') {
10109
1.77k
      NEXT;
10110
1.77k
      version = xmlParseVersionNum(ctxt);
10111
1.77k
      if (RAW != '"') {
10112
88
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10113
88
      } else
10114
1.68k
          NEXT;
10115
1.77k
  } else if (RAW == '\''){
10116
263
      NEXT;
10117
263
      version = xmlParseVersionNum(ctxt);
10118
263
      if (RAW != '\'') {
10119
21
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10120
21
      } else
10121
242
          NEXT;
10122
263
  } else {
10123
62
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10124
62
  }
10125
2.09k
    }
10126
3.41k
    return(version);
10127
3.44k
}
10128
10129
/**
10130
 * xmlParseEncName:
10131
 * @ctxt:  an XML parser context
10132
 *
10133
 * DEPRECATED: Internal function, don't use.
10134
 *
10135
 * parse the XML encoding name
10136
 *
10137
 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10138
 *
10139
 * Returns the encoding name value or NULL
10140
 */
10141
xmlChar *
10142
2.40k
xmlParseEncName(xmlParserCtxtPtr ctxt) {
10143
2.40k
    xmlChar *buf = NULL;
10144
2.40k
    int len = 0;
10145
2.40k
    int size = 10;
10146
2.40k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
10147
0
                    XML_MAX_TEXT_LENGTH :
10148
2.40k
                    XML_MAX_NAME_LENGTH;
10149
2.40k
    xmlChar cur;
10150
10151
2.40k
    cur = CUR;
10152
2.40k
    if (((cur >= 'a') && (cur <= 'z')) ||
10153
2.40k
        ((cur >= 'A') && (cur <= 'Z'))) {
10154
2.38k
  buf = xmlMalloc(size);
10155
2.38k
  if (buf == NULL) {
10156
0
      xmlErrMemory(ctxt);
10157
0
      return(NULL);
10158
0
  }
10159
10160
2.38k
  buf[len++] = cur;
10161
2.38k
  NEXT;
10162
2.38k
  cur = CUR;
10163
68.5k
  while (((cur >= 'a') && (cur <= 'z')) ||
10164
68.5k
         ((cur >= 'A') && (cur <= 'Z')) ||
10165
68.5k
         ((cur >= '0') && (cur <= '9')) ||
10166
68.5k
         (cur == '.') || (cur == '_') ||
10167
68.5k
         (cur == '-')) {
10168
66.1k
      if (len + 1 >= size) {
10169
431
          xmlChar *tmp;
10170
10171
431
    size *= 2;
10172
431
    tmp = (xmlChar *) xmlRealloc(buf, size);
10173
431
    if (tmp == NULL) {
10174
0
        xmlErrMemory(ctxt);
10175
0
        xmlFree(buf);
10176
0
        return(NULL);
10177
0
    }
10178
431
    buf = tmp;
10179
431
      }
10180
66.1k
      buf[len++] = cur;
10181
66.1k
            if (len > maxLength) {
10182
1
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "EncName");
10183
1
                xmlFree(buf);
10184
1
                return(NULL);
10185
1
            }
10186
66.1k
      NEXT;
10187
66.1k
      cur = CUR;
10188
66.1k
        }
10189
2.38k
  buf[len] = 0;
10190
2.38k
    } else {
10191
17
  xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10192
17
    }
10193
2.40k
    return(buf);
10194
2.40k
}
10195
10196
/**
10197
 * xmlParseEncodingDecl:
10198
 * @ctxt:  an XML parser context
10199
 *
10200
 * DEPRECATED: Internal function, don't use.
10201
 *
10202
 * parse the XML encoding declaration
10203
 *
10204
 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'")
10205
 *
10206
 * this setups the conversion filters.
10207
 *
10208
 * Returns the encoding value or NULL
10209
 */
10210
10211
const xmlChar *
10212
3.42k
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10213
3.42k
    xmlChar *encoding = NULL;
10214
10215
3.42k
    SKIP_BLANKS;
10216
3.42k
    if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g') == 0)
10217
916
        return(NULL);
10218
10219
2.50k
    SKIP(8);
10220
2.50k
    SKIP_BLANKS;
10221
2.50k
    if (RAW != '=') {
10222
47
        xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10223
47
        return(NULL);
10224
47
    }
10225
2.45k
    NEXT;
10226
2.45k
    SKIP_BLANKS;
10227
2.45k
    if (RAW == '"') {
10228
2.22k
        NEXT;
10229
2.22k
        encoding = xmlParseEncName(ctxt);
10230
2.22k
        if (RAW != '"') {
10231
144
            xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10232
144
            xmlFree((xmlChar *) encoding);
10233
144
            return(NULL);
10234
144
        } else
10235
2.08k
            NEXT;
10236
2.22k
    } else if (RAW == '\''){
10237
178
        NEXT;
10238
178
        encoding = xmlParseEncName(ctxt);
10239
178
        if (RAW != '\'') {
10240
20
            xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10241
20
            xmlFree((xmlChar *) encoding);
10242
20
            return(NULL);
10243
20
        } else
10244
158
            NEXT;
10245
178
    } else {
10246
54
        xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10247
54
    }
10248
10249
2.29k
    if (encoding == NULL)
10250
54
        return(NULL);
10251
10252
2.24k
    xmlSetDeclaredEncoding(ctxt, encoding);
10253
10254
2.24k
    return(ctxt->encoding);
10255
2.29k
}
10256
10257
/**
10258
 * xmlParseSDDecl:
10259
 * @ctxt:  an XML parser context
10260
 *
10261
 * DEPRECATED: Internal function, don't use.
10262
 *
10263
 * parse the XML standalone declaration
10264
 *
10265
 * [32] SDDecl ::= S 'standalone' Eq
10266
 *                 (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10267
 *
10268
 * [ VC: Standalone Document Declaration ]
10269
 * TODO The standalone document declaration must have the value "no"
10270
 * if any external markup declarations contain declarations of:
10271
 *  - attributes with default values, if elements to which these
10272
 *    attributes apply appear in the document without specifications
10273
 *    of values for these attributes, or
10274
 *  - entities (other than amp, lt, gt, apos, quot), if references
10275
 *    to those entities appear in the document, or
10276
 *  - attributes with values subject to normalization, where the
10277
 *    attribute appears in the document with a value which will change
10278
 *    as a result of normalization, or
10279
 *  - element types with element content, if white space occurs directly
10280
 *    within any instance of those types.
10281
 *
10282
 * Returns:
10283
 *   1 if standalone="yes"
10284
 *   0 if standalone="no"
10285
 *  -2 if standalone attribute is missing or invalid
10286
 *    (A standalone value of -2 means that the XML declaration was found,
10287
 *     but no value was specified for the standalone attribute).
10288
 */
10289
10290
int
10291
1.81k
xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10292
1.81k
    int standalone = -2;
10293
10294
1.81k
    SKIP_BLANKS;
10295
1.81k
    if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10296
311
  SKIP(10);
10297
311
        SKIP_BLANKS;
10298
311
  if (RAW != '=') {
10299
21
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10300
21
      return(standalone);
10301
21
        }
10302
290
  NEXT;
10303
290
  SKIP_BLANKS;
10304
290
        if (RAW == '\''){
10305
146
      NEXT;
10306
146
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10307
126
          standalone = 0;
10308
126
                SKIP(2);
10309
126
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10310
20
                 (NXT(2) == 's')) {
10311
7
          standalone = 1;
10312
7
    SKIP(3);
10313
13
            } else {
10314
13
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10315
13
      }
10316
146
      if (RAW != '\'') {
10317
23
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10318
23
      } else
10319
123
          NEXT;
10320
146
  } else if (RAW == '"'){
10321
137
      NEXT;
10322
137
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10323
69
          standalone = 0;
10324
69
    SKIP(2);
10325
69
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10326
68
                 (NXT(2) == 's')) {
10327
55
          standalone = 1;
10328
55
                SKIP(3);
10329
55
            } else {
10330
13
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10331
13
      }
10332
137
      if (RAW != '"') {
10333
19
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10334
19
      } else
10335
118
          NEXT;
10336
137
  } else {
10337
7
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10338
7
        }
10339
290
    }
10340
1.79k
    return(standalone);
10341
1.81k
}
10342
10343
/**
10344
 * xmlParseXMLDecl:
10345
 * @ctxt:  an XML parser context
10346
 *
10347
 * DEPRECATED: Internal function, don't use.
10348
 *
10349
 * parse an XML declaration header
10350
 *
10351
 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10352
 */
10353
10354
void
10355
3.44k
xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10356
3.44k
    xmlChar *version;
10357
10358
    /*
10359
     * This value for standalone indicates that the document has an
10360
     * XML declaration but it does not have a standalone attribute.
10361
     * It will be overwritten later if a standalone attribute is found.
10362
     */
10363
10364
3.44k
    ctxt->standalone = -2;
10365
10366
    /*
10367
     * We know that '<?xml' is here.
10368
     */
10369
3.44k
    SKIP(5);
10370
10371
3.44k
    if (!IS_BLANK_CH(RAW)) {
10372
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10373
0
                 "Blank needed after '<?xml'\n");
10374
0
    }
10375
3.44k
    SKIP_BLANKS;
10376
10377
    /*
10378
     * We must have the VersionInfo here.
10379
     */
10380
3.44k
    version = xmlParseVersionInfo(ctxt);
10381
3.44k
    if (version == NULL) {
10382
1.51k
  xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10383
1.93k
    } else {
10384
1.93k
  if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10385
      /*
10386
       * Changed here for XML-1.0 5th edition
10387
       */
10388
1.28k
      if (ctxt->options & XML_PARSE_OLD10) {
10389
0
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10390
0
                "Unsupported version '%s'\n",
10391
0
                version);
10392
1.28k
      } else {
10393
1.28k
          if ((version[0] == '1') && ((version[1] == '.'))) {
10394
1.12k
        xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10395
1.12k
                      "Unsupported version '%s'\n",
10396
1.12k
          version, NULL);
10397
1.12k
    } else {
10398
158
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10399
158
              "Unsupported version '%s'\n",
10400
158
              version);
10401
158
    }
10402
1.28k
      }
10403
1.28k
  }
10404
1.93k
  if (ctxt->version != NULL)
10405
0
      xmlFree((void *) ctxt->version);
10406
1.93k
  ctxt->version = version;
10407
1.93k
    }
10408
10409
    /*
10410
     * We may have the encoding declaration
10411
     */
10412
3.44k
    if (!IS_BLANK_CH(RAW)) {
10413
1.51k
        if ((RAW == '?') && (NXT(1) == '>')) {
10414
23
      SKIP(2);
10415
23
      return;
10416
23
  }
10417
1.48k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10418
1.48k
    }
10419
3.42k
    xmlParseEncodingDecl(ctxt);
10420
10421
    /*
10422
     * We may have the standalone status.
10423
     */
10424
3.42k
    if ((ctxt->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10425
1.91k
        if ((RAW == '?') && (NXT(1) == '>')) {
10426
1.60k
      SKIP(2);
10427
1.60k
      return;
10428
1.60k
  }
10429
311
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10430
311
    }
10431
10432
    /*
10433
     * We can grow the input buffer freely at that point
10434
     */
10435
1.81k
    GROW;
10436
10437
1.81k
    SKIP_BLANKS;
10438
1.81k
    ctxt->standalone = xmlParseSDDecl(ctxt);
10439
10440
1.81k
    SKIP_BLANKS;
10441
1.81k
    if ((RAW == '?') && (NXT(1) == '>')) {
10442
258
        SKIP(2);
10443
1.55k
    } else if (RAW == '>') {
10444
        /* Deprecated old WD ... */
10445
173
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10446
173
  NEXT;
10447
1.38k
    } else {
10448
1.38k
        int c;
10449
10450
1.38k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10451
1.17M
        while ((PARSER_STOPPED(ctxt) == 0) &&
10452
1.17M
               ((c = CUR) != 0)) {
10453
1.17M
            NEXT;
10454
1.17M
            if (c == '>')
10455
691
                break;
10456
1.17M
        }
10457
1.38k
    }
10458
1.81k
}
10459
10460
/**
10461
 * xmlCtxtGetVersion:
10462
 * ctxt:  parser context
10463
 *
10464
 * Available since 2.14.0.
10465
 *
10466
 * Returns the version from the XML declaration.
10467
 */
10468
const xmlChar *
10469
0
xmlCtxtGetVersion(xmlParserCtxtPtr ctxt) {
10470
0
    if (ctxt == NULL)
10471
0
        return(NULL);
10472
10473
0
    return(ctxt->version);
10474
0
}
10475
10476
/**
10477
 * xmlCtxtGetStandalone:
10478
 * ctxt:  parser context
10479
 *
10480
 * Available since 2.14.0.
10481
 *
10482
 * Returns the value from the standalone document declaration.
10483
 */
10484
int
10485
0
xmlCtxtGetStandalone(xmlParserCtxtPtr ctxt) {
10486
0
    if (ctxt == NULL)
10487
0
        return(0);
10488
10489
0
    return(ctxt->standalone);
10490
0
}
10491
10492
/**
10493
 * xmlParseMisc:
10494
 * @ctxt:  an XML parser context
10495
 *
10496
 * DEPRECATED: Internal function, don't use.
10497
 *
10498
 * parse an XML Misc* optional field.
10499
 *
10500
 * [27] Misc ::= Comment | PI |  S
10501
 */
10502
10503
void
10504
33.7k
xmlParseMisc(xmlParserCtxtPtr ctxt) {
10505
42.3k
    while (PARSER_STOPPED(ctxt) == 0) {
10506
41.4k
        SKIP_BLANKS;
10507
41.4k
        GROW;
10508
41.4k
        if ((RAW == '<') && (NXT(1) == '?')) {
10509
5.81k
      xmlParsePI(ctxt);
10510
35.5k
        } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10511
2.83k
      xmlParseComment(ctxt);
10512
32.7k
        } else {
10513
32.7k
            break;
10514
32.7k
        }
10515
41.4k
    }
10516
33.7k
}
10517
10518
static void
10519
16.9k
xmlFinishDocument(xmlParserCtxtPtr ctxt) {
10520
16.9k
    xmlDocPtr doc;
10521
10522
    /*
10523
     * SAX: end of the document processing.
10524
     */
10525
16.9k
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10526
16.9k
        ctxt->sax->endDocument(ctxt->userData);
10527
10528
16.9k
    doc = ctxt->myDoc;
10529
16.9k
    if (doc != NULL) {
10530
15.5k
        if (ctxt->wellFormed) {
10531
36
            doc->properties |= XML_DOC_WELLFORMED;
10532
36
            if (ctxt->valid)
10533
35
                doc->properties |= XML_DOC_DTDVALID;
10534
36
            if (ctxt->nsWellFormed)
10535
32
                doc->properties |= XML_DOC_NSVALID;
10536
36
        }
10537
10538
15.5k
        if (ctxt->options & XML_PARSE_OLD10)
10539
0
            doc->properties |= XML_DOC_OLD10;
10540
10541
        /*
10542
         * Remove locally kept entity definitions if the tree was not built
10543
         */
10544
15.5k
  if (xmlStrEqual(doc->version, SAX_COMPAT_MODE)) {
10545
469
            xmlFreeDoc(doc);
10546
469
            ctxt->myDoc = NULL;
10547
469
        }
10548
15.5k
    }
10549
16.9k
}
10550
10551
/**
10552
 * xmlParseDocument:
10553
 * @ctxt:  an XML parser context
10554
 *
10555
 * Parse an XML document and invoke the SAX handlers. This is useful
10556
 * if you're only interested in custom SAX callbacks. If you want a
10557
 * document tree, use xmlCtxtParseDocument.
10558
 *
10559
 * Returns 0, -1 in case of error.
10560
 */
10561
10562
int
10563
20.2k
xmlParseDocument(xmlParserCtxtPtr ctxt) {
10564
20.2k
    if ((ctxt == NULL) || (ctxt->input == NULL))
10565
0
        return(-1);
10566
10567
20.2k
    GROW;
10568
10569
    /*
10570
     * SAX: detecting the level.
10571
     */
10572
20.2k
    xmlCtxtInitializeLate(ctxt);
10573
10574
20.2k
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10575
20.2k
        ctxt->sax->setDocumentLocator(ctxt->userData,
10576
20.2k
                (xmlSAXLocator *) &xmlDefaultSAXLocator);
10577
20.2k
    }
10578
10579
20.2k
    xmlDetectEncoding(ctxt);
10580
10581
20.2k
    if (CUR == 0) {
10582
3.25k
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10583
3.25k
  return(-1);
10584
3.25k
    }
10585
10586
16.9k
    GROW;
10587
16.9k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10588
10589
  /*
10590
   * Note that we will switch encoding on the fly.
10591
   */
10592
3.44k
  xmlParseXMLDecl(ctxt);
10593
3.44k
  SKIP_BLANKS;
10594
13.5k
    } else {
10595
13.5k
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10596
13.5k
        if (ctxt->version == NULL) {
10597
0
            xmlErrMemory(ctxt);
10598
0
            return(-1);
10599
0
        }
10600
13.5k
    }
10601
16.9k
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10602
15.0k
        ctxt->sax->startDocument(ctxt->userData);
10603
16.9k
    if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10604
16.9k
        (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10605
0
  ctxt->myDoc->compression = ctxt->input->buf->compressed;
10606
0
    }
10607
10608
    /*
10609
     * The Misc part of the Prolog
10610
     */
10611
16.9k
    xmlParseMisc(ctxt);
10612
10613
    /*
10614
     * Then possibly doc type declaration(s) and more Misc
10615
     * (doctypedecl Misc*)?
10616
     */
10617
16.9k
    GROW;
10618
16.9k
    if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10619
10620
6.97k
  ctxt->inSubset = 1;
10621
6.97k
  xmlParseDocTypeDecl(ctxt);
10622
6.97k
  if (RAW == '[') {
10623
6.59k
      xmlParseInternalSubset(ctxt);
10624
6.59k
  }
10625
10626
  /*
10627
   * Create and update the external subset.
10628
   */
10629
6.97k
  ctxt->inSubset = 2;
10630
6.97k
  if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10631
6.97k
      (!ctxt->disableSAX))
10632
1.72k
      ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10633
1.72k
                                ctxt->extSubSystem, ctxt->extSubURI);
10634
6.97k
  ctxt->inSubset = 0;
10635
10636
6.97k
        xmlCleanSpecialAttr(ctxt);
10637
10638
6.97k
  xmlParseMisc(ctxt);
10639
6.97k
    }
10640
10641
    /*
10642
     * Time to start parsing the tree itself
10643
     */
10644
16.9k
    GROW;
10645
16.9k
    if (RAW != '<') {
10646
7.20k
        if (ctxt->wellFormed)
10647
630
            xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10648
630
                           "Start tag expected, '<' not found\n");
10649
9.76k
    } else {
10650
9.76k
  xmlParseElement(ctxt);
10651
10652
  /*
10653
   * The Misc part at the end
10654
   */
10655
9.76k
  xmlParseMisc(ctxt);
10656
10657
9.76k
        if (ctxt->input->cur < ctxt->input->end) {
10658
1.20k
            if (ctxt->wellFormed)
10659
12
          xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10660
8.56k
        } else if ((ctxt->input->buf != NULL) &&
10661
8.56k
                   (ctxt->input->buf->encoder != NULL) &&
10662
8.56k
                   (ctxt->input->buf->error == 0) &&
10663
8.56k
                   (!xmlBufIsEmpty(ctxt->input->buf->raw))) {
10664
90
            xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
10665
90
                           "Truncated multi-byte sequence at EOF\n");
10666
90
        }
10667
9.76k
    }
10668
10669
16.9k
    ctxt->instate = XML_PARSER_EOF;
10670
16.9k
    xmlFinishDocument(ctxt);
10671
10672
16.9k
    if (! ctxt->wellFormed) {
10673
16.9k
  ctxt->valid = 0;
10674
16.9k
  return(-1);
10675
16.9k
    }
10676
10677
36
    return(0);
10678
16.9k
}
10679
10680
/**
10681
 * xmlParseExtParsedEnt:
10682
 * @ctxt:  an XML parser context
10683
 *
10684
 * parse a general parsed entity
10685
 * An external general parsed entity is well-formed if it matches the
10686
 * production labeled extParsedEnt.
10687
 *
10688
 * [78] extParsedEnt ::= TextDecl? content
10689
 *
10690
 * Returns 0, -1 in case of error. the parser context is augmented
10691
 *                as a result of the parsing.
10692
 */
10693
10694
int
10695
0
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10696
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
10697
0
        return(-1);
10698
10699
0
    xmlCtxtInitializeLate(ctxt);
10700
10701
0
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10702
0
        ctxt->sax->setDocumentLocator(ctxt->userData,
10703
0
                (xmlSAXLocator *) &xmlDefaultSAXLocator);
10704
0
    }
10705
10706
0
    xmlDetectEncoding(ctxt);
10707
10708
0
    if (CUR == 0) {
10709
0
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10710
0
    }
10711
10712
    /*
10713
     * Check for the XMLDecl in the Prolog.
10714
     */
10715
0
    GROW;
10716
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10717
10718
  /*
10719
   * Note that we will switch encoding on the fly.
10720
   */
10721
0
  xmlParseXMLDecl(ctxt);
10722
0
  SKIP_BLANKS;
10723
0
    } else {
10724
0
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10725
0
    }
10726
0
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10727
0
        ctxt->sax->startDocument(ctxt->userData);
10728
10729
    /*
10730
     * Doing validity checking on chunk doesn't make sense
10731
     */
10732
0
    ctxt->options &= ~XML_PARSE_DTDVALID;
10733
0
    ctxt->validate = 0;
10734
0
    ctxt->depth = 0;
10735
10736
0
    xmlParseContentInternal(ctxt);
10737
10738
0
    if (ctxt->input->cur < ctxt->input->end)
10739
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10740
10741
    /*
10742
     * SAX: end of the document processing.
10743
     */
10744
0
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10745
0
        ctxt->sax->endDocument(ctxt->userData);
10746
10747
0
    if (! ctxt->wellFormed) return(-1);
10748
0
    return(0);
10749
0
}
10750
10751
#ifdef LIBXML_PUSH_ENABLED
10752
/************************************************************************
10753
 *                  *
10754
 *    Progressive parsing interfaces        *
10755
 *                  *
10756
 ************************************************************************/
10757
10758
/**
10759
 * xmlParseLookupChar:
10760
 * @ctxt:  an XML parser context
10761
 * @c:  character
10762
 *
10763
 * Check whether the input buffer contains a character.
10764
 */
10765
static int
10766
0
xmlParseLookupChar(xmlParserCtxtPtr ctxt, int c) {
10767
0
    const xmlChar *cur;
10768
10769
0
    if (ctxt->checkIndex == 0) {
10770
0
        cur = ctxt->input->cur + 1;
10771
0
    } else {
10772
0
        cur = ctxt->input->cur + ctxt->checkIndex;
10773
0
    }
10774
10775
0
    if (memchr(cur, c, ctxt->input->end - cur) == NULL) {
10776
0
        size_t index = ctxt->input->end - ctxt->input->cur;
10777
10778
0
        if (index > LONG_MAX) {
10779
0
            ctxt->checkIndex = 0;
10780
0
            return(1);
10781
0
        }
10782
0
        ctxt->checkIndex = index;
10783
0
        return(0);
10784
0
    } else {
10785
0
        ctxt->checkIndex = 0;
10786
0
        return(1);
10787
0
    }
10788
0
}
10789
10790
/**
10791
 * xmlParseLookupString:
10792
 * @ctxt:  an XML parser context
10793
 * @startDelta: delta to apply at the start
10794
 * @str:  string
10795
 * @strLen:  length of string
10796
 *
10797
 * Check whether the input buffer contains a string.
10798
 */
10799
static const xmlChar *
10800
xmlParseLookupString(xmlParserCtxtPtr ctxt, size_t startDelta,
10801
0
                     const char *str, size_t strLen) {
10802
0
    const xmlChar *cur, *term;
10803
10804
0
    if (ctxt->checkIndex == 0) {
10805
0
        cur = ctxt->input->cur + startDelta;
10806
0
    } else {
10807
0
        cur = ctxt->input->cur + ctxt->checkIndex;
10808
0
    }
10809
10810
0
    term = BAD_CAST strstr((const char *) cur, str);
10811
0
    if (term == NULL) {
10812
0
        const xmlChar *end = ctxt->input->end;
10813
0
        size_t index;
10814
10815
        /* Rescan (strLen - 1) characters. */
10816
0
        if ((size_t) (end - cur) < strLen)
10817
0
            end = cur;
10818
0
        else
10819
0
            end -= strLen - 1;
10820
0
        index = end - ctxt->input->cur;
10821
0
        if (index > LONG_MAX) {
10822
0
            ctxt->checkIndex = 0;
10823
0
            return(ctxt->input->end - strLen);
10824
0
        }
10825
0
        ctxt->checkIndex = index;
10826
0
    } else {
10827
0
        ctxt->checkIndex = 0;
10828
0
    }
10829
10830
0
    return(term);
10831
0
}
10832
10833
/**
10834
 * xmlParseLookupCharData:
10835
 * @ctxt:  an XML parser context
10836
 *
10837
 * Check whether the input buffer contains terminated char data.
10838
 */
10839
static int
10840
0
xmlParseLookupCharData(xmlParserCtxtPtr ctxt) {
10841
0
    const xmlChar *cur = ctxt->input->cur + ctxt->checkIndex;
10842
0
    const xmlChar *end = ctxt->input->end;
10843
0
    size_t index;
10844
10845
0
    while (cur < end) {
10846
0
        if ((*cur == '<') || (*cur == '&')) {
10847
0
            ctxt->checkIndex = 0;
10848
0
            return(1);
10849
0
        }
10850
0
        cur++;
10851
0
    }
10852
10853
0
    index = cur - ctxt->input->cur;
10854
0
    if (index > LONG_MAX) {
10855
0
        ctxt->checkIndex = 0;
10856
0
        return(1);
10857
0
    }
10858
0
    ctxt->checkIndex = index;
10859
0
    return(0);
10860
0
}
10861
10862
/**
10863
 * xmlParseLookupGt:
10864
 * @ctxt:  an XML parser context
10865
 *
10866
 * Check whether there's enough data in the input buffer to finish parsing
10867
 * a start tag. This has to take quotes into account.
10868
 */
10869
static int
10870
0
xmlParseLookupGt(xmlParserCtxtPtr ctxt) {
10871
0
    const xmlChar *cur;
10872
0
    const xmlChar *end = ctxt->input->end;
10873
0
    int state = ctxt->endCheckState;
10874
0
    size_t index;
10875
10876
0
    if (ctxt->checkIndex == 0)
10877
0
        cur = ctxt->input->cur + 1;
10878
0
    else
10879
0
        cur = ctxt->input->cur + ctxt->checkIndex;
10880
10881
0
    while (cur < end) {
10882
0
        if (state) {
10883
0
            if (*cur == state)
10884
0
                state = 0;
10885
0
        } else if (*cur == '\'' || *cur == '"') {
10886
0
            state = *cur;
10887
0
        } else if (*cur == '>') {
10888
0
            ctxt->checkIndex = 0;
10889
0
            ctxt->endCheckState = 0;
10890
0
            return(1);
10891
0
        }
10892
0
        cur++;
10893
0
    }
10894
10895
0
    index = cur - ctxt->input->cur;
10896
0
    if (index > LONG_MAX) {
10897
0
        ctxt->checkIndex = 0;
10898
0
        ctxt->endCheckState = 0;
10899
0
        return(1);
10900
0
    }
10901
0
    ctxt->checkIndex = index;
10902
0
    ctxt->endCheckState = state;
10903
0
    return(0);
10904
0
}
10905
10906
/**
10907
 * xmlParseLookupInternalSubset:
10908
 * @ctxt:  an XML parser context
10909
 *
10910
 * Check whether there's enough data in the input buffer to finish parsing
10911
 * the internal subset.
10912
 */
10913
static int
10914
0
xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt) {
10915
    /*
10916
     * Sorry, but progressive parsing of the internal subset is not
10917
     * supported. We first check that the full content of the internal
10918
     * subset is available and parsing is launched only at that point.
10919
     * Internal subset ends with "']' S? '>'" in an unescaped section and
10920
     * not in a ']]>' sequence which are conditional sections.
10921
     */
10922
0
    const xmlChar *cur, *start;
10923
0
    const xmlChar *end = ctxt->input->end;
10924
0
    int state = ctxt->endCheckState;
10925
0
    size_t index;
10926
10927
0
    if (ctxt->checkIndex == 0) {
10928
0
        cur = ctxt->input->cur + 1;
10929
0
    } else {
10930
0
        cur = ctxt->input->cur + ctxt->checkIndex;
10931
0
    }
10932
0
    start = cur;
10933
10934
0
    while (cur < end) {
10935
0
        if (state == '-') {
10936
0
            if ((*cur == '-') &&
10937
0
                (cur[1] == '-') &&
10938
0
                (cur[2] == '>')) {
10939
0
                state = 0;
10940
0
                cur += 3;
10941
0
                start = cur;
10942
0
                continue;
10943
0
            }
10944
0
        }
10945
0
        else if (state == ']') {
10946
0
            if (*cur == '>') {
10947
0
                ctxt->checkIndex = 0;
10948
0
                ctxt->endCheckState = 0;
10949
0
                return(1);
10950
0
            }
10951
0
            if (IS_BLANK_CH(*cur)) {
10952
0
                state = ' ';
10953
0
            } else if (*cur != ']') {
10954
0
                state = 0;
10955
0
                start = cur;
10956
0
                continue;
10957
0
            }
10958
0
        }
10959
0
        else if (state == ' ') {
10960
0
            if (*cur == '>') {
10961
0
                ctxt->checkIndex = 0;
10962
0
                ctxt->endCheckState = 0;
10963
0
                return(1);
10964
0
            }
10965
0
            if (!IS_BLANK_CH(*cur)) {
10966
0
                state = 0;
10967
0
                start = cur;
10968
0
                continue;
10969
0
            }
10970
0
        }
10971
0
        else if (state != 0) {
10972
0
            if (*cur == state) {
10973
0
                state = 0;
10974
0
                start = cur + 1;
10975
0
            }
10976
0
        }
10977
0
        else if (*cur == '<') {
10978
0
            if ((cur[1] == '!') &&
10979
0
                (cur[2] == '-') &&
10980
0
                (cur[3] == '-')) {
10981
0
                state = '-';
10982
0
                cur += 4;
10983
                /* Don't treat <!--> as comment */
10984
0
                start = cur;
10985
0
                continue;
10986
0
            }
10987
0
        }
10988
0
        else if ((*cur == '"') || (*cur == '\'') || (*cur == ']')) {
10989
0
            state = *cur;
10990
0
        }
10991
10992
0
        cur++;
10993
0
    }
10994
10995
    /*
10996
     * Rescan the three last characters to detect "<!--" and "-->"
10997
     * split across chunks.
10998
     */
10999
0
    if ((state == 0) || (state == '-')) {
11000
0
        if (cur - start < 3)
11001
0
            cur = start;
11002
0
        else
11003
0
            cur -= 3;
11004
0
    }
11005
0
    index = cur - ctxt->input->cur;
11006
0
    if (index > LONG_MAX) {
11007
0
        ctxt->checkIndex = 0;
11008
0
        ctxt->endCheckState = 0;
11009
0
        return(1);
11010
0
    }
11011
0
    ctxt->checkIndex = index;
11012
0
    ctxt->endCheckState = state;
11013
0
    return(0);
11014
0
}
11015
11016
/**
11017
 * xmlParseTryOrFinish:
11018
 * @ctxt:  an XML parser context
11019
 * @terminate:  last chunk indicator
11020
 *
11021
 * Try to progress on parsing
11022
 *
11023
 * Returns zero if no parsing was possible
11024
 */
11025
static int
11026
0
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11027
0
    int ret = 0;
11028
0
    size_t avail;
11029
0
    xmlChar cur, next;
11030
11031
0
    if (ctxt->input == NULL)
11032
0
        return(0);
11033
11034
0
    if ((ctxt->input != NULL) &&
11035
0
        (ctxt->input->cur - ctxt->input->base > 4096)) {
11036
0
        xmlParserShrink(ctxt);
11037
0
    }
11038
11039
0
    while (ctxt->disableSAX == 0) {
11040
0
        avail = ctxt->input->end - ctxt->input->cur;
11041
0
        if (avail < 1)
11042
0
      goto done;
11043
0
        switch (ctxt->instate) {
11044
0
            case XML_PARSER_EOF:
11045
          /*
11046
     * Document parsing is done !
11047
     */
11048
0
          goto done;
11049
0
            case XML_PARSER_START:
11050
                /*
11051
                 * Very first chars read from the document flow.
11052
                 */
11053
0
                if ((!terminate) && (avail < 4))
11054
0
                    goto done;
11055
11056
                /*
11057
                 * We need more bytes to detect EBCDIC code pages.
11058
                 * See xmlDetectEBCDIC.
11059
                 */
11060
0
                if ((CMP4(CUR_PTR, 0x4C, 0x6F, 0xA7, 0x94)) &&
11061
0
                    (!terminate) && (avail < 200))
11062
0
                    goto done;
11063
11064
0
                xmlDetectEncoding(ctxt);
11065
0
                ctxt->instate = XML_PARSER_XML_DECL;
11066
0
    break;
11067
11068
0
            case XML_PARSER_XML_DECL:
11069
0
    if ((!terminate) && (avail < 2))
11070
0
        goto done;
11071
0
    cur = ctxt->input->cur[0];
11072
0
    next = ctxt->input->cur[1];
11073
0
          if ((cur == '<') && (next == '?')) {
11074
        /* PI or XML decl */
11075
0
        if ((!terminate) &&
11076
0
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11077
0
      goto done;
11078
0
        if ((ctxt->input->cur[2] == 'x') &&
11079
0
      (ctxt->input->cur[3] == 'm') &&
11080
0
      (ctxt->input->cur[4] == 'l') &&
11081
0
      (IS_BLANK_CH(ctxt->input->cur[5]))) {
11082
0
      ret += 5;
11083
0
      xmlParseXMLDecl(ctxt);
11084
0
        } else {
11085
0
      ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11086
0
                        if (ctxt->version == NULL) {
11087
0
                            xmlErrMemory(ctxt);
11088
0
                            break;
11089
0
                        }
11090
0
        }
11091
0
    } else {
11092
0
        ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11093
0
        if (ctxt->version == NULL) {
11094
0
            xmlErrMemory(ctxt);
11095
0
      break;
11096
0
        }
11097
0
    }
11098
0
                if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
11099
0
                    ctxt->sax->setDocumentLocator(ctxt->userData,
11100
0
                            (xmlSAXLocator *) &xmlDefaultSAXLocator);
11101
0
                }
11102
0
                if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11103
0
                    (!ctxt->disableSAX))
11104
0
                    ctxt->sax->startDocument(ctxt->userData);
11105
0
                ctxt->instate = XML_PARSER_MISC;
11106
0
    break;
11107
0
            case XML_PARSER_START_TAG: {
11108
0
          const xmlChar *name;
11109
0
    const xmlChar *prefix = NULL;
11110
0
    const xmlChar *URI = NULL;
11111
0
                int line = ctxt->input->line;
11112
0
    int nbNs = 0;
11113
11114
0
    if ((!terminate) && (avail < 2))
11115
0
        goto done;
11116
0
    cur = ctxt->input->cur[0];
11117
0
          if (cur != '<') {
11118
0
        xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
11119
0
                                   "Start tag expected, '<' not found");
11120
0
                    ctxt->instate = XML_PARSER_EOF;
11121
0
                    xmlFinishDocument(ctxt);
11122
0
        goto done;
11123
0
    }
11124
0
    if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11125
0
                    goto done;
11126
0
    if (ctxt->spaceNr == 0)
11127
0
        spacePush(ctxt, -1);
11128
0
    else if (*ctxt->space == -2)
11129
0
        spacePush(ctxt, -1);
11130
0
    else
11131
0
        spacePush(ctxt, *ctxt->space);
11132
0
#ifdef LIBXML_SAX1_ENABLED
11133
0
    if (ctxt->sax2)
11134
0
#endif /* LIBXML_SAX1_ENABLED */
11135
0
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &nbNs);
11136
0
#ifdef LIBXML_SAX1_ENABLED
11137
0
    else
11138
0
        name = xmlParseStartTag(ctxt);
11139
0
#endif /* LIBXML_SAX1_ENABLED */
11140
0
    if (name == NULL) {
11141
0
        spacePop(ctxt);
11142
0
                    ctxt->instate = XML_PARSER_EOF;
11143
0
                    xmlFinishDocument(ctxt);
11144
0
        goto done;
11145
0
    }
11146
0
#ifdef LIBXML_VALID_ENABLED
11147
    /*
11148
     * [ VC: Root Element Type ]
11149
     * The Name in the document type declaration must match
11150
     * the element type of the root element.
11151
     */
11152
0
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11153
0
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
11154
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11155
0
#endif /* LIBXML_VALID_ENABLED */
11156
11157
    /*
11158
     * Check for an Empty Element.
11159
     */
11160
0
    if ((RAW == '/') && (NXT(1) == '>')) {
11161
0
        SKIP(2);
11162
11163
0
        if (ctxt->sax2) {
11164
0
      if ((ctxt->sax != NULL) &&
11165
0
          (ctxt->sax->endElementNs != NULL) &&
11166
0
          (!ctxt->disableSAX))
11167
0
          ctxt->sax->endElementNs(ctxt->userData, name,
11168
0
                                  prefix, URI);
11169
0
      if (nbNs > 0)
11170
0
          xmlParserNsPop(ctxt, nbNs);
11171
0
#ifdef LIBXML_SAX1_ENABLED
11172
0
        } else {
11173
0
      if ((ctxt->sax != NULL) &&
11174
0
          (ctxt->sax->endElement != NULL) &&
11175
0
          (!ctxt->disableSAX))
11176
0
          ctxt->sax->endElement(ctxt->userData, name);
11177
0
#endif /* LIBXML_SAX1_ENABLED */
11178
0
        }
11179
0
        spacePop(ctxt);
11180
0
    } else if (RAW == '>') {
11181
0
        NEXT;
11182
0
                    nameNsPush(ctxt, name, prefix, URI, line, nbNs);
11183
0
    } else {
11184
0
        xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11185
0
           "Couldn't find end of Start Tag %s\n",
11186
0
           name);
11187
0
        nodePop(ctxt);
11188
0
        spacePop(ctxt);
11189
0
                    if (nbNs > 0)
11190
0
                        xmlParserNsPop(ctxt, nbNs);
11191
0
    }
11192
11193
0
                if (ctxt->nameNr == 0)
11194
0
                    ctxt->instate = XML_PARSER_EPILOG;
11195
0
                else
11196
0
                    ctxt->instate = XML_PARSER_CONTENT;
11197
0
                break;
11198
0
      }
11199
0
            case XML_PARSER_CONTENT: {
11200
0
    cur = ctxt->input->cur[0];
11201
11202
0
    if (cur == '<') {
11203
0
                    if ((!terminate) && (avail < 2))
11204
0
                        goto done;
11205
0
        next = ctxt->input->cur[1];
11206
11207
0
                    if (next == '/') {
11208
0
                        ctxt->instate = XML_PARSER_END_TAG;
11209
0
                        break;
11210
0
                    } else if (next == '?') {
11211
0
                        if ((!terminate) &&
11212
0
                            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11213
0
                            goto done;
11214
0
                        xmlParsePI(ctxt);
11215
0
                        ctxt->instate = XML_PARSER_CONTENT;
11216
0
                        break;
11217
0
                    } else if (next == '!') {
11218
0
                        if ((!terminate) && (avail < 3))
11219
0
                            goto done;
11220
0
                        next = ctxt->input->cur[2];
11221
11222
0
                        if (next == '-') {
11223
0
                            if ((!terminate) && (avail < 4))
11224
0
                                goto done;
11225
0
                            if (ctxt->input->cur[3] == '-') {
11226
0
                                if ((!terminate) &&
11227
0
                                    (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11228
0
                                    goto done;
11229
0
                                xmlParseComment(ctxt);
11230
0
                                ctxt->instate = XML_PARSER_CONTENT;
11231
0
                                break;
11232
0
                            }
11233
0
                        } else if (next == '[') {
11234
0
                            if ((!terminate) && (avail < 9))
11235
0
                                goto done;
11236
0
                            if ((ctxt->input->cur[2] == '[') &&
11237
0
                                (ctxt->input->cur[3] == 'C') &&
11238
0
                                (ctxt->input->cur[4] == 'D') &&
11239
0
                                (ctxt->input->cur[5] == 'A') &&
11240
0
                                (ctxt->input->cur[6] == 'T') &&
11241
0
                                (ctxt->input->cur[7] == 'A') &&
11242
0
                                (ctxt->input->cur[8] == '[')) {
11243
0
                                if ((!terminate) &&
11244
0
                                    (!xmlParseLookupString(ctxt, 9, "]]>", 3)))
11245
0
                                    goto done;
11246
0
                                ctxt->instate = XML_PARSER_CDATA_SECTION;
11247
0
                                xmlParseCDSect(ctxt);
11248
0
                                ctxt->instate = XML_PARSER_CONTENT;
11249
0
                                break;
11250
0
                            }
11251
0
                        }
11252
0
                    }
11253
0
    } else if (cur == '&') {
11254
0
        if ((!terminate) && (!xmlParseLookupChar(ctxt, ';')))
11255
0
      goto done;
11256
0
        xmlParseReference(ctxt);
11257
0
                    break;
11258
0
    } else {
11259
        /* TODO Avoid the extra copy, handle directly !!! */
11260
        /*
11261
         * Goal of the following test is:
11262
         *  - minimize calls to the SAX 'character' callback
11263
         *    when they are mergeable
11264
         *  - handle an problem for isBlank when we only parse
11265
         *    a sequence of blank chars and the next one is
11266
         *    not available to check against '<' presence.
11267
         *  - tries to homogenize the differences in SAX
11268
         *    callbacks between the push and pull versions
11269
         *    of the parser.
11270
         */
11271
0
        if (avail < XML_PARSER_BIG_BUFFER_SIZE) {
11272
0
      if ((!terminate) && (!xmlParseLookupCharData(ctxt)))
11273
0
          goto done;
11274
0
                    }
11275
0
                    ctxt->checkIndex = 0;
11276
0
        xmlParseCharDataInternal(ctxt, !terminate);
11277
0
                    break;
11278
0
    }
11279
11280
0
                ctxt->instate = XML_PARSER_START_TAG;
11281
0
    break;
11282
0
      }
11283
0
            case XML_PARSER_END_TAG:
11284
0
    if ((!terminate) && (!xmlParseLookupChar(ctxt, '>')))
11285
0
        goto done;
11286
0
    if (ctxt->sax2) {
11287
0
              xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11288
0
        nameNsPop(ctxt);
11289
0
    }
11290
0
#ifdef LIBXML_SAX1_ENABLED
11291
0
      else
11292
0
        xmlParseEndTag1(ctxt, 0);
11293
0
#endif /* LIBXML_SAX1_ENABLED */
11294
0
    if (ctxt->nameNr == 0) {
11295
0
        ctxt->instate = XML_PARSER_EPILOG;
11296
0
    } else {
11297
0
        ctxt->instate = XML_PARSER_CONTENT;
11298
0
    }
11299
0
    break;
11300
0
            case XML_PARSER_MISC:
11301
0
            case XML_PARSER_PROLOG:
11302
0
            case XML_PARSER_EPILOG:
11303
0
    SKIP_BLANKS;
11304
0
                avail = ctxt->input->end - ctxt->input->cur;
11305
0
    if (avail < 1)
11306
0
        goto done;
11307
0
    if (ctxt->input->cur[0] == '<') {
11308
0
                    if ((!terminate) && (avail < 2))
11309
0
                        goto done;
11310
0
                    next = ctxt->input->cur[1];
11311
0
                    if (next == '?') {
11312
0
                        if ((!terminate) &&
11313
0
                            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11314
0
                            goto done;
11315
0
                        xmlParsePI(ctxt);
11316
0
                        break;
11317
0
                    } else if (next == '!') {
11318
0
                        if ((!terminate) && (avail < 3))
11319
0
                            goto done;
11320
11321
0
                        if (ctxt->input->cur[2] == '-') {
11322
0
                            if ((!terminate) && (avail < 4))
11323
0
                                goto done;
11324
0
                            if (ctxt->input->cur[3] == '-') {
11325
0
                                if ((!terminate) &&
11326
0
                                    (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11327
0
                                    goto done;
11328
0
                                xmlParseComment(ctxt);
11329
0
                                break;
11330
0
                            }
11331
0
                        } else if (ctxt->instate == XML_PARSER_MISC) {
11332
0
                            if ((!terminate) && (avail < 9))
11333
0
                                goto done;
11334
0
                            if ((ctxt->input->cur[2] == 'D') &&
11335
0
                                (ctxt->input->cur[3] == 'O') &&
11336
0
                                (ctxt->input->cur[4] == 'C') &&
11337
0
                                (ctxt->input->cur[5] == 'T') &&
11338
0
                                (ctxt->input->cur[6] == 'Y') &&
11339
0
                                (ctxt->input->cur[7] == 'P') &&
11340
0
                                (ctxt->input->cur[8] == 'E')) {
11341
0
                                if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11342
0
                                    goto done;
11343
0
                                ctxt->inSubset = 1;
11344
0
                                xmlParseDocTypeDecl(ctxt);
11345
0
                                if (RAW == '[') {
11346
0
                                    ctxt->instate = XML_PARSER_DTD;
11347
0
                                } else {
11348
                                    /*
11349
                                     * Create and update the external subset.
11350
                                     */
11351
0
                                    ctxt->inSubset = 2;
11352
0
                                    if ((ctxt->sax != NULL) &&
11353
0
                                        (!ctxt->disableSAX) &&
11354
0
                                        (ctxt->sax->externalSubset != NULL))
11355
0
                                        ctxt->sax->externalSubset(
11356
0
                                                ctxt->userData,
11357
0
                                                ctxt->intSubName,
11358
0
                                                ctxt->extSubSystem,
11359
0
                                                ctxt->extSubURI);
11360
0
                                    ctxt->inSubset = 0;
11361
0
                                    xmlCleanSpecialAttr(ctxt);
11362
0
                                    ctxt->instate = XML_PARSER_PROLOG;
11363
0
                                }
11364
0
                                break;
11365
0
                            }
11366
0
                        }
11367
0
                    }
11368
0
                }
11369
11370
0
                if (ctxt->instate == XML_PARSER_EPILOG) {
11371
0
                    if (ctxt->errNo == XML_ERR_OK)
11372
0
                        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11373
0
        ctxt->instate = XML_PARSER_EOF;
11374
0
                    xmlFinishDocument(ctxt);
11375
0
                } else {
11376
0
        ctxt->instate = XML_PARSER_START_TAG;
11377
0
    }
11378
0
    break;
11379
0
            case XML_PARSER_DTD: {
11380
0
                if ((!terminate) && (!xmlParseLookupInternalSubset(ctxt)))
11381
0
                    goto done;
11382
0
    xmlParseInternalSubset(ctxt);
11383
0
    ctxt->inSubset = 2;
11384
0
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11385
0
        (ctxt->sax->externalSubset != NULL))
11386
0
        ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11387
0
          ctxt->extSubSystem, ctxt->extSubURI);
11388
0
    ctxt->inSubset = 0;
11389
0
    xmlCleanSpecialAttr(ctxt);
11390
0
    ctxt->instate = XML_PARSER_PROLOG;
11391
0
                break;
11392
0
      }
11393
0
            default:
11394
0
                xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
11395
0
      "PP: internal error\n");
11396
0
    ctxt->instate = XML_PARSER_EOF;
11397
0
    break;
11398
0
  }
11399
0
    }
11400
0
done:
11401
0
    return(ret);
11402
0
}
11403
11404
/**
11405
 * xmlParseChunk:
11406
 * @ctxt:  an XML parser context
11407
 * @chunk:  chunk of memory
11408
 * @size:  size of chunk in bytes
11409
 * @terminate:  last chunk indicator
11410
 *
11411
 * Parse a chunk of memory in push parser mode.
11412
 *
11413
 * Assumes that the parser context was initialized with
11414
 * xmlCreatePushParserCtxt.
11415
 *
11416
 * The last chunk, which will often be empty, must be marked with
11417
 * the @terminate flag. With the default SAX callbacks, the resulting
11418
 * document will be available in ctxt->myDoc. This pointer will not
11419
 * be freed by the library.
11420
 *
11421
 * If the document isn't well-formed, ctxt->myDoc is set to NULL.
11422
 * The push parser doesn't support recovery mode.
11423
 *
11424
 * Returns an xmlParserErrors code (0 on success).
11425
 */
11426
int
11427
xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
11428
0
              int terminate) {
11429
0
    size_t curBase;
11430
0
    size_t maxLength;
11431
0
    size_t pos;
11432
0
    int end_in_lf = 0;
11433
0
    int res;
11434
11435
0
    if ((ctxt == NULL) || (size < 0))
11436
0
        return(XML_ERR_ARGUMENT);
11437
0
    if ((chunk == NULL) && (size > 0))
11438
0
        return(XML_ERR_ARGUMENT);
11439
0
    if ((ctxt->input == NULL) || (ctxt->input->buf == NULL))
11440
0
        return(XML_ERR_ARGUMENT);
11441
0
    if (ctxt->disableSAX != 0)
11442
0
        return(ctxt->errNo);
11443
11444
0
    ctxt->input->flags |= XML_INPUT_PROGRESSIVE;
11445
0
    if (ctxt->instate == XML_PARSER_START)
11446
0
        xmlCtxtInitializeLate(ctxt);
11447
0
    if ((size > 0) && (chunk != NULL) && (!terminate) &&
11448
0
        (chunk[size - 1] == '\r')) {
11449
0
  end_in_lf = 1;
11450
0
  size--;
11451
0
    }
11452
11453
    /*
11454
     * Also push an empty chunk to make sure that the raw buffer
11455
     * will be flushed if there is an encoder.
11456
     */
11457
0
    pos = ctxt->input->cur - ctxt->input->base;
11458
0
    res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11459
0
    xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
11460
0
    if (res < 0) {
11461
0
        xmlCtxtErrIO(ctxt, ctxt->input->buf->error, NULL);
11462
0
        xmlHaltParser(ctxt);
11463
0
        return(ctxt->errNo);
11464
0
    }
11465
11466
0
    xmlParseTryOrFinish(ctxt, terminate);
11467
11468
0
    curBase = ctxt->input->cur - ctxt->input->base;
11469
0
    maxLength = (ctxt->options & XML_PARSE_HUGE) ?
11470
0
                XML_MAX_HUGE_LENGTH :
11471
0
                XML_MAX_LOOKUP_LIMIT;
11472
0
    if (curBase > maxLength) {
11473
0
        xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
11474
0
                    "Buffer size limit exceeded, try XML_PARSE_HUGE\n");
11475
0
        xmlHaltParser(ctxt);
11476
0
    }
11477
11478
0
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX != 0))
11479
0
        return(ctxt->errNo);
11480
11481
0
    if (end_in_lf == 1) {
11482
0
  pos = ctxt->input->cur - ctxt->input->base;
11483
0
  res = xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
11484
0
  xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
11485
0
        if (res < 0) {
11486
0
            xmlCtxtErrIO(ctxt, ctxt->input->buf->error, NULL);
11487
0
            xmlHaltParser(ctxt);
11488
0
            return(ctxt->errNo);
11489
0
        }
11490
0
    }
11491
0
    if (terminate) {
11492
  /*
11493
   * Check for termination
11494
   */
11495
0
        if ((ctxt->instate != XML_PARSER_EOF) &&
11496
0
            (ctxt->instate != XML_PARSER_EPILOG)) {
11497
0
            if (ctxt->nameNr > 0) {
11498
0
                const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
11499
0
                int line = ctxt->pushTab[ctxt->nameNr - 1].line;
11500
0
                xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
11501
0
                        "Premature end of data in tag %s line %d\n",
11502
0
                        name, line, NULL);
11503
0
            } else if (ctxt->instate == XML_PARSER_START) {
11504
0
                xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11505
0
            } else {
11506
0
                xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
11507
0
                               "Start tag expected, '<' not found\n");
11508
0
            }
11509
0
        } else if ((ctxt->input->buf->encoder != NULL) &&
11510
0
                   (ctxt->input->buf->error == 0) &&
11511
0
                   (!xmlBufIsEmpty(ctxt->input->buf->raw))) {
11512
0
            xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
11513
0
                           "Truncated multi-byte sequence at EOF\n");
11514
0
        }
11515
0
  if (ctxt->instate != XML_PARSER_EOF) {
11516
0
            ctxt->instate = XML_PARSER_EOF;
11517
0
            xmlFinishDocument(ctxt);
11518
0
  }
11519
0
    }
11520
0
    if (ctxt->wellFormed == 0)
11521
0
  return((xmlParserErrors) ctxt->errNo);
11522
0
    else
11523
0
        return(0);
11524
0
}
11525
11526
/************************************************************************
11527
 *                  *
11528
 *    I/O front end functions to the parser     *
11529
 *                  *
11530
 ************************************************************************/
11531
11532
/**
11533
 * xmlCreatePushParserCtxt:
11534
 * @sax:  a SAX handler (optional)
11535
 * @user_data:  user data for SAX callbacks (optional)
11536
 * @chunk:  initial chunk (optional, deprecated)
11537
 * @size:  size of initial chunk in bytes
11538
 * @filename:  file name or URI (optional)
11539
 *
11540
 * Create a parser context for using the XML parser in push mode.
11541
 * See xmlParseChunk.
11542
 *
11543
 * Passing an initial chunk is useless and deprecated.
11544
 *
11545
 * @filename is used as base URI to fetch external entities and for
11546
 * error reports.
11547
 *
11548
 * Returns the new parser context or NULL if a memory allocation
11549
 * failed.
11550
 */
11551
11552
xmlParserCtxtPtr
11553
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11554
0
                        const char *chunk, int size, const char *filename) {
11555
0
    xmlParserCtxtPtr ctxt;
11556
0
    xmlParserInputPtr input;
11557
11558
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
11559
0
    if (ctxt == NULL)
11560
0
  return(NULL);
11561
11562
0
    ctxt->options &= ~XML_PARSE_NODICT;
11563
0
    ctxt->dictNames = 1;
11564
11565
0
    input = xmlNewPushInput(filename, chunk, size);
11566
0
    if (input == NULL) {
11567
0
  xmlFreeParserCtxt(ctxt);
11568
0
  return(NULL);
11569
0
    }
11570
0
    if (inputPush(ctxt, input) < 0) {
11571
0
        xmlFreeInputStream(input);
11572
0
        xmlFreeParserCtxt(ctxt);
11573
0
        return(NULL);
11574
0
    }
11575
11576
0
    return(ctxt);
11577
0
}
11578
#endif /* LIBXML_PUSH_ENABLED */
11579
11580
/**
11581
 * xmlStopParser:
11582
 * @ctxt:  an XML parser context
11583
 *
11584
 * Blocks further parser processing
11585
 */
11586
void
11587
0
xmlStopParser(xmlParserCtxtPtr ctxt) {
11588
0
    if (ctxt == NULL)
11589
0
        return;
11590
0
    xmlHaltParser(ctxt);
11591
0
    if (ctxt->errNo != XML_ERR_NO_MEMORY)
11592
0
        ctxt->errNo = XML_ERR_USER_STOP;
11593
0
}
11594
11595
/**
11596
 * xmlCreateIOParserCtxt:
11597
 * @sax:  a SAX handler (optional)
11598
 * @user_data:  user data for SAX callbacks (optional)
11599
 * @ioread:  an I/O read function
11600
 * @ioclose:  an I/O close function (optional)
11601
 * @ioctx:  an I/O handler
11602
 * @enc:  the charset encoding if known (deprecated)
11603
 *
11604
 * Create a parser context for using the XML parser with an existing
11605
 * I/O stream
11606
 *
11607
 * Returns the new parser context or NULL
11608
 */
11609
xmlParserCtxtPtr
11610
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11611
                      xmlInputReadCallback ioread,
11612
                      xmlInputCloseCallback ioclose,
11613
0
                      void *ioctx, xmlCharEncoding enc) {
11614
0
    xmlParserCtxtPtr ctxt;
11615
0
    xmlParserInputPtr input;
11616
0
    const char *encoding;
11617
11618
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
11619
0
    if (ctxt == NULL)
11620
0
  return(NULL);
11621
11622
0
    encoding = xmlGetCharEncodingName(enc);
11623
0
    input = xmlCtxtNewInputFromIO(ctxt, NULL, ioread, ioclose, ioctx,
11624
0
                                  encoding, 0);
11625
0
    if (input == NULL) {
11626
0
  xmlFreeParserCtxt(ctxt);
11627
0
        return (NULL);
11628
0
    }
11629
0
    if (inputPush(ctxt, input) < 0) {
11630
0
        xmlFreeInputStream(input);
11631
0
        xmlFreeParserCtxt(ctxt);
11632
0
        return(NULL);
11633
0
    }
11634
11635
0
    return(ctxt);
11636
0
}
11637
11638
#ifdef LIBXML_VALID_ENABLED
11639
/************************************************************************
11640
 *                  *
11641
 *    Front ends when parsing a DTD       *
11642
 *                  *
11643
 ************************************************************************/
11644
11645
/**
11646
 * xmlIOParseDTD:
11647
 * @sax:  the SAX handler block or NULL
11648
 * @input:  an Input Buffer
11649
 * @enc:  the charset encoding if known
11650
 *
11651
 * Load and parse a DTD
11652
 *
11653
 * Returns the resulting xmlDtdPtr or NULL in case of error.
11654
 * @input will be freed by the function in any case.
11655
 */
11656
11657
xmlDtdPtr
11658
xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
11659
0
        xmlCharEncoding enc) {
11660
0
    xmlDtdPtr ret = NULL;
11661
0
    xmlParserCtxtPtr ctxt;
11662
0
    xmlParserInputPtr pinput = NULL;
11663
11664
0
    if (input == NULL)
11665
0
  return(NULL);
11666
11667
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
11668
0
    if (ctxt == NULL) {
11669
0
        xmlFreeParserInputBuffer(input);
11670
0
  return(NULL);
11671
0
    }
11672
11673
    /*
11674
     * generate a parser input from the I/O handler
11675
     */
11676
11677
0
    pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
11678
0
    if (pinput == NULL) {
11679
0
        xmlFreeParserInputBuffer(input);
11680
0
  xmlFreeParserCtxt(ctxt);
11681
0
  return(NULL);
11682
0
    }
11683
11684
    /*
11685
     * plug some encoding conversion routines here.
11686
     */
11687
0
    if (xmlPushInput(ctxt, pinput) < 0) {
11688
0
        xmlFreeInputStream(pinput);
11689
0
  xmlFreeParserCtxt(ctxt);
11690
0
  return(NULL);
11691
0
    }
11692
0
    if (enc != XML_CHAR_ENCODING_NONE) {
11693
0
        xmlSwitchEncoding(ctxt, enc);
11694
0
    }
11695
11696
    /*
11697
     * let's parse that entity knowing it's an external subset.
11698
     */
11699
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11700
0
    if (ctxt->myDoc == NULL) {
11701
0
  xmlErrMemory(ctxt);
11702
0
  return(NULL);
11703
0
    }
11704
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
11705
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11706
0
                                 BAD_CAST "none", BAD_CAST "none");
11707
11708
0
    xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
11709
11710
0
    if (ctxt->myDoc != NULL) {
11711
0
  if (ctxt->wellFormed) {
11712
0
      ret = ctxt->myDoc->extSubset;
11713
0
      ctxt->myDoc->extSubset = NULL;
11714
0
      if (ret != NULL) {
11715
0
    xmlNodePtr tmp;
11716
11717
0
    ret->doc = NULL;
11718
0
    tmp = ret->children;
11719
0
    while (tmp != NULL) {
11720
0
        tmp->doc = NULL;
11721
0
        tmp = tmp->next;
11722
0
    }
11723
0
      }
11724
0
  } else {
11725
0
      ret = NULL;
11726
0
  }
11727
0
        xmlFreeDoc(ctxt->myDoc);
11728
0
        ctxt->myDoc = NULL;
11729
0
    }
11730
0
    xmlFreeParserCtxt(ctxt);
11731
11732
0
    return(ret);
11733
0
}
11734
11735
/**
11736
 * xmlSAXParseDTD:
11737
 * @sax:  the SAX handler block
11738
 * @ExternalID:  a NAME* containing the External ID of the DTD
11739
 * @SystemID:  a NAME* containing the URL to the DTD
11740
 *
11741
 * DEPRECATED: Don't use.
11742
 *
11743
 * Load and parse an external subset.
11744
 *
11745
 * Returns the resulting xmlDtdPtr or NULL in case of error.
11746
 */
11747
11748
xmlDtdPtr
11749
xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
11750
0
                          const xmlChar *SystemID) {
11751
0
    xmlDtdPtr ret = NULL;
11752
0
    xmlParserCtxtPtr ctxt;
11753
0
    xmlParserInputPtr input = NULL;
11754
0
    xmlChar* systemIdCanonic;
11755
11756
0
    if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
11757
11758
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
11759
0
    if (ctxt == NULL) {
11760
0
  return(NULL);
11761
0
    }
11762
11763
    /*
11764
     * Canonicalise the system ID
11765
     */
11766
0
    systemIdCanonic = xmlCanonicPath(SystemID);
11767
0
    if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
11768
0
  xmlFreeParserCtxt(ctxt);
11769
0
  return(NULL);
11770
0
    }
11771
11772
    /*
11773
     * Ask the Entity resolver to load the damn thing
11774
     */
11775
11776
0
    if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
11777
0
  input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
11778
0
                                   systemIdCanonic);
11779
0
    if (input == NULL) {
11780
0
  xmlFreeParserCtxt(ctxt);
11781
0
  if (systemIdCanonic != NULL)
11782
0
      xmlFree(systemIdCanonic);
11783
0
  return(NULL);
11784
0
    }
11785
11786
    /*
11787
     * plug some encoding conversion routines here.
11788
     */
11789
0
    if (xmlPushInput(ctxt, input) < 0) {
11790
0
        xmlFreeInputStream(input);
11791
0
  xmlFreeParserCtxt(ctxt);
11792
0
  if (systemIdCanonic != NULL)
11793
0
      xmlFree(systemIdCanonic);
11794
0
  return(NULL);
11795
0
    }
11796
11797
0
    xmlDetectEncoding(ctxt);
11798
11799
0
    if (input->filename == NULL)
11800
0
  input->filename = (char *) systemIdCanonic;
11801
0
    else
11802
0
  xmlFree(systemIdCanonic);
11803
11804
    /*
11805
     * let's parse that entity knowing it's an external subset.
11806
     */
11807
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11808
0
    if (ctxt->myDoc == NULL) {
11809
0
  xmlErrMemory(ctxt);
11810
0
  xmlFreeParserCtxt(ctxt);
11811
0
  return(NULL);
11812
0
    }
11813
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
11814
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11815
0
                                 ExternalID, SystemID);
11816
0
    if (ctxt->myDoc->extSubset == NULL) {
11817
0
        xmlFreeDoc(ctxt->myDoc);
11818
0
        xmlFreeParserCtxt(ctxt);
11819
0
        return(NULL);
11820
0
    }
11821
0
    xmlParseExternalSubset(ctxt, ExternalID, SystemID);
11822
11823
0
    if (ctxt->myDoc != NULL) {
11824
0
  if (ctxt->wellFormed) {
11825
0
      ret = ctxt->myDoc->extSubset;
11826
0
      ctxt->myDoc->extSubset = NULL;
11827
0
      if (ret != NULL) {
11828
0
    xmlNodePtr tmp;
11829
11830
0
    ret->doc = NULL;
11831
0
    tmp = ret->children;
11832
0
    while (tmp != NULL) {
11833
0
        tmp->doc = NULL;
11834
0
        tmp = tmp->next;
11835
0
    }
11836
0
      }
11837
0
  } else {
11838
0
      ret = NULL;
11839
0
  }
11840
0
        xmlFreeDoc(ctxt->myDoc);
11841
0
        ctxt->myDoc = NULL;
11842
0
    }
11843
0
    xmlFreeParserCtxt(ctxt);
11844
11845
0
    return(ret);
11846
0
}
11847
11848
11849
/**
11850
 * xmlParseDTD:
11851
 * @ExternalID:  a NAME* containing the External ID of the DTD
11852
 * @SystemID:  a NAME* containing the URL to the DTD
11853
 *
11854
 * Load and parse an external subset.
11855
 *
11856
 * Returns the resulting xmlDtdPtr or NULL in case of error.
11857
 */
11858
11859
xmlDtdPtr
11860
0
xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
11861
0
    return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
11862
0
}
11863
#endif /* LIBXML_VALID_ENABLED */
11864
11865
/************************************************************************
11866
 *                  *
11867
 *    Front ends when parsing an Entity     *
11868
 *                  *
11869
 ************************************************************************/
11870
11871
static xmlNodePtr
11872
xmlCtxtParseContentInternal(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
11873
863
                            int hasTextDecl, int buildTree) {
11874
863
    xmlNodePtr root = NULL;
11875
863
    xmlNodePtr list = NULL;
11876
863
    xmlChar *rootName = BAD_CAST "#root";
11877
863
    int result;
11878
11879
863
    if (buildTree) {
11880
863
        root = xmlNewDocNode(ctxt->myDoc, NULL, rootName, NULL);
11881
863
        if (root == NULL) {
11882
0
            xmlErrMemory(ctxt);
11883
0
            goto error;
11884
0
        }
11885
863
    }
11886
11887
863
    if (xmlPushInput(ctxt, input) < 0)
11888
2
        goto error;
11889
11890
861
    nameNsPush(ctxt, rootName, NULL, NULL, 0, 0);
11891
861
    spacePush(ctxt, -1);
11892
11893
861
    if (buildTree)
11894
861
        nodePush(ctxt, root);
11895
11896
861
    if (hasTextDecl) {
11897
0
        xmlDetectEncoding(ctxt);
11898
11899
        /*
11900
         * Parse a possible text declaration first
11901
         */
11902
0
        if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
11903
0
            (IS_BLANK_CH(NXT(5)))) {
11904
0
            xmlParseTextDecl(ctxt);
11905
            /*
11906
             * An XML-1.0 document can't reference an entity not XML-1.0
11907
             */
11908
0
            if ((xmlStrEqual(ctxt->version, BAD_CAST "1.0")) &&
11909
0
                (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
11910
0
                xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
11911
0
                               "Version mismatch between document and "
11912
0
                               "entity\n");
11913
0
            }
11914
0
        }
11915
0
    }
11916
11917
861
    xmlParseContentInternal(ctxt);
11918
11919
861
    if (ctxt->input->cur < ctxt->input->end)
11920
101
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11921
11922
861
    if ((ctxt->wellFormed) ||
11923
861
        ((ctxt->recovery) && (ctxt->errNo != XML_ERR_NO_MEMORY))) {
11924
456
        if (root != NULL) {
11925
456
            xmlNodePtr cur;
11926
11927
            /*
11928
             * Unlink newly created node list.
11929
             */
11930
456
            list = root->children;
11931
456
            root->children = NULL;
11932
456
            root->last = NULL;
11933
1.41k
            for (cur = list; cur != NULL; cur = cur->next)
11934
956
                cur->parent = NULL;
11935
456
        }
11936
456
    }
11937
11938
    /*
11939
     * Read the rest of the stream in case of errors. We want
11940
     * to account for the whole entity size.
11941
     */
11942
861
    do {
11943
861
        ctxt->input->cur = ctxt->input->end;
11944
861
        xmlParserShrink(ctxt);
11945
861
        result = xmlParserGrow(ctxt);
11946
861
    } while (result > 0);
11947
11948
861
    if (buildTree)
11949
861
        nodePop(ctxt);
11950
11951
861
    namePop(ctxt);
11952
861
    spacePop(ctxt);
11953
11954
    /* xmlPopInput would free the stream */
11955
861
    inputPop(ctxt);
11956
11957
863
error:
11958
863
    xmlFreeNode(root);
11959
11960
863
    return(list);
11961
861
}
11962
11963
static void
11964
884
xmlCtxtParseEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr ent) {
11965
884
    xmlParserInputPtr input;
11966
884
    xmlNodePtr list;
11967
884
    unsigned long consumed;
11968
884
    int isExternal;
11969
884
    int buildTree;
11970
884
    int oldMinNsIndex;
11971
884
    int oldNodelen, oldNodemem;
11972
11973
884
    isExternal = (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY);
11974
884
    buildTree = (ctxt->node != NULL);
11975
11976
    /*
11977
     * Recursion check
11978
     */
11979
884
    if (ent->flags & XML_ENT_EXPANDING) {
11980
21
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
11981
21
        xmlHaltParser(ctxt);
11982
21
        goto error;
11983
21
    }
11984
11985
    /*
11986
     * Load entity
11987
     */
11988
863
    input = xmlNewEntityInputStream(ctxt, ent);
11989
863
    if (input == NULL)
11990
0
        goto error;
11991
11992
    /*
11993
     * When building a tree, we need to limit the scope of namespace
11994
     * declarations, so that entities don't reference xmlNs structs
11995
     * from the parent of a reference.
11996
     */
11997
863
    oldMinNsIndex = ctxt->nsdb->minNsIndex;
11998
863
    if (buildTree)
11999
863
        ctxt->nsdb->minNsIndex = ctxt->nsNr;
12000
12001
863
    oldNodelen = ctxt->nodelen;
12002
863
    oldNodemem = ctxt->nodemem;
12003
863
    ctxt->nodelen = 0;
12004
863
    ctxt->nodemem = 0;
12005
12006
    /*
12007
     * Parse content
12008
     *
12009
     * This initiates a recursive call chain:
12010
     *
12011
     * - xmlCtxtParseContentInternal
12012
     * - xmlParseContentInternal
12013
     * - xmlParseReference
12014
     * - xmlCtxtParseEntity
12015
     *
12016
     * The nesting depth is limited by the maximum number of inputs,
12017
     * see xmlPushInput.
12018
     *
12019
     * It's possible to make this non-recursive (minNsIndex must be
12020
     * stored in the input struct) at the expense of code readability.
12021
     */
12022
12023
863
    ent->flags |= XML_ENT_EXPANDING;
12024
12025
863
    list = xmlCtxtParseContentInternal(ctxt, input, isExternal, buildTree);
12026
12027
863
    ent->flags &= ~XML_ENT_EXPANDING;
12028
12029
863
    ctxt->nsdb->minNsIndex = oldMinNsIndex;
12030
863
    ctxt->nodelen = oldNodelen;
12031
863
    ctxt->nodemem = oldNodemem;
12032
12033
    /*
12034
     * Entity size accounting
12035
     */
12036
863
    consumed = input->consumed;
12037
863
    xmlSaturatedAddSizeT(&consumed, input->end - input->base);
12038
12039
863
    if ((ent->flags & XML_ENT_CHECKED) == 0)
12040
514
        xmlSaturatedAdd(&ent->expandedSize, consumed);
12041
12042
863
    if ((ent->flags & XML_ENT_PARSED) == 0) {
12043
514
        if (isExternal)
12044
0
            xmlSaturatedAdd(&ctxt->sizeentities, consumed);
12045
12046
514
        ent->children = list;
12047
12048
1.47k
        while (list != NULL) {
12049
956
            list->parent = (xmlNodePtr) ent;
12050
956
            if (list->next == NULL)
12051
110
                ent->last = list;
12052
956
            list = list->next;
12053
956
        }
12054
514
    } else {
12055
349
        xmlFreeNodeList(list);
12056
349
    }
12057
12058
863
    xmlFreeInputStream(input);
12059
12060
884
error:
12061
884
    ent->flags |= XML_ENT_PARSED | XML_ENT_CHECKED;
12062
884
}
12063
12064
/**
12065
 * xmlParseCtxtExternalEntity:
12066
 * @ctxt:  the existing parsing context
12067
 * @URL:  the URL for the entity to load
12068
 * @ID:  the System ID for the entity to load
12069
 * @listOut:  the return value for the set of parsed nodes
12070
 *
12071
 * Parse an external general entity within an existing parsing context
12072
 * An external general parsed entity is well-formed if it matches the
12073
 * production labeled extParsedEnt.
12074
 *
12075
 * [78] extParsedEnt ::= TextDecl? content
12076
 *
12077
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12078
 *    the parser error code otherwise
12079
 */
12080
12081
int
12082
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctxt, const xmlChar *URL,
12083
0
                           const xmlChar *ID, xmlNodePtr *listOut) {
12084
0
    xmlParserInputPtr input;
12085
0
    xmlNodePtr list;
12086
12087
0
    if (listOut != NULL)
12088
0
        *listOut = NULL;
12089
12090
0
    if (ctxt == NULL)
12091
0
        return(XML_ERR_ARGUMENT);
12092
12093
0
    input = xmlLoadResource(ctxt, (char *) URL, (char *) ID,
12094
0
                            XML_RESOURCE_GENERAL_ENTITY);
12095
0
    if (input == NULL)
12096
0
        return(ctxt->errNo);
12097
12098
0
    xmlCtxtInitializeLate(ctxt);
12099
12100
0
    list = xmlCtxtParseContentInternal(ctxt, input, /* hasTextDecl */ 1, 1);
12101
0
    if (listOut != NULL)
12102
0
        *listOut = list;
12103
0
    else
12104
0
        xmlFreeNodeList(list);
12105
12106
0
    xmlFreeInputStream(input);
12107
0
    return(ctxt->errNo);
12108
0
}
12109
12110
#ifdef LIBXML_SAX1_ENABLED
12111
/**
12112
 * xmlParseExternalEntity:
12113
 * @doc:  the document the chunk pertains to
12114
 * @sax:  the SAX handler block (possibly NULL)
12115
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12116
 * @depth:  Used for loop detection, use 0
12117
 * @URL:  the URL for the entity to load
12118
 * @ID:  the System ID for the entity to load
12119
 * @list:  the return value for the set of parsed nodes
12120
 *
12121
 * DEPRECATED: Use xmlParseCtxtExternalEntity.
12122
 *
12123
 * Parse an external general entity
12124
 * An external general parsed entity is well-formed if it matches the
12125
 * production labeled extParsedEnt.
12126
 *
12127
 * [78] extParsedEnt ::= TextDecl? content
12128
 *
12129
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12130
 *    the parser error code otherwise
12131
 */
12132
12133
int
12134
xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
12135
0
    int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *list) {
12136
0
    xmlParserCtxtPtr ctxt;
12137
0
    int ret;
12138
12139
0
    if (list != NULL)
12140
0
        *list = NULL;
12141
12142
0
    if (doc == NULL)
12143
0
        return(XML_ERR_ARGUMENT);
12144
12145
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12146
0
    if (ctxt == NULL)
12147
0
        return(XML_ERR_NO_MEMORY);
12148
12149
0
    ctxt->depth = depth;
12150
0
    ctxt->myDoc = doc;
12151
0
    ret = xmlParseCtxtExternalEntity(ctxt, URL, ID, list);
12152
12153
0
    xmlFreeParserCtxt(ctxt);
12154
0
    return(ret);
12155
0
}
12156
12157
/**
12158
 * xmlParseBalancedChunkMemory:
12159
 * @doc:  the document the chunk pertains to (must not be NULL)
12160
 * @sax:  the SAX handler block (possibly NULL)
12161
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12162
 * @depth:  Used for loop detection, use 0
12163
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
12164
 * @lst:  the return value for the set of parsed nodes
12165
 *
12166
 * Parse a well-balanced chunk of an XML document
12167
 * called by the parser
12168
 * The allowed sequence for the Well Balanced Chunk is the one defined by
12169
 * the content production in the XML grammar:
12170
 *
12171
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12172
 *
12173
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12174
 *    the parser error code otherwise
12175
 */
12176
12177
int
12178
xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12179
0
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
12180
0
    return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
12181
0
                                                depth, string, lst, 0 );
12182
0
}
12183
#endif /* LIBXML_SAX1_ENABLED */
12184
12185
/**
12186
 * xmlCtxtParseContent:
12187
 * @ctxt:  parser context
12188
 * @input:  parser input
12189
 * @node:  target node or document
12190
 * @hasTextDecl:  whether to parse text declaration
12191
 *
12192
 * Parse a well-balanced chunk of XML matching the 'content' production.
12193
 *
12194
 * Namespaces in scope of @node and entities of @node's document are
12195
 * recognized. When validating, the DTD of @node's document is used.
12196
 *
12197
 * Always consumes @input even in error case.
12198
 *
12199
 * Available since 2.14.0.
12200
 *
12201
 * Returns a node list or NULL in case of error.
12202
 */
12203
xmlNodePtr
12204
xmlCtxtParseContent(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
12205
0
                    xmlNodePtr node, int hasTextDecl) {
12206
0
    xmlDocPtr doc;
12207
0
    xmlNodePtr cur, list = NULL;
12208
0
    int nsnr = 0;
12209
0
    xmlDictPtr oldDict;
12210
0
    int oldOptions, oldDictNames, oldLoadSubset;
12211
12212
0
    if ((ctxt == NULL) || (input == NULL) || (node == NULL)) {
12213
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
12214
0
        goto exit;
12215
0
    }
12216
12217
0
    doc = node->doc;
12218
0
    if (doc == NULL) {
12219
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
12220
0
        goto exit;
12221
0
    }
12222
12223
0
    switch (node->type) {
12224
0
        case XML_ELEMENT_NODE:
12225
0
        case XML_DOCUMENT_NODE:
12226
0
        case XML_HTML_DOCUMENT_NODE:
12227
0
            break;
12228
12229
0
        case XML_ATTRIBUTE_NODE:
12230
0
        case XML_TEXT_NODE:
12231
0
        case XML_CDATA_SECTION_NODE:
12232
0
        case XML_ENTITY_REF_NODE:
12233
0
        case XML_PI_NODE:
12234
0
        case XML_COMMENT_NODE:
12235
0
            for (cur = node->parent; cur != NULL; cur = node->parent) {
12236
0
                if ((cur->type == XML_ELEMENT_NODE) ||
12237
0
                    (cur->type == XML_DOCUMENT_NODE) ||
12238
0
                    (cur->type == XML_HTML_DOCUMENT_NODE)) {
12239
0
                    node = cur;
12240
0
                    break;
12241
0
                }
12242
0
            }
12243
0
            break;
12244
12245
0
        default:
12246
0
            xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
12247
0
            goto exit;
12248
0
    }
12249
12250
0
#ifdef LIBXML_HTML_ENABLED
12251
0
    if (ctxt->html)
12252
0
        htmlCtxtReset(ctxt);
12253
0
    else
12254
0
#endif
12255
0
        xmlCtxtReset(ctxt);
12256
12257
0
    oldDict = ctxt->dict;
12258
0
    oldOptions = ctxt->options;
12259
0
    oldDictNames = ctxt->dictNames;
12260
0
    oldLoadSubset = ctxt->loadsubset;
12261
12262
    /*
12263
     * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
12264
     */
12265
0
    if (doc->dict != NULL) {
12266
0
        ctxt->dict = doc->dict;
12267
0
    } else {
12268
0
        ctxt->options |= XML_PARSE_NODICT;
12269
0
        ctxt->dictNames = 0;
12270
0
    }
12271
12272
    /*
12273
     * Disable IDs
12274
     */
12275
0
    ctxt->loadsubset |= XML_SKIP_IDS;
12276
12277
0
    ctxt->myDoc = doc;
12278
12279
0
#ifdef LIBXML_HTML_ENABLED
12280
0
    if (ctxt->html) {
12281
        /*
12282
         * When parsing in context, it makes no sense to add implied
12283
         * elements like html/body/etc...
12284
         */
12285
0
        ctxt->options |= HTML_PARSE_NOIMPLIED;
12286
12287
0
        list = htmlCtxtParseContentInternal(ctxt, input);
12288
0
    } else
12289
0
#endif
12290
0
    {
12291
0
        xmlCtxtInitializeLate(ctxt);
12292
12293
        /*
12294
         * This hack lowers the error level of undeclared entities
12295
         * from XML_ERR_FATAL (well-formedness error) to XML_ERR_ERROR
12296
         * or XML_ERR_WARNING.
12297
         */
12298
0
        ctxt->hasExternalSubset = 1;
12299
12300
        /*
12301
         * initialize the SAX2 namespaces stack
12302
         */
12303
0
        cur = node;
12304
0
        while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
12305
0
            xmlNsPtr ns = cur->nsDef;
12306
0
            xmlHashedString hprefix, huri;
12307
12308
0
            while (ns != NULL) {
12309
0
                hprefix = xmlDictLookupHashed(ctxt->dict, ns->prefix, -1);
12310
0
                huri = xmlDictLookupHashed(ctxt->dict, ns->href, -1);
12311
0
                if (xmlParserNsPush(ctxt, &hprefix, &huri, ns, 1) > 0)
12312
0
                    nsnr++;
12313
0
                ns = ns->next;
12314
0
            }
12315
0
            cur = cur->parent;
12316
0
        }
12317
12318
0
        list = xmlCtxtParseContentInternal(ctxt, input, hasTextDecl, 1);
12319
12320
0
        if (nsnr > 0)
12321
0
            xmlParserNsPop(ctxt, nsnr);
12322
0
    }
12323
12324
0
    ctxt->dict = oldDict;
12325
0
    ctxt->options = oldOptions;
12326
0
    ctxt->dictNames = oldDictNames;
12327
0
    ctxt->loadsubset = oldLoadSubset;
12328
0
    ctxt->myDoc = NULL;
12329
0
    ctxt->node = NULL;
12330
12331
0
exit:
12332
0
    xmlFreeInputStream(input);
12333
0
    return(list);
12334
0
}
12335
12336
/**
12337
 * xmlParseInNodeContext:
12338
 * @node:  the context node
12339
 * @data:  the input string
12340
 * @datalen:  the input string length in bytes
12341
 * @options:  a combination of xmlParserOption
12342
 * @listOut:  the return value for the set of parsed nodes
12343
 *
12344
 * Parse a well-balanced chunk of an XML document
12345
 * within the context (DTD, namespaces, etc ...) of the given node.
12346
 *
12347
 * The allowed sequence for the data is a Well Balanced Chunk defined by
12348
 * the content production in the XML grammar:
12349
 *
12350
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12351
 *
12352
 * This function assumes the encoding of @node's document which is
12353
 * typically not what you want. A better alternative is
12354
 * xmlCtxtParseContent.
12355
 *
12356
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12357
 * error code otherwise
12358
 */
12359
xmlParserErrors
12360
xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
12361
0
                      int options, xmlNodePtr *listOut) {
12362
0
    xmlParserCtxtPtr ctxt;
12363
0
    xmlParserInputPtr input;
12364
0
    xmlDocPtr doc;
12365
0
    xmlNodePtr list;
12366
0
    xmlParserErrors ret;
12367
12368
0
    if (listOut == NULL)
12369
0
        return(XML_ERR_INTERNAL_ERROR);
12370
0
    *listOut = NULL;
12371
12372
0
    if ((node == NULL) || (data == NULL) || (datalen < 0))
12373
0
        return(XML_ERR_INTERNAL_ERROR);
12374
12375
0
    doc = node->doc;
12376
0
    if (doc == NULL)
12377
0
        return(XML_ERR_INTERNAL_ERROR);
12378
12379
0
#ifdef LIBXML_HTML_ENABLED
12380
0
    if (doc->type == XML_HTML_DOCUMENT_NODE) {
12381
0
        ctxt = htmlNewParserCtxt();
12382
0
    }
12383
0
    else
12384
0
#endif
12385
0
        ctxt = xmlNewParserCtxt();
12386
12387
0
    if (ctxt == NULL)
12388
0
        return(XML_ERR_NO_MEMORY);
12389
12390
0
    input = xmlCtxtNewInputFromMemory(ctxt, NULL, data, datalen,
12391
0
                                      (const char *) doc->encoding,
12392
0
                                      XML_INPUT_BUF_STATIC);
12393
0
    if (input == NULL) {
12394
0
        xmlFreeParserCtxt(ctxt);
12395
0
        return(XML_ERR_NO_MEMORY);
12396
0
    }
12397
12398
0
    xmlCtxtUseOptions(ctxt, options);
12399
12400
0
    list = xmlCtxtParseContent(ctxt, input, node, /* hasTextDecl */ 0);
12401
12402
0
    if (list == NULL) {
12403
0
        ret = ctxt->errNo;
12404
0
        if (ret == XML_ERR_ARGUMENT)
12405
0
            ret = XML_ERR_INTERNAL_ERROR;
12406
0
    } else {
12407
0
        ret = XML_ERR_OK;
12408
0
        *listOut = list;
12409
0
    }
12410
12411
0
    xmlFreeParserCtxt(ctxt);
12412
12413
0
    return(ret);
12414
0
}
12415
12416
#ifdef LIBXML_SAX1_ENABLED
12417
/**
12418
 * xmlParseBalancedChunkMemoryRecover:
12419
 * @doc:  the document the chunk pertains to (must not be NULL)
12420
 * @sax:  the SAX handler block (possibly NULL)
12421
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12422
 * @depth:  Used for loop detection, use 0
12423
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
12424
 * @listOut:  the return value for the set of parsed nodes
12425
 * @recover: return nodes even if the data is broken (use 0)
12426
 *
12427
 * Parse a well-balanced chunk of an XML document
12428
 *
12429
 * The allowed sequence for the Well Balanced Chunk is the one defined by
12430
 * the content production in the XML grammar:
12431
 *
12432
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12433
 *
12434
 * Returns 0 if the chunk is well balanced, or thehe parser error code
12435
 * otherwise.
12436
 *
12437
 * In case recover is set to 1, the nodelist will not be empty even if
12438
 * the parsed chunk is not well balanced, assuming the parsing succeeded to
12439
 * some extent.
12440
 */
12441
int
12442
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12443
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *listOut,
12444
0
     int recover) {
12445
0
    xmlParserCtxtPtr ctxt;
12446
0
    xmlParserInputPtr input;
12447
0
    xmlNodePtr list;
12448
0
    int ret;
12449
12450
0
    if (listOut != NULL)
12451
0
        *listOut = NULL;
12452
12453
0
    if (string == NULL)
12454
0
        return(XML_ERR_ARGUMENT);
12455
12456
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12457
0
    if (ctxt == NULL)
12458
0
        return(XML_ERR_NO_MEMORY);
12459
12460
0
    xmlCtxtInitializeLate(ctxt);
12461
12462
0
    ctxt->depth = depth;
12463
0
    ctxt->myDoc = doc;
12464
0
    if (recover) {
12465
0
        ctxt->options |= XML_PARSE_RECOVER;
12466
0
        ctxt->recovery = 1;
12467
0
    }
12468
12469
0
    input = xmlNewStringInputStream(ctxt, string);
12470
0
    if (input == NULL) {
12471
0
        ret = ctxt->errNo;
12472
0
        goto error;
12473
0
    }
12474
12475
0
    list = xmlCtxtParseContentInternal(ctxt, input, /* hasTextDecl */ 0, 1);
12476
0
    if (listOut != NULL)
12477
0
        *listOut = list;
12478
0
    else
12479
0
        xmlFreeNodeList(list);
12480
12481
0
    if (!ctxt->wellFormed)
12482
0
        ret = ctxt->errNo;
12483
0
    else
12484
0
        ret = XML_ERR_OK;
12485
12486
0
error:
12487
0
    xmlFreeInputStream(input);
12488
0
    xmlFreeParserCtxt(ctxt);
12489
0
    return(ret);
12490
0
}
12491
12492
/**
12493
 * xmlSAXParseEntity:
12494
 * @sax:  the SAX handler block
12495
 * @filename:  the filename
12496
 *
12497
 * DEPRECATED: Don't use.
12498
 *
12499
 * parse an XML external entity out of context and build a tree.
12500
 * It use the given SAX function block to handle the parsing callback.
12501
 * If sax is NULL, fallback to the default DOM tree building routines.
12502
 *
12503
 * [78] extParsedEnt ::= TextDecl? content
12504
 *
12505
 * This correspond to a "Well Balanced" chunk
12506
 *
12507
 * Returns the resulting document tree
12508
 */
12509
12510
xmlDocPtr
12511
0
xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
12512
0
    xmlDocPtr ret;
12513
0
    xmlParserCtxtPtr ctxt;
12514
12515
0
    ctxt = xmlCreateFileParserCtxt(filename);
12516
0
    if (ctxt == NULL) {
12517
0
  return(NULL);
12518
0
    }
12519
0
    if (sax != NULL) {
12520
0
        if (sax->initialized == XML_SAX2_MAGIC) {
12521
0
            *ctxt->sax = *sax;
12522
0
        } else {
12523
0
            memset(ctxt->sax, 0, sizeof(*ctxt->sax));
12524
0
            memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12525
0
        }
12526
0
        ctxt->userData = NULL;
12527
0
    }
12528
12529
0
    xmlParseExtParsedEnt(ctxt);
12530
12531
0
    if (ctxt->wellFormed) {
12532
0
  ret = ctxt->myDoc;
12533
0
    } else {
12534
0
        ret = NULL;
12535
0
        xmlFreeDoc(ctxt->myDoc);
12536
0
    }
12537
12538
0
    xmlFreeParserCtxt(ctxt);
12539
12540
0
    return(ret);
12541
0
}
12542
12543
/**
12544
 * xmlParseEntity:
12545
 * @filename:  the filename
12546
 *
12547
 * parse an XML external entity out of context and build a tree.
12548
 *
12549
 * [78] extParsedEnt ::= TextDecl? content
12550
 *
12551
 * This correspond to a "Well Balanced" chunk
12552
 *
12553
 * Returns the resulting document tree
12554
 */
12555
12556
xmlDocPtr
12557
0
xmlParseEntity(const char *filename) {
12558
0
    return(xmlSAXParseEntity(NULL, filename));
12559
0
}
12560
#endif /* LIBXML_SAX1_ENABLED */
12561
12562
/**
12563
 * xmlCreateEntityParserCtxt:
12564
 * @URL:  the entity URL
12565
 * @ID:  the entity PUBLIC ID
12566
 * @base:  a possible base for the target URI
12567
 *
12568
 * DEPRECATED: Don't use.
12569
 *
12570
 * Create a parser context for an external entity
12571
 * Automatic support for ZLIB/Compress compressed document is provided
12572
 * by default if found at compile-time.
12573
 *
12574
 * Returns the new parser context or NULL
12575
 */
12576
xmlParserCtxtPtr
12577
xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
12578
0
                    const xmlChar *base) {
12579
0
    xmlParserCtxtPtr ctxt;
12580
0
    xmlParserInputPtr input;
12581
0
    xmlChar *uri = NULL;
12582
12583
0
    ctxt = xmlNewParserCtxt();
12584
0
    if (ctxt == NULL)
12585
0
  return(NULL);
12586
12587
0
    if (base != NULL) {
12588
0
        if (xmlBuildURISafe(URL, base, &uri) < 0)
12589
0
            goto error;
12590
0
        if (uri != NULL)
12591
0
            URL = uri;
12592
0
    }
12593
12594
0
    input = xmlLoadResource(ctxt, (char *) URL, (char *) ID,
12595
0
                            XML_RESOURCE_UNKNOWN);
12596
0
    if (input == NULL)
12597
0
        goto error;
12598
12599
0
    if (inputPush(ctxt, input) < 0) {
12600
0
        xmlFreeInputStream(input);
12601
0
        goto error;
12602
0
    }
12603
12604
0
    xmlFree(uri);
12605
0
    return(ctxt);
12606
12607
0
error:
12608
0
    xmlFree(uri);
12609
0
    xmlFreeParserCtxt(ctxt);
12610
0
    return(NULL);
12611
0
}
12612
12613
/************************************************************************
12614
 *                  *
12615
 *    Front ends when parsing from a file     *
12616
 *                  *
12617
 ************************************************************************/
12618
12619
/**
12620
 * xmlCreateURLParserCtxt:
12621
 * @filename:  the filename or URL
12622
 * @options:  a combination of xmlParserOption
12623
 *
12624
 * DEPRECATED: Use xmlNewParserCtxt and xmlCtxtReadFile.
12625
 *
12626
 * Create a parser context for a file or URL content.
12627
 * Automatic support for ZLIB/Compress compressed document is provided
12628
 * by default if found at compile-time and for file accesses
12629
 *
12630
 * Returns the new parser context or NULL
12631
 */
12632
xmlParserCtxtPtr
12633
xmlCreateURLParserCtxt(const char *filename, int options)
12634
0
{
12635
0
    xmlParserCtxtPtr ctxt;
12636
0
    xmlParserInputPtr input;
12637
12638
0
    ctxt = xmlNewParserCtxt();
12639
0
    if (ctxt == NULL)
12640
0
  return(NULL);
12641
12642
0
    xmlCtxtUseOptions(ctxt, options);
12643
0
    ctxt->linenumbers = 1;
12644
12645
0
    input = xmlLoadResource(ctxt, filename, NULL, XML_RESOURCE_MAIN_DOCUMENT);
12646
0
    if (input == NULL) {
12647
0
  xmlFreeParserCtxt(ctxt);
12648
0
  return(NULL);
12649
0
    }
12650
0
    if (inputPush(ctxt, input) < 0) {
12651
0
        xmlFreeInputStream(input);
12652
0
        xmlFreeParserCtxt(ctxt);
12653
0
        return(NULL);
12654
0
    }
12655
12656
0
    return(ctxt);
12657
0
}
12658
12659
/**
12660
 * xmlCreateFileParserCtxt:
12661
 * @filename:  the filename
12662
 *
12663
 * DEPRECATED: Use xmlNewParserCtxt and xmlCtxtReadFile.
12664
 *
12665
 * Create a parser context for a file content.
12666
 * Automatic support for ZLIB/Compress compressed document is provided
12667
 * by default if found at compile-time.
12668
 *
12669
 * Returns the new parser context or NULL
12670
 */
12671
xmlParserCtxtPtr
12672
xmlCreateFileParserCtxt(const char *filename)
12673
0
{
12674
0
    return(xmlCreateURLParserCtxt(filename, 0));
12675
0
}
12676
12677
#ifdef LIBXML_SAX1_ENABLED
12678
/**
12679
 * xmlSAXParseFileWithData:
12680
 * @sax:  the SAX handler block
12681
 * @filename:  the filename
12682
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
12683
 *             documents
12684
 * @data:  the userdata
12685
 *
12686
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
12687
 *
12688
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12689
 * compressed document is provided by default if found at compile-time.
12690
 * It use the given SAX function block to handle the parsing callback.
12691
 * If sax is NULL, fallback to the default DOM tree building routines.
12692
 *
12693
 * User data (void *) is stored within the parser context in the
12694
 * context's _private member, so it is available nearly everywhere in libxml
12695
 *
12696
 * Returns the resulting document tree
12697
 */
12698
12699
xmlDocPtr
12700
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
12701
0
                        int recovery, void *data) {
12702
0
    xmlDocPtr ret;
12703
0
    xmlParserCtxtPtr ctxt;
12704
0
    xmlParserInputPtr input;
12705
12706
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12707
0
    if (ctxt == NULL)
12708
0
  return(NULL);
12709
12710
0
    if (data != NULL)
12711
0
  ctxt->_private = data;
12712
12713
0
    if (recovery) {
12714
0
        ctxt->options |= XML_PARSE_RECOVER;
12715
0
        ctxt->recovery = 1;
12716
0
    }
12717
12718
0
    if ((filename != NULL) && (filename[0] == '-') && (filename[1] == 0))
12719
0
        input = xmlCtxtNewInputFromFd(ctxt, filename, STDIN_FILENO, NULL, 0);
12720
0
    else
12721
0
        input = xmlCtxtNewInputFromUrl(ctxt, filename, NULL, NULL, 0);
12722
12723
0
    ret = xmlCtxtParseDocument(ctxt, input);
12724
12725
0
    xmlFreeParserCtxt(ctxt);
12726
0
    return(ret);
12727
0
}
12728
12729
/**
12730
 * xmlSAXParseFile:
12731
 * @sax:  the SAX handler block
12732
 * @filename:  the filename
12733
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
12734
 *             documents
12735
 *
12736
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
12737
 *
12738
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12739
 * compressed document is provided by default if found at compile-time.
12740
 * It use the given SAX function block to handle the parsing callback.
12741
 * If sax is NULL, fallback to the default DOM tree building routines.
12742
 *
12743
 * Returns the resulting document tree
12744
 */
12745
12746
xmlDocPtr
12747
xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
12748
0
                          int recovery) {
12749
0
    return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
12750
0
}
12751
12752
/**
12753
 * xmlRecoverDoc:
12754
 * @cur:  a pointer to an array of xmlChar
12755
 *
12756
 * DEPRECATED: Use xmlReadDoc with XML_PARSE_RECOVER.
12757
 *
12758
 * parse an XML in-memory document and build a tree.
12759
 * In the case the document is not Well Formed, a attempt to build a
12760
 * tree is tried anyway
12761
 *
12762
 * Returns the resulting document tree or NULL in case of failure
12763
 */
12764
12765
xmlDocPtr
12766
0
xmlRecoverDoc(const xmlChar *cur) {
12767
0
    return(xmlSAXParseDoc(NULL, cur, 1));
12768
0
}
12769
12770
/**
12771
 * xmlParseFile:
12772
 * @filename:  the filename
12773
 *
12774
 * DEPRECATED: Use xmlReadFile.
12775
 *
12776
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12777
 * compressed document is provided by default if found at compile-time.
12778
 *
12779
 * Returns the resulting document tree if the file was wellformed,
12780
 * NULL otherwise.
12781
 */
12782
12783
xmlDocPtr
12784
0
xmlParseFile(const char *filename) {
12785
0
    return(xmlSAXParseFile(NULL, filename, 0));
12786
0
}
12787
12788
/**
12789
 * xmlRecoverFile:
12790
 * @filename:  the filename
12791
 *
12792
 * DEPRECATED: Use xmlReadFile with XML_PARSE_RECOVER.
12793
 *
12794
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12795
 * compressed document is provided by default if found at compile-time.
12796
 * In the case the document is not Well Formed, it attempts to build
12797
 * a tree anyway
12798
 *
12799
 * Returns the resulting document tree or NULL in case of failure
12800
 */
12801
12802
xmlDocPtr
12803
0
xmlRecoverFile(const char *filename) {
12804
0
    return(xmlSAXParseFile(NULL, filename, 1));
12805
0
}
12806
12807
12808
/**
12809
 * xmlSetupParserForBuffer:
12810
 * @ctxt:  an XML parser context
12811
 * @buffer:  a xmlChar * buffer
12812
 * @filename:  a file name
12813
 *
12814
 * DEPRECATED: Don't use.
12815
 *
12816
 * Setup the parser context to parse a new buffer; Clears any prior
12817
 * contents from the parser context. The buffer parameter must not be
12818
 * NULL, but the filename parameter can be
12819
 */
12820
void
12821
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
12822
                             const char* filename)
12823
0
{
12824
0
    xmlParserInputPtr input;
12825
12826
0
    if ((ctxt == NULL) || (buffer == NULL))
12827
0
        return;
12828
12829
0
    xmlClearParserCtxt(ctxt);
12830
12831
0
    input = xmlCtxtNewInputFromString(ctxt, filename, (const char *) buffer,
12832
0
                                      NULL, 0);
12833
0
    if (input == NULL)
12834
0
        return;
12835
0
    if (inputPush(ctxt, input) < 0)
12836
0
        xmlFreeInputStream(input);
12837
0
}
12838
12839
/**
12840
 * xmlSAXUserParseFile:
12841
 * @sax:  a SAX handler
12842
 * @user_data:  The user data returned on SAX callbacks
12843
 * @filename:  a file name
12844
 *
12845
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
12846
 *
12847
 * parse an XML file and call the given SAX handler routines.
12848
 * Automatic support for ZLIB/Compress compressed document is provided
12849
 *
12850
 * Returns 0 in case of success or a error number otherwise
12851
 */
12852
int
12853
xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
12854
0
                    const char *filename) {
12855
0
    int ret = 0;
12856
0
    xmlParserCtxtPtr ctxt;
12857
12858
0
    ctxt = xmlCreateFileParserCtxt(filename);
12859
0
    if (ctxt == NULL) return -1;
12860
0
    if (sax != NULL) {
12861
0
        if (sax->initialized == XML_SAX2_MAGIC) {
12862
0
            *ctxt->sax = *sax;
12863
0
        } else {
12864
0
            memset(ctxt->sax, 0, sizeof(*ctxt->sax));
12865
0
            memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12866
0
        }
12867
0
  ctxt->userData = user_data;
12868
0
    }
12869
12870
0
    xmlParseDocument(ctxt);
12871
12872
0
    if (ctxt->wellFormed)
12873
0
  ret = 0;
12874
0
    else {
12875
0
        if (ctxt->errNo != 0)
12876
0
      ret = ctxt->errNo;
12877
0
  else
12878
0
      ret = -1;
12879
0
    }
12880
0
    if (ctxt->myDoc != NULL) {
12881
0
        xmlFreeDoc(ctxt->myDoc);
12882
0
  ctxt->myDoc = NULL;
12883
0
    }
12884
0
    xmlFreeParserCtxt(ctxt);
12885
12886
0
    return ret;
12887
0
}
12888
#endif /* LIBXML_SAX1_ENABLED */
12889
12890
/************************************************************************
12891
 *                  *
12892
 *    Front ends when parsing from memory     *
12893
 *                  *
12894
 ************************************************************************/
12895
12896
/**
12897
 * xmlCreateMemoryParserCtxt:
12898
 * @buffer:  a pointer to a char array
12899
 * @size:  the size of the array
12900
 *
12901
 * Create a parser context for an XML in-memory document. The input buffer
12902
 * must not contain a terminating null byte.
12903
 *
12904
 * Returns the new parser context or NULL
12905
 */
12906
xmlParserCtxtPtr
12907
0
xmlCreateMemoryParserCtxt(const char *buffer, int size) {
12908
0
    xmlParserCtxtPtr ctxt;
12909
0
    xmlParserInputPtr input;
12910
12911
0
    if (size < 0)
12912
0
  return(NULL);
12913
12914
0
    ctxt = xmlNewParserCtxt();
12915
0
    if (ctxt == NULL)
12916
0
  return(NULL);
12917
12918
0
    input = xmlCtxtNewInputFromMemory(ctxt, NULL, buffer, size, NULL, 0);
12919
0
    if (input == NULL) {
12920
0
  xmlFreeParserCtxt(ctxt);
12921
0
  return(NULL);
12922
0
    }
12923
0
    if (inputPush(ctxt, input) < 0) {
12924
0
        xmlFreeInputStream(input);
12925
0
        xmlFreeParserCtxt(ctxt);
12926
0
        return(NULL);
12927
0
    }
12928
12929
0
    return(ctxt);
12930
0
}
12931
12932
#ifdef LIBXML_SAX1_ENABLED
12933
/**
12934
 * xmlSAXParseMemoryWithData:
12935
 * @sax:  the SAX handler block
12936
 * @buffer:  an pointer to a char array
12937
 * @size:  the size of the array
12938
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
12939
 *             documents
12940
 * @data:  the userdata
12941
 *
12942
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
12943
 *
12944
 * parse an XML in-memory block and use the given SAX function block
12945
 * to handle the parsing callback. If sax is NULL, fallback to the default
12946
 * DOM tree building routines.
12947
 *
12948
 * User data (void *) is stored within the parser context in the
12949
 * context's _private member, so it is available nearly everywhere in libxml
12950
 *
12951
 * Returns the resulting document tree
12952
 */
12953
12954
xmlDocPtr
12955
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
12956
0
                          int size, int recovery, void *data) {
12957
0
    xmlDocPtr ret;
12958
0
    xmlParserCtxtPtr ctxt;
12959
0
    xmlParserInputPtr input;
12960
12961
0
    if (size < 0)
12962
0
        return(NULL);
12963
12964
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12965
0
    if (ctxt == NULL)
12966
0
        return(NULL);
12967
12968
0
    if (data != NULL)
12969
0
  ctxt->_private=data;
12970
12971
0
    if (recovery) {
12972
0
        ctxt->options |= XML_PARSE_RECOVER;
12973
0
        ctxt->recovery = 1;
12974
0
    }
12975
12976
0
    input = xmlCtxtNewInputFromMemory(ctxt, NULL, buffer, size, NULL,
12977
0
                                      XML_INPUT_BUF_STATIC);
12978
12979
0
    ret = xmlCtxtParseDocument(ctxt, input);
12980
12981
0
    xmlFreeParserCtxt(ctxt);
12982
0
    return(ret);
12983
0
}
12984
12985
/**
12986
 * xmlSAXParseMemory:
12987
 * @sax:  the SAX handler block
12988
 * @buffer:  an pointer to a char array
12989
 * @size:  the size of the array
12990
 * @recovery:  work in recovery mode, i.e. tries to read not Well Formed
12991
 *             documents
12992
 *
12993
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
12994
 *
12995
 * parse an XML in-memory block and use the given SAX function block
12996
 * to handle the parsing callback. If sax is NULL, fallback to the default
12997
 * DOM tree building routines.
12998
 *
12999
 * Returns the resulting document tree
13000
 */
13001
xmlDocPtr
13002
xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
13003
0
            int size, int recovery) {
13004
0
    return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
13005
0
}
13006
13007
/**
13008
 * xmlParseMemory:
13009
 * @buffer:  an pointer to a char array
13010
 * @size:  the size of the array
13011
 *
13012
 * DEPRECATED: Use xmlReadMemory.
13013
 *
13014
 * parse an XML in-memory block and build a tree.
13015
 *
13016
 * Returns the resulting document tree
13017
 */
13018
13019
0
xmlDocPtr xmlParseMemory(const char *buffer, int size) {
13020
0
   return(xmlSAXParseMemory(NULL, buffer, size, 0));
13021
0
}
13022
13023
/**
13024
 * xmlRecoverMemory:
13025
 * @buffer:  an pointer to a char array
13026
 * @size:  the size of the array
13027
 *
13028
 * DEPRECATED: Use xmlReadMemory with XML_PARSE_RECOVER.
13029
 *
13030
 * parse an XML in-memory block and build a tree.
13031
 * In the case the document is not Well Formed, an attempt to
13032
 * build a tree is tried anyway
13033
 *
13034
 * Returns the resulting document tree or NULL in case of error
13035
 */
13036
13037
0
xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
13038
0
   return(xmlSAXParseMemory(NULL, buffer, size, 1));
13039
0
}
13040
13041
/**
13042
 * xmlSAXUserParseMemory:
13043
 * @sax:  a SAX handler
13044
 * @user_data:  The user data returned on SAX callbacks
13045
 * @buffer:  an in-memory XML document input
13046
 * @size:  the length of the XML document in bytes
13047
 *
13048
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
13049
 *
13050
 * parse an XML in-memory buffer and call the given SAX handler routines.
13051
 *
13052
 * Returns 0 in case of success or a error number otherwise
13053
 */
13054
int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
13055
0
        const char *buffer, int size) {
13056
0
    int ret = 0;
13057
0
    xmlParserCtxtPtr ctxt;
13058
13059
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13060
0
    if (ctxt == NULL) return -1;
13061
0
    if (sax != NULL) {
13062
0
        if (sax->initialized == XML_SAX2_MAGIC) {
13063
0
            *ctxt->sax = *sax;
13064
0
        } else {
13065
0
            memset(ctxt->sax, 0, sizeof(*ctxt->sax));
13066
0
            memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
13067
0
        }
13068
0
  ctxt->userData = user_data;
13069
0
    }
13070
13071
0
    xmlParseDocument(ctxt);
13072
13073
0
    if (ctxt->wellFormed)
13074
0
  ret = 0;
13075
0
    else {
13076
0
        if (ctxt->errNo != 0)
13077
0
      ret = ctxt->errNo;
13078
0
  else
13079
0
      ret = -1;
13080
0
    }
13081
0
    if (ctxt->myDoc != NULL) {
13082
0
        xmlFreeDoc(ctxt->myDoc);
13083
0
  ctxt->myDoc = NULL;
13084
0
    }
13085
0
    xmlFreeParserCtxt(ctxt);
13086
13087
0
    return ret;
13088
0
}
13089
#endif /* LIBXML_SAX1_ENABLED */
13090
13091
/**
13092
 * xmlCreateDocParserCtxt:
13093
 * @str:  a pointer to an array of xmlChar
13094
 *
13095
 * Creates a parser context for an XML in-memory document.
13096
 *
13097
 * Returns the new parser context or NULL
13098
 */
13099
xmlParserCtxtPtr
13100
0
xmlCreateDocParserCtxt(const xmlChar *str) {
13101
0
    xmlParserCtxtPtr ctxt;
13102
0
    xmlParserInputPtr input;
13103
13104
0
    ctxt = xmlNewParserCtxt();
13105
0
    if (ctxt == NULL)
13106
0
  return(NULL);
13107
13108
0
    input = xmlCtxtNewInputFromString(ctxt, NULL, (const char *) str, NULL, 0);
13109
0
    if (input == NULL) {
13110
0
  xmlFreeParserCtxt(ctxt);
13111
0
  return(NULL);
13112
0
    }
13113
0
    if (inputPush(ctxt, input) < 0) {
13114
0
        xmlFreeInputStream(input);
13115
0
        xmlFreeParserCtxt(ctxt);
13116
0
        return(NULL);
13117
0
    }
13118
13119
0
    return(ctxt);
13120
0
}
13121
13122
#ifdef LIBXML_SAX1_ENABLED
13123
/**
13124
 * xmlSAXParseDoc:
13125
 * @sax:  the SAX handler block
13126
 * @cur:  a pointer to an array of xmlChar
13127
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13128
 *             documents
13129
 *
13130
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadDoc.
13131
 *
13132
 * parse an XML in-memory document and build a tree.
13133
 * It use the given SAX function block to handle the parsing callback.
13134
 * If sax is NULL, fallback to the default DOM tree building routines.
13135
 *
13136
 * Returns the resulting document tree
13137
 */
13138
13139
xmlDocPtr
13140
0
xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
13141
0
    xmlDocPtr ret;
13142
0
    xmlParserCtxtPtr ctxt;
13143
0
    xmlSAXHandlerPtr oldsax = NULL;
13144
13145
0
    if (cur == NULL) return(NULL);
13146
13147
13148
0
    ctxt = xmlCreateDocParserCtxt(cur);
13149
0
    if (ctxt == NULL) return(NULL);
13150
0
    if (sax != NULL) {
13151
0
        oldsax = ctxt->sax;
13152
0
        ctxt->sax = sax;
13153
0
        ctxt->userData = NULL;
13154
0
    }
13155
13156
0
    xmlParseDocument(ctxt);
13157
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13158
0
    else {
13159
0
       ret = NULL;
13160
0
       xmlFreeDoc(ctxt->myDoc);
13161
0
       ctxt->myDoc = NULL;
13162
0
    }
13163
0
    if (sax != NULL)
13164
0
  ctxt->sax = oldsax;
13165
0
    xmlFreeParserCtxt(ctxt);
13166
13167
0
    return(ret);
13168
0
}
13169
13170
/**
13171
 * xmlParseDoc:
13172
 * @cur:  a pointer to an array of xmlChar
13173
 *
13174
 * DEPRECATED: Use xmlReadDoc.
13175
 *
13176
 * parse an XML in-memory document and build a tree.
13177
 *
13178
 * Returns the resulting document tree
13179
 */
13180
13181
xmlDocPtr
13182
0
xmlParseDoc(const xmlChar *cur) {
13183
0
    return(xmlSAXParseDoc(NULL, cur, 0));
13184
0
}
13185
#endif /* LIBXML_SAX1_ENABLED */
13186
13187
/************************************************************************
13188
 *                  *
13189
 *  New set (2.6.0) of simpler and more flexible APIs   *
13190
 *                  *
13191
 ************************************************************************/
13192
13193
/**
13194
 * DICT_FREE:
13195
 * @str:  a string
13196
 *
13197
 * Free a string if it is not owned by the "dict" dictionary in the
13198
 * current scope
13199
 */
13200
#define DICT_FREE(str)            \
13201
0
  if ((str) && ((!dict) ||       \
13202
0
      (xmlDictOwns(dict, (const xmlChar *)(str)) == 0)))  \
13203
0
      xmlFree((char *)(str));
13204
13205
/**
13206
 * xmlCtxtReset:
13207
 * @ctxt: an XML parser context
13208
 *
13209
 * Reset a parser context
13210
 */
13211
void
13212
xmlCtxtReset(xmlParserCtxtPtr ctxt)
13213
0
{
13214
0
    xmlParserInputPtr input;
13215
0
    xmlDictPtr dict;
13216
13217
0
    if (ctxt == NULL)
13218
0
        return;
13219
13220
0
    dict = ctxt->dict;
13221
13222
0
    while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
13223
0
        xmlFreeInputStream(input);
13224
0
    }
13225
0
    ctxt->inputNr = 0;
13226
0
    ctxt->input = NULL;
13227
13228
0
    ctxt->spaceNr = 0;
13229
0
    if (ctxt->spaceTab != NULL) {
13230
0
  ctxt->spaceTab[0] = -1;
13231
0
  ctxt->space = &ctxt->spaceTab[0];
13232
0
    } else {
13233
0
        ctxt->space = NULL;
13234
0
    }
13235
13236
13237
0
    ctxt->nodeNr = 0;
13238
0
    ctxt->node = NULL;
13239
13240
0
    ctxt->nameNr = 0;
13241
0
    ctxt->name = NULL;
13242
13243
0
    ctxt->nsNr = 0;
13244
0
    xmlParserNsReset(ctxt->nsdb);
13245
13246
0
    DICT_FREE(ctxt->version);
13247
0
    ctxt->version = NULL;
13248
0
    DICT_FREE(ctxt->encoding);
13249
0
    ctxt->encoding = NULL;
13250
0
    DICT_FREE(ctxt->extSubURI);
13251
0
    ctxt->extSubURI = NULL;
13252
0
    DICT_FREE(ctxt->extSubSystem);
13253
0
    ctxt->extSubSystem = NULL;
13254
13255
0
    if (ctxt->directory != NULL) {
13256
0
        xmlFree(ctxt->directory);
13257
0
        ctxt->directory = NULL;
13258
0
    }
13259
13260
0
    if (ctxt->myDoc != NULL)
13261
0
        xmlFreeDoc(ctxt->myDoc);
13262
0
    ctxt->myDoc = NULL;
13263
13264
0
    ctxt->standalone = -1;
13265
0
    ctxt->hasExternalSubset = 0;
13266
0
    ctxt->hasPErefs = 0;
13267
0
    ctxt->html = 0;
13268
0
    ctxt->instate = XML_PARSER_START;
13269
13270
0
    ctxt->wellFormed = 1;
13271
0
    ctxt->nsWellFormed = 1;
13272
0
    ctxt->disableSAX = 0;
13273
0
    ctxt->valid = 1;
13274
0
    ctxt->record_info = 0;
13275
0
    ctxt->checkIndex = 0;
13276
0
    ctxt->endCheckState = 0;
13277
0
    ctxt->inSubset = 0;
13278
0
    ctxt->errNo = XML_ERR_OK;
13279
0
    ctxt->depth = 0;
13280
0
    ctxt->catalogs = NULL;
13281
0
    ctxt->sizeentities = 0;
13282
0
    ctxt->sizeentcopy = 0;
13283
0
    xmlInitNodeInfoSeq(&ctxt->node_seq);
13284
13285
0
    if (ctxt->attsDefault != NULL) {
13286
0
        xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
13287
0
        ctxt->attsDefault = NULL;
13288
0
    }
13289
0
    if (ctxt->attsSpecial != NULL) {
13290
0
        xmlHashFree(ctxt->attsSpecial, NULL);
13291
0
        ctxt->attsSpecial = NULL;
13292
0
    }
13293
13294
0
#ifdef LIBXML_CATALOG_ENABLED
13295
0
    if (ctxt->catalogs != NULL)
13296
0
  xmlCatalogFreeLocal(ctxt->catalogs);
13297
0
#endif
13298
0
    ctxt->nbErrors = 0;
13299
0
    ctxt->nbWarnings = 0;
13300
0
    if (ctxt->lastError.code != XML_ERR_OK)
13301
0
        xmlResetError(&ctxt->lastError);
13302
0
}
13303
13304
/**
13305
 * xmlCtxtResetPush:
13306
 * @ctxt: an XML parser context
13307
 * @chunk:  a pointer to an array of chars
13308
 * @size:  number of chars in the array
13309
 * @filename:  an optional file name or URI
13310
 * @encoding:  the document encoding, or NULL
13311
 *
13312
 * Reset a push parser context
13313
 *
13314
 * Returns 0 in case of success and 1 in case of error
13315
 */
13316
int
13317
xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
13318
                 int size, const char *filename, const char *encoding)
13319
0
{
13320
0
    xmlParserInputPtr input;
13321
13322
0
    if (ctxt == NULL)
13323
0
        return(1);
13324
13325
0
    xmlCtxtReset(ctxt);
13326
13327
0
    input = xmlNewPushInput(filename, chunk, size);
13328
0
    if (input == NULL)
13329
0
        return(1);
13330
13331
0
    if (inputPush(ctxt, input) < 0) {
13332
0
        xmlFreeInputStream(input);
13333
0
        return(1);
13334
0
    }
13335
13336
0
    if (encoding != NULL)
13337
0
        xmlSwitchEncodingName(ctxt, encoding);
13338
13339
0
    return(0);
13340
0
}
13341
13342
static int
13343
xmlCtxtSetOptionsInternal(xmlParserCtxtPtr ctxt, int options, int keepMask)
13344
20.2k
{
13345
20.2k
    int allMask;
13346
13347
20.2k
    if (ctxt == NULL)
13348
0
        return(-1);
13349
13350
    /*
13351
     * XInclude options aren't handled by the parser.
13352
     *
13353
     * XML_PARSE_XINCLUDE
13354
     * XML_PARSE_NOXINCNODE
13355
     * XML_PARSE_NOBASEFIX
13356
     */
13357
20.2k
    allMask = XML_PARSE_RECOVER |
13358
20.2k
              XML_PARSE_NOENT |
13359
20.2k
              XML_PARSE_DTDLOAD |
13360
20.2k
              XML_PARSE_DTDATTR |
13361
20.2k
              XML_PARSE_DTDVALID |
13362
20.2k
              XML_PARSE_NOERROR |
13363
20.2k
              XML_PARSE_NOWARNING |
13364
20.2k
              XML_PARSE_PEDANTIC |
13365
20.2k
              XML_PARSE_NOBLANKS |
13366
20.2k
#ifdef LIBXML_SAX1_ENABLED
13367
20.2k
              XML_PARSE_SAX1 |
13368
20.2k
#endif
13369
20.2k
              XML_PARSE_NONET |
13370
20.2k
              XML_PARSE_NODICT |
13371
20.2k
              XML_PARSE_NSCLEAN |
13372
20.2k
              XML_PARSE_NOCDATA |
13373
20.2k
              XML_PARSE_COMPACT |
13374
20.2k
              XML_PARSE_OLD10 |
13375
20.2k
              XML_PARSE_HUGE |
13376
20.2k
              XML_PARSE_OLDSAX |
13377
20.2k
              XML_PARSE_IGNORE_ENC |
13378
20.2k
              XML_PARSE_BIG_LINES |
13379
20.2k
              XML_PARSE_NO_XXE;
13380
13381
20.2k
    ctxt->options = (ctxt->options & keepMask) | (options & allMask);
13382
13383
    /*
13384
     * For some options, struct members are historically the source
13385
     * of truth. The values are initalized from global variables and
13386
     * old code could also modify them directly. Several older API
13387
     * functions that don't take an options argument rely on these
13388
     * deprecated mechanisms.
13389
     *
13390
     * Once public access to struct members and the globals are
13391
     * disabled, we can use the options bitmask as source of
13392
     * truth, making all these struct members obsolete.
13393
     *
13394
     * The XML_DETECT_IDS flags is misnamed. It simply enables
13395
     * loading of the external subset.
13396
     */
13397
20.2k
    ctxt->recovery = (options & XML_PARSE_RECOVER) ? 1 : 0;
13398
20.2k
    ctxt->replaceEntities = (options & XML_PARSE_NOENT) ? 1 : 0;
13399
20.2k
    ctxt->loadsubset = (options & XML_PARSE_DTDLOAD) ? XML_DETECT_IDS : 0;
13400
20.2k
    ctxt->loadsubset |= (options & XML_PARSE_DTDATTR) ? XML_COMPLETE_ATTRS : 0;
13401
20.2k
    ctxt->validate = (options & XML_PARSE_DTDVALID) ? 1 : 0;
13402
20.2k
    ctxt->pedantic = (options & XML_PARSE_PEDANTIC) ? 1 : 0;
13403
20.2k
    ctxt->keepBlanks = (options & XML_PARSE_NOBLANKS) ? 0 : 1;
13404
20.2k
    ctxt->dictNames = (options & XML_PARSE_NODICT) ? 0 : 1;
13405
13406
    /*
13407
     * Changing SAX callbacks is a bad idea. This should be fixed.
13408
     */
13409
20.2k
    if (options & XML_PARSE_NOBLANKS) {
13410
0
        ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
13411
0
    }
13412
20.2k
    if (options & XML_PARSE_NOCDATA) {
13413
0
        ctxt->sax->cdataBlock = NULL;
13414
0
    }
13415
20.2k
    if (options & XML_PARSE_HUGE) {
13416
0
        if (ctxt->dict != NULL)
13417
0
            xmlDictSetLimit(ctxt->dict, 0);
13418
0
    }
13419
13420
20.2k
    ctxt->linenumbers = 1;
13421
13422
20.2k
    return(options & ~allMask);
13423
20.2k
}
13424
13425
/**
13426
 * xmlCtxtSetOptions:
13427
 * @ctxt: an XML parser context
13428
 * @options:  a bitmask of xmlParserOption values
13429
 *
13430
 * Applies the options to the parser context. Unset options are
13431
 * cleared.
13432
 *
13433
 * Available since 2.13.0. With older versions, you can use
13434
 * xmlCtxtUseOptions.
13435
 *
13436
 * XML_PARSE_RECOVER
13437
 *
13438
 * Enable "recovery" mode which allows non-wellformed documents.
13439
 * How this mode behaves exactly is unspecified and may change
13440
 * without further notice. Use of this feature is DISCOURAGED.
13441
 *
13442
 * XML_PARSE_NOENT
13443
 *
13444
 * Despite the confusing name, this option enables substitution
13445
 * of entities. The resulting tree won't contain any entity
13446
 * reference nodes.
13447
 *
13448
 * This option also enables loading of external entities (both
13449
 * general and parameter entities) which is dangerous. If you
13450
 * process untrusted data, it's recommended to set the
13451
 * XML_PARSE_NO_XXE option to disable loading of external
13452
 * entities.
13453
 *
13454
 * XML_PARSE_DTDLOAD
13455
 *
13456
 * Enables loading of an external DTD and the loading and
13457
 * substitution of external parameter entities. Has no effect
13458
 * if XML_PARSE_NO_XXE is set.
13459
 *
13460
 * XML_PARSE_DTDATTR
13461
 *
13462
 * Adds default attributes from the DTD to the result document.
13463
 *
13464
 * Implies XML_PARSE_DTDLOAD, but loading of external content
13465
 * can be disabled with XML_PARSE_NO_XXE.
13466
 *
13467
 * XML_PARSE_DTDVALID
13468
 *
13469
 * This option enables DTD validation which requires to load
13470
 * external DTDs and external entities (both general and
13471
 * parameter entities) unless XML_PARSE_NO_XXE was set.
13472
 *
13473
 * XML_PARSE_NO_XXE
13474
 *
13475
 * Disables loading of external DTDs or entities.
13476
 *
13477
 * Available since 2.13.0.
13478
 *
13479
 * XML_PARSE_NOERROR
13480
 *
13481
 * Disable error and warning reports to the error handlers.
13482
 * Errors are still accessible with xmlCtxtGetLastError.
13483
 *
13484
 * XML_PARSE_NOWARNING
13485
 *
13486
 * Disable warning reports.
13487
 *
13488
 * XML_PARSE_PEDANTIC
13489
 *
13490
 * Enable some pedantic warnings.
13491
 *
13492
 * XML_PARSE_NOBLANKS
13493
 *
13494
 * Remove some text nodes containing only whitespace from the
13495
 * result document. Which nodes are removed depends on DTD
13496
 * element declarations or a conservative heuristic. The
13497
 * reindenting feature of the serialization code relies on this
13498
 * option to be set when parsing. Use of this option is
13499
 * DISCOURAGED.
13500
 *
13501
 * XML_PARSE_SAX1
13502
 *
13503
 * Always invoke the deprecated SAX1 startElement and endElement
13504
 * handlers. This option is DEPRECATED.
13505
 *
13506
 * XML_PARSE_NONET
13507
 *
13508
 * Disable network access with the builtin HTTP client.
13509
 *
13510
 * XML_PARSE_NODICT
13511
 *
13512
 * Create a document without interned strings, making all
13513
 * strings separate memory allocations.
13514
 *
13515
 * XML_PARSE_NSCLEAN
13516
 *
13517
 * Remove redundant namespace declarations from the result
13518
 * document.
13519
 *
13520
 * XML_PARSE_NOCDATA
13521
 *
13522
 * Output normal text nodes instead of CDATA nodes.
13523
 *
13524
 * XML_PARSE_COMPACT
13525
 *
13526
 * Store small strings directly in the node struct to save
13527
 * memory.
13528
 *
13529
 * XML_PARSE_OLD10
13530
 *
13531
 * Use old Name productions from before XML 1.0 Fifth Edition.
13532
 * This options is DEPRECATED.
13533
 *
13534
 * XML_PARSE_HUGE
13535
 *
13536
 * Relax some internal limits.
13537
 *
13538
 * Maximum size of text nodes, tags, comments, processing instructions,
13539
 * CDATA sections, entity values
13540
 *
13541
 * normal: 10M
13542
 * huge:    1B
13543
 *
13544
 * Maximum size of names, system literals, pubid literals
13545
 *
13546
 * normal: 50K
13547
 * huge:   10M
13548
 *
13549
 * Maximum nesting depth of elements
13550
 *
13551
 * normal:  256
13552
 * huge:   2048
13553
 *
13554
 * Maximum nesting depth of entities
13555
 *
13556
 * normal: 20
13557
 * huge:   40
13558
 *
13559
 * XML_PARSE_OLDSAX
13560
 *
13561
 * Enable an unspecified legacy mode for SAX parsers. This
13562
 * option is DEPRECATED.
13563
 *
13564
 * XML_PARSE_IGNORE_ENC
13565
 *
13566
 * Ignore the encoding in the XML declaration. This option is
13567
 * mostly unneeded these days. The only effect is to enforce
13568
 * UTF-8 decoding of ASCII-like data.
13569
 *
13570
 * XML_PARSE_BIG_LINES
13571
 *
13572
 * Enable reporting of line numbers larger than 65535.
13573
 *
13574
 * XML_PARSE_NO_UNZIP
13575
 *
13576
 * Disables input decompression. Setting this option is recommended
13577
 * to avoid zip bombs.
13578
 *
13579
 * Available since 2.14.0.
13580
 *
13581
 * XML_PARSE_NO_SYS_CATALOG
13582
 *
13583
 * Disables the global system XML catalog.
13584
 *
13585
 * Available since 2.14.0.
13586
 *
13587
 * XML_PARSE_NO_CATALOG_PI
13588
 *
13589
 * Ignore XML catalog processing instructions.
13590
 *
13591
 * Available since 2.14.0.
13592
 *
13593
 * Returns 0 in case of success, the set of unknown or unimplemented options
13594
 *         in case of error.
13595
 */
13596
int
13597
xmlCtxtSetOptions(xmlParserCtxtPtr ctxt, int options)
13598
0
{
13599
0
    return(xmlCtxtSetOptionsInternal(ctxt, options, 0));
13600
0
}
13601
13602
/**
13603
 * xmlCtxtGetOptions:
13604
 * @ctxt: an XML parser context
13605
 *
13606
 * Get the current options of the parser context.
13607
 *
13608
 * Available since 2.14.0.
13609
 *
13610
 * Returns the current options set in the parser context, or -1 if ctxt is NULL.
13611
 */
13612
int
13613
xmlCtxtGetOptions(xmlParserCtxtPtr ctxt)
13614
0
{
13615
0
    if (ctxt == NULL)
13616
0
        return(-1);
13617
13618
0
    return(ctxt->options);
13619
0
}
13620
13621
/**
13622
 * xmlCtxtUseOptions:
13623
 * @ctxt: an XML parser context
13624
 * @options:  a combination of xmlParserOption
13625
 *
13626
 * DEPRECATED: Use xmlCtxtSetOptions.
13627
 *
13628
 * Applies the options to the parser context. The following options
13629
 * are never cleared and can only be enabled:
13630
 *
13631
 * XML_PARSE_NOERROR
13632
 * XML_PARSE_NOWARNING
13633
 * XML_PARSE_NONET
13634
 * XML_PARSE_NSCLEAN
13635
 * XML_PARSE_NOCDATA
13636
 * XML_PARSE_COMPACT
13637
 * XML_PARSE_OLD10
13638
 * XML_PARSE_HUGE
13639
 * XML_PARSE_OLDSAX
13640
 * XML_PARSE_IGNORE_ENC
13641
 * XML_PARSE_BIG_LINES
13642
 *
13643
 * Returns 0 in case of success, the set of unknown or unimplemented options
13644
 *         in case of error.
13645
 */
13646
int
13647
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
13648
20.2k
{
13649
20.2k
    int keepMask;
13650
13651
    /*
13652
     * For historic reasons, some options can only be enabled.
13653
     */
13654
20.2k
    keepMask = XML_PARSE_NOERROR |
13655
20.2k
               XML_PARSE_NOWARNING |
13656
20.2k
               XML_PARSE_NONET |
13657
20.2k
               XML_PARSE_NSCLEAN |
13658
20.2k
               XML_PARSE_NOCDATA |
13659
20.2k
               XML_PARSE_COMPACT |
13660
20.2k
               XML_PARSE_OLD10 |
13661
20.2k
               XML_PARSE_HUGE |
13662
20.2k
               XML_PARSE_OLDSAX |
13663
20.2k
               XML_PARSE_IGNORE_ENC |
13664
20.2k
               XML_PARSE_BIG_LINES;
13665
13666
20.2k
    return(xmlCtxtSetOptionsInternal(ctxt, options, keepMask));
13667
20.2k
}
13668
13669
/**
13670
 * xmlCtxtSetMaxAmplification:
13671
 * @ctxt: an XML parser context
13672
 * @maxAmpl:  maximum amplification factor
13673
 *
13674
 * To protect against exponential entity expansion ("billion laughs"), the
13675
 * size of serialized output is (roughly) limited to the input size
13676
 * multiplied by this factor. The default value is 5.
13677
 *
13678
 * When working with documents making heavy use of entity expansion, it can
13679
 * be necessary to increase the value. For security reasons, this should only
13680
 * be considered when processing trusted input.
13681
 */
13682
void
13683
xmlCtxtSetMaxAmplification(xmlParserCtxtPtr ctxt, unsigned maxAmpl)
13684
0
{
13685
0
    ctxt->maxAmpl = maxAmpl;
13686
0
}
13687
13688
/**
13689
 * xmlCtxtParseDocument:
13690
 * @ctxt:  an XML parser context
13691
 * @input:  parser input
13692
 *
13693
 * Parse an XML document and return the resulting document tree.
13694
 * Takes ownership of the input object.
13695
 *
13696
 * Available since 2.13.0.
13697
 *
13698
 * Returns the resulting document tree or NULL
13699
 */
13700
xmlDocPtr
13701
xmlCtxtParseDocument(xmlParserCtxtPtr ctxt, xmlParserInputPtr input)
13702
20.2k
{
13703
20.2k
    xmlDocPtr ret = NULL;
13704
13705
20.2k
    if ((ctxt == NULL) || (input == NULL))
13706
0
        return(NULL);
13707
13708
    /* assert(ctxt->inputNr == 0); */
13709
20.2k
    while (ctxt->inputNr > 0)
13710
0
        xmlFreeInputStream(inputPop(ctxt));
13711
13712
20.2k
    if (inputPush(ctxt, input) < 0) {
13713
0
        xmlFreeInputStream(input);
13714
0
        return(NULL);
13715
0
    }
13716
13717
20.2k
    xmlParseDocument(ctxt);
13718
13719
20.2k
    if ((ctxt->wellFormed) ||
13720
20.2k
        ((ctxt->recovery) && (ctxt->errNo != XML_ERR_NO_MEMORY))) {
13721
36
        ret = ctxt->myDoc;
13722
20.1k
    } else {
13723
20.1k
        if (ctxt->errNo == XML_ERR_OK)
13724
0
            xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, "unknown error\n");
13725
13726
20.1k
        ret = NULL;
13727
20.1k
  xmlFreeDoc(ctxt->myDoc);
13728
20.1k
    }
13729
20.2k
    ctxt->myDoc = NULL;
13730
13731
    /* assert(ctxt->inputNr == 1); */
13732
40.4k
    while (ctxt->inputNr > 0)
13733
20.2k
        xmlFreeInputStream(inputPop(ctxt));
13734
13735
20.2k
    return(ret);
13736
20.2k
}
13737
13738
/**
13739
 * xmlReadDoc:
13740
 * @cur:  a pointer to a zero terminated string
13741
 * @URL:  base URL (optional)
13742
 * @encoding:  the document encoding (optional)
13743
 * @options:  a combination of xmlParserOption
13744
 *
13745
 * Convenience function to parse an XML document from a
13746
 * zero-terminated string.
13747
 *
13748
 * See xmlCtxtReadDoc for details.
13749
 *
13750
 * Returns the resulting document tree
13751
 */
13752
xmlDocPtr
13753
xmlReadDoc(const xmlChar *cur, const char *URL, const char *encoding,
13754
           int options)
13755
0
{
13756
0
    xmlParserCtxtPtr ctxt;
13757
0
    xmlParserInputPtr input;
13758
0
    xmlDocPtr doc;
13759
13760
0
    ctxt = xmlNewParserCtxt();
13761
0
    if (ctxt == NULL)
13762
0
        return(NULL);
13763
13764
0
    xmlCtxtUseOptions(ctxt, options);
13765
13766
0
    input = xmlCtxtNewInputFromString(ctxt, URL, (const char *) cur, encoding,
13767
0
                                      XML_INPUT_BUF_STATIC);
13768
13769
0
    doc = xmlCtxtParseDocument(ctxt, input);
13770
13771
0
    xmlFreeParserCtxt(ctxt);
13772
0
    return(doc);
13773
0
}
13774
13775
/**
13776
 * xmlReadFile:
13777
 * @filename:  a file or URL
13778
 * @encoding:  the document encoding (optional)
13779
 * @options:  a combination of xmlParserOption
13780
 *
13781
 * Convenience function to parse an XML file from the filesystem,
13782
 * the network or a global user-define resource loader.
13783
 *
13784
 * See xmlCtxtReadFile for details.
13785
 *
13786
 * Returns the resulting document tree
13787
 */
13788
xmlDocPtr
13789
xmlReadFile(const char *filename, const char *encoding, int options)
13790
0
{
13791
0
    xmlParserCtxtPtr ctxt;
13792
0
    xmlParserInputPtr input;
13793
0
    xmlDocPtr doc;
13794
13795
0
    ctxt = xmlNewParserCtxt();
13796
0
    if (ctxt == NULL)
13797
0
        return(NULL);
13798
13799
0
    xmlCtxtUseOptions(ctxt, options);
13800
13801
    /*
13802
     * Backward compatibility for users of command line utilities like
13803
     * xmlstarlet expecting "-" to mean stdin. This is dangerous and
13804
     * should be removed at some point.
13805
     */
13806
0
    if ((filename != NULL) && (filename[0] == '-') && (filename[1] == 0))
13807
0
        input = xmlCtxtNewInputFromFd(ctxt, filename, STDIN_FILENO,
13808
0
                                      encoding, 0);
13809
0
    else
13810
0
        input = xmlCtxtNewInputFromUrl(ctxt, filename, NULL, encoding, 0);
13811
13812
0
    doc = xmlCtxtParseDocument(ctxt, input);
13813
13814
0
    xmlFreeParserCtxt(ctxt);
13815
0
    return(doc);
13816
0
}
13817
13818
/**
13819
 * xmlReadMemory:
13820
 * @buffer:  a pointer to a char array
13821
 * @size:  the size of the array
13822
 * @url:  base URL (optional)
13823
 * @encoding:  the document encoding (optional)
13824
 * @options:  a combination of xmlParserOption
13825
 *
13826
 * Parse an XML in-memory document and build a tree. The input buffer must
13827
 * not contain a terminating null byte.
13828
 *
13829
 * See xmlCtxtReadMemory for details.
13830
 *
13831
 * Returns the resulting document tree
13832
 */
13833
xmlDocPtr
13834
xmlReadMemory(const char *buffer, int size, const char *url,
13835
              const char *encoding, int options)
13836
20.2k
{
13837
20.2k
    xmlParserCtxtPtr ctxt;
13838
20.2k
    xmlParserInputPtr input;
13839
20.2k
    xmlDocPtr doc;
13840
13841
20.2k
    if (size < 0)
13842
0
  return(NULL);
13843
13844
20.2k
    ctxt = xmlNewParserCtxt();
13845
20.2k
    if (ctxt == NULL)
13846
0
        return(NULL);
13847
13848
20.2k
    xmlCtxtUseOptions(ctxt, options);
13849
13850
20.2k
    input = xmlCtxtNewInputFromMemory(ctxt, url, buffer, size, encoding,
13851
20.2k
                                      XML_INPUT_BUF_STATIC);
13852
13853
20.2k
    doc = xmlCtxtParseDocument(ctxt, input);
13854
13855
20.2k
    xmlFreeParserCtxt(ctxt);
13856
20.2k
    return(doc);
13857
20.2k
}
13858
13859
/**
13860
 * xmlReadFd:
13861
 * @fd:  an open file descriptor
13862
 * @URL:  base URL (optional)
13863
 * @encoding:  the document encoding (optional)
13864
 * @options:  a combination of xmlParserOption
13865
 *
13866
 * Parse an XML from a file descriptor and build a tree.
13867
 *
13868
 * See xmlCtxtReadFd for details.
13869
 *
13870
 * NOTE that the file descriptor will not be closed when the
13871
 * context is freed or reset.
13872
 *
13873
 * Returns the resulting document tree
13874
 */
13875
xmlDocPtr
13876
xmlReadFd(int fd, const char *URL, const char *encoding, int options)
13877
0
{
13878
0
    xmlParserCtxtPtr ctxt;
13879
0
    xmlParserInputPtr input;
13880
0
    xmlDocPtr doc;
13881
13882
0
    ctxt = xmlNewParserCtxt();
13883
0
    if (ctxt == NULL)
13884
0
        return(NULL);
13885
13886
0
    xmlCtxtUseOptions(ctxt, options);
13887
13888
0
    input = xmlCtxtNewInputFromFd(ctxt, URL, fd, encoding, 0);
13889
13890
0
    doc = xmlCtxtParseDocument(ctxt, input);
13891
13892
0
    xmlFreeParserCtxt(ctxt);
13893
0
    return(doc);
13894
0
}
13895
13896
/**
13897
 * xmlReadIO:
13898
 * @ioread:  an I/O read function
13899
 * @ioclose:  an I/O close function (optional)
13900
 * @ioctx:  an I/O handler
13901
 * @URL:  base URL (optional)
13902
 * @encoding:  the document encoding (optional)
13903
 * @options:  a combination of xmlParserOption
13904
 *
13905
 * Parse an XML document from I/O functions and context and build a tree.
13906
 *
13907
 * See xmlCtxtReadIO for details.
13908
 *
13909
 * Returns the resulting document tree
13910
 */
13911
xmlDocPtr
13912
xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
13913
          void *ioctx, const char *URL, const char *encoding, int options)
13914
0
{
13915
0
    xmlParserCtxtPtr ctxt;
13916
0
    xmlParserInputPtr input;
13917
0
    xmlDocPtr doc;
13918
13919
0
    ctxt = xmlNewParserCtxt();
13920
0
    if (ctxt == NULL)
13921
0
        return(NULL);
13922
13923
0
    xmlCtxtUseOptions(ctxt, options);
13924
13925
0
    input = xmlCtxtNewInputFromIO(ctxt, URL, ioread, ioclose, ioctx,
13926
0
                                  encoding, 0);
13927
13928
0
    doc = xmlCtxtParseDocument(ctxt, input);
13929
13930
0
    xmlFreeParserCtxt(ctxt);
13931
0
    return(doc);
13932
0
}
13933
13934
/**
13935
 * xmlCtxtReadDoc:
13936
 * @ctxt:  an XML parser context
13937
 * @str:  a pointer to a zero terminated string
13938
 * @URL:  base URL (optional)
13939
 * @encoding:  the document encoding (optional)
13940
 * @options:  a combination of xmlParserOption
13941
 *
13942
 * Parse an XML in-memory document and build a tree.
13943
 *
13944
 * @URL is used as base to resolve external entities and for error
13945
 * reporting.
13946
 *
13947
 * See xmlCtxtUseOptions for details.
13948
 *
13949
 * Returns the resulting document tree
13950
 */
13951
xmlDocPtr
13952
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar *str,
13953
               const char *URL, const char *encoding, int options)
13954
0
{
13955
0
    xmlParserInputPtr input;
13956
13957
0
    if (ctxt == NULL)
13958
0
        return(NULL);
13959
13960
0
    xmlCtxtReset(ctxt);
13961
0
    xmlCtxtUseOptions(ctxt, options);
13962
13963
0
    input = xmlCtxtNewInputFromString(ctxt, URL, (const char *) str, encoding,
13964
0
                                      XML_INPUT_BUF_STATIC);
13965
13966
0
    return(xmlCtxtParseDocument(ctxt, input));
13967
0
}
13968
13969
/**
13970
 * xmlCtxtReadFile:
13971
 * @ctxt:  an XML parser context
13972
 * @filename:  a file or URL
13973
 * @encoding:  the document encoding (optional)
13974
 * @options:  a combination of xmlParserOption
13975
 *
13976
 * Parse an XML file from the filesystem, the network or a user-defined
13977
 * resource loader.
13978
 *
13979
 * Returns the resulting document tree
13980
 */
13981
xmlDocPtr
13982
xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
13983
                const char *encoding, int options)
13984
0
{
13985
0
    xmlParserInputPtr input;
13986
13987
0
    if (ctxt == NULL)
13988
0
        return(NULL);
13989
13990
0
    xmlCtxtReset(ctxt);
13991
0
    xmlCtxtUseOptions(ctxt, options);
13992
13993
0
    input = xmlCtxtNewInputFromUrl(ctxt, filename, NULL, encoding, 0);
13994
13995
0
    return(xmlCtxtParseDocument(ctxt, input));
13996
0
}
13997
13998
/**
13999
 * xmlCtxtReadMemory:
14000
 * @ctxt:  an XML parser context
14001
 * @buffer:  a pointer to a char array
14002
 * @size:  the size of the array
14003
 * @URL:  base URL (optional)
14004
 * @encoding:  the document encoding (optional)
14005
 * @options:  a combination of xmlParserOption
14006
 *
14007
 * Parse an XML in-memory document and build a tree. The input buffer must
14008
 * not contain a terminating null byte.
14009
 *
14010
 * @URL is used as base to resolve external entities and for error
14011
 * reporting.
14012
 *
14013
 * See xmlCtxtUseOptions for details.
14014
 *
14015
 * Returns the resulting document tree
14016
 */
14017
xmlDocPtr
14018
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
14019
                  const char *URL, const char *encoding, int options)
14020
0
{
14021
0
    xmlParserInputPtr input;
14022
14023
0
    if ((ctxt == NULL) || (size < 0))
14024
0
        return(NULL);
14025
14026
0
    xmlCtxtReset(ctxt);
14027
0
    xmlCtxtUseOptions(ctxt, options);
14028
14029
0
    input = xmlCtxtNewInputFromMemory(ctxt, URL, buffer, size, encoding,
14030
0
                                      XML_INPUT_BUF_STATIC);
14031
14032
0
    return(xmlCtxtParseDocument(ctxt, input));
14033
0
}
14034
14035
/**
14036
 * xmlCtxtReadFd:
14037
 * @ctxt:  an XML parser context
14038
 * @fd:  an open file descriptor
14039
 * @URL:  base URL (optional)
14040
 * @encoding:  the document encoding (optional)
14041
 * @options:  a combination of xmlParserOption
14042
 *
14043
 * Parse an XML document from a file descriptor and build a tree.
14044
 *
14045
 * NOTE that the file descriptor will not be closed when the
14046
 * context is freed or reset.
14047
 *
14048
 * @URL is used as base to resolve external entities and for error
14049
 * reporting.
14050
 *
14051
 * See xmlCtxtUseOptions for details.
14052
 *
14053
 * Returns the resulting document tree
14054
 */
14055
xmlDocPtr
14056
xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
14057
              const char *URL, const char *encoding, int options)
14058
0
{
14059
0
    xmlParserInputPtr input;
14060
14061
0
    if (ctxt == NULL)
14062
0
        return(NULL);
14063
14064
0
    xmlCtxtReset(ctxt);
14065
0
    xmlCtxtUseOptions(ctxt, options);
14066
14067
0
    input = xmlCtxtNewInputFromFd(ctxt, URL, fd, encoding, 0);
14068
14069
0
    return(xmlCtxtParseDocument(ctxt, input));
14070
0
}
14071
14072
/**
14073
 * xmlCtxtReadIO:
14074
 * @ctxt:  an XML parser context
14075
 * @ioread:  an I/O read function
14076
 * @ioclose:  an I/O close function
14077
 * @ioctx:  an I/O handler
14078
 * @URL:  the base URL to use for the document
14079
 * @encoding:  the document encoding, or NULL
14080
 * @options:  a combination of xmlParserOption
14081
 *
14082
 * parse an XML document from I/O functions and source and build a tree.
14083
 * This reuses the existing @ctxt parser context
14084
 *
14085
 * @URL is used as base to resolve external entities and for error
14086
 * reporting.
14087
 *
14088
 * See xmlCtxtUseOptions for details.
14089
 *
14090
 * Returns the resulting document tree
14091
 */
14092
xmlDocPtr
14093
xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
14094
              xmlInputCloseCallback ioclose, void *ioctx,
14095
        const char *URL,
14096
              const char *encoding, int options)
14097
0
{
14098
0
    xmlParserInputPtr input;
14099
14100
0
    if (ctxt == NULL)
14101
0
        return(NULL);
14102
14103
0
    xmlCtxtReset(ctxt);
14104
0
    xmlCtxtUseOptions(ctxt, options);
14105
14106
0
    input = xmlCtxtNewInputFromIO(ctxt, URL, ioread, ioclose, ioctx,
14107
0
                                  encoding, 0);
14108
14109
0
    return(xmlCtxtParseDocument(ctxt, input));
14110
0
}
14111