Coverage Report

Created: 2025-05-14 06:18

/src/libprotobuf-mutator/build/examples/libxml2/external.libxml2/src/external.libxml2/parser.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
 *            implemented on top of the SAX interfaces
4
 *
5
 * References:
6
 *   The XML specification:
7
 *     http://www.w3.org/TR/REC-xml
8
 *   Original 1.0 version:
9
 *     http://www.w3.org/TR/1998/REC-xml-19980210
10
 *   XML second edition working draft
11
 *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12
 *
13
 * Okay this is a big file, the parser core is around 7000 lines, then it
14
 * is followed by the progressive parser top routines, then the various
15
 * high level APIs to call the parser and a few miscellaneous functions.
16
 * A number of helper functions and deprecated ones have been moved to
17
 * parserInternals.c to reduce this file size.
18
 * As much as possible the functions are associated with their relative
19
 * production in the XML specification. A few productions defining the
20
 * different ranges of character are actually implanted either in
21
 * parserInternals.h or parserInternals.c
22
 * The DOM tree build is realized from the default SAX callbacks in
23
 * the module SAX2.c.
24
 * The routines doing the validation checks are in valid.c and called either
25
 * from the SAX callbacks or as standalone functions using a preparsed
26
 * document.
27
 *
28
 * See Copyright for the status of this software.
29
 *
30
 * Author: Daniel Veillard
31
 */
32
33
/* To avoid EBCDIC trouble when parsing on zOS */
34
#if defined(__MVS__)
35
#pragma convert("ISO8859-1")
36
#endif
37
38
#define IN_LIBXML
39
#include "libxml.h"
40
41
#if defined(_WIN32)
42
#define XML_DIR_SEP '\\'
43
#else
44
#define XML_DIR_SEP '/'
45
#endif
46
47
#include <stdlib.h>
48
#include <limits.h>
49
#include <string.h>
50
#include <stdarg.h>
51
#include <stddef.h>
52
#include <ctype.h>
53
#include <stdlib.h>
54
#include <libxml/parser.h>
55
#include <libxml/xmlmemory.h>
56
#include <libxml/tree.h>
57
#include <libxml/parserInternals.h>
58
#include <libxml/valid.h>
59
#include <libxml/entities.h>
60
#include <libxml/xmlerror.h>
61
#include <libxml/encoding.h>
62
#include <libxml/xmlIO.h>
63
#include <libxml/uri.h>
64
#include <libxml/SAX2.h>
65
#include <libxml/HTMLparser.h>
66
#ifdef LIBXML_CATALOG_ENABLED
67
#include <libxml/catalog.h>
68
#endif
69
70
#include "private/buf.h"
71
#include "private/dict.h"
72
#include "private/entities.h"
73
#include "private/error.h"
74
#include "private/html.h"
75
#include "private/io.h"
76
#include "private/memory.h"
77
#include "private/parser.h"
78
79
48.6k
#define NS_INDEX_EMPTY  INT_MAX
80
12.0k
#define NS_INDEX_XML    (INT_MAX - 1)
81
21.1k
#define URI_HASH_EMPTY  0xD943A04E
82
2.05k
#define URI_HASH_XML    0xF0451F02
83
84
#ifndef STDIN_FILENO
85
0
  #define STDIN_FILENO 0
86
#endif
87
88
#ifndef SIZE_MAX
89
  #define SIZE_MAX ((size_t) -1)
90
#endif
91
92
25.9k
#define XML_MAX_ATTRS 100000000 /* 100 million */
93
94
struct _xmlStartTag {
95
    const xmlChar *prefix;
96
    const xmlChar *URI;
97
    int line;
98
    int nsNr;
99
};
100
101
typedef struct {
102
    void *saxData;
103
    unsigned prefixHashValue;
104
    unsigned uriHashValue;
105
    unsigned elementId;
106
    int oldIndex;
107
} xmlParserNsExtra;
108
109
typedef struct {
110
    unsigned hashValue;
111
    int index;
112
} xmlParserNsBucket;
113
114
struct _xmlParserNsData {
115
    xmlParserNsExtra *extra;
116
117
    unsigned hashSize;
118
    unsigned hashElems;
119
    xmlParserNsBucket *hash;
120
121
    unsigned elementId;
122
    int defaultNsIndex;
123
    int minNsIndex;
124
};
125
126
static int
127
xmlParseElementStart(xmlParserCtxtPtr ctxt);
128
129
static void
130
xmlParseElementEnd(xmlParserCtxtPtr ctxt);
131
132
static xmlEntityPtr
133
xmlLookupGeneralEntity(xmlParserCtxtPtr ctxt, const xmlChar *name, int inAttr);
134
135
static const xmlChar *
136
xmlParseEntityRefInternal(xmlParserCtxtPtr ctxt);
137
138
/************************************************************************
139
 *                  *
140
 *  Arbitrary limits set in the parser. See XML_PARSE_HUGE    *
141
 *                  *
142
 ************************************************************************/
143
144
#define XML_PARSER_BIG_ENTITY 1000
145
#define XML_PARSER_LOT_ENTITY 5000
146
147
/*
148
 * Constants for protection against abusive entity expansion
149
 * ("billion laughs").
150
 */
151
152
/*
153
 * A certain amount of entity expansion which is always allowed.
154
 */
155
675k
#define XML_PARSER_ALLOWED_EXPANSION 1000000
156
157
/*
158
 * Fixed cost for each entity reference. This crudely models processing time
159
 * as well to protect, for example, against exponential expansion of empty
160
 * or very short entities.
161
 */
162
678k
#define XML_ENT_FIXED_COST 20
163
164
39.3k
#define XML_PARSER_BIG_BUFFER_SIZE 300
165
58.8k
#define XML_PARSER_BUFFER_SIZE 100
166
26.5k
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
167
168
/**
169
 * XML_PARSER_CHUNK_SIZE
170
 *
171
 * When calling GROW that's the minimal amount of data
172
 * the parser expected to have received. It is not a hard
173
 * limit but an optimization when reading strings like Names
174
 * It is not strictly needed as long as inputs available characters
175
 * are followed by 0, which should be provided by the I/O level
176
 */
177
#define XML_PARSER_CHUNK_SIZE 100
178
179
/**
180
 * Constant string describing the internal version of the library
181
 */
182
const char *const
183
xmlParserVersion = LIBXML_VERSION_STRING LIBXML_VERSION_EXTRA;
184
185
/*
186
 * List of XML prefixed PI allowed by W3C specs
187
 */
188
189
static const char* const xmlW3CPIs[] = {
190
    "xml-stylesheet",
191
    "xml-model",
192
    NULL
193
};
194
195
196
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
197
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
198
                                              const xmlChar **str);
199
200
static void
201
xmlCtxtParseEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr ent);
202
203
static int
204
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
205
206
/************************************************************************
207
 *                  *
208
 *    Some factorized error routines        *
209
 *                  *
210
 ************************************************************************/
211
212
static void
213
0
xmlErrMemory(xmlParserCtxtPtr ctxt) {
214
0
    xmlCtxtErrMemory(ctxt);
215
0
}
216
217
/**
218
 * Handle a redefinition of attribute error
219
 *
220
 * @param ctxt  an XML parser context
221
 * @param prefix  the attribute prefix
222
 * @param localname  the attribute localname
223
 */
224
static void
225
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
226
                   const xmlChar * localname)
227
10.5k
{
228
10.5k
    if (prefix == NULL)
229
8.19k
        xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, XML_ERR_ATTRIBUTE_REDEFINED,
230
8.19k
                   XML_ERR_FATAL, localname, NULL, NULL, 0,
231
8.19k
                   "Attribute %s redefined\n", localname);
232
2.36k
    else
233
2.36k
        xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, XML_ERR_ATTRIBUTE_REDEFINED,
234
2.36k
                   XML_ERR_FATAL, prefix, localname, NULL, 0,
235
2.36k
                   "Attribute %s:%s redefined\n", prefix, localname);
236
10.5k
}
237
238
/**
239
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
240
 *
241
 * @param ctxt  an XML parser context
242
 * @param error  the error number
243
 * @param msg  the error message
244
 */
245
static void LIBXML_ATTR_FORMAT(3,0)
246
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
247
               const char *msg)
248
273k
{
249
273k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
250
273k
               NULL, NULL, NULL, 0, "%s", msg);
251
273k
}
252
253
/**
254
 * Handle a warning.
255
 *
256
 * @param ctxt  an XML parser context
257
 * @param error  the error number
258
 * @param msg  the error message
259
 * @param str1  extra data
260
 * @param str2  extra data
261
 */
262
void LIBXML_ATTR_FORMAT(3,0)
263
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
264
              const char *msg, const xmlChar *str1, const xmlChar *str2)
265
16.6k
{
266
16.6k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_WARNING,
267
16.6k
               str1, str2, NULL, 0, msg, str1, str2);
268
16.6k
}
269
270
/**
271
 * Handle a validity error.
272
 *
273
 * @param ctxt  an XML parser context
274
 * @param error  the error number
275
 * @param msg  the error message
276
 * @param str1  extra data
277
 * @param str2  extra data
278
 */
279
static void LIBXML_ATTR_FORMAT(3,0)
280
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
281
              const char *msg, const xmlChar *str1, const xmlChar *str2)
282
1.33k
{
283
1.33k
    ctxt->valid = 0;
284
285
1.33k
    xmlCtxtErr(ctxt, NULL, XML_FROM_DTD, error, XML_ERR_ERROR,
286
1.33k
               str1, str2, NULL, 0, msg, str1, str2);
287
1.33k
}
288
289
/**
290
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
291
 *
292
 * @param ctxt  an XML parser context
293
 * @param error  the error number
294
 * @param msg  the error message
295
 * @param val  an integer value
296
 */
297
static void LIBXML_ATTR_FORMAT(3,0)
298
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
299
                  const char *msg, int val)
300
12.0k
{
301
12.0k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
302
12.0k
               NULL, NULL, NULL, val, msg, val);
303
12.0k
}
304
305
/**
306
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
307
 *
308
 * @param ctxt  an XML parser context
309
 * @param error  the error number
310
 * @param msg  the error message
311
 * @param str1  an string info
312
 * @param val  an integer value
313
 * @param str2  an string info
314
 */
315
static void LIBXML_ATTR_FORMAT(3,0)
316
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
317
                  const char *msg, const xmlChar *str1, int val,
318
      const xmlChar *str2)
319
94.4k
{
320
94.4k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
321
94.4k
               str1, str2, NULL, val, msg, str1, val, str2);
322
94.4k
}
323
324
/**
325
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
326
 *
327
 * @param ctxt  an XML parser context
328
 * @param error  the error number
329
 * @param msg  the error message
330
 * @param val  a string value
331
 */
332
static void LIBXML_ATTR_FORMAT(3,0)
333
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
334
                  const char *msg, const xmlChar * val)
335
61.8k
{
336
61.8k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
337
61.8k
               val, NULL, NULL, 0, msg, val);
338
61.8k
}
339
340
/**
341
 * Handle a non fatal parser error
342
 *
343
 * @param ctxt  an XML parser context
344
 * @param error  the error number
345
 * @param msg  the error message
346
 * @param val  a string value
347
 */
348
static void LIBXML_ATTR_FORMAT(3,0)
349
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
350
                  const char *msg, const xmlChar * val)
351
0
{
352
0
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_ERROR,
353
0
               val, NULL, NULL, 0, msg, val);
354
0
}
355
356
/**
357
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
358
 *
359
 * @param ctxt  an XML parser context
360
 * @param error  the error number
361
 * @param msg  the message
362
 * @param info1  extra information string
363
 * @param info2  extra information string
364
 * @param info3  extra information string
365
 */
366
static void LIBXML_ATTR_FORMAT(3,0)
367
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
368
         const char *msg,
369
         const xmlChar * info1, const xmlChar * info2,
370
         const xmlChar * info3)
371
35.9k
{
372
35.9k
    ctxt->nsWellFormed = 0;
373
374
35.9k
    xmlCtxtErr(ctxt, NULL, XML_FROM_NAMESPACE, error, XML_ERR_ERROR,
375
35.9k
               info1, info2, info3, 0, msg, info1, info2, info3);
376
35.9k
}
377
378
/**
379
 * Handle a namespace warning error
380
 *
381
 * @param ctxt  an XML parser context
382
 * @param error  the error number
383
 * @param msg  the message
384
 * @param info1  extra information string
385
 * @param info2  extra information string
386
 * @param info3  extra information string
387
 */
388
static void LIBXML_ATTR_FORMAT(3,0)
389
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
390
         const char *msg,
391
         const xmlChar * info1, const xmlChar * info2,
392
         const xmlChar * info3)
393
1.14k
{
394
1.14k
    xmlCtxtErr(ctxt, NULL, XML_FROM_NAMESPACE, error, XML_ERR_WARNING,
395
1.14k
               info1, info2, info3, 0, msg, info1, info2, info3);
396
1.14k
}
397
398
static void
399
2.03M
xmlSaturatedAdd(unsigned long *dst, unsigned long val) {
400
2.03M
    if (val > ULONG_MAX - *dst)
401
0
        *dst = ULONG_MAX;
402
2.03M
    else
403
2.03M
        *dst += val;
404
2.03M
}
405
406
static void
407
679k
xmlSaturatedAddSizeT(unsigned long *dst, unsigned long val) {
408
679k
    if (val > ULONG_MAX - *dst)
409
0
        *dst = ULONG_MAX;
410
679k
    else
411
679k
        *dst += val;
412
679k
}
413
414
/**
415
 * Check for non-linear entity expansion behaviour.
416
 *
417
 * In some cases like xmlExpandEntityInAttValue(), this function is called
418
 * for each, possibly nested entity and its unexpanded content length.
419
 *
420
 * In other cases like xmlParseReference(), it's only called for each
421
 * top-level entity with its unexpanded content length plus the sum of
422
 * the unexpanded content lengths (plus fixed cost) of all nested
423
 * entities.
424
 *
425
 * Summing the unexpanded lengths also adds the length of the reference.
426
 * This is by design. Taking the length of the entity name into account
427
 * discourages attacks that try to waste CPU time with abusively long
428
 * entity names. See test/recurse/lol6.xml for example. Each call also
429
 * adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with
430
 * short entities.
431
 *
432
 * @param ctxt  parser context
433
 * @param extra  sum of unexpanded entity sizes
434
 * @returns 1 on error, 0 on success.
435
 */
436
static int
437
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long extra)
438
702k
{
439
702k
    unsigned long consumed;
440
702k
    unsigned long *expandedSize;
441
702k
    xmlParserInputPtr input = ctxt->input;
442
702k
    xmlEntityPtr entity = input->entity;
443
444
702k
    if ((entity) && (entity->flags & XML_ENT_CHECKED))
445
26.7k
        return(0);
446
447
    /*
448
     * Compute total consumed bytes so far, including input streams of
449
     * external entities.
450
     */
451
675k
    consumed = input->consumed;
452
675k
    xmlSaturatedAddSizeT(&consumed, input->cur - input->base);
453
675k
    xmlSaturatedAdd(&consumed, ctxt->sizeentities);
454
455
675k
    if (entity)
456
9.12k
        expandedSize = &entity->expandedSize;
457
666k
    else
458
666k
        expandedSize = &ctxt->sizeentcopy;
459
460
    /*
461
     * Add extra cost and some fixed cost.
462
     */
463
675k
    xmlSaturatedAdd(expandedSize, extra);
464
675k
    xmlSaturatedAdd(expandedSize, XML_ENT_FIXED_COST);
465
466
    /*
467
     * It's important to always use saturation arithmetic when tracking
468
     * entity sizes to make the size checks reliable. If "sizeentcopy"
469
     * overflows, we have to abort.
470
     */
471
675k
    if ((*expandedSize > XML_PARSER_ALLOWED_EXPANSION) &&
472
675k
        ((*expandedSize >= ULONG_MAX) ||
473
13
         (*expandedSize / ctxt->maxAmpl > consumed))) {
474
13
        xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
475
13
                       "Maximum entity amplification factor exceeded, see "
476
13
                       "xmlCtxtSetMaxAmplification.\n");
477
13
        xmlHaltParser(ctxt);
478
13
        return(1);
479
13
    }
480
481
675k
    return(0);
482
675k
}
483
484
/************************************************************************
485
 *                  *
486
 *    Library wide options          *
487
 *                  *
488
 ************************************************************************/
489
490
/**
491
 * Examines if the library has been compiled with a given feature.
492
 *
493
 * @param feature  the feature to be examined
494
 * @returns zero (0) if the feature does not exist or an unknown
495
 * feature is requested, non-zero otherwise.
496
 */
497
int
498
xmlHasFeature(xmlFeature feature)
499
0
{
500
0
    switch (feature) {
501
0
  case XML_WITH_THREAD:
502
0
#ifdef LIBXML_THREAD_ENABLED
503
0
      return(1);
504
#else
505
      return(0);
506
#endif
507
0
        case XML_WITH_TREE:
508
0
            return(1);
509
0
        case XML_WITH_OUTPUT:
510
0
#ifdef LIBXML_OUTPUT_ENABLED
511
0
            return(1);
512
#else
513
            return(0);
514
#endif
515
0
        case XML_WITH_PUSH:
516
0
#ifdef LIBXML_PUSH_ENABLED
517
0
            return(1);
518
#else
519
            return(0);
520
#endif
521
0
        case XML_WITH_READER:
522
0
#ifdef LIBXML_READER_ENABLED
523
0
            return(1);
524
#else
525
            return(0);
526
#endif
527
0
        case XML_WITH_PATTERN:
528
0
#ifdef LIBXML_PATTERN_ENABLED
529
0
            return(1);
530
#else
531
            return(0);
532
#endif
533
0
        case XML_WITH_WRITER:
534
0
#ifdef LIBXML_WRITER_ENABLED
535
0
            return(1);
536
#else
537
            return(0);
538
#endif
539
0
        case XML_WITH_SAX1:
540
0
#ifdef LIBXML_SAX1_ENABLED
541
0
            return(1);
542
#else
543
            return(0);
544
#endif
545
0
        case XML_WITH_HTTP:
546
0
            return(0);
547
0
        case XML_WITH_VALID:
548
0
#ifdef LIBXML_VALID_ENABLED
549
0
            return(1);
550
#else
551
            return(0);
552
#endif
553
0
        case XML_WITH_HTML:
554
0
#ifdef LIBXML_HTML_ENABLED
555
0
            return(1);
556
#else
557
            return(0);
558
#endif
559
0
        case XML_WITH_LEGACY:
560
0
            return(0);
561
0
        case XML_WITH_C14N:
562
0
#ifdef LIBXML_C14N_ENABLED
563
0
            return(1);
564
#else
565
            return(0);
566
#endif
567
0
        case XML_WITH_CATALOG:
568
0
#ifdef LIBXML_CATALOG_ENABLED
569
0
            return(1);
570
#else
571
            return(0);
572
#endif
573
0
        case XML_WITH_XPATH:
574
0
#ifdef LIBXML_XPATH_ENABLED
575
0
            return(1);
576
#else
577
            return(0);
578
#endif
579
0
        case XML_WITH_XPTR:
580
0
#ifdef LIBXML_XPTR_ENABLED
581
0
            return(1);
582
#else
583
            return(0);
584
#endif
585
0
        case XML_WITH_XINCLUDE:
586
0
#ifdef LIBXML_XINCLUDE_ENABLED
587
0
            return(1);
588
#else
589
            return(0);
590
#endif
591
0
        case XML_WITH_ICONV:
592
0
#ifdef LIBXML_ICONV_ENABLED
593
0
            return(1);
594
#else
595
            return(0);
596
#endif
597
0
        case XML_WITH_ISO8859X:
598
0
#ifdef LIBXML_ISO8859X_ENABLED
599
0
            return(1);
600
#else
601
            return(0);
602
#endif
603
0
        case XML_WITH_UNICODE:
604
0
            return(0);
605
0
        case XML_WITH_REGEXP:
606
0
#ifdef LIBXML_REGEXP_ENABLED
607
0
            return(1);
608
#else
609
            return(0);
610
#endif
611
0
        case XML_WITH_AUTOMATA:
612
0
#ifdef LIBXML_REGEXP_ENABLED
613
0
            return(1);
614
#else
615
            return(0);
616
#endif
617
0
        case XML_WITH_EXPR:
618
#ifdef LIBXML_EXPR_ENABLED
619
            return(1);
620
#else
621
0
            return(0);
622
0
#endif
623
0
        case XML_WITH_RELAXNG:
624
0
#ifdef LIBXML_RELAXNG_ENABLED
625
0
            return(1);
626
#else
627
            return(0);
628
#endif
629
0
        case XML_WITH_SCHEMAS:
630
0
#ifdef LIBXML_SCHEMAS_ENABLED
631
0
            return(1);
632
#else
633
            return(0);
634
#endif
635
0
        case XML_WITH_SCHEMATRON:
636
0
#ifdef LIBXML_SCHEMATRON_ENABLED
637
0
            return(1);
638
#else
639
            return(0);
640
#endif
641
0
        case XML_WITH_MODULES:
642
0
#ifdef LIBXML_MODULES_ENABLED
643
0
            return(1);
644
#else
645
            return(0);
646
#endif
647
0
        case XML_WITH_DEBUG:
648
0
#ifdef LIBXML_DEBUG_ENABLED
649
0
            return(1);
650
#else
651
            return(0);
652
#endif
653
0
        case XML_WITH_DEBUG_MEM:
654
0
            return(0);
655
0
        case XML_WITH_ZLIB:
656
#ifdef LIBXML_ZLIB_ENABLED
657
            return(1);
658
#else
659
0
            return(0);
660
0
#endif
661
0
        case XML_WITH_LZMA:
662
#ifdef LIBXML_LZMA_ENABLED
663
            return(1);
664
#else
665
0
            return(0);
666
0
#endif
667
0
        case XML_WITH_ICU:
668
#ifdef LIBXML_ICU_ENABLED
669
            return(1);
670
#else
671
0
            return(0);
672
0
#endif
673
0
        default:
674
0
      break;
675
0
     }
676
0
     return(0);
677
0
}
678
679
/************************************************************************
680
 *                  *
681
 *      Simple string buffer        *
682
 *                  *
683
 ************************************************************************/
684
685
typedef struct {
686
    xmlChar *mem;
687
    unsigned size;
688
    unsigned cap; /* size < cap */
689
    unsigned max; /* size <= max */
690
    xmlParserErrors code;
691
} xmlSBuf;
692
693
static void
694
114k
xmlSBufInit(xmlSBuf *buf, unsigned max) {
695
114k
    buf->mem = NULL;
696
114k
    buf->size = 0;
697
114k
    buf->cap = 0;
698
114k
    buf->max = max;
699
114k
    buf->code = XML_ERR_OK;
700
114k
}
701
702
static int
703
78.7k
xmlSBufGrow(xmlSBuf *buf, unsigned len) {
704
78.7k
    xmlChar *mem;
705
78.7k
    unsigned cap;
706
707
78.7k
    if (len >= UINT_MAX / 2 - buf->size) {
708
0
        if (buf->code == XML_ERR_OK)
709
0
            buf->code = XML_ERR_RESOURCE_LIMIT;
710
0
        return(-1);
711
0
    }
712
713
78.7k
    cap = (buf->size + len) * 2;
714
78.7k
    if (cap < 240)
715
67.3k
        cap = 240;
716
717
78.7k
    mem = xmlRealloc(buf->mem, cap);
718
78.7k
    if (mem == NULL) {
719
0
        buf->code = XML_ERR_NO_MEMORY;
720
0
        return(-1);
721
0
    }
722
723
78.7k
    buf->mem = mem;
724
78.7k
    buf->cap = cap;
725
726
78.7k
    return(0);
727
78.7k
}
728
729
static void
730
1.76M
xmlSBufAddString(xmlSBuf *buf, const xmlChar *str, unsigned len) {
731
1.76M
    if (buf->max - buf->size < len) {
732
0
        if (buf->code == XML_ERR_OK)
733
0
            buf->code = XML_ERR_RESOURCE_LIMIT;
734
0
        return;
735
0
    }
736
737
1.76M
    if (buf->cap - buf->size <= len) {
738
76.4k
        if (xmlSBufGrow(buf, len) < 0)
739
0
            return;
740
76.4k
    }
741
742
1.76M
    if (len > 0)
743
1.76M
        memcpy(buf->mem + buf->size, str, len);
744
1.76M
    buf->size += len;
745
1.76M
}
746
747
static void
748
532k
xmlSBufAddCString(xmlSBuf *buf, const char *str, unsigned len) {
749
532k
    xmlSBufAddString(buf, (const xmlChar *) str, len);
750
532k
}
751
752
static void
753
24.1k
xmlSBufAddChar(xmlSBuf *buf, int c) {
754
24.1k
    xmlChar *end;
755
756
24.1k
    if (buf->max - buf->size < 4) {
757
0
        if (buf->code == XML_ERR_OK)
758
0
            buf->code = XML_ERR_RESOURCE_LIMIT;
759
0
        return;
760
0
    }
761
762
24.1k
    if (buf->cap - buf->size <= 4) {
763
2.36k
        if (xmlSBufGrow(buf, 4) < 0)
764
0
            return;
765
2.36k
    }
766
767
24.1k
    end = buf->mem + buf->size;
768
769
24.1k
    if (c < 0x80) {
770
13.7k
        *end = (xmlChar) c;
771
13.7k
        buf->size += 1;
772
13.7k
    } else {
773
10.3k
        buf->size += xmlCopyCharMultiByte(end, c);
774
10.3k
    }
775
24.1k
}
776
777
static void
778
18.1k
xmlSBufAddReplChar(xmlSBuf *buf) {
779
18.1k
    xmlSBufAddCString(buf, "\xEF\xBF\xBD", 3);
780
18.1k
}
781
782
static void
783
0
xmlSBufReportError(xmlSBuf *buf, xmlParserCtxtPtr ctxt, const char *errMsg) {
784
0
    if (buf->code == XML_ERR_NO_MEMORY)
785
0
        xmlCtxtErrMemory(ctxt);
786
0
    else
787
0
        xmlFatalErr(ctxt, buf->code, errMsg);
788
0
}
789
790
static xmlChar *
791
xmlSBufFinish(xmlSBuf *buf, int *sizeOut, xmlParserCtxtPtr ctxt,
792
78.1k
              const char *errMsg) {
793
78.1k
    if (buf->mem == NULL) {
794
15.2k
        buf->mem = xmlMalloc(1);
795
15.2k
        if (buf->mem == NULL) {
796
0
            buf->code = XML_ERR_NO_MEMORY;
797
15.2k
        } else {
798
15.2k
            buf->mem[0] = 0;
799
15.2k
        }
800
62.9k
    } else {
801
62.9k
        buf->mem[buf->size] = 0;
802
62.9k
    }
803
804
78.1k
    if (buf->code == XML_ERR_OK) {
805
78.1k
        if (sizeOut != NULL)
806
2.93k
            *sizeOut = buf->size;
807
78.1k
        return(buf->mem);
808
78.1k
    }
809
810
0
    xmlSBufReportError(buf, ctxt, errMsg);
811
812
0
    xmlFree(buf->mem);
813
814
0
    if (sizeOut != NULL)
815
0
        *sizeOut = 0;
816
0
    return(NULL);
817
78.1k
}
818
819
static void
820
32.0k
xmlSBufCleanup(xmlSBuf *buf, xmlParserCtxtPtr ctxt, const char *errMsg) {
821
32.0k
    if (buf->code != XML_ERR_OK)
822
0
        xmlSBufReportError(buf, ctxt, errMsg);
823
824
32.0k
    xmlFree(buf->mem);
825
32.0k
}
826
827
static int
828
xmlUTF8MultibyteLen(xmlParserCtxtPtr ctxt, const xmlChar *str,
829
95.5k
                    const char *errMsg) {
830
95.5k
    int c = str[0];
831
95.5k
    int c1 = str[1];
832
833
95.5k
    if ((c1 & 0xC0) != 0x80)
834
6.90k
        goto encoding_error;
835
836
88.6k
    if (c < 0xE0) {
837
        /* 2-byte sequence */
838
14.2k
        if (c < 0xC2)
839
4.73k
            goto encoding_error;
840
841
9.52k
        return(2);
842
74.3k
    } else {
843
74.3k
        int c2 = str[2];
844
845
74.3k
        if ((c2 & 0xC0) != 0x80)
846
256
            goto encoding_error;
847
848
74.1k
        if (c < 0xF0) {
849
            /* 3-byte sequence */
850
71.8k
            if (c == 0xE0) {
851
                /* overlong */
852
1.44k
                if (c1 < 0xA0)
853
66
                    goto encoding_error;
854
70.4k
            } else if (c == 0xED) {
855
                /* surrogate */
856
283
                if (c1 >= 0xA0)
857
67
                    goto encoding_error;
858
70.1k
            } else if (c == 0xEF) {
859
                /* U+FFFE and U+FFFF are invalid Chars */
860
25.0k
                if ((c1 == 0xBF) && (c2 >= 0xBE))
861
273
                    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR, errMsg);
862
25.0k
            }
863
864
71.7k
            return(3);
865
71.8k
        } else {
866
            /* 4-byte sequence */
867
2.25k
            if ((str[3] & 0xC0) != 0x80)
868
67
                goto encoding_error;
869
2.19k
            if (c == 0xF0) {
870
                /* overlong */
871
530
                if (c1 < 0x90)
872
66
                    goto encoding_error;
873
1.66k
            } else if (c >= 0xF4) {
874
                /* greater than 0x10FFFF */
875
906
                if ((c > 0xF4) || (c1 >= 0x90))
876
696
                    goto encoding_error;
877
906
            }
878
879
1.42k
            return(4);
880
2.19k
        }
881
74.1k
    }
882
883
12.8k
encoding_error:
884
    /* Only report the first error */
885
12.8k
    if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
886
665
        xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
887
665
        ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
888
665
    }
889
890
12.8k
    return(0);
891
88.6k
}
892
893
/************************************************************************
894
 *                  *
895
 *    SAX2 defaulted attributes handling      *
896
 *                  *
897
 ************************************************************************/
898
899
/**
900
 * Final initialization of the parser context before starting to parse.
901
 *
902
 * This accounts for users modifying struct members of parser context
903
 * directly.
904
 *
905
 * @param ctxt  an XML parser context
906
 */
907
static void
908
21.9k
xmlCtxtInitializeLate(xmlParserCtxtPtr ctxt) {
909
21.9k
    xmlSAXHandlerPtr sax;
910
911
    /* Avoid unused variable warning if features are disabled. */
912
21.9k
    (void) sax;
913
914
    /*
915
     * Changing the SAX struct directly is still widespread practice
916
     * in internal and external code.
917
     */
918
21.9k
    if (ctxt == NULL) return;
919
21.9k
    sax = ctxt->sax;
920
21.9k
#ifdef LIBXML_SAX1_ENABLED
921
    /*
922
     * Only enable SAX2 if there SAX2 element handlers, except when there
923
     * are no element handlers at all.
924
     */
925
21.9k
    if (((ctxt->options & XML_PARSE_SAX1) == 0) &&
926
21.9k
        (sax) &&
927
21.9k
        (sax->initialized == XML_SAX2_MAGIC) &&
928
21.9k
        ((sax->startElementNs != NULL) ||
929
18.5k
         (sax->endElementNs != NULL) ||
930
18.5k
         ((sax->startElement == NULL) && (sax->endElement == NULL))))
931
18.5k
        ctxt->sax2 = 1;
932
#else
933
    ctxt->sax2 = 1;
934
#endif /* LIBXML_SAX1_ENABLED */
935
936
    /*
937
     * Some users replace the dictionary directly in the context struct.
938
     * We really need an API function to do that cleanly.
939
     */
940
21.9k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
941
21.9k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
942
21.9k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
943
21.9k
    if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
944
21.9k
    (ctxt->str_xml_ns == NULL)) {
945
0
        xmlErrMemory(ctxt);
946
0
    }
947
948
21.9k
    xmlDictSetLimit(ctxt->dict,
949
21.9k
                    (ctxt->options & XML_PARSE_HUGE) ?
950
0
                        0 :
951
21.9k
                        XML_MAX_DICTIONARY_LIMIT);
952
21.9k
}
953
954
typedef struct {
955
    xmlHashedString prefix;
956
    xmlHashedString name;
957
    xmlHashedString value;
958
    const xmlChar *valueEnd;
959
    int external;
960
    int expandedSize;
961
} xmlDefAttr;
962
963
typedef struct _xmlDefAttrs xmlDefAttrs;
964
typedef xmlDefAttrs *xmlDefAttrsPtr;
965
struct _xmlDefAttrs {
966
    int nbAttrs;  /* number of defaulted attributes on that element */
967
    int maxAttrs;       /* the size of the array */
968
#if __STDC_VERSION__ >= 199901L
969
    /* Using a C99 flexible array member avoids UBSan errors. */
970
    xmlDefAttr attrs[] ATTRIBUTE_COUNTED_BY(maxAttrs);
971
#else
972
    xmlDefAttr attrs[1];
973
#endif
974
};
975
976
/**
977
 * Normalize the space in non CDATA attribute values:
978
 * If the attribute type is not CDATA, then the XML processor MUST further
979
 * process the normalized attribute value by discarding any leading and
980
 * trailing space (\#x20) characters, and by replacing sequences of space
981
 * (\#x20) characters by a single space (\#x20) character.
982
 * Note that the size of dst need to be at least src, and if one doesn't need
983
 * to preserve dst (and it doesn't come from a dictionary or read-only) then
984
 * passing src as dst is just fine.
985
 *
986
 * @param src  the source string
987
 * @param dst  the target string
988
 * @returns a pointer to the normalized value (dst) or NULL if no conversion
989
 *         is needed.
990
 */
991
static xmlChar *
992
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
993
22.3k
{
994
22.3k
    if ((src == NULL) || (dst == NULL))
995
0
        return(NULL);
996
997
28.4k
    while (*src == 0x20) src++;
998
119k
    while (*src != 0) {
999
96.8k
  if (*src == 0x20) {
1000
20.7k
      while (*src == 0x20) src++;
1001
10.2k
      if (*src != 0)
1002
9.55k
    *dst++ = 0x20;
1003
86.6k
  } else {
1004
86.6k
      *dst++ = *src++;
1005
86.6k
  }
1006
96.8k
    }
1007
22.3k
    *dst = 0;
1008
22.3k
    if (dst == src)
1009
16.0k
       return(NULL);
1010
6.34k
    return(dst);
1011
22.3k
}
1012
1013
/**
1014
 * Add a defaulted attribute for an element
1015
 *
1016
 * @param ctxt  an XML parser context
1017
 * @param fullname  the element fullname
1018
 * @param fullattr  the attribute fullname
1019
 * @param value  the attribute value
1020
 */
1021
static void
1022
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1023
               const xmlChar *fullname,
1024
               const xmlChar *fullattr,
1025
21.1k
               const xmlChar *value) {
1026
21.1k
    xmlDefAttrsPtr defaults;
1027
21.1k
    xmlDefAttr *attr;
1028
21.1k
    int len, expandedSize;
1029
21.1k
    xmlHashedString name;
1030
21.1k
    xmlHashedString prefix;
1031
21.1k
    xmlHashedString hvalue;
1032
21.1k
    const xmlChar *localname;
1033
1034
    /*
1035
     * Allows to detect attribute redefinitions
1036
     */
1037
21.1k
    if (ctxt->attsSpecial != NULL) {
1038
18.5k
        if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1039
13.9k
      return;
1040
18.5k
    }
1041
1042
7.17k
    if (ctxt->attsDefault == NULL) {
1043
2.60k
        ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1044
2.60k
  if (ctxt->attsDefault == NULL)
1045
0
      goto mem_error;
1046
2.60k
    }
1047
1048
    /*
1049
     * split the element name into prefix:localname , the string found
1050
     * are within the DTD and then not associated to namespace names.
1051
     */
1052
7.17k
    localname = xmlSplitQName3(fullname, &len);
1053
7.17k
    if (localname == NULL) {
1054
6.80k
        name = xmlDictLookupHashed(ctxt->dict, fullname, -1);
1055
6.80k
  prefix.name = NULL;
1056
6.80k
    } else {
1057
370
        name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1058
370
  prefix = xmlDictLookupHashed(ctxt->dict, fullname, len);
1059
370
        if (prefix.name == NULL)
1060
0
            goto mem_error;
1061
370
    }
1062
7.17k
    if (name.name == NULL)
1063
0
        goto mem_error;
1064
1065
    /*
1066
     * make sure there is some storage
1067
     */
1068
7.17k
    defaults = xmlHashLookup2(ctxt->attsDefault, name.name, prefix.name);
1069
7.17k
    if ((defaults == NULL) ||
1070
7.17k
        (defaults->nbAttrs >= defaults->maxAttrs)) {
1071
3.31k
        xmlDefAttrsPtr temp;
1072
3.31k
        int newSize;
1073
1074
3.31k
        if (defaults == NULL) {
1075
2.73k
            newSize = 4;
1076
2.73k
        } else {
1077
575
            if ((defaults->maxAttrs >= XML_MAX_ATTRS) ||
1078
575
                ((size_t) defaults->maxAttrs >
1079
575
                     SIZE_MAX / 2 / sizeof(temp[0]) - sizeof(*defaults)))
1080
0
                goto mem_error;
1081
1082
575
            if (defaults->maxAttrs > XML_MAX_ATTRS / 2)
1083
0
                newSize = XML_MAX_ATTRS;
1084
575
            else
1085
575
                newSize = defaults->maxAttrs * 2;
1086
575
        }
1087
3.31k
        temp = xmlRealloc(defaults,
1088
3.31k
                          sizeof(*defaults) + newSize * sizeof(xmlDefAttr));
1089
3.31k
  if (temp == NULL)
1090
0
      goto mem_error;
1091
3.31k
        if (defaults == NULL)
1092
2.73k
            temp->nbAttrs = 0;
1093
3.31k
  temp->maxAttrs = newSize;
1094
3.31k
        defaults = temp;
1095
3.31k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name.name, prefix.name,
1096
3.31k
                          defaults, NULL) < 0) {
1097
0
      xmlFree(defaults);
1098
0
      goto mem_error;
1099
0
  }
1100
3.31k
    }
1101
1102
    /*
1103
     * Split the attribute name into prefix:localname , the string found
1104
     * are within the DTD and hen not associated to namespace names.
1105
     */
1106
7.17k
    localname = xmlSplitQName3(fullattr, &len);
1107
7.17k
    if (localname == NULL) {
1108
4.84k
        name = xmlDictLookupHashed(ctxt->dict, fullattr, -1);
1109
4.84k
  prefix.name = NULL;
1110
4.84k
    } else {
1111
2.32k
        name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1112
2.32k
  prefix = xmlDictLookupHashed(ctxt->dict, fullattr, len);
1113
2.32k
        if (prefix.name == NULL)
1114
0
            goto mem_error;
1115
2.32k
    }
1116
7.17k
    if (name.name == NULL)
1117
0
        goto mem_error;
1118
1119
    /* intern the string and precompute the end */
1120
7.17k
    len = strlen((const char *) value);
1121
7.17k
    hvalue = xmlDictLookupHashed(ctxt->dict, value, len);
1122
7.17k
    if (hvalue.name == NULL)
1123
0
        goto mem_error;
1124
1125
7.17k
    expandedSize = strlen((const char *) name.name);
1126
7.17k
    if (prefix.name != NULL)
1127
2.32k
        expandedSize += strlen((const char *) prefix.name);
1128
7.17k
    expandedSize += len;
1129
1130
7.17k
    attr = &defaults->attrs[defaults->nbAttrs++];
1131
7.17k
    attr->name = name;
1132
7.17k
    attr->prefix = prefix;
1133
7.17k
    attr->value = hvalue;
1134
7.17k
    attr->valueEnd = hvalue.name + len;
1135
7.17k
    attr->external = PARSER_EXTERNAL(ctxt);
1136
7.17k
    attr->expandedSize = expandedSize;
1137
1138
7.17k
    return;
1139
1140
0
mem_error:
1141
0
    xmlErrMemory(ctxt);
1142
0
}
1143
1144
/**
1145
 * Register this attribute type
1146
 *
1147
 * @param ctxt  an XML parser context
1148
 * @param fullname  the element fullname
1149
 * @param fullattr  the attribute fullname
1150
 * @param type  the attribute type
1151
 */
1152
static void
1153
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1154
      const xmlChar *fullname,
1155
      const xmlChar *fullattr,
1156
      int type)
1157
31.8k
{
1158
31.8k
    if (ctxt->attsSpecial == NULL) {
1159
3.14k
        ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1160
3.14k
  if (ctxt->attsSpecial == NULL)
1161
0
      goto mem_error;
1162
3.14k
    }
1163
1164
31.8k
    if (xmlHashAdd2(ctxt->attsSpecial, fullname, fullattr,
1165
31.8k
                    XML_INT_TO_PTR(type)) < 0)
1166
0
        goto mem_error;
1167
31.8k
    return;
1168
1169
31.8k
mem_error:
1170
0
    xmlErrMemory(ctxt);
1171
0
}
1172
1173
/**
1174
 * Removes CDATA attributes from the special attribute table
1175
 */
1176
static void
1177
xmlCleanSpecialAttrCallback(void *payload, void *data,
1178
                            const xmlChar *fullname, const xmlChar *fullattr,
1179
7.96k
                            const xmlChar *unused ATTRIBUTE_UNUSED) {
1180
7.96k
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1181
1182
7.96k
    if (XML_PTR_TO_INT(payload) == XML_ATTRIBUTE_CDATA) {
1183
992
        xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1184
992
    }
1185
7.96k
}
1186
1187
/**
1188
 * Trim the list of attributes defined to remove all those of type
1189
 * CDATA as they are not special. This call should be done when finishing
1190
 * to parse the DTD and before starting to parse the document root.
1191
 *
1192
 * @param ctxt  an XML parser context
1193
 */
1194
static void
1195
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1196
10.7k
{
1197
10.7k
    if (ctxt->attsSpecial == NULL)
1198
7.65k
        return;
1199
1200
3.14k
    xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1201
1202
3.14k
    if (xmlHashSize(ctxt->attsSpecial) == 0) {
1203
308
        xmlHashFree(ctxt->attsSpecial, NULL);
1204
308
        ctxt->attsSpecial = NULL;
1205
308
    }
1206
3.14k
}
1207
1208
/**
1209
 * Checks that the value conforms to the LanguageID production:
1210
 *
1211
 * @deprecated Internal function, do not use.
1212
 *
1213
 * NOTE: this is somewhat deprecated, those productions were removed from
1214
 * the XML Second edition.
1215
 *
1216
 *     [33] LanguageID ::= Langcode ('-' Subcode)*
1217
 *     [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1218
 *     [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1219
 *     [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1220
 *     [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1221
 *     [38] Subcode ::= ([a-z] | [A-Z])+
1222
 *
1223
 * The current REC reference the successors of RFC 1766, currently 5646
1224
 *
1225
 * http://www.rfc-editor.org/rfc/rfc5646.txt
1226
 *
1227
 *     langtag       = language
1228
 *                     ["-" script]
1229
 *                     ["-" region]
1230
 *                     *("-" variant)
1231
 *                     *("-" extension)
1232
 *                     ["-" privateuse]
1233
 *     language      = 2*3ALPHA            ; shortest ISO 639 code
1234
 *                     ["-" extlang]       ; sometimes followed by
1235
 *                                         ; extended language subtags
1236
 *                   / 4ALPHA              ; or reserved for future use
1237
 *                   / 5*8ALPHA            ; or registered language subtag
1238
 *
1239
 *     extlang       = 3ALPHA              ; selected ISO 639 codes
1240
 *                     *2("-" 3ALPHA)      ; permanently reserved
1241
 *
1242
 *     script        = 4ALPHA              ; ISO 15924 code
1243
 *
1244
 *     region        = 2ALPHA              ; ISO 3166-1 code
1245
 *                   / 3DIGIT              ; UN M.49 code
1246
 *
1247
 *     variant       = 5*8alphanum         ; registered variants
1248
 *                   / (DIGIT 3alphanum)
1249
 *
1250
 *     extension     = singleton 1*("-" (2*8alphanum))
1251
 *
1252
 *                                         ; Single alphanumerics
1253
 *                                         ; "x" reserved for private use
1254
 *     singleton     = DIGIT               ; 0 - 9
1255
 *                   / %x41-57             ; A - W
1256
 *                   / %x59-5A             ; Y - Z
1257
 *                   / %x61-77             ; a - w
1258
 *                   / %x79-7A             ; y - z
1259
 *
1260
 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1261
 * The parser below doesn't try to cope with extension or privateuse
1262
 * that could be added but that's not interoperable anyway
1263
 *
1264
 * @param lang  pointer to the string value
1265
 * @returns 1 if correct 0 otherwise
1266
 **/
1267
int
1268
xmlCheckLanguageID(const xmlChar * lang)
1269
2.89k
{
1270
2.89k
    const xmlChar *cur = lang, *nxt;
1271
1272
2.89k
    if (cur == NULL)
1273
117
        return (0);
1274
2.77k
    if (((cur[0] == 'i') && (cur[1] == '-')) ||
1275
2.77k
        ((cur[0] == 'I') && (cur[1] == '-')) ||
1276
2.77k
        ((cur[0] == 'x') && (cur[1] == '-')) ||
1277
2.77k
        ((cur[0] == 'X') && (cur[1] == '-'))) {
1278
        /*
1279
         * Still allow IANA code and user code which were coming
1280
         * from the previous version of the XML-1.0 specification
1281
         * it's deprecated but we should not fail
1282
         */
1283
289
        cur += 2;
1284
967
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1285
967
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
1286
678
            cur++;
1287
289
        return(cur[0] == 0);
1288
289
    }
1289
2.48k
    nxt = cur;
1290
8.79k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1291
8.79k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1292
6.30k
           nxt++;
1293
2.48k
    if (nxt - cur >= 4) {
1294
        /*
1295
         * Reserved
1296
         */
1297
218
        if ((nxt - cur > 8) || (nxt[0] != 0))
1298
149
            return(0);
1299
69
        return(1);
1300
218
    }
1301
2.27k
    if (nxt - cur < 2)
1302
133
        return(0);
1303
    /* we got an ISO 639 code */
1304
2.13k
    if (nxt[0] == 0)
1305
123
        return(1);
1306
2.01k
    if (nxt[0] != '-')
1307
80
        return(0);
1308
1309
1.93k
    nxt++;
1310
1.93k
    cur = nxt;
1311
    /* now we can have extlang or script or region or variant */
1312
1.93k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1313
203
        goto region_m49;
1314
1315
8.26k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1316
8.26k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1317
6.53k
           nxt++;
1318
1.73k
    if (nxt - cur == 4)
1319
593
        goto script;
1320
1.13k
    if (nxt - cur == 2)
1321
243
        goto region;
1322
896
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1323
164
        goto variant;
1324
732
    if (nxt - cur != 3)
1325
151
        return(0);
1326
    /* we parsed an extlang */
1327
581
    if (nxt[0] == 0)
1328
68
        return(1);
1329
513
    if (nxt[0] != '-')
1330
67
        return(0);
1331
1332
446
    nxt++;
1333
446
    cur = nxt;
1334
    /* now we can have script or region or variant */
1335
446
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1336
82
        goto region_m49;
1337
1338
2.25k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1339
2.25k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1340
1.89k
           nxt++;
1341
364
    if (nxt - cur == 2)
1342
74
        goto region;
1343
290
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1344
69
        goto variant;
1345
221
    if (nxt - cur != 4)
1346
152
        return(0);
1347
    /* we parsed a script */
1348
662
script:
1349
662
    if (nxt[0] == 0)
1350
159
        return(1);
1351
503
    if (nxt[0] != '-')
1352
70
        return(0);
1353
1354
433
    nxt++;
1355
433
    cur = nxt;
1356
    /* now we can have region or variant */
1357
433
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1358
66
        goto region_m49;
1359
1360
2.00k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1361
2.00k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1362
1.63k
           nxt++;
1363
1364
367
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1365
72
        goto variant;
1366
295
    if (nxt - cur != 2)
1367
151
        return(0);
1368
    /* we parsed a region */
1369
532
region:
1370
532
    if (nxt[0] == 0)
1371
125
        return(1);
1372
407
    if (nxt[0] != '-')
1373
124
        return(0);
1374
1375
283
    nxt++;
1376
283
    cur = nxt;
1377
    /* now we can just have a variant */
1378
1.74k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1379
1.74k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1380
1.45k
           nxt++;
1381
1382
283
    if ((nxt - cur < 5) || (nxt - cur > 8))
1383
215
        return(0);
1384
1385
    /* we parsed a variant */
1386
373
variant:
1387
373
    if (nxt[0] == 0)
1388
253
        return(1);
1389
120
    if (nxt[0] != '-')
1390
47
        return(0);
1391
    /* extensions and private use subtags not checked */
1392
73
    return (1);
1393
1394
351
region_m49:
1395
351
    if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1396
351
        ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1397
71
        nxt += 3;
1398
71
        goto region;
1399
71
    }
1400
280
    return(0);
1401
351
}
1402
1403
/************************************************************************
1404
 *                  *
1405
 *    Parser stacks related functions and macros    *
1406
 *                  *
1407
 ************************************************************************/
1408
1409
static xmlChar *
1410
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar **str);
1411
1412
/**
1413
 * Create a new namespace database.
1414
 *
1415
 * @returns the new obejct.
1416
 */
1417
xmlParserNsData *
1418
21.9k
xmlParserNsCreate(void) {
1419
21.9k
    xmlParserNsData *nsdb = xmlMalloc(sizeof(*nsdb));
1420
1421
21.9k
    if (nsdb == NULL)
1422
0
        return(NULL);
1423
21.9k
    memset(nsdb, 0, sizeof(*nsdb));
1424
21.9k
    nsdb->defaultNsIndex = INT_MAX;
1425
1426
21.9k
    return(nsdb);
1427
21.9k
}
1428
1429
/**
1430
 * Free a namespace database.
1431
 *
1432
 * @param nsdb  namespace database
1433
 */
1434
void
1435
21.9k
xmlParserNsFree(xmlParserNsData *nsdb) {
1436
21.9k
    if (nsdb == NULL)
1437
0
        return;
1438
1439
21.9k
    xmlFree(nsdb->extra);
1440
21.9k
    xmlFree(nsdb->hash);
1441
21.9k
    xmlFree(nsdb);
1442
21.9k
}
1443
1444
/**
1445
 * Reset a namespace database.
1446
 *
1447
 * @param nsdb  namespace database
1448
 */
1449
static void
1450
0
xmlParserNsReset(xmlParserNsData *nsdb) {
1451
0
    if (nsdb == NULL)
1452
0
        return;
1453
1454
0
    nsdb->hashElems = 0;
1455
0
    nsdb->elementId = 0;
1456
0
    nsdb->defaultNsIndex = INT_MAX;
1457
1458
0
    if (nsdb->hash)
1459
0
        memset(nsdb->hash, 0, nsdb->hashSize * sizeof(nsdb->hash[0]));
1460
0
}
1461
1462
/**
1463
 * Signal that a new element has started.
1464
 *
1465
 * @param nsdb  namespace database
1466
 * @returns 0 on success, -1 if the element counter overflowed.
1467
 */
1468
static int
1469
98.7k
xmlParserNsStartElement(xmlParserNsData *nsdb) {
1470
98.7k
    if (nsdb->elementId == UINT_MAX)
1471
0
        return(-1);
1472
98.7k
    nsdb->elementId++;
1473
1474
98.7k
    return(0);
1475
98.7k
}
1476
1477
/**
1478
 * Lookup namespace with given prefix. If `bucketPtr` is non-NULL, it will
1479
 * be set to the matching bucket, or the first empty bucket if no match
1480
 * was found.
1481
 *
1482
 * @param ctxt  parser context
1483
 * @param prefix  namespace prefix
1484
 * @param bucketPtr  optional bucket (return value)
1485
 * @returns the namespace index on success, INT_MAX if no namespace was
1486
 * found.
1487
 */
1488
static int
1489
xmlParserNsLookup(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1490
273k
                  xmlParserNsBucket **bucketPtr) {
1491
273k
    xmlParserNsBucket *bucket, *tombstone;
1492
273k
    unsigned index, hashValue;
1493
1494
273k
    if (prefix->name == NULL)
1495
82.1k
        return(ctxt->nsdb->defaultNsIndex);
1496
1497
191k
    if (ctxt->nsdb->hashSize == 0)
1498
6.52k
        return(INT_MAX);
1499
1500
185k
    hashValue = prefix->hashValue;
1501
185k
    index = hashValue & (ctxt->nsdb->hashSize - 1);
1502
185k
    bucket = &ctxt->nsdb->hash[index];
1503
185k
    tombstone = NULL;
1504
1505
283k
    while (bucket->hashValue) {
1506
256k
        if (bucket->index == INT_MAX) {
1507
31.6k
            if (tombstone == NULL)
1508
20.1k
                tombstone = bucket;
1509
225k
        } else if (bucket->hashValue == hashValue) {
1510
158k
            if (ctxt->nsTab[bucket->index * 2] == prefix->name) {
1511
158k
                if (bucketPtr != NULL)
1512
129k
                    *bucketPtr = bucket;
1513
158k
                return(bucket->index);
1514
158k
            }
1515
158k
        }
1516
1517
98.5k
        index++;
1518
98.5k
        bucket++;
1519
98.5k
        if (index == ctxt->nsdb->hashSize) {
1520
13.3k
            index = 0;
1521
13.3k
            bucket = ctxt->nsdb->hash;
1522
13.3k
        }
1523
98.5k
    }
1524
1525
26.9k
    if (bucketPtr != NULL)
1526
19.4k
        *bucketPtr = tombstone ? tombstone : bucket;
1527
26.9k
    return(INT_MAX);
1528
185k
}
1529
1530
/**
1531
 * Lookup namespace URI with given prefix.
1532
 *
1533
 * @param ctxt  parser context
1534
 * @param prefix  namespace prefix
1535
 * @returns the namespace URI on success, NULL if no namespace was found.
1536
 */
1537
static const xmlChar *
1538
81.7k
xmlParserNsLookupUri(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix) {
1539
81.7k
    const xmlChar *ret;
1540
81.7k
    int nsIndex;
1541
1542
81.7k
    if (prefix->name == ctxt->str_xml)
1543
939
        return(ctxt->str_xml_ns);
1544
1545
    /*
1546
     * minNsIndex is used when building an entity tree. We must
1547
     * ignore namespaces declared outside the entity.
1548
     */
1549
80.7k
    nsIndex = xmlParserNsLookup(ctxt, prefix, NULL);
1550
80.7k
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1551
67.6k
        return(NULL);
1552
1553
13.0k
    ret = ctxt->nsTab[nsIndex * 2 + 1];
1554
13.0k
    if (ret[0] == 0)
1555
4.58k
        ret = NULL;
1556
13.0k
    return(ret);
1557
80.7k
}
1558
1559
/**
1560
 * Lookup extra data for the given prefix. This returns data stored
1561
 * with xmlParserNsUdpateSax().
1562
 *
1563
 * @param ctxt  parser context
1564
 * @param prefix  namespace prefix
1565
 * @returns the data on success, NULL if no namespace was found.
1566
 */
1567
void *
1568
6.58k
xmlParserNsLookupSax(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
1569
6.58k
    xmlHashedString hprefix;
1570
6.58k
    int nsIndex;
1571
1572
6.58k
    if (prefix == ctxt->str_xml)
1573
3.99k
        return(NULL);
1574
1575
2.58k
    hprefix.name = prefix;
1576
2.58k
    if (prefix != NULL)
1577
1.25k
        hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1578
1.32k
    else
1579
1.32k
        hprefix.hashValue = 0;
1580
2.58k
    nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1581
2.58k
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1582
0
        return(NULL);
1583
1584
2.58k
    return(ctxt->nsdb->extra[nsIndex].saxData);
1585
2.58k
}
1586
1587
/**
1588
 * Sets or updates extra data for the given prefix. This value will be
1589
 * returned by xmlParserNsLookupSax() as long as the namespace with the
1590
 * given prefix is in scope.
1591
 *
1592
 * @param ctxt  parser context
1593
 * @param prefix  namespace prefix
1594
 * @param saxData  extra data for SAX handler
1595
 * @returns the data on success, NULL if no namespace was found.
1596
 */
1597
int
1598
xmlParserNsUpdateSax(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
1599
25.0k
                     void *saxData) {
1600
25.0k
    xmlHashedString hprefix;
1601
25.0k
    int nsIndex;
1602
1603
25.0k
    if (prefix == ctxt->str_xml)
1604
0
        return(-1);
1605
1606
25.0k
    hprefix.name = prefix;
1607
25.0k
    if (prefix != NULL)
1608
20.1k
        hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1609
4.87k
    else
1610
4.87k
        hprefix.hashValue = 0;
1611
25.0k
    nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1612
25.0k
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1613
0
        return(-1);
1614
1615
25.0k
    ctxt->nsdb->extra[nsIndex].saxData = saxData;
1616
25.0k
    return(0);
1617
25.0k
}
1618
1619
/**
1620
 * Grows the namespace tables.
1621
 *
1622
 * @param ctxt  parser context
1623
 * @returns 0 on success, -1 if a memory allocation failed.
1624
 */
1625
static int
1626
7.37k
xmlParserNsGrow(xmlParserCtxtPtr ctxt) {
1627
7.37k
    const xmlChar **table;
1628
7.37k
    xmlParserNsExtra *extra;
1629
7.37k
    int newSize;
1630
1631
7.37k
    newSize = xmlGrowCapacity(ctxt->nsMax,
1632
7.37k
                              sizeof(table[0]) + sizeof(extra[0]),
1633
7.37k
                              16, XML_MAX_ITEMS);
1634
7.37k
    if (newSize < 0)
1635
0
        goto error;
1636
1637
7.37k
    table = xmlRealloc(ctxt->nsTab, 2 * newSize * sizeof(table[0]));
1638
7.37k
    if (table == NULL)
1639
0
        goto error;
1640
7.37k
    ctxt->nsTab = table;
1641
1642
7.37k
    extra = xmlRealloc(ctxt->nsdb->extra, newSize * sizeof(extra[0]));
1643
7.37k
    if (extra == NULL)
1644
0
        goto error;
1645
7.37k
    ctxt->nsdb->extra = extra;
1646
1647
7.37k
    ctxt->nsMax = newSize;
1648
7.37k
    return(0);
1649
1650
0
error:
1651
0
    xmlErrMemory(ctxt);
1652
0
    return(-1);
1653
7.37k
}
1654
1655
/**
1656
 * Push a new namespace on the table.
1657
 *
1658
 * @param ctxt  parser context
1659
 * @param prefix  prefix with hash value
1660
 * @param uri  uri with hash value
1661
 * @param saxData  extra data for SAX handler
1662
 * @param defAttr  whether the namespace comes from a default attribute
1663
 * @returns 1 if the namespace was pushed, 0 if the namespace was ignored,
1664
 * -1 if a memory allocation failed.
1665
 */
1666
static int
1667
xmlParserNsPush(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1668
87.2k
                const xmlHashedString *uri, void *saxData, int defAttr) {
1669
87.2k
    xmlParserNsBucket *bucket = NULL;
1670
87.2k
    xmlParserNsExtra *extra;
1671
87.2k
    const xmlChar **ns;
1672
87.2k
    unsigned hashValue, nsIndex, oldIndex;
1673
1674
87.2k
    if ((prefix != NULL) && (prefix->name == ctxt->str_xml))
1675
194
        return(0);
1676
1677
87.0k
    if ((ctxt->nsNr >= ctxt->nsMax) && (xmlParserNsGrow(ctxt) < 0)) {
1678
0
        xmlErrMemory(ctxt);
1679
0
        return(-1);
1680
0
    }
1681
1682
    /*
1683
     * Default namespace and 'xml' namespace
1684
     */
1685
87.0k
    if ((prefix == NULL) || (prefix->name == NULL)) {
1686
11.2k
        oldIndex = ctxt->nsdb->defaultNsIndex;
1687
1688
11.2k
        if (oldIndex != INT_MAX) {
1689
8.54k
            extra = &ctxt->nsdb->extra[oldIndex];
1690
1691
8.54k
            if (extra->elementId == ctxt->nsdb->elementId) {
1692
558
                if (defAttr == 0)
1693
491
                    xmlErrAttributeDup(ctxt, NULL, BAD_CAST "xmlns");
1694
558
                return(0);
1695
558
            }
1696
1697
7.98k
            if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1698
7.98k
                (uri->name == ctxt->nsTab[oldIndex * 2 + 1]))
1699
440
                return(0);
1700
7.98k
        }
1701
1702
10.2k
        ctxt->nsdb->defaultNsIndex = ctxt->nsNr;
1703
10.2k
        goto populate_entry;
1704
11.2k
    }
1705
1706
    /*
1707
     * Hash table lookup
1708
     */
1709
75.8k
    oldIndex = xmlParserNsLookup(ctxt, prefix, &bucket);
1710
75.8k
    if (oldIndex != INT_MAX) {
1711
55.0k
        extra = &ctxt->nsdb->extra[oldIndex];
1712
1713
        /*
1714
         * Check for duplicate definitions on the same element.
1715
         */
1716
55.0k
        if (extra->elementId == ctxt->nsdb->elementId) {
1717
202
            if (defAttr == 0)
1718
132
                xmlErrAttributeDup(ctxt, BAD_CAST "xmlns", prefix->name);
1719
202
            return(0);
1720
202
        }
1721
1722
54.8k
        if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1723
54.8k
            (uri->name == ctxt->nsTab[bucket->index * 2 + 1]))
1724
398
            return(0);
1725
1726
54.4k
        bucket->index = ctxt->nsNr;
1727
54.4k
        goto populate_entry;
1728
54.8k
    }
1729
1730
    /*
1731
     * Insert new bucket
1732
     */
1733
1734
20.7k
    hashValue = prefix->hashValue;
1735
1736
    /*
1737
     * Grow hash table, 50% fill factor
1738
     */
1739
20.7k
    if (ctxt->nsdb->hashElems + 1 > ctxt->nsdb->hashSize / 2) {
1740
1.75k
        xmlParserNsBucket *newHash;
1741
1.75k
        unsigned newSize, i, index;
1742
1743
1.75k
        if (ctxt->nsdb->hashSize > UINT_MAX / 2) {
1744
0
            xmlErrMemory(ctxt);
1745
0
            return(-1);
1746
0
        }
1747
1.75k
        newSize = ctxt->nsdb->hashSize ? ctxt->nsdb->hashSize * 2 : 16;
1748
1.75k
        newHash = xmlMalloc(newSize * sizeof(newHash[0]));
1749
1.75k
        if (newHash == NULL) {
1750
0
            xmlErrMemory(ctxt);
1751
0
            return(-1);
1752
0
        }
1753
1.75k
        memset(newHash, 0, newSize * sizeof(newHash[0]));
1754
1755
65.8k
        for (i = 0; i < ctxt->nsdb->hashSize; i++) {
1756
64.1k
            unsigned hv = ctxt->nsdb->hash[i].hashValue;
1757
64.1k
            unsigned newIndex;
1758
1759
64.1k
            if ((hv == 0) || (ctxt->nsdb->hash[i].index == INT_MAX))
1760
62.1k
                continue;
1761
1.96k
            newIndex = hv & (newSize - 1);
1762
1763
3.77k
            while (newHash[newIndex].hashValue != 0) {
1764
1.80k
                newIndex++;
1765
1.80k
                if (newIndex == newSize)
1766
433
                    newIndex = 0;
1767
1.80k
            }
1768
1769
1.96k
            newHash[newIndex] = ctxt->nsdb->hash[i];
1770
1.96k
        }
1771
1772
1.75k
        xmlFree(ctxt->nsdb->hash);
1773
1.75k
        ctxt->nsdb->hash = newHash;
1774
1.75k
        ctxt->nsdb->hashSize = newSize;
1775
1776
        /*
1777
         * Relookup
1778
         */
1779
1.75k
        index = hashValue & (newSize - 1);
1780
1781
2.21k
        while (newHash[index].hashValue != 0) {
1782
459
            index++;
1783
459
            if (index == newSize)
1784
51
                index = 0;
1785
459
        }
1786
1787
1.75k
        bucket = &newHash[index];
1788
1.75k
    }
1789
1790
20.7k
    bucket->hashValue = hashValue;
1791
20.7k
    bucket->index = ctxt->nsNr;
1792
20.7k
    ctxt->nsdb->hashElems++;
1793
20.7k
    oldIndex = INT_MAX;
1794
1795
85.4k
populate_entry:
1796
85.4k
    nsIndex = ctxt->nsNr;
1797
1798
85.4k
    ns = &ctxt->nsTab[nsIndex * 2];
1799
85.4k
    ns[0] = prefix ? prefix->name : NULL;
1800
85.4k
    ns[1] = uri->name;
1801
1802
85.4k
    extra = &ctxt->nsdb->extra[nsIndex];
1803
85.4k
    extra->saxData = saxData;
1804
85.4k
    extra->prefixHashValue = prefix ? prefix->hashValue : 0;
1805
85.4k
    extra->uriHashValue = uri->hashValue;
1806
85.4k
    extra->elementId = ctxt->nsdb->elementId;
1807
85.4k
    extra->oldIndex = oldIndex;
1808
1809
85.4k
    ctxt->nsNr++;
1810
1811
85.4k
    return(1);
1812
20.7k
}
1813
1814
/**
1815
 * Pops the top `nr` namespaces and restores the hash table.
1816
 *
1817
 * @param ctxt  an XML parser context
1818
 * @param nr  the number to pop
1819
 * @returns the number of namespaces popped.
1820
 */
1821
static int
1822
xmlParserNsPop(xmlParserCtxtPtr ctxt, int nr)
1823
22.9k
{
1824
22.9k
    int i;
1825
1826
    /* assert(nr <= ctxt->nsNr); */
1827
1828
107k
    for (i = ctxt->nsNr - 1; i >= ctxt->nsNr - nr; i--) {
1829
84.4k
        const xmlChar *prefix = ctxt->nsTab[i * 2];
1830
84.4k
        xmlParserNsExtra *extra = &ctxt->nsdb->extra[i];
1831
1832
84.4k
        if (prefix == NULL) {
1833
10.0k
            ctxt->nsdb->defaultNsIndex = extra->oldIndex;
1834
74.4k
        } else {
1835
74.4k
            xmlHashedString hprefix;
1836
74.4k
            xmlParserNsBucket *bucket = NULL;
1837
1838
74.4k
            hprefix.name = prefix;
1839
74.4k
            hprefix.hashValue = extra->prefixHashValue;
1840
74.4k
            xmlParserNsLookup(ctxt, &hprefix, &bucket);
1841
            /* assert(bucket && bucket->hashValue); */
1842
74.4k
            bucket->index = extra->oldIndex;
1843
74.4k
        }
1844
84.4k
    }
1845
1846
22.9k
    ctxt->nsNr -= nr;
1847
22.9k
    return(nr);
1848
22.9k
}
1849
1850
static int
1851
4.90k
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt) {
1852
4.90k
    const xmlChar **atts;
1853
4.90k
    unsigned *attallocs;
1854
4.90k
    int newSize;
1855
1856
4.90k
    newSize = xmlGrowCapacity(ctxt->maxatts / 5,
1857
4.90k
                              sizeof(atts[0]) * 5 + sizeof(attallocs[0]),
1858
4.90k
                              10, XML_MAX_ATTRS);
1859
4.90k
    if (newSize < 0) {
1860
0
        xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
1861
0
                    "Maximum number of attributes exceeded");
1862
0
        return(-1);
1863
0
    }
1864
1865
4.90k
    atts = xmlRealloc(ctxt->atts, newSize * sizeof(atts[0]) * 5);
1866
4.90k
    if (atts == NULL)
1867
0
        goto mem_error;
1868
4.90k
    ctxt->atts = atts;
1869
1870
4.90k
    attallocs = xmlRealloc(ctxt->attallocs,
1871
4.90k
                           newSize * sizeof(attallocs[0]));
1872
4.90k
    if (attallocs == NULL)
1873
0
        goto mem_error;
1874
4.90k
    ctxt->attallocs = attallocs;
1875
1876
4.90k
    ctxt->maxatts = newSize * 5;
1877
1878
4.90k
    return(0);
1879
1880
0
mem_error:
1881
0
    xmlErrMemory(ctxt);
1882
0
    return(-1);
1883
4.90k
}
1884
1885
/**
1886
 * Pushes a new parser input on top of the input stack
1887
 *
1888
 * @param ctxt  an XML parser context
1889
 * @param value  the parser input
1890
 * @returns -1 in case of error, the index in the stack otherwise
1891
 */
1892
int
1893
xmlCtxtPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1894
76.4k
{
1895
76.4k
    char *directory = NULL;
1896
76.4k
    int maxDepth;
1897
1898
76.4k
    if ((ctxt == NULL) || (value == NULL))
1899
0
        return(-1);
1900
1901
76.4k
    maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
1902
1903
76.4k
    if (ctxt->inputNr >= ctxt->inputMax) {
1904
2.90k
        xmlParserInputPtr *tmp;
1905
2.90k
        int newSize;
1906
1907
2.90k
        newSize = xmlGrowCapacity(ctxt->inputMax, sizeof(tmp[0]),
1908
2.90k
                                  5, maxDepth);
1909
2.90k
        if (newSize < 0) {
1910
0
            xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
1911
0
                           "Maximum entity nesting depth exceeded");
1912
0
            xmlHaltParser(ctxt);
1913
0
            return(-1);
1914
0
        }
1915
2.90k
        tmp = xmlRealloc(ctxt->inputTab, newSize * sizeof(tmp[0]));
1916
2.90k
        if (tmp == NULL) {
1917
0
            xmlErrMemory(ctxt);
1918
0
            return(-1);
1919
0
        }
1920
2.90k
        ctxt->inputTab = tmp;
1921
2.90k
        ctxt->inputMax = newSize;
1922
2.90k
    }
1923
1924
76.4k
    if ((ctxt->inputNr == 0) && (value->filename != NULL)) {
1925
21.9k
        directory = xmlParserGetDirectory(value->filename);
1926
21.9k
        if (directory == NULL) {
1927
0
            xmlErrMemory(ctxt);
1928
0
            return(-1);
1929
0
        }
1930
21.9k
    }
1931
1932
76.4k
    if (ctxt->input_id >= INT_MAX) {
1933
0
        xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT, "Input ID overflow\n");
1934
0
        return(-1);
1935
0
    }
1936
1937
76.4k
    ctxt->inputTab[ctxt->inputNr] = value;
1938
76.4k
    ctxt->input = value;
1939
1940
76.4k
    if (ctxt->inputNr == 0) {
1941
21.9k
        xmlFree(ctxt->directory);
1942
21.9k
        ctxt->directory = directory;
1943
21.9k
    }
1944
1945
    /*
1946
     * Internally, the input ID is only used to detect parameter entity
1947
     * boundaries. But there are entity loaders in downstream code that
1948
     * detect the main document by checking for "input_id == 1".
1949
     */
1950
76.4k
    value->id = ctxt->input_id++;
1951
1952
76.4k
    return(ctxt->inputNr++);
1953
76.4k
}
1954
1955
/**
1956
 * Pops the top parser input from the input stack
1957
 *
1958
 * @param ctxt  an XML parser context
1959
 * @returns the input just removed
1960
 */
1961
xmlParserInputPtr
1962
xmlCtxtPopInput(xmlParserCtxtPtr ctxt)
1963
120k
{
1964
120k
    xmlParserInputPtr ret;
1965
1966
120k
    if (ctxt == NULL)
1967
0
        return(NULL);
1968
120k
    if (ctxt->inputNr <= 0)
1969
43.9k
        return (NULL);
1970
76.4k
    ctxt->inputNr--;
1971
76.4k
    if (ctxt->inputNr > 0)
1972
54.4k
        ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1973
21.9k
    else
1974
21.9k
        ctxt->input = NULL;
1975
76.4k
    ret = ctxt->inputTab[ctxt->inputNr];
1976
76.4k
    ctxt->inputTab[ctxt->inputNr] = NULL;
1977
76.4k
    return (ret);
1978
120k
}
1979
1980
/**
1981
 * Pushes a new element node on top of the node stack
1982
 *
1983
 * @deprecated Internal function, do not use.
1984
 *
1985
 * @param ctxt  an XML parser context
1986
 * @param value  the element node
1987
 * @returns -1 in case of error, the index in the stack otherwise
1988
 */
1989
int
1990
nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1991
85.8k
{
1992
85.8k
    if (ctxt == NULL)
1993
0
        return(0);
1994
1995
85.8k
    if (ctxt->nodeNr >= ctxt->nodeMax) {
1996
7.92k
        int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
1997
7.92k
        xmlNodePtr *tmp;
1998
7.92k
        int newSize;
1999
2000
7.92k
        newSize = xmlGrowCapacity(ctxt->nodeMax, sizeof(tmp[0]),
2001
7.92k
                                  10, maxDepth);
2002
7.92k
        if (newSize < 0) {
2003
14
            xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
2004
14
                    "Excessive depth in document: %d,"
2005
14
                    " use XML_PARSE_HUGE option\n",
2006
14
                    ctxt->nodeNr);
2007
14
            xmlHaltParser(ctxt);
2008
14
            return(-1);
2009
14
        }
2010
2011
7.91k
  tmp = xmlRealloc(ctxt->nodeTab, newSize * sizeof(tmp[0]));
2012
7.91k
        if (tmp == NULL) {
2013
0
            xmlErrMemory(ctxt);
2014
0
            return (-1);
2015
0
        }
2016
7.91k
        ctxt->nodeTab = tmp;
2017
7.91k
  ctxt->nodeMax = newSize;
2018
7.91k
    }
2019
2020
85.8k
    ctxt->nodeTab[ctxt->nodeNr] = value;
2021
85.8k
    ctxt->node = value;
2022
85.8k
    return (ctxt->nodeNr++);
2023
85.8k
}
2024
2025
/**
2026
 * Pops the top element node from the node stack
2027
 *
2028
 * @deprecated Internal function, do not use.
2029
 *
2030
 * @param ctxt  an XML parser context
2031
 * @returns the node just removed
2032
 */
2033
xmlNodePtr
2034
nodePop(xmlParserCtxtPtr ctxt)
2035
105k
{
2036
105k
    xmlNodePtr ret;
2037
2038
105k
    if (ctxt == NULL) return(NULL);
2039
105k
    if (ctxt->nodeNr <= 0)
2040
23.2k
        return (NULL);
2041
82.6k
    ctxt->nodeNr--;
2042
82.6k
    if (ctxt->nodeNr > 0)
2043
78.3k
        ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
2044
4.28k
    else
2045
4.28k
        ctxt->node = NULL;
2046
82.6k
    ret = ctxt->nodeTab[ctxt->nodeNr];
2047
82.6k
    ctxt->nodeTab[ctxt->nodeNr] = NULL;
2048
82.6k
    return (ret);
2049
105k
}
2050
2051
/**
2052
 * Pushes a new element name/prefix/URL on top of the name stack
2053
 *
2054
 * @param ctxt  an XML parser context
2055
 * @param value  the element name
2056
 * @param prefix  the element prefix
2057
 * @param URI  the element namespace name
2058
 * @param line  the current line number for error messages
2059
 * @param nsNr  the number of namespaces pushed on the namespace table
2060
 * @returns -1 in case of error, the index in the stack otherwise
2061
 */
2062
static int
2063
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
2064
           const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
2065
132k
{
2066
132k
    xmlStartTag *tag;
2067
2068
132k
    if (ctxt->nameNr >= ctxt->nameMax) {
2069
12.5k
        const xmlChar **tmp;
2070
12.5k
        xmlStartTag *tmp2;
2071
12.5k
        int newSize;
2072
2073
12.5k
        newSize = xmlGrowCapacity(ctxt->nameMax,
2074
12.5k
                                  sizeof(tmp[0]) + sizeof(tmp2[0]),
2075
12.5k
                                  10, XML_MAX_ITEMS);
2076
12.5k
        if (newSize < 0)
2077
0
            goto mem_error;
2078
2079
12.5k
        tmp = xmlRealloc(ctxt->nameTab, newSize * sizeof(tmp[0]));
2080
12.5k
        if (tmp == NULL)
2081
0
      goto mem_error;
2082
12.5k
  ctxt->nameTab = tmp;
2083
2084
12.5k
        tmp2 = xmlRealloc(ctxt->pushTab, newSize * sizeof(tmp2[0]));
2085
12.5k
        if (tmp2 == NULL)
2086
0
      goto mem_error;
2087
12.5k
  ctxt->pushTab = tmp2;
2088
2089
12.5k
        ctxt->nameMax = newSize;
2090
120k
    } else if (ctxt->pushTab == NULL) {
2091
13.9k
        ctxt->pushTab = xmlMalloc(ctxt->nameMax * sizeof(ctxt->pushTab[0]));
2092
13.9k
        if (ctxt->pushTab == NULL)
2093
0
            goto mem_error;
2094
13.9k
    }
2095
132k
    ctxt->nameTab[ctxt->nameNr] = value;
2096
132k
    ctxt->name = value;
2097
132k
    tag = &ctxt->pushTab[ctxt->nameNr];
2098
132k
    tag->prefix = prefix;
2099
132k
    tag->URI = URI;
2100
132k
    tag->line = line;
2101
132k
    tag->nsNr = nsNr;
2102
132k
    return (ctxt->nameNr++);
2103
0
mem_error:
2104
0
    xmlErrMemory(ctxt);
2105
0
    return (-1);
2106
132k
}
2107
#ifdef LIBXML_PUSH_ENABLED
2108
/**
2109
 * Pops the top element/prefix/URI name from the name stack
2110
 *
2111
 * @param ctxt  an XML parser context
2112
 * @returns the name just removed
2113
 */
2114
static const xmlChar *
2115
nameNsPop(xmlParserCtxtPtr ctxt)
2116
0
{
2117
0
    const xmlChar *ret;
2118
2119
0
    if (ctxt->nameNr <= 0)
2120
0
        return (NULL);
2121
0
    ctxt->nameNr--;
2122
0
    if (ctxt->nameNr > 0)
2123
0
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2124
0
    else
2125
0
        ctxt->name = NULL;
2126
0
    ret = ctxt->nameTab[ctxt->nameNr];
2127
0
    ctxt->nameTab[ctxt->nameNr] = NULL;
2128
0
    return (ret);
2129
0
}
2130
#endif /* LIBXML_PUSH_ENABLED */
2131
2132
/**
2133
 * Pops the top element name from the name stack
2134
 *
2135
 * @deprecated Internal function, do not use.
2136
 *
2137
 * @param ctxt  an XML parser context
2138
 * @returns the name just removed
2139
 */
2140
static const xmlChar *
2141
namePop(xmlParserCtxtPtr ctxt)
2142
127k
{
2143
127k
    const xmlChar *ret;
2144
2145
127k
    if ((ctxt == NULL) || (ctxt->nameNr <= 0))
2146
0
        return (NULL);
2147
127k
    ctxt->nameNr--;
2148
127k
    if (ctxt->nameNr > 0)
2149
118k
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2150
8.97k
    else
2151
8.97k
        ctxt->name = NULL;
2152
127k
    ret = ctxt->nameTab[ctxt->nameNr];
2153
127k
    ctxt->nameTab[ctxt->nameNr] = NULL;
2154
127k
    return (ret);
2155
127k
}
2156
2157
156k
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
2158
156k
    if (ctxt->spaceNr >= ctxt->spaceMax) {
2159
29.6k
        int *tmp;
2160
29.6k
        int newSize;
2161
2162
29.6k
        newSize = xmlGrowCapacity(ctxt->spaceMax, sizeof(tmp[0]),
2163
29.6k
                                  10, XML_MAX_ITEMS);
2164
29.6k
        if (newSize < 0) {
2165
0
      xmlErrMemory(ctxt);
2166
0
      return(-1);
2167
0
        }
2168
2169
29.6k
        tmp = xmlRealloc(ctxt->spaceTab, newSize * sizeof(tmp[0]));
2170
29.6k
        if (tmp == NULL) {
2171
0
      xmlErrMemory(ctxt);
2172
0
      return(-1);
2173
0
  }
2174
29.6k
  ctxt->spaceTab = tmp;
2175
2176
29.6k
        ctxt->spaceMax = newSize;
2177
29.6k
    }
2178
156k
    ctxt->spaceTab[ctxt->spaceNr] = val;
2179
156k
    ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
2180
156k
    return(ctxt->spaceNr++);
2181
156k
}
2182
2183
151k
static int spacePop(xmlParserCtxtPtr ctxt) {
2184
151k
    int ret;
2185
151k
    if (ctxt->spaceNr <= 0) return(0);
2186
151k
    ctxt->spaceNr--;
2187
151k
    if (ctxt->spaceNr > 0)
2188
151k
  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
2189
0
    else
2190
0
        ctxt->space = &ctxt->spaceTab[0];
2191
151k
    ret = ctxt->spaceTab[ctxt->spaceNr];
2192
151k
    ctxt->spaceTab[ctxt->spaceNr] = -1;
2193
151k
    return(ret);
2194
151k
}
2195
2196
/*
2197
 * Macros for accessing the content. Those should be used only by the parser,
2198
 * and not exported.
2199
 *
2200
 * Dirty macros, i.e. one often need to make assumption on the context to
2201
 * use them
2202
 *
2203
 *   CUR_PTR return the current pointer to the xmlChar to be parsed.
2204
 *           To be used with extreme caution since operations consuming
2205
 *           characters may move the input buffer to a different location !
2206
 *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
2207
 *           This should be used internally by the parser
2208
 *           only to compare to ASCII values otherwise it would break when
2209
 *           running with UTF-8 encoding.
2210
 *   RAW     same as CUR but in the input buffer, bypass any token
2211
 *           extraction that may have been done
2212
 *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
2213
 *           to compare on ASCII based substring.
2214
 *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2215
 *           strings without newlines within the parser.
2216
 *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2217
 *           defined char within the parser.
2218
 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2219
 *
2220
 *   NEXT    Skip to the next character, this does the proper decoding
2221
 *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2222
 *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2223
 *   CUR_SCHAR  same but operate on a string instead of the context
2224
 *   COPY_BUF  copy the current unicode char to the target buffer, increment
2225
 *            the index
2226
 *   GROW, SHRINK  handling of input buffers
2227
 */
2228
2229
2.37M
#define RAW (*ctxt->input->cur)
2230
2.86M
#define CUR (*ctxt->input->cur)
2231
1.02M
#define NXT(val) ctxt->input->cur[(val)]
2232
3.12M
#define CUR_PTR ctxt->input->cur
2233
325k
#define BASE_PTR ctxt->input->base
2234
2235
#define CMP4( s, c1, c2, c3, c4 ) \
2236
1.47M
  ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2237
761k
    ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2238
#define CMP5( s, c1, c2, c3, c4, c5 ) \
2239
1.30M
  ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2240
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2241
1.04M
  ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2242
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2243
828k
  ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2244
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2245
665k
  ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2246
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2247
298k
  ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2248
298k
    ((unsigned char *) s)[ 8 ] == c9 )
2249
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2250
9.97k
  ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2251
9.97k
    ((unsigned char *) s)[ 9 ] == c10 )
2252
2253
308k
#define SKIP(val) do {             \
2254
308k
    ctxt->input->cur += (val),ctxt->input->col+=(val);      \
2255
308k
    if (*ctxt->input->cur == 0)           \
2256
308k
        xmlParserGrow(ctxt);           \
2257
308k
  } while (0)
2258
2259
#define SKIPL(val) do {             \
2260
    int skipl;                \
2261
    for(skipl=0; skipl<val; skipl++) {          \
2262
  if (*(ctxt->input->cur) == '\n') {        \
2263
  ctxt->input->line++; ctxt->input->col = 1;      \
2264
  } else ctxt->input->col++;          \
2265
  ctxt->input->cur++;           \
2266
    }                 \
2267
    if (*ctxt->input->cur == 0)           \
2268
        xmlParserGrow(ctxt);            \
2269
  } while (0)
2270
2271
#define SHRINK \
2272
410k
    if (!PARSER_PROGRESSIVE(ctxt)) \
2273
410k
  xmlParserShrink(ctxt);
2274
2275
#define GROW \
2276
2.39M
    if ((!PARSER_PROGRESSIVE(ctxt)) && \
2277
2.39M
        (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2278
1.99M
  xmlParserGrow(ctxt);
2279
2280
458k
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2281
2282
522k
#define SKIP_BLANKS_PE xmlSkipBlankCharsPE(ctxt)
2283
2284
720k
#define NEXT xmlNextChar(ctxt)
2285
2286
211k
#define NEXT1 {               \
2287
211k
  ctxt->input->col++;           \
2288
211k
  ctxt->input->cur++;           \
2289
211k
  if (*ctxt->input->cur == 0)         \
2290
211k
      xmlParserGrow(ctxt);           \
2291
211k
    }
2292
2293
1.77M
#define NEXTL(l) do {             \
2294
1.77M
    if (*(ctxt->input->cur) == '\n') {         \
2295
5.42k
  ctxt->input->line++; ctxt->input->col = 1;      \
2296
1.77M
    } else ctxt->input->col++;           \
2297
1.77M
    ctxt->input->cur += l;        \
2298
1.77M
  } while (0)
2299
2300
1.37M
#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2301
2302
#define COPY_BUF(b, i, v)           \
2303
1.14M
    if (v < 0x80) b[i++] = v;           \
2304
1.14M
    else i += xmlCopyCharMultiByte(&b[i],v)
2305
2306
static int
2307
315k
xmlCurrentCharRecover(xmlParserCtxtPtr ctxt, int *len) {
2308
315k
    int c = xmlCurrentChar(ctxt, len);
2309
2310
315k
    if (c == XML_INVALID_CHAR)
2311
17.0k
        c = 0xFFFD; /* replacement character */
2312
2313
315k
    return(c);
2314
315k
}
2315
2316
/**
2317
 * Skip whitespace in the input stream.
2318
 *
2319
 * @deprecated Internal function, do not use.
2320
 *
2321
 * @param ctxt  the XML parser context
2322
 * @returns the number of space chars skipped
2323
 */
2324
int
2325
591k
xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2326
591k
    const xmlChar *cur;
2327
591k
    int res = 0;
2328
2329
591k
    cur = ctxt->input->cur;
2330
591k
    while (IS_BLANK_CH(*cur)) {
2331
109k
        if (*cur == '\n') {
2332
633
            ctxt->input->line++; ctxt->input->col = 1;
2333
109k
        } else {
2334
109k
            ctxt->input->col++;
2335
109k
        }
2336
109k
        cur++;
2337
109k
        if (res < INT_MAX)
2338
109k
            res++;
2339
109k
        if (*cur == 0) {
2340
496
            ctxt->input->cur = cur;
2341
496
            xmlParserGrow(ctxt);
2342
496
            cur = ctxt->input->cur;
2343
496
        }
2344
109k
    }
2345
591k
    ctxt->input->cur = cur;
2346
2347
591k
    if (res > 4)
2348
447
        GROW;
2349
2350
591k
    return(res);
2351
591k
}
2352
2353
static void
2354
53.4k
xmlPopPE(xmlParserCtxtPtr ctxt) {
2355
53.4k
    unsigned long consumed;
2356
53.4k
    xmlEntityPtr ent;
2357
2358
53.4k
    ent = ctxt->input->entity;
2359
2360
53.4k
    ent->flags &= ~XML_ENT_EXPANDING;
2361
2362
53.4k
    if ((ent->flags & XML_ENT_CHECKED) == 0) {
2363
2.35k
        int result;
2364
2365
        /*
2366
         * Read the rest of the stream in case of errors. We want
2367
         * to account for the whole entity size.
2368
         */
2369
2.35k
        do {
2370
2.35k
            ctxt->input->cur = ctxt->input->end;
2371
2.35k
            xmlParserShrink(ctxt);
2372
2.35k
            result = xmlParserGrow(ctxt);
2373
2.35k
        } while (result > 0);
2374
2375
2.35k
        consumed = ctxt->input->consumed;
2376
2.35k
        xmlSaturatedAddSizeT(&consumed,
2377
2.35k
                             ctxt->input->end - ctxt->input->base);
2378
2379
2.35k
        xmlSaturatedAdd(&ent->expandedSize, consumed);
2380
2381
        /*
2382
         * Add to sizeentities when parsing an external entity
2383
         * for the first time.
2384
         */
2385
2.35k
        if (ent->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
2386
0
            xmlSaturatedAdd(&ctxt->sizeentities, consumed);
2387
0
        }
2388
2389
2.35k
        ent->flags |= XML_ENT_CHECKED;
2390
2.35k
    }
2391
2392
53.4k
    xmlFreeInputStream(xmlCtxtPopInput(ctxt));
2393
2394
53.4k
    xmlParserEntityCheck(ctxt, ent->expandedSize);
2395
2396
53.4k
    GROW;
2397
53.4k
}
2398
2399
/**
2400
 * Skip whitespace in the input stream, also handling parameter
2401
 * entities.
2402
 *
2403
 * @param ctxt  the XML parser context
2404
 * @returns the number of space chars skipped
2405
 */
2406
static int
2407
522k
xmlSkipBlankCharsPE(xmlParserCtxtPtr ctxt) {
2408
522k
    int res = 0;
2409
522k
    int inParam;
2410
522k
    int expandParam;
2411
2412
522k
    inParam = PARSER_IN_PE(ctxt);
2413
522k
    expandParam = PARSER_EXTERNAL(ctxt);
2414
2415
522k
    if (!inParam && !expandParam)
2416
132k
        return(xmlSkipBlankChars(ctxt));
2417
2418
    /*
2419
     * It's Okay to use CUR/NEXT here since all the blanks are on
2420
     * the ASCII range.
2421
     */
2422
568k
    while (PARSER_STOPPED(ctxt) == 0) {
2423
568k
        if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2424
125k
            NEXT;
2425
442k
        } else if (CUR == '%') {
2426
51.2k
            if ((expandParam == 0) ||
2427
51.2k
                (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2428
51.2k
                break;
2429
2430
            /*
2431
             * Expand parameter entity. We continue to consume
2432
             * whitespace at the start of the entity and possible
2433
             * even consume the whole entity and pop it. We might
2434
             * even pop multiple PEs in this loop.
2435
             */
2436
0
            xmlParsePEReference(ctxt);
2437
2438
0
            inParam = PARSER_IN_PE(ctxt);
2439
0
            expandParam = PARSER_EXTERNAL(ctxt);
2440
391k
        } else if (CUR == 0) {
2441
53.3k
            if (inParam == 0)
2442
0
                break;
2443
2444
53.3k
            xmlPopPE(ctxt);
2445
2446
53.3k
            inParam = PARSER_IN_PE(ctxt);
2447
53.3k
            expandParam = PARSER_EXTERNAL(ctxt);
2448
338k
        } else {
2449
338k
            break;
2450
338k
        }
2451
2452
        /*
2453
         * Also increase the counter when entering or exiting a PERef.
2454
         * The spec says: "When a parameter-entity reference is recognized
2455
         * in the DTD and included, its replacement text MUST be enlarged
2456
         * by the attachment of one leading and one following space (#x20)
2457
         * character."
2458
         */
2459
179k
        if (res < INT_MAX)
2460
179k
            res++;
2461
179k
    }
2462
2463
389k
    return(res);
2464
522k
}
2465
2466
/************************************************************************
2467
 *                  *
2468
 *    Commodity functions to handle entities      *
2469
 *                  *
2470
 ************************************************************************/
2471
2472
/**
2473
 * @deprecated Internal function, don't use.
2474
 *
2475
 * @param ctxt  an XML parser context
2476
 * @returns the current xmlChar in the parser context
2477
 */
2478
xmlChar
2479
0
xmlPopInput(xmlParserCtxtPtr ctxt) {
2480
0
    xmlParserInputPtr input;
2481
2482
0
    if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2483
0
    input = xmlCtxtPopInput(ctxt);
2484
0
    xmlFreeInputStream(input);
2485
0
    if (*ctxt->input->cur == 0)
2486
0
        xmlParserGrow(ctxt);
2487
0
    return(CUR);
2488
0
}
2489
2490
/**
2491
 * Push an input stream onto the stack.
2492
 *
2493
 * @deprecated Internal function, don't use.
2494
 *
2495
 * @param ctxt  an XML parser context
2496
 * @param input  an XML parser input fragment (entity, XML fragment ...).
2497
 * @returns -1 in case of error or the index in the input stack
2498
 */
2499
int
2500
0
xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2501
0
    int ret;
2502
2503
0
    if ((ctxt == NULL) || (input == NULL))
2504
0
        return(-1);
2505
2506
0
    ret = xmlCtxtPushInput(ctxt, input);
2507
0
    if (ret >= 0)
2508
0
        GROW;
2509
0
    return(ret);
2510
0
}
2511
2512
/**
2513
 * Parse a numeric character reference. Always consumes '&'.
2514
 *
2515
 * @deprecated Internal function, don't use.
2516
 *
2517
 *     [66] CharRef ::= '&#' [0-9]+ ';' |
2518
 *                      '&#x' [0-9a-fA-F]+ ';'
2519
 *
2520
 * [ WFC: Legal Character ]
2521
 * Characters referred to using character references must match the
2522
 * production for Char.
2523
 *
2524
 * @param ctxt  an XML parser context
2525
 * @returns the value parsed (as an int), 0 in case of error
2526
 */
2527
int
2528
12.1k
xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2529
12.1k
    int val = 0;
2530
12.1k
    int count = 0;
2531
2532
    /*
2533
     * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2534
     */
2535
12.1k
    if ((RAW == '&') && (NXT(1) == '#') &&
2536
12.1k
        (NXT(2) == 'x')) {
2537
4.28k
  SKIP(3);
2538
4.28k
  GROW;
2539
20.0k
  while ((RAW != ';') && (PARSER_STOPPED(ctxt) == 0)) {
2540
17.2k
      if (count++ > 20) {
2541
588
    count = 0;
2542
588
    GROW;
2543
588
      }
2544
17.2k
      if ((RAW >= '0') && (RAW <= '9'))
2545
8.47k
          val = val * 16 + (CUR - '0');
2546
8.73k
      else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2547
3.07k
          val = val * 16 + (CUR - 'a') + 10;
2548
5.66k
      else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2549
4.17k
          val = val * 16 + (CUR - 'A') + 10;
2550
1.48k
      else {
2551
1.48k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2552
1.48k
    val = 0;
2553
1.48k
    break;
2554
1.48k
      }
2555
15.7k
      if (val > 0x110000)
2556
7.09k
          val = 0x110000;
2557
2558
15.7k
      NEXT;
2559
15.7k
      count++;
2560
15.7k
  }
2561
4.28k
  if (RAW == ';') {
2562
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2563
2.79k
      ctxt->input->col++;
2564
2.79k
      ctxt->input->cur++;
2565
2.79k
  }
2566
7.81k
    } else if  ((RAW == '&') && (NXT(1) == '#')) {
2567
7.81k
  SKIP(2);
2568
7.81k
  GROW;
2569
28.1k
  while (RAW != ';') { /* loop blocked by count */
2570
21.8k
      if (count++ > 20) {
2571
566
    count = 0;
2572
566
    GROW;
2573
566
      }
2574
21.8k
      if ((RAW >= '0') && (RAW <= '9'))
2575
20.2k
          val = val * 10 + (CUR - '0');
2576
1.56k
      else {
2577
1.56k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2578
1.56k
    val = 0;
2579
1.56k
    break;
2580
1.56k
      }
2581
20.2k
      if (val > 0x110000)
2582
5.65k
          val = 0x110000;
2583
2584
20.2k
      NEXT;
2585
20.2k
      count++;
2586
20.2k
  }
2587
7.81k
  if (RAW == ';') {
2588
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2589
6.25k
      ctxt->input->col++;
2590
6.25k
      ctxt->input->cur++;
2591
6.25k
  }
2592
7.81k
    } else {
2593
0
        if (RAW == '&')
2594
0
            SKIP(1);
2595
0
        xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2596
0
    }
2597
2598
    /*
2599
     * [ WFC: Legal Character ]
2600
     * Characters referred to using character references must match the
2601
     * production for Char.
2602
     */
2603
12.1k
    if (val >= 0x110000) {
2604
244
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2605
244
                "xmlParseCharRef: character reference out of bounds\n",
2606
244
          val);
2607
11.8k
    } else if (IS_CHAR(val)) {
2608
8.14k
        return(val);
2609
8.14k
    } else {
2610
3.71k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2611
3.71k
                          "xmlParseCharRef: invalid xmlChar value %d\n",
2612
3.71k
                    val);
2613
3.71k
    }
2614
3.95k
    return(0);
2615
12.1k
}
2616
2617
/**
2618
 * parse Reference declarations, variant parsing from a string rather
2619
 * than an an input flow.
2620
 *
2621
 *     [66] CharRef ::= '&#' [0-9]+ ';' |
2622
 *                      '&#x' [0-9a-fA-F]+ ';'
2623
 *
2624
 * [ WFC: Legal Character ]
2625
 * Characters referred to using character references must match the
2626
 * production for Char.
2627
 *
2628
 * @param ctxt  an XML parser context
2629
 * @param str  a pointer to an index in the string
2630
 * @returns the value parsed (as an int), 0 in case of error, str will be
2631
 *         updated to the current value of the index
2632
 */
2633
static int
2634
113k
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2635
113k
    const xmlChar *ptr;
2636
113k
    xmlChar cur;
2637
113k
    int val = 0;
2638
2639
113k
    if ((str == NULL) || (*str == NULL)) return(0);
2640
113k
    ptr = *str;
2641
113k
    cur = *ptr;
2642
113k
    if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2643
17.0k
  ptr += 3;
2644
17.0k
  cur = *ptr;
2645
47.4k
  while (cur != ';') { /* Non input consuming loop */
2646
30.8k
      if ((cur >= '0') && (cur <= '9'))
2647
5.37k
          val = val * 16 + (cur - '0');
2648
25.4k
      else if ((cur >= 'a') && (cur <= 'f'))
2649
3.17k
          val = val * 16 + (cur - 'a') + 10;
2650
22.2k
      else if ((cur >= 'A') && (cur <= 'F'))
2651
21.8k
          val = val * 16 + (cur - 'A') + 10;
2652
440
      else {
2653
440
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2654
440
    val = 0;
2655
440
    break;
2656
440
      }
2657
30.3k
      if (val > 0x110000)
2658
281
          val = 0x110000;
2659
2660
30.3k
      ptr++;
2661
30.3k
      cur = *ptr;
2662
30.3k
  }
2663
17.0k
  if (cur == ';')
2664
16.6k
      ptr++;
2665
96.0k
    } else if  ((cur == '&') && (ptr[1] == '#')){
2666
96.0k
  ptr += 2;
2667
96.0k
  cur = *ptr;
2668
288k
  while (cur != ';') { /* Non input consuming loops */
2669
193k
      if ((cur >= '0') && (cur <= '9'))
2670
192k
          val = val * 10 + (cur - '0');
2671
1.25k
      else {
2672
1.25k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2673
1.25k
    val = 0;
2674
1.25k
    break;
2675
1.25k
      }
2676
192k
      if (val > 0x110000)
2677
517
          val = 0x110000;
2678
2679
192k
      ptr++;
2680
192k
      cur = *ptr;
2681
192k
  }
2682
96.0k
  if (cur == ';')
2683
94.7k
      ptr++;
2684
96.0k
    } else {
2685
0
  xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2686
0
  return(0);
2687
0
    }
2688
113k
    *str = ptr;
2689
2690
    /*
2691
     * [ WFC: Legal Character ]
2692
     * Characters referred to using character references must match the
2693
     * production for Char.
2694
     */
2695
113k
    if (val >= 0x110000) {
2696
207
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2697
207
                "xmlParseStringCharRef: character reference out of bounds\n",
2698
207
                val);
2699
112k
    } else if (IS_CHAR(val)) {
2700
110k
        return(val);
2701
110k
    } else {
2702
2.31k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2703
2.31k
        "xmlParseStringCharRef: invalid xmlChar value %d\n",
2704
2.31k
        val);
2705
2.31k
    }
2706
2.51k
    return(0);
2707
113k
}
2708
2709
/**
2710
 *     [69] PEReference ::= '%' Name ';'
2711
 *
2712
 * @deprecated Internal function, do not use.
2713
 *
2714
 * [ WFC: No Recursion ]
2715
 * A parsed entity must not contain a recursive
2716
 * reference to itself, either directly or indirectly.
2717
 *
2718
 * [ WFC: Entity Declared ]
2719
 * In a document without any DTD, a document with only an internal DTD
2720
 * subset which contains no parameter entity references, or a document
2721
 * with "standalone='yes'", ...  ... The declaration of a parameter
2722
 * entity must precede any reference to it...
2723
 *
2724
 * [ VC: Entity Declared ]
2725
 * In a document with an external subset or external parameter entities
2726
 * with "standalone='no'", ...  ... The declaration of a parameter entity
2727
 * must precede any reference to it...
2728
 *
2729
 * [ WFC: In DTD ]
2730
 * Parameter-entity references may only appear in the DTD.
2731
 * NOTE: misleading but this is handled.
2732
 *
2733
 * A PEReference may have been detected in the current input stream
2734
 * the handling is done accordingly to
2735
 *      http://www.w3.org/TR/REC-xml#entproc
2736
 * i.e.
2737
 *   - Included in literal in entity values
2738
 *   - Included as Parameter Entity reference within DTDs
2739
 * @param ctxt  the parser context
2740
 */
2741
void
2742
0
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2743
0
    xmlParsePEReference(ctxt);
2744
0
}
2745
2746
/**
2747
 * @deprecated Internal function, don't use.
2748
 *
2749
 * @param ctxt  the parser context
2750
 * @param str  the input string
2751
 * @param len  the string length
2752
 * @param what  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2753
 * @param end  an end marker xmlChar, 0 if none
2754
 * @param end2  an end marker xmlChar, 0 if none
2755
 * @param end3  an end marker xmlChar, 0 if none
2756
 * @returns A newly allocated string with the substitution done. The caller
2757
 *      must deallocate it !
2758
 */
2759
xmlChar *
2760
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2761
                           int what ATTRIBUTE_UNUSED,
2762
0
                           xmlChar end, xmlChar end2, xmlChar end3) {
2763
0
    if ((ctxt == NULL) || (str == NULL) || (len < 0))
2764
0
        return(NULL);
2765
2766
0
    if ((str[len] != 0) ||
2767
0
        (end != 0) || (end2 != 0) || (end3 != 0))
2768
0
        return(NULL);
2769
2770
0
    return(xmlExpandEntitiesInAttValue(ctxt, str, 0));
2771
0
}
2772
2773
/**
2774
 * @deprecated Internal function, don't use.
2775
 *
2776
 * @param ctxt  the parser context
2777
 * @param str  the input string
2778
 * @param what  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2779
 * @param end  an end marker xmlChar, 0 if none
2780
 * @param end2  an end marker xmlChar, 0 if none
2781
 * @param end3  an end marker xmlChar, 0 if none
2782
 * @returns A newly allocated string with the substitution done. The caller
2783
 *      must deallocate it !
2784
 */
2785
xmlChar *
2786
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str,
2787
                        int what ATTRIBUTE_UNUSED,
2788
0
            xmlChar end, xmlChar  end2, xmlChar end3) {
2789
0
    if ((ctxt == NULL) || (str == NULL))
2790
0
        return(NULL);
2791
2792
0
    if ((end != 0) || (end2 != 0) || (end3 != 0))
2793
0
        return(NULL);
2794
2795
0
    return(xmlExpandEntitiesInAttValue(ctxt, str, 0));
2796
0
}
2797
2798
/************************************************************************
2799
 *                  *
2800
 *    Commodity functions, cleanup needed ?     *
2801
 *                  *
2802
 ************************************************************************/
2803
2804
/**
2805
 * Is this a sequence of blank chars that one can ignore ?
2806
 *
2807
 * @param ctxt  an XML parser context
2808
 * @param str  a xmlChar *
2809
 * @param len  the size of `str`
2810
 * @param blank_chars  we know the chars are blanks
2811
 * @returns 1 if ignorable 0 otherwise.
2812
 */
2813
2814
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2815
10.7k
                     int blank_chars) {
2816
10.7k
    int i;
2817
10.7k
    xmlNodePtr lastChild;
2818
2819
    /*
2820
     * Check for xml:space value.
2821
     */
2822
10.7k
    if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2823
10.7k
        (*(ctxt->space) == -2))
2824
4.10k
  return(0);
2825
2826
    /*
2827
     * Check that the string is made of blanks
2828
     */
2829
6.63k
    if (blank_chars == 0) {
2830
6.36k
  for (i = 0;i < len;i++)
2831
4.88k
      if (!(IS_BLANK_CH(str[i]))) return(0);
2832
4.22k
    }
2833
2834
    /*
2835
     * Look if the element is mixed content in the DTD if available
2836
     */
2837
3.89k
    if (ctxt->node == NULL) return(0);
2838
3.89k
    if (ctxt->myDoc != NULL) {
2839
3.89k
        xmlElementPtr elemDecl = NULL;
2840
3.89k
        xmlDocPtr doc = ctxt->myDoc;
2841
3.89k
        const xmlChar *prefix = NULL;
2842
2843
3.89k
        if (ctxt->node->ns)
2844
265
            prefix = ctxt->node->ns->prefix;
2845
3.89k
        if (doc->intSubset != NULL)
2846
1.40k
            elemDecl = xmlHashLookup2(doc->intSubset->elements, ctxt->node->name,
2847
1.40k
                                      prefix);
2848
3.89k
        if ((elemDecl == NULL) && (doc->extSubset != NULL))
2849
0
            elemDecl = xmlHashLookup2(doc->extSubset->elements, ctxt->node->name,
2850
0
                                      prefix);
2851
3.89k
        if (elemDecl != NULL) {
2852
985
            if (elemDecl->etype == XML_ELEMENT_TYPE_ELEMENT)
2853
204
                return(1);
2854
781
            if ((elemDecl->etype == XML_ELEMENT_TYPE_ANY) ||
2855
781
                (elemDecl->etype == XML_ELEMENT_TYPE_MIXED))
2856
504
                return(0);
2857
781
        }
2858
3.89k
    }
2859
2860
    /*
2861
     * Otherwise, heuristic :-\
2862
     *
2863
     * When push parsing, we could be at the end of a chunk.
2864
     * This makes the look-ahead and consequently the NOBLANKS
2865
     * option unreliable.
2866
     */
2867
3.18k
    if ((RAW != '<') && (RAW != 0xD)) return(0);
2868
2.96k
    if ((ctxt->node->children == NULL) &&
2869
2.96k
  (RAW == '<') && (NXT(1) == '/')) return(0);
2870
2871
2.51k
    lastChild = xmlGetLastChild(ctxt->node);
2872
2.51k
    if (lastChild == NULL) {
2873
1.66k
        if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2874
1.66k
            (ctxt->node->content != NULL)) return(0);
2875
1.66k
    } else if (xmlNodeIsText(lastChild))
2876
195
        return(0);
2877
654
    else if ((ctxt->node->children != NULL) &&
2878
654
             (xmlNodeIsText(ctxt->node->children)))
2879
67
        return(0);
2880
2.25k
    return(1);
2881
2.51k
}
2882
2883
/************************************************************************
2884
 *                  *
2885
 *    Extra stuff for namespace support     *
2886
 *  Relates to http://www.w3.org/TR/WD-xml-names      *
2887
 *                  *
2888
 ************************************************************************/
2889
2890
/**
2891
 * parse an UTF8 encoded XML qualified name string
2892
 *
2893
 * @deprecated Don't use.
2894
 *
2895
 *     [NS 5] QName ::= (Prefix ':')? LocalPart
2896
 *
2897
 *     [NS 6] Prefix ::= NCName
2898
 *
2899
 *     [NS 7] LocalPart ::= NCName
2900
 *
2901
 * @param ctxt  an XML parser context
2902
 * @param name  an XML parser context
2903
 * @param prefixOut  a xmlChar **
2904
 * @returns the local part, and prefix is updated
2905
 *   to get the Prefix if any.
2906
 */
2907
2908
xmlChar *
2909
0
xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefixOut) {
2910
0
    xmlChar buf[XML_MAX_NAMELEN + 5];
2911
0
    xmlChar *buffer = NULL;
2912
0
    int len = 0;
2913
0
    int max = XML_MAX_NAMELEN;
2914
0
    xmlChar *ret = NULL;
2915
0
    xmlChar *prefix;
2916
0
    const xmlChar *cur = name;
2917
0
    int c;
2918
2919
0
    if (prefixOut == NULL) return(NULL);
2920
0
    *prefixOut = NULL;
2921
2922
0
    if (cur == NULL) return(NULL);
2923
2924
    /* nasty but well=formed */
2925
0
    if (cur[0] == ':')
2926
0
  return(xmlStrdup(name));
2927
2928
0
    c = *cur++;
2929
0
    while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2930
0
  buf[len++] = c;
2931
0
  c = *cur++;
2932
0
    }
2933
0
    if (len >= max) {
2934
  /*
2935
   * Okay someone managed to make a huge name, so he's ready to pay
2936
   * for the processing speed.
2937
   */
2938
0
  max = len * 2;
2939
2940
0
  buffer = xmlMalloc(max);
2941
0
  if (buffer == NULL) {
2942
0
      xmlErrMemory(ctxt);
2943
0
      return(NULL);
2944
0
  }
2945
0
  memcpy(buffer, buf, len);
2946
0
  while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2947
0
      if (len + 10 > max) {
2948
0
          xmlChar *tmp;
2949
0
                int newSize;
2950
2951
0
                newSize = xmlGrowCapacity(max, 1, 1, XML_MAX_ITEMS);
2952
0
                if (newSize < 0) {
2953
0
        xmlErrMemory(ctxt);
2954
0
        xmlFree(buffer);
2955
0
        return(NULL);
2956
0
                }
2957
0
    tmp = xmlRealloc(buffer, newSize);
2958
0
    if (tmp == NULL) {
2959
0
        xmlErrMemory(ctxt);
2960
0
        xmlFree(buffer);
2961
0
        return(NULL);
2962
0
    }
2963
0
    buffer = tmp;
2964
0
    max = newSize;
2965
0
      }
2966
0
      buffer[len++] = c;
2967
0
      c = *cur++;
2968
0
  }
2969
0
  buffer[len] = 0;
2970
0
    }
2971
2972
0
    if ((c == ':') && (*cur == 0)) {
2973
0
        if (buffer != NULL)
2974
0
      xmlFree(buffer);
2975
0
  return(xmlStrdup(name));
2976
0
    }
2977
2978
0
    if (buffer == NULL) {
2979
0
  ret = xmlStrndup(buf, len);
2980
0
        if (ret == NULL) {
2981
0
      xmlErrMemory(ctxt);
2982
0
      return(NULL);
2983
0
        }
2984
0
    } else {
2985
0
  ret = buffer;
2986
0
  buffer = NULL;
2987
0
  max = XML_MAX_NAMELEN;
2988
0
    }
2989
2990
2991
0
    if (c == ':') {
2992
0
  c = *cur;
2993
0
        prefix = ret;
2994
0
  if (c == 0) {
2995
0
      ret = xmlStrndup(BAD_CAST "", 0);
2996
0
            if (ret == NULL) {
2997
0
                xmlFree(prefix);
2998
0
                return(NULL);
2999
0
            }
3000
0
            *prefixOut = prefix;
3001
0
            return(ret);
3002
0
  }
3003
0
  len = 0;
3004
3005
  /*
3006
   * Check that the first character is proper to start
3007
   * a new name
3008
   */
3009
0
  if (!(((c >= 0x61) && (c <= 0x7A)) ||
3010
0
        ((c >= 0x41) && (c <= 0x5A)) ||
3011
0
        (c == '_') || (c == ':'))) {
3012
0
      int l;
3013
0
      int first = CUR_SCHAR(cur, l);
3014
3015
0
      if (!IS_LETTER(first) && (first != '_')) {
3016
0
    xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3017
0
          "Name %s is not XML Namespace compliant\n",
3018
0
          name);
3019
0
      }
3020
0
  }
3021
0
  cur++;
3022
3023
0
  while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3024
0
      buf[len++] = c;
3025
0
      c = *cur++;
3026
0
  }
3027
0
  if (len >= max) {
3028
      /*
3029
       * Okay someone managed to make a huge name, so he's ready to pay
3030
       * for the processing speed.
3031
       */
3032
0
      max = len * 2;
3033
3034
0
      buffer = xmlMalloc(max);
3035
0
      if (buffer == NULL) {
3036
0
          xmlErrMemory(ctxt);
3037
0
                xmlFree(prefix);
3038
0
    return(NULL);
3039
0
      }
3040
0
      memcpy(buffer, buf, len);
3041
0
      while (c != 0) { /* tested bigname2.xml */
3042
0
    if (len + 10 > max) {
3043
0
        xmlChar *tmp;
3044
0
                    int newSize;
3045
3046
0
                    newSize = xmlGrowCapacity(max, 1, 1, XML_MAX_ITEMS);
3047
0
                    if (newSize < 0) {
3048
0
                        xmlErrMemory(ctxt);
3049
0
                        xmlFree(buffer);
3050
0
                        return(NULL);
3051
0
                    }
3052
0
        tmp = xmlRealloc(buffer, newSize);
3053
0
        if (tmp == NULL) {
3054
0
      xmlErrMemory(ctxt);
3055
0
                        xmlFree(prefix);
3056
0
      xmlFree(buffer);
3057
0
      return(NULL);
3058
0
        }
3059
0
        buffer = tmp;
3060
0
                    max = newSize;
3061
0
    }
3062
0
    buffer[len++] = c;
3063
0
    c = *cur++;
3064
0
      }
3065
0
      buffer[len] = 0;
3066
0
  }
3067
3068
0
  if (buffer == NULL) {
3069
0
      ret = xmlStrndup(buf, len);
3070
0
            if (ret == NULL) {
3071
0
                xmlFree(prefix);
3072
0
                return(NULL);
3073
0
            }
3074
0
  } else {
3075
0
      ret = buffer;
3076
0
  }
3077
3078
0
        *prefixOut = prefix;
3079
0
    }
3080
3081
0
    return(ret);
3082
0
}
3083
3084
/************************************************************************
3085
 *                  *
3086
 *      The parser itself       *
3087
 *  Relates to http://www.w3.org/TR/REC-xml       *
3088
 *                  *
3089
 ************************************************************************/
3090
3091
/************************************************************************
3092
 *                  *
3093
 *  Routines to parse Name, NCName and NmToken      *
3094
 *                  *
3095
 ************************************************************************/
3096
3097
/*
3098
 * The two following functions are related to the change of accepted
3099
 * characters for Name and NmToken in the Revision 5 of XML-1.0
3100
 * They correspond to the modified production [4] and the new production [4a]
3101
 * changes in that revision. Also note that the macros used for the
3102
 * productions Letter, Digit, CombiningChar and Extender are not needed
3103
 * anymore.
3104
 * We still keep compatibility to pre-revision5 parsing semantic if the
3105
 * new XML_PARSE_OLD10 option is given to the parser.
3106
 */
3107
static int
3108
669k
xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3109
669k
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3110
        /*
3111
   * Use the new checks of production [4] [4a] amd [5] of the
3112
   * Update 5 of XML-1.0
3113
   */
3114
661k
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3115
661k
      (((c >= 'a') && (c <= 'z')) ||
3116
660k
       ((c >= 'A') && (c <= 'Z')) ||
3117
660k
       (c == '_') || (c == ':') ||
3118
660k
       ((c >= 0xC0) && (c <= 0xD6)) ||
3119
660k
       ((c >= 0xD8) && (c <= 0xF6)) ||
3120
660k
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3121
660k
       ((c >= 0x370) && (c <= 0x37D)) ||
3122
660k
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3123
660k
       ((c >= 0x200C) && (c <= 0x200D)) ||
3124
660k
       ((c >= 0x2070) && (c <= 0x218F)) ||
3125
660k
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3126
660k
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3127
660k
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3128
660k
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3129
660k
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3130
616k
      return(1);
3131
661k
    } else {
3132
8.01k
        if (IS_LETTER(c) || (c == '_') || (c == ':'))
3133
6.17k
      return(1);
3134
8.01k
    }
3135
47.0k
    return(0);
3136
669k
}
3137
3138
static int
3139
920k
xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3140
920k
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3141
        /*
3142
   * Use the new checks of production [4] [4a] amd [5] of the
3143
   * Update 5 of XML-1.0
3144
   */
3145
907k
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3146
907k
      (((c >= 'a') && (c <= 'z')) ||
3147
896k
       ((c >= 'A') && (c <= 'Z')) ||
3148
896k
       ((c >= '0') && (c <= '9')) || /* !start */
3149
896k
       (c == '_') || (c == ':') ||
3150
896k
       (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3151
896k
       ((c >= 0xC0) && (c <= 0xD6)) ||
3152
896k
       ((c >= 0xD8) && (c <= 0xF6)) ||
3153
896k
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3154
896k
       ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3155
896k
       ((c >= 0x370) && (c <= 0x37D)) ||
3156
896k
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3157
896k
       ((c >= 0x200C) && (c <= 0x200D)) ||
3158
896k
       ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3159
896k
       ((c >= 0x2070) && (c <= 0x218F)) ||
3160
896k
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3161
896k
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3162
896k
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3163
896k
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3164
896k
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3165
282k
       return(1);
3166
907k
    } else {
3167
12.8k
        if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3168
12.8k
            (c == '.') || (c == '-') ||
3169
12.8k
      (c == '_') || (c == ':') ||
3170
12.8k
      (IS_COMBINING(c)) ||
3171
12.8k
      (IS_EXTENDER(c)))
3172
6.24k
      return(1);
3173
12.8k
    }
3174
632k
    return(0);
3175
920k
}
3176
3177
static const xmlChar *
3178
111k
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3179
111k
    const xmlChar *ret;
3180
111k
    int len = 0, l;
3181
111k
    int c;
3182
111k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3183
0
                    XML_MAX_TEXT_LENGTH :
3184
111k
                    XML_MAX_NAME_LENGTH;
3185
3186
    /*
3187
     * Handler for more complex cases
3188
     */
3189
111k
    c = xmlCurrentChar(ctxt, &l);
3190
111k
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3191
        /*
3192
   * Use the new checks of production [4] [4a] amd [5] of the
3193
   * Update 5 of XML-1.0
3194
   */
3195
101k
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3196
101k
      (!(((c >= 'a') && (c <= 'z')) ||
3197
90.4k
         ((c >= 'A') && (c <= 'Z')) ||
3198
90.4k
         (c == '_') || (c == ':') ||
3199
90.4k
         ((c >= 0xC0) && (c <= 0xD6)) ||
3200
90.4k
         ((c >= 0xD8) && (c <= 0xF6)) ||
3201
90.4k
         ((c >= 0xF8) && (c <= 0x2FF)) ||
3202
90.4k
         ((c >= 0x370) && (c <= 0x37D)) ||
3203
90.4k
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
3204
90.4k
         ((c >= 0x200C) && (c <= 0x200D)) ||
3205
90.4k
         ((c >= 0x2070) && (c <= 0x218F)) ||
3206
90.4k
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3207
90.4k
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
3208
90.4k
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
3209
90.4k
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3210
92.7k
         ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3211
92.7k
      return(NULL);
3212
92.7k
  }
3213
8.96k
  len += l;
3214
8.96k
  NEXTL(l);
3215
8.96k
  c = xmlCurrentChar(ctxt, &l);
3216
22.2k
  while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3217
22.2k
         (((c >= 'a') && (c <= 'z')) ||
3218
20.4k
          ((c >= 'A') && (c <= 'Z')) ||
3219
20.4k
          ((c >= '0') && (c <= '9')) || /* !start */
3220
20.4k
          (c == '_') || (c == ':') ||
3221
20.4k
          (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3222
20.4k
          ((c >= 0xC0) && (c <= 0xD6)) ||
3223
20.4k
          ((c >= 0xD8) && (c <= 0xF6)) ||
3224
20.4k
          ((c >= 0xF8) && (c <= 0x2FF)) ||
3225
20.4k
          ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3226
20.4k
          ((c >= 0x370) && (c <= 0x37D)) ||
3227
20.4k
          ((c >= 0x37F) && (c <= 0x1FFF)) ||
3228
20.4k
          ((c >= 0x200C) && (c <= 0x200D)) ||
3229
20.4k
          ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3230
20.4k
          ((c >= 0x2070) && (c <= 0x218F)) ||
3231
20.4k
          ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3232
20.4k
          ((c >= 0x3001) && (c <= 0xD7FF)) ||
3233
20.4k
          ((c >= 0xF900) && (c <= 0xFDCF)) ||
3234
20.4k
          ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3235
20.4k
          ((c >= 0x10000) && (c <= 0xEFFFF))
3236
20.4k
    )) {
3237
13.3k
            if (len <= INT_MAX - l)
3238
13.3k
          len += l;
3239
13.3k
      NEXTL(l);
3240
13.3k
      c = xmlCurrentChar(ctxt, &l);
3241
13.3k
  }
3242
9.29k
    } else {
3243
9.29k
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3244
9.29k
      (!IS_LETTER(c) && (c != '_') &&
3245
8.04k
       (c != ':'))) {
3246
5.96k
      return(NULL);
3247
5.96k
  }
3248
3.33k
  len += l;
3249
3.33k
  NEXTL(l);
3250
3.33k
  c = xmlCurrentChar(ctxt, &l);
3251
3252
7.18k
  while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3253
7.18k
         ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3254
6.15k
    (c == '.') || (c == '-') ||
3255
6.15k
    (c == '_') || (c == ':') ||
3256
6.15k
    (IS_COMBINING(c)) ||
3257
6.15k
    (IS_EXTENDER(c)))) {
3258
3.84k
            if (len <= INT_MAX - l)
3259
3.84k
          len += l;
3260
3.84k
      NEXTL(l);
3261
3.84k
      c = xmlCurrentChar(ctxt, &l);
3262
3.84k
  }
3263
3.33k
    }
3264
12.2k
    if (len > maxLength) {
3265
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3266
0
        return(NULL);
3267
0
    }
3268
12.2k
    if (ctxt->input->cur - ctxt->input->base < len) {
3269
        /*
3270
         * There were a couple of bugs where PERefs lead to to a change
3271
         * of the buffer. Check the buffer size to avoid passing an invalid
3272
         * pointer to xmlDictLookup.
3273
         */
3274
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3275
0
                    "unexpected change of input buffer");
3276
0
        return (NULL);
3277
0
    }
3278
12.2k
    if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3279
194
        ret = xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len);
3280
12.1k
    else
3281
12.1k
        ret = xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len);
3282
12.2k
    if (ret == NULL)
3283
0
        xmlErrMemory(ctxt);
3284
12.2k
    return(ret);
3285
12.2k
}
3286
3287
/**
3288
 * parse an XML name.
3289
 *
3290
 * @deprecated Internal function, don't use.
3291
 *
3292
 *     [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3293
 *                      CombiningChar | Extender
3294
 *
3295
 *     [5] Name ::= (Letter | '_' | ':') (NameChar)*
3296
 *
3297
 *     [6] Names ::= Name (#x20 Name)*
3298
 *
3299
 * @param ctxt  an XML parser context
3300
 * @returns the Name parsed or NULL
3301
 */
3302
3303
const xmlChar *
3304
433k
xmlParseName(xmlParserCtxtPtr ctxt) {
3305
433k
    const xmlChar *in;
3306
433k
    const xmlChar *ret;
3307
433k
    size_t count = 0;
3308
433k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3309
0
                       XML_MAX_TEXT_LENGTH :
3310
433k
                       XML_MAX_NAME_LENGTH;
3311
3312
433k
    GROW;
3313
3314
    /*
3315
     * Accelerator for simple ASCII names
3316
     */
3317
433k
    in = ctxt->input->cur;
3318
433k
    if (((*in >= 0x61) && (*in <= 0x7A)) ||
3319
433k
  ((*in >= 0x41) && (*in <= 0x5A)) ||
3320
433k
  (*in == '_') || (*in == ':')) {
3321
327k
  in++;
3322
629k
  while (((*in >= 0x61) && (*in <= 0x7A)) ||
3323
629k
         ((*in >= 0x41) && (*in <= 0x5A)) ||
3324
629k
         ((*in >= 0x30) && (*in <= 0x39)) ||
3325
629k
         (*in == '_') || (*in == '-') ||
3326
629k
         (*in == ':') || (*in == '.'))
3327
302k
      in++;
3328
327k
  if ((*in > 0) && (*in < 0x80)) {
3329
322k
      count = in - ctxt->input->cur;
3330
322k
            if (count > maxLength) {
3331
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3332
0
                return(NULL);
3333
0
            }
3334
322k
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3335
322k
      ctxt->input->cur = in;
3336
322k
      ctxt->input->col += count;
3337
322k
      if (ret == NULL)
3338
0
          xmlErrMemory(ctxt);
3339
322k
      return(ret);
3340
322k
  }
3341
327k
    }
3342
    /* accelerator for special cases */
3343
111k
    return(xmlParseNameComplex(ctxt));
3344
433k
}
3345
3346
static xmlHashedString
3347
70.0k
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3348
70.0k
    xmlHashedString ret;
3349
70.0k
    int len = 0, l;
3350
70.0k
    int c;
3351
70.0k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3352
0
                    XML_MAX_TEXT_LENGTH :
3353
70.0k
                    XML_MAX_NAME_LENGTH;
3354
70.0k
    size_t startPosition = 0;
3355
3356
70.0k
    ret.name = NULL;
3357
70.0k
    ret.hashValue = 0;
3358
3359
    /*
3360
     * Handler for more complex cases
3361
     */
3362
70.0k
    startPosition = CUR_PTR - BASE_PTR;
3363
70.0k
    c = xmlCurrentChar(ctxt, &l);
3364
70.0k
    if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3365
70.0k
  (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3366
63.7k
  return(ret);
3367
63.7k
    }
3368
3369
27.3k
    while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3370
27.3k
     (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3371
21.0k
        if (len <= INT_MAX - l)
3372
21.0k
      len += l;
3373
21.0k
  NEXTL(l);
3374
21.0k
  c = xmlCurrentChar(ctxt, &l);
3375
21.0k
    }
3376
6.33k
    if (len > maxLength) {
3377
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3378
0
        return(ret);
3379
0
    }
3380
6.33k
    ret = xmlDictLookupHashed(ctxt->dict, (BASE_PTR + startPosition), len);
3381
6.33k
    if (ret.name == NULL)
3382
0
        xmlErrMemory(ctxt);
3383
6.33k
    return(ret);
3384
6.33k
}
3385
3386
/**
3387
 * parse an XML name.
3388
 *
3389
 *     [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3390
 *                          CombiningChar | Extender
3391
 *
3392
 *     [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3393
 *
3394
 * @param ctxt  an XML parser context
3395
 * @returns the Name parsed or NULL
3396
 */
3397
3398
static xmlHashedString
3399
180k
xmlParseNCName(xmlParserCtxtPtr ctxt) {
3400
180k
    const xmlChar *in, *e;
3401
180k
    xmlHashedString ret;
3402
180k
    size_t count = 0;
3403
180k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3404
0
                       XML_MAX_TEXT_LENGTH :
3405
180k
                       XML_MAX_NAME_LENGTH;
3406
3407
180k
    ret.name = NULL;
3408
3409
    /*
3410
     * Accelerator for simple ASCII names
3411
     */
3412
180k
    in = ctxt->input->cur;
3413
180k
    e = ctxt->input->end;
3414
180k
    if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3415
180k
   ((*in >= 0x41) && (*in <= 0x5A)) ||
3416
180k
   (*in == '_')) && (in < e)) {
3417
112k
  in++;
3418
201k
  while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3419
201k
          ((*in >= 0x41) && (*in <= 0x5A)) ||
3420
201k
          ((*in >= 0x30) && (*in <= 0x39)) ||
3421
201k
          (*in == '_') || (*in == '-') ||
3422
201k
          (*in == '.')) && (in < e))
3423
89.2k
      in++;
3424
112k
  if (in >= e)
3425
19
      goto complex;
3426
112k
  if ((*in > 0) && (*in < 0x80)) {
3427
110k
      count = in - ctxt->input->cur;
3428
110k
            if (count > maxLength) {
3429
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3430
0
                return(ret);
3431
0
            }
3432
110k
      ret = xmlDictLookupHashed(ctxt->dict, ctxt->input->cur, count);
3433
110k
      ctxt->input->cur = in;
3434
110k
      ctxt->input->col += count;
3435
110k
      if (ret.name == NULL) {
3436
0
          xmlErrMemory(ctxt);
3437
0
      }
3438
110k
      return(ret);
3439
110k
  }
3440
112k
    }
3441
70.0k
complex:
3442
70.0k
    return(xmlParseNCNameComplex(ctxt));
3443
180k
}
3444
3445
/**
3446
 * parse an XML name and compares for match
3447
 * (specialized for endtag parsing)
3448
 *
3449
 * @param ctxt  an XML parser context
3450
 * @param other  the name to compare with
3451
 * @returns NULL for an illegal name, (xmlChar*) 1 for success
3452
 * and the name for mismatch
3453
 */
3454
3455
static const xmlChar *
3456
13.8k
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3457
13.8k
    register const xmlChar *cmp = other;
3458
13.8k
    register const xmlChar *in;
3459
13.8k
    const xmlChar *ret;
3460
3461
13.8k
    GROW;
3462
3463
13.8k
    in = ctxt->input->cur;
3464
19.2k
    while (*in != 0 && *in == *cmp) {
3465
5.46k
  ++in;
3466
5.46k
  ++cmp;
3467
5.46k
    }
3468
13.8k
    if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3469
  /* success */
3470
2.54k
  ctxt->input->col += in - ctxt->input->cur;
3471
2.54k
  ctxt->input->cur = in;
3472
2.54k
  return (const xmlChar*) 1;
3473
2.54k
    }
3474
    /* failure (or end of input buffer), check with full function */
3475
11.2k
    ret = xmlParseName (ctxt);
3476
    /* strings coming from the dictionary direct compare possible */
3477
11.2k
    if (ret == other) {
3478
915
  return (const xmlChar*) 1;
3479
915
    }
3480
10.3k
    return ret;
3481
11.2k
}
3482
3483
/**
3484
 * parse an XML name.
3485
 *
3486
 *     [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3487
 *                      CombiningChar | Extender
3488
 *
3489
 *     [5] Name ::= (Letter | '_' | ':') (NameChar)*
3490
 *
3491
 *     [6] Names ::= Name (#x20 Name)*
3492
 *
3493
 * @param ctxt  an XML parser context
3494
 * @param str  a pointer to the string pointer (IN/OUT)
3495
 * @returns the Name parsed or NULL. The `str` pointer
3496
 * is updated to the current location in the string.
3497
 */
3498
3499
static xmlChar *
3500
606k
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3501
606k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3502
606k
    xmlChar *ret;
3503
606k
    const xmlChar *cur = *str;
3504
606k
    int len = 0, l;
3505
606k
    int c;
3506
606k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3507
0
                    XML_MAX_TEXT_LENGTH :
3508
606k
                    XML_MAX_NAME_LENGTH;
3509
3510
606k
    c = CUR_SCHAR(cur, l);
3511
606k
    if (!xmlIsNameStartChar(ctxt, c)) {
3512
1.52k
  return(NULL);
3513
1.52k
    }
3514
3515
605k
    COPY_BUF(buf, len, c);
3516
605k
    cur += l;
3517
605k
    c = CUR_SCHAR(cur, l);
3518
748k
    while (xmlIsNameChar(ctxt, c)) {
3519
144k
  COPY_BUF(buf, len, c);
3520
144k
  cur += l;
3521
144k
  c = CUR_SCHAR(cur, l);
3522
144k
  if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3523
      /*
3524
       * Okay someone managed to make a huge name, so he's ready to pay
3525
       * for the processing speed.
3526
       */
3527
654
      xmlChar *buffer;
3528
654
      int max = len * 2;
3529
3530
654
      buffer = xmlMalloc(max);
3531
654
      if (buffer == NULL) {
3532
0
          xmlErrMemory(ctxt);
3533
0
    return(NULL);
3534
0
      }
3535
654
      memcpy(buffer, buf, len);
3536
20.7k
      while (xmlIsNameChar(ctxt, c)) {
3537
20.1k
    if (len + 10 > max) {
3538
206
        xmlChar *tmp;
3539
206
                    int newSize;
3540
3541
206
                    newSize = xmlGrowCapacity(max, 1, 1, maxLength);
3542
206
                    if (newSize < 0) {
3543
0
                        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3544
0
                        xmlFree(buffer);
3545
0
                        return(NULL);
3546
0
                    }
3547
206
        tmp = xmlRealloc(buffer, newSize);
3548
206
        if (tmp == NULL) {
3549
0
      xmlErrMemory(ctxt);
3550
0
      xmlFree(buffer);
3551
0
      return(NULL);
3552
0
        }
3553
206
        buffer = tmp;
3554
206
                    max = newSize;
3555
206
    }
3556
20.1k
    COPY_BUF(buffer, len, c);
3557
20.1k
    cur += l;
3558
20.1k
    c = CUR_SCHAR(cur, l);
3559
20.1k
      }
3560
654
      buffer[len] = 0;
3561
654
      *str = cur;
3562
654
      return(buffer);
3563
654
  }
3564
144k
    }
3565
604k
    if (len > maxLength) {
3566
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3567
0
        return(NULL);
3568
0
    }
3569
604k
    *str = cur;
3570
604k
    ret = xmlStrndup(buf, len);
3571
604k
    if (ret == NULL)
3572
0
        xmlErrMemory(ctxt);
3573
604k
    return(ret);
3574
604k
}
3575
3576
/**
3577
 * parse an XML Nmtoken.
3578
 *
3579
 * @deprecated Internal function, don't use.
3580
 *
3581
 *     [7] Nmtoken ::= (NameChar)+
3582
 *
3583
 *     [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3584
 *
3585
 * @param ctxt  an XML parser context
3586
 * @returns the Nmtoken parsed or NULL
3587
 */
3588
3589
xmlChar *
3590
22.1k
xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3591
22.1k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3592
22.1k
    xmlChar *ret;
3593
22.1k
    int len = 0, l;
3594
22.1k
    int c;
3595
22.1k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3596
0
                    XML_MAX_TEXT_LENGTH :
3597
22.1k
                    XML_MAX_NAME_LENGTH;
3598
3599
22.1k
    c = xmlCurrentChar(ctxt, &l);
3600
3601
105k
    while (xmlIsNameChar(ctxt, c)) {
3602
84.3k
  COPY_BUF(buf, len, c);
3603
84.3k
  NEXTL(l);
3604
84.3k
  c = xmlCurrentChar(ctxt, &l);
3605
84.3k
  if (len >= XML_MAX_NAMELEN) {
3606
      /*
3607
       * Okay someone managed to make a huge token, so he's ready to pay
3608
       * for the processing speed.
3609
       */
3610
610
      xmlChar *buffer;
3611
610
      int max = len * 2;
3612
3613
610
      buffer = xmlMalloc(max);
3614
610
      if (buffer == NULL) {
3615
0
          xmlErrMemory(ctxt);
3616
0
    return(NULL);
3617
0
      }
3618
610
      memcpy(buffer, buf, len);
3619
18.8k
      while (xmlIsNameChar(ctxt, c)) {
3620
18.2k
    if (len + 10 > max) {
3621
196
        xmlChar *tmp;
3622
196
                    int newSize;
3623
3624
196
                    newSize = xmlGrowCapacity(max, 1, 1, maxLength);
3625
196
                    if (newSize < 0) {
3626
0
                        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3627
0
                        xmlFree(buffer);
3628
0
                        return(NULL);
3629
0
                    }
3630
196
        tmp = xmlRealloc(buffer, newSize);
3631
196
        if (tmp == NULL) {
3632
0
      xmlErrMemory(ctxt);
3633
0
      xmlFree(buffer);
3634
0
      return(NULL);
3635
0
        }
3636
196
        buffer = tmp;
3637
196
                    max = newSize;
3638
196
    }
3639
18.2k
    COPY_BUF(buffer, len, c);
3640
18.2k
    NEXTL(l);
3641
18.2k
    c = xmlCurrentChar(ctxt, &l);
3642
18.2k
      }
3643
610
      buffer[len] = 0;
3644
610
      return(buffer);
3645
610
  }
3646
84.3k
    }
3647
21.5k
    if (len == 0)
3648
1.67k
        return(NULL);
3649
19.8k
    if (len > maxLength) {
3650
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3651
0
        return(NULL);
3652
0
    }
3653
19.8k
    ret = xmlStrndup(buf, len);
3654
19.8k
    if (ret == NULL)
3655
0
        xmlErrMemory(ctxt);
3656
19.8k
    return(ret);
3657
19.8k
}
3658
3659
/**
3660
 * Validate an entity value and expand parameter entities.
3661
 *
3662
 * @param ctxt  parser context
3663
 * @param buf  string buffer
3664
 * @param str  entity value
3665
 * @param length  size of entity value
3666
 * @param depth  nesting depth
3667
 */
3668
static void
3669
xmlExpandPEsInEntityValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf,
3670
13.8k
                          const xmlChar *str, int length, int depth) {
3671
13.8k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3672
13.8k
    const xmlChar *end, *chunk;
3673
13.8k
    int c, l;
3674
3675
13.8k
    if (str == NULL)
3676
0
        return;
3677
3678
13.8k
    depth += 1;
3679
13.8k
    if (depth > maxDepth) {
3680
0
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3681
0
                       "Maximum entity nesting depth exceeded");
3682
0
  return;
3683
0
    }
3684
3685
13.8k
    end = str + length;
3686
13.8k
    chunk = str;
3687
3688
363k
    while ((str < end) && (!PARSER_STOPPED(ctxt))) {
3689
355k
        c = *str;
3690
3691
355k
        if (c >= 0x80) {
3692
24.3k
            l = xmlUTF8MultibyteLen(ctxt, str,
3693
24.3k
                    "invalid character in entity value\n");
3694
24.3k
            if (l == 0) {
3695
5.68k
                if (chunk < str)
3696
1.34k
                    xmlSBufAddString(buf, chunk, str - chunk);
3697
5.68k
                xmlSBufAddReplChar(buf);
3698
5.68k
                str += 1;
3699
5.68k
                chunk = str;
3700
18.6k
            } else {
3701
18.6k
                str += l;
3702
18.6k
            }
3703
330k
        } else if (c == '&') {
3704
27.6k
            if (str[1] == '#') {
3705
5.93k
                if (chunk < str)
3706
3.16k
                    xmlSBufAddString(buf, chunk, str - chunk);
3707
3708
5.93k
                c = xmlParseStringCharRef(ctxt, &str);
3709
5.93k
                if (c == 0)
3710
2.51k
                    return;
3711
3712
3.42k
                xmlSBufAddChar(buf, c);
3713
3714
3.42k
                chunk = str;
3715
21.6k
            } else {
3716
21.6k
                xmlChar *name;
3717
3718
                /*
3719
                 * General entity references are checked for
3720
                 * syntactic validity.
3721
                 */
3722
21.6k
                str++;
3723
21.6k
                name = xmlParseStringName(ctxt, &str);
3724
3725
21.6k
                if ((name == NULL) || (*str++ != ';')) {
3726
636
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3727
636
                            "EntityValue: '&' forbidden except for entities "
3728
636
                            "references\n");
3729
636
                    xmlFree(name);
3730
636
                    return;
3731
636
                }
3732
3733
21.0k
                xmlFree(name);
3734
21.0k
            }
3735
303k
        } else if (c == '%') {
3736
2.13k
            xmlEntityPtr ent;
3737
3738
2.13k
            if (chunk < str)
3739
1.24k
                xmlSBufAddString(buf, chunk, str - chunk);
3740
3741
2.13k
            ent = xmlParseStringPEReference(ctxt, &str);
3742
2.13k
            if (ent == NULL)
3743
1.85k
                return;
3744
3745
277
            if (!PARSER_EXTERNAL(ctxt)) {
3746
277
                xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3747
277
                return;
3748
277
            }
3749
3750
0
            if (ent->content == NULL) {
3751
                /*
3752
                 * Note: external parsed entities will not be loaded,
3753
                 * it is not required for a non-validating parser to
3754
                 * complete external PEReferences coming from the
3755
                 * internal subset
3756
                 */
3757
0
                if (((ctxt->options & XML_PARSE_NO_XXE) == 0) &&
3758
0
                    ((ctxt->replaceEntities) ||
3759
0
                     (ctxt->validate))) {
3760
0
                    xmlLoadEntityContent(ctxt, ent);
3761
0
                } else {
3762
0
                    xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
3763
0
                                  "not validating will not read content for "
3764
0
                                  "PE entity %s\n", ent->name, NULL);
3765
0
                }
3766
0
            }
3767
3768
            /*
3769
             * TODO: Skip if ent->content is still NULL.
3770
             */
3771
3772
0
            if (xmlParserEntityCheck(ctxt, ent->length))
3773
0
                return;
3774
3775
0
            if (ent->flags & XML_ENT_EXPANDING) {
3776
0
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
3777
0
                xmlHaltParser(ctxt);
3778
0
                return;
3779
0
            }
3780
3781
0
            ent->flags |= XML_ENT_EXPANDING;
3782
0
            xmlExpandPEsInEntityValue(ctxt, buf, ent->content, ent->length,
3783
0
                                      depth);
3784
0
            ent->flags &= ~XML_ENT_EXPANDING;
3785
3786
0
            chunk = str;
3787
300k
        } else {
3788
            /* Normal ASCII char */
3789
300k
            if (!IS_BYTE_CHAR(c)) {
3790
2.82k
                xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3791
2.82k
                        "invalid character in entity value\n");
3792
2.82k
                if (chunk < str)
3793
890
                    xmlSBufAddString(buf, chunk, str - chunk);
3794
2.82k
                xmlSBufAddReplChar(buf);
3795
2.82k
                str += 1;
3796
2.82k
                chunk = str;
3797
298k
            } else {
3798
298k
                str += 1;
3799
298k
            }
3800
300k
        }
3801
355k
    }
3802
3803
8.55k
    if (chunk < str)
3804
7.33k
        xmlSBufAddString(buf, chunk, str - chunk);
3805
8.55k
}
3806
3807
/**
3808
 * parse a value for ENTITY declarations
3809
 *
3810
 * @deprecated Internal function, don't use.
3811
 *
3812
 *     [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3813
 *                         "'" ([^%&'] | PEReference | Reference)* "'"
3814
 *
3815
 * @param ctxt  an XML parser context
3816
 * @param orig  if non-NULL store a copy of the original entity value
3817
 * @returns the EntityValue parsed with reference substituted or NULL
3818
 */
3819
xmlChar *
3820
14.7k
xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3821
14.7k
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3822
0
                         XML_MAX_HUGE_LENGTH :
3823
14.7k
                         XML_MAX_TEXT_LENGTH;
3824
14.7k
    xmlSBuf buf;
3825
14.7k
    const xmlChar *start;
3826
14.7k
    int quote, length;
3827
3828
14.7k
    xmlSBufInit(&buf, maxLength);
3829
3830
14.7k
    GROW;
3831
3832
14.7k
    quote = CUR;
3833
14.7k
    if ((quote != '"') && (quote != '\'')) {
3834
0
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3835
0
  return(NULL);
3836
0
    }
3837
14.7k
    CUR_PTR++;
3838
3839
14.7k
    length = 0;
3840
3841
    /*
3842
     * Copy raw content of the entity into a buffer
3843
     */
3844
492k
    while (1) {
3845
492k
        int c;
3846
3847
492k
        if (PARSER_STOPPED(ctxt))
3848
0
            goto error;
3849
3850
492k
        if (CUR_PTR >= ctxt->input->end) {
3851
916
            xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3852
916
            goto error;
3853
916
        }
3854
3855
491k
        c = CUR;
3856
3857
491k
        if (c == 0) {
3858
2
            xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3859
2
                    "invalid character in entity value\n");
3860
2
            goto error;
3861
2
        }
3862
491k
        if (c == quote)
3863
13.8k
            break;
3864
477k
        NEXTL(1);
3865
477k
        length += 1;
3866
3867
        /*
3868
         * TODO: Check growth threshold
3869
         */
3870
477k
        if (ctxt->input->end - CUR_PTR < 10)
3871
27.0k
            GROW;
3872
477k
    }
3873
3874
13.8k
    start = CUR_PTR - length;
3875
3876
13.8k
    if (orig != NULL) {
3877
13.8k
        *orig = xmlStrndup(start, length);
3878
13.8k
        if (*orig == NULL)
3879
0
            xmlErrMemory(ctxt);
3880
13.8k
    }
3881
3882
13.8k
    xmlExpandPEsInEntityValue(ctxt, &buf, start, length, ctxt->inputNr);
3883
3884
13.8k
    NEXTL(1);
3885
3886
13.8k
    return(xmlSBufFinish(&buf, NULL, ctxt, "entity length too long"));
3887
3888
918
error:
3889
918
    xmlSBufCleanup(&buf, ctxt, "entity length too long");
3890
918
    return(NULL);
3891
14.7k
}
3892
3893
/**
3894
 * Check an entity reference in an attribute value for validity
3895
 * without expanding it.
3896
 *
3897
 * @param ctxt  parser context
3898
 * @param pent  entity
3899
 * @param depth  nesting depth
3900
 */
3901
static void
3902
449
xmlCheckEntityInAttValue(xmlParserCtxtPtr ctxt, xmlEntityPtr pent, int depth) {
3903
449
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3904
449
    const xmlChar *str;
3905
449
    unsigned long expandedSize = pent->length;
3906
449
    int c, flags;
3907
3908
449
    depth += 1;
3909
449
    if (depth > maxDepth) {
3910
0
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3911
0
                       "Maximum entity nesting depth exceeded");
3912
0
  return;
3913
0
    }
3914
3915
449
    if (pent->flags & XML_ENT_EXPANDING) {
3916
12
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
3917
12
        xmlHaltParser(ctxt);
3918
12
        return;
3919
12
    }
3920
3921
    /*
3922
     * If we're parsing a default attribute value in DTD content,
3923
     * the entity might reference other entities which weren't
3924
     * defined yet, so the check isn't reliable.
3925
     */
3926
437
    if (ctxt->inSubset == 0)
3927
396
        flags = XML_ENT_CHECKED | XML_ENT_VALIDATED;
3928
41
    else
3929
41
        flags = XML_ENT_VALIDATED;
3930
3931
437
    str = pent->content;
3932
437
    if (str == NULL)
3933
8
        goto done;
3934
3935
    /*
3936
     * Note that entity values are already validated. We only check
3937
     * for illegal less-than signs and compute the expanded size
3938
     * of the entity. No special handling for multi-byte characters
3939
     * is needed.
3940
     */
3941
15.3k
    while (!PARSER_STOPPED(ctxt)) {
3942
15.3k
        c = *str;
3943
3944
15.3k
  if (c != '&') {
3945
11.2k
            if (c == 0)
3946
403
                break;
3947
3948
10.8k
            if (c == '<')
3949
370
                xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
3950
370
                        "'<' in entity '%s' is not allowed in attributes "
3951
370
                        "values\n", pent->name);
3952
3953
10.8k
            str += 1;
3954
10.8k
        } else if (str[1] == '#') {
3955
386
            int val;
3956
3957
386
      val = xmlParseStringCharRef(ctxt, &str);
3958
386
      if (val == 0) {
3959
4
                pent->content[0] = 0;
3960
4
                break;
3961
4
            }
3962
3.74k
  } else {
3963
3.74k
            xmlChar *name;
3964
3.74k
            xmlEntityPtr ent;
3965
3966
3.74k
      name = xmlParseStringEntityRef(ctxt, &str);
3967
3.74k
      if (name == NULL) {
3968
5
                pent->content[0] = 0;
3969
5
                break;
3970
5
            }
3971
3972
3.73k
            ent = xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 1);
3973
3.73k
            xmlFree(name);
3974
3975
3.73k
            if ((ent != NULL) &&
3976
3.73k
                (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
3977
2.27k
                if ((ent->flags & flags) != flags) {
3978
92
                    pent->flags |= XML_ENT_EXPANDING;
3979
92
                    xmlCheckEntityInAttValue(ctxt, ent, depth);
3980
92
                    pent->flags &= ~XML_ENT_EXPANDING;
3981
92
                }
3982
3983
2.27k
                xmlSaturatedAdd(&expandedSize, ent->expandedSize);
3984
2.27k
                xmlSaturatedAdd(&expandedSize, XML_ENT_FIXED_COST);
3985
2.27k
            }
3986
3.73k
        }
3987
15.3k
    }
3988
3989
437
done:
3990
437
    if (ctxt->inSubset == 0)
3991
396
        pent->expandedSize = expandedSize;
3992
3993
437
    pent->flags |= flags;
3994
437
}
3995
3996
/**
3997
 * Expand general entity references in an entity or attribute value.
3998
 * Perform attribute value normalization.
3999
 *
4000
 * @param ctxt  parser context
4001
 * @param buf  string buffer
4002
 * @param str  entity or attribute value
4003
 * @param pent  entity for entity value, NULL for attribute values
4004
 * @param normalize  whether to collapse whitespace
4005
 * @param inSpace  whitespace state
4006
 * @param depth  nesting depth
4007
 * @param check  whether to check for amplification
4008
 */
4009
static void
4010
xmlExpandEntityInAttValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf,
4011
                          const xmlChar *str, xmlEntityPtr pent, int normalize,
4012
565k
                          int *inSpace, int depth, int check) {
4013
565k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
4014
565k
    int c, chunkSize;
4015
4016
565k
    if (str == NULL)
4017
66
        return;
4018
4019
565k
    depth += 1;
4020
565k
    if (depth > maxDepth) {
4021
0
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
4022
0
                       "Maximum entity nesting depth exceeded");
4023
0
  return;
4024
0
    }
4025
4026
565k
    if (pent != NULL) {
4027
535k
        if (pent->flags & XML_ENT_EXPANDING) {
4028
4
            xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
4029
4
            xmlHaltParser(ctxt);
4030
4
            return;
4031
4
        }
4032
4033
535k
        if (check) {
4034
515k
            if (xmlParserEntityCheck(ctxt, pent->length))
4035
5
                return;
4036
515k
        }
4037
535k
    }
4038
4039
565k
    chunkSize = 0;
4040
4041
    /*
4042
     * Note that entity values are already validated. No special
4043
     * handling for multi-byte characters is needed.
4044
     */
4045
75.6M
    while (!PARSER_STOPPED(ctxt)) {
4046
75.6M
        c = *str;
4047
4048
75.6M
  if (c != '&') {
4049
74.9M
            if (c == 0)
4050
562k
                break;
4051
4052
            /*
4053
             * If this function is called without an entity, it is used to
4054
             * expand entities in an attribute content where less-than was
4055
             * already unscaped and is allowed.
4056
             */
4057
74.3M
            if ((pent != NULL) && (c == '<')) {
4058
2.62k
                xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
4059
2.62k
                        "'<' in entity '%s' is not allowed in attributes "
4060
2.62k
                        "values\n", pent->name);
4061
2.62k
                break;
4062
2.62k
            }
4063
4064
74.3M
            if (c <= 0x20) {
4065
764k
                if ((normalize) && (*inSpace)) {
4066
                    /* Skip char */
4067
6.66k
                    if (chunkSize > 0) {
4068
1.91k
                        xmlSBufAddString(buf, str - chunkSize, chunkSize);
4069
1.91k
                        chunkSize = 0;
4070
1.91k
                    }
4071
757k
                } else if (c < 0x20) {
4072
353k
                    if (chunkSize > 0) {
4073
201k
                        xmlSBufAddString(buf, str - chunkSize, chunkSize);
4074
201k
                        chunkSize = 0;
4075
201k
                    }
4076
4077
353k
                    xmlSBufAddCString(buf, " ", 1);
4078
404k
                } else {
4079
404k
                    chunkSize += 1;
4080
404k
                }
4081
4082
764k
                *inSpace = 1;
4083
73.6M
            } else {
4084
73.6M
                chunkSize += 1;
4085
73.6M
                *inSpace = 0;
4086
73.6M
            }
4087
4088
74.3M
            str += 1;
4089
74.3M
        } else if (str[1] == '#') {
4090
106k
            int val;
4091
4092
106k
            if (chunkSize > 0) {
4093
105k
                xmlSBufAddString(buf, str - chunkSize, chunkSize);
4094
105k
                chunkSize = 0;
4095
105k
            }
4096
4097
106k
      val = xmlParseStringCharRef(ctxt, &str);
4098
106k
      if (val == 0) {
4099
4
                if (pent != NULL)
4100
4
                    pent->content[0] = 0;
4101
4
                break;
4102
4
            }
4103
4104
106k
            if (val == ' ') {
4105
89.6k
                if ((!normalize) || (!*inSpace))
4106
89.4k
                    xmlSBufAddCString(buf, " ", 1);
4107
89.6k
                *inSpace = 1;
4108
89.6k
            } else {
4109
17.0k
                xmlSBufAddChar(buf, val);
4110
17.0k
                *inSpace = 0;
4111
17.0k
            }
4112
579k
  } else {
4113
579k
            xmlChar *name;
4114
579k
            xmlEntityPtr ent;
4115
4116
579k
            if (chunkSize > 0) {
4117
232k
                xmlSBufAddString(buf, str - chunkSize, chunkSize);
4118
232k
                chunkSize = 0;
4119
232k
            }
4120
4121
579k
      name = xmlParseStringEntityRef(ctxt, &str);
4122
579k
            if (name == NULL) {
4123
4
                if (pent != NULL)
4124
4
                    pent->content[0] = 0;
4125
4
                break;
4126
4
            }
4127
4128
579k
            ent = xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 1);
4129
579k
            xmlFree(name);
4130
4131
579k
      if ((ent != NULL) &&
4132
579k
    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
4133
35.4k
    if (ent->content == NULL) {
4134
0
        xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
4135
0
          "predefined entity has no content\n");
4136
0
                    break;
4137
0
                }
4138
4139
35.4k
                xmlSBufAddString(buf, ent->content, ent->length);
4140
4141
35.4k
                *inSpace = 0;
4142
543k
      } else if ((ent != NULL) && (ent->content != NULL)) {
4143
513k
                if (pent != NULL)
4144
512k
                    pent->flags |= XML_ENT_EXPANDING;
4145
513k
    xmlExpandEntityInAttValue(ctxt, buf, ent->content, ent,
4146
513k
                                          normalize, inSpace, depth, check);
4147
513k
                if (pent != NULL)
4148
512k
                    pent->flags &= ~XML_ENT_EXPANDING;
4149
513k
      }
4150
579k
        }
4151
75.6M
    }
4152
4153
565k
    if (chunkSize > 0)
4154
551k
        xmlSBufAddString(buf, str - chunkSize, chunkSize);
4155
565k
}
4156
4157
/**
4158
 * Expand general entity references in an entity or attribute value.
4159
 * Perform attribute value normalization.
4160
 *
4161
 * @param ctxt  parser context
4162
 * @param str  entity or attribute value
4163
 * @param normalize  whether to collapse whitespace
4164
 * @returns the expanded attribtue value.
4165
 */
4166
xmlChar *
4167
xmlExpandEntitiesInAttValue(xmlParserCtxtPtr ctxt, const xmlChar *str,
4168
29.2k
                            int normalize) {
4169
29.2k
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4170
0
                         XML_MAX_HUGE_LENGTH :
4171
29.2k
                         XML_MAX_TEXT_LENGTH;
4172
29.2k
    xmlSBuf buf;
4173
29.2k
    int inSpace = 1;
4174
4175
29.2k
    xmlSBufInit(&buf, maxLength);
4176
4177
29.2k
    xmlExpandEntityInAttValue(ctxt, &buf, str, NULL, normalize, &inSpace,
4178
29.2k
                              ctxt->inputNr, /* check */ 0);
4179
4180
29.2k
    if ((normalize) && (inSpace) && (buf.size > 0))
4181
0
        buf.size--;
4182
4183
29.2k
    return(xmlSBufFinish(&buf, NULL, ctxt, "AttValue length too long"));
4184
29.2k
}
4185
4186
/**
4187
 * parse a value for an attribute.
4188
 *
4189
 * NOTE: if no normalization is needed, the routine will return pointers
4190
 * directly from the data buffer.
4191
 *
4192
 * 3.3.3 Attribute-Value Normalization:
4193
 *
4194
 * Before the value of an attribute is passed to the application or
4195
 * checked for validity, the XML processor must normalize it as follows:
4196
 *
4197
 * - a character reference is processed by appending the referenced
4198
 *   character to the attribute value
4199
 * - an entity reference is processed by recursively processing the
4200
 *   replacement text of the entity
4201
 * - a whitespace character (\#x20, \#xD, \#xA, \#x9) is processed by
4202
 *   appending \#x20 to the normalized value, except that only a single
4203
 *   \#x20 is appended for a "#xD#xA" sequence that is part of an external
4204
 *   parsed entity or the literal entity value of an internal parsed entity
4205
 * - other characters are processed by appending them to the normalized value
4206
 *
4207
 * If the declared value is not CDATA, then the XML processor must further
4208
 * process the normalized attribute value by discarding any leading and
4209
 * trailing space (\#x20) characters, and by replacing sequences of space
4210
 * (\#x20) characters by a single space (\#x20) character.
4211
 * All attributes for which no declaration has been read should be treated
4212
 * by a non-validating parser as if declared CDATA.
4213
 *
4214
 * @param ctxt  an XML parser context
4215
 * @param attlen  attribute len result
4216
 * @param alloc  whether the attribute was reallocated as a new string
4217
 * @param normalize  if 1 then further non-CDATA normalization must be done
4218
 * @param isNamespace  whether this is a namespace declaration
4219
 * @returns the AttValue parsed or NULL. The value has to be freed by the
4220
 *     caller if it was copied, this can be detected by val[*len] == 0.
4221
 */
4222
static xmlChar *
4223
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *attlen, int *alloc,
4224
70.7k
                         int normalize, int isNamespace) {
4225
70.7k
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4226
0
                         XML_MAX_HUGE_LENGTH :
4227
70.7k
                         XML_MAX_TEXT_LENGTH;
4228
70.7k
    xmlSBuf buf;
4229
70.7k
    xmlChar *ret;
4230
70.7k
    int c, l, quote, flags, chunkSize;
4231
70.7k
    int inSpace = 1;
4232
70.7k
    int replaceEntities;
4233
4234
    /* Always expand namespace URIs */
4235
70.7k
    replaceEntities = (ctxt->replaceEntities) || (isNamespace);
4236
4237
70.7k
    xmlSBufInit(&buf, maxLength);
4238
4239
70.7k
    GROW;
4240
4241
70.7k
    quote = CUR;
4242
70.7k
    if ((quote != '"') && (quote != '\'')) {
4243
4.49k
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
4244
4.49k
  return(NULL);
4245
4.49k
    }
4246
66.2k
    NEXTL(1);
4247
4248
66.2k
    if (ctxt->inSubset == 0)
4249
32.7k
        flags = XML_ENT_CHECKED | XML_ENT_VALIDATED;
4250
33.4k
    else
4251
33.4k
        flags = XML_ENT_VALIDATED;
4252
4253
66.2k
    inSpace = 1;
4254
66.2k
    chunkSize = 0;
4255
4256
871k
    while (1) {
4257
871k
        if (PARSER_STOPPED(ctxt))
4258
21
            goto error;
4259
4260
871k
        if (CUR_PTR >= ctxt->input->end) {
4261
9.17k
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4262
9.17k
                           "AttValue: ' expected\n");
4263
9.17k
            goto error;
4264
9.17k
        }
4265
4266
        /*
4267
         * TODO: Check growth threshold
4268
         */
4269
862k
        if (ctxt->input->end - CUR_PTR < 10)
4270
98.5k
            GROW;
4271
4272
862k
        c = CUR;
4273
4274
862k
        if (c >= 0x80) {
4275
71.2k
            l = xmlUTF8MultibyteLen(ctxt, CUR_PTR,
4276
71.2k
                    "invalid character in attribute value\n");
4277
71.2k
            if (l == 0) {
4278
7.17k
                if (chunkSize > 0) {
4279
991
                    xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4280
991
                    chunkSize = 0;
4281
991
                }
4282
7.17k
                xmlSBufAddReplChar(&buf);
4283
7.17k
                NEXTL(1);
4284
64.0k
            } else {
4285
64.0k
                chunkSize += l;
4286
64.0k
                NEXTL(l);
4287
64.0k
            }
4288
4289
71.2k
            inSpace = 0;
4290
791k
        } else if (c != '&') {
4291
710k
            if (c > 0x20) {
4292
509k
                if (c == quote)
4293
55.6k
                    break;
4294
4295
453k
                if (c == '<')
4296
35.4k
                    xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4297
4298
453k
                chunkSize += 1;
4299
453k
                inSpace = 0;
4300
453k
            } else if (!IS_BYTE_CHAR(c)) {
4301
2.43k
                xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4302
2.43k
                        "invalid character in attribute value\n");
4303
2.43k
                if (chunkSize > 0) {
4304
761
                    xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4305
761
                    chunkSize = 0;
4306
761
                }
4307
2.43k
                xmlSBufAddReplChar(&buf);
4308
2.43k
                inSpace = 0;
4309
198k
            } else {
4310
                /* Whitespace */
4311
198k
                if ((normalize) && (inSpace)) {
4312
                    /* Skip char */
4313
704
                    if (chunkSize > 0) {
4314
362
                        xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4315
362
                        chunkSize = 0;
4316
362
                    }
4317
197k
                } else if (c < 0x20) {
4318
                    /* Convert to space */
4319
4.19k
                    if (chunkSize > 0) {
4320
2.37k
                        xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4321
2.37k
                        chunkSize = 0;
4322
2.37k
                    }
4323
4324
4.19k
                    xmlSBufAddCString(&buf, " ", 1);
4325
193k
                } else {
4326
193k
                    chunkSize += 1;
4327
193k
                }
4328
4329
198k
                inSpace = 1;
4330
4331
198k
                if ((c == 0xD) && (NXT(1) == 0xA))
4332
194
                    CUR_PTR++;
4333
198k
            }
4334
4335
654k
            NEXTL(1);
4336
654k
        } else if (NXT(1) == '#') {
4337
7.93k
            int val;
4338
4339
7.93k
            if (chunkSize > 0) {
4340
3.68k
                xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4341
3.68k
                chunkSize = 0;
4342
3.68k
            }
4343
4344
7.93k
            val = xmlParseCharRef(ctxt);
4345
7.93k
            if (val == 0)
4346
1.36k
                goto error;
4347
4348
6.56k
            if ((val == '&') && (!replaceEntities)) {
4349
                /*
4350
                 * The reparsing will be done in xmlNodeParseContent()
4351
                 * called from SAX2.c
4352
                 */
4353
1.69k
                xmlSBufAddCString(&buf, "&#38;", 5);
4354
1.69k
                inSpace = 0;
4355
4.86k
            } else if (val == ' ') {
4356
1.20k
                if ((!normalize) || (!inSpace))
4357
1.01k
                    xmlSBufAddCString(&buf, " ", 1);
4358
1.20k
                inSpace = 1;
4359
3.65k
            } else {
4360
3.65k
                xmlSBufAddChar(&buf, val);
4361
3.65k
                inSpace = 0;
4362
3.65k
            }
4363
72.7k
        } else {
4364
72.7k
            const xmlChar *name;
4365
72.7k
            xmlEntityPtr ent;
4366
4367
72.7k
            if (chunkSize > 0) {
4368
22.0k
                xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4369
22.0k
                chunkSize = 0;
4370
22.0k
            }
4371
4372
72.7k
            name = xmlParseEntityRefInternal(ctxt);
4373
72.7k
            if (name == NULL) {
4374
                /*
4375
                 * Probably a literal '&' which wasn't escaped.
4376
                 * TODO: Handle gracefully in recovery mode.
4377
                 */
4378
4.86k
                continue;
4379
4.86k
            }
4380
4381
67.9k
            ent = xmlLookupGeneralEntity(ctxt, name, /* isAttr */ 1);
4382
67.9k
            if (ent == NULL)
4383
10.8k
                continue;
4384
4385
57.1k
            if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
4386
3.03k
                if ((ent->content[0] == '&') && (!replaceEntities))
4387
1.56k
                    xmlSBufAddCString(&buf, "&#38;", 5);
4388
1.46k
                else
4389
1.46k
                    xmlSBufAddString(&buf, ent->content, ent->length);
4390
3.03k
                inSpace = 0;
4391
54.0k
            } else if (replaceEntities) {
4392
22.3k
                xmlExpandEntityInAttValue(ctxt, &buf, ent->content, ent,
4393
22.3k
                                          normalize, &inSpace, ctxt->inputNr,
4394
22.3k
                                          /* check */ 1);
4395
31.7k
            } else {
4396
31.7k
                if ((ent->flags & flags) != flags)
4397
357
                    xmlCheckEntityInAttValue(ctxt, ent, ctxt->inputNr);
4398
4399
31.7k
                if (xmlParserEntityCheck(ctxt, ent->expandedSize)) {
4400
8
                    ent->content[0] = 0;
4401
8
                    goto error;
4402
8
                }
4403
4404
                /*
4405
                 * Just output the reference
4406
                 */
4407
31.7k
                xmlSBufAddCString(&buf, "&", 1);
4408
31.7k
                xmlSBufAddString(&buf, ent->name, xmlStrlen(ent->name));
4409
31.7k
                xmlSBufAddCString(&buf, ";", 1);
4410
4411
31.7k
                inSpace = 0;
4412
31.7k
            }
4413
57.1k
  }
4414
862k
    }
4415
4416
55.6k
    if ((buf.mem == NULL) && (alloc != NULL)) {
4417
20.6k
        ret = (xmlChar *) CUR_PTR - chunkSize;
4418
4419
20.6k
        if (attlen != NULL)
4420
20.6k
            *attlen = chunkSize;
4421
20.6k
        if ((normalize) && (inSpace) && (chunkSize > 0))
4422
81
            *attlen -= 1;
4423
20.6k
        *alloc = 0;
4424
4425
        /* Report potential error */
4426
20.6k
        xmlSBufCleanup(&buf, ctxt, "AttValue length too long");
4427
35.0k
    } else {
4428
35.0k
        if (chunkSize > 0)
4429
21.9k
            xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4430
4431
35.0k
        if ((normalize) && (inSpace) && (buf.size > 0))
4432
108
            buf.size--;
4433
4434
35.0k
        ret = xmlSBufFinish(&buf, attlen, ctxt, "AttValue length too long");
4435
4436
35.0k
        if (ret != NULL) {
4437
35.0k
            if (attlen != NULL)
4438
2.93k
                *attlen = buf.size;
4439
35.0k
            if (alloc != NULL)
4440
2.93k
                *alloc = 1;
4441
35.0k
        }
4442
35.0k
    }
4443
4444
55.6k
    NEXTL(1);
4445
4446
55.6k
    return(ret);
4447
4448
10.5k
error:
4449
10.5k
    xmlSBufCleanup(&buf, ctxt, "AttValue length too long");
4450
10.5k
    return(NULL);
4451
66.2k
}
4452
4453
/**
4454
 * parse a value for an attribute
4455
 * Note: the parser won't do substitution of entities here, this
4456
 * will be handled later in xmlStringGetNodeList()
4457
 *
4458
 * @deprecated Internal function, don't use.
4459
 *
4460
 *     [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4461
 *                       "'" ([^<&'] | Reference)* "'"
4462
 *
4463
 * 3.3.3 Attribute-Value Normalization:
4464
 *
4465
 * Before the value of an attribute is passed to the application or
4466
 * checked for validity, the XML processor must normalize it as follows:
4467
 *
4468
 * - a character reference is processed by appending the referenced
4469
 *   character to the attribute value
4470
 * - an entity reference is processed by recursively processing the
4471
 *   replacement text of the entity
4472
 * - a whitespace character (\#x20, \#xD, \#xA, \#x9) is processed by
4473
 *   appending \#x20 to the normalized value, except that only a single
4474
 *   \#x20 is appended for a "#xD#xA" sequence that is part of an external
4475
 *   parsed entity or the literal entity value of an internal parsed entity
4476
 * - other characters are processed by appending them to the normalized value
4477
 *
4478
 * If the declared value is not CDATA, then the XML processor must further
4479
 * process the normalized attribute value by discarding any leading and
4480
 * trailing space (\#x20) characters, and by replacing sequences of space
4481
 * (\#x20) characters by a single space (\#x20) character.
4482
 * All attributes for which no declaration has been read should be treated
4483
 * by a non-validating parser as if declared CDATA.
4484
 *
4485
 * @param ctxt  an XML parser context
4486
 * @returns the AttValue parsed or NULL. The value has to be freed by the
4487
 * caller.
4488
 */
4489
xmlChar *
4490
45.3k
xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4491
45.3k
    if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4492
45.3k
    return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0, 0));
4493
45.3k
}
4494
4495
/**
4496
 * parse an XML Literal
4497
 *
4498
 * @deprecated Internal function, don't use.
4499
 *
4500
 *     [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4501
 *
4502
 * @param ctxt  an XML parser context
4503
 * @returns the SystemLiteral parsed or NULL
4504
 */
4505
4506
xmlChar *
4507
4.58k
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4508
4.58k
    xmlChar *buf = NULL;
4509
4.58k
    int len = 0;
4510
4.58k
    int size = XML_PARSER_BUFFER_SIZE;
4511
4.58k
    int cur, l;
4512
4.58k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4513
0
                    XML_MAX_TEXT_LENGTH :
4514
4.58k
                    XML_MAX_NAME_LENGTH;
4515
4.58k
    xmlChar stop;
4516
4517
4.58k
    if (RAW == '"') {
4518
2.28k
        NEXT;
4519
2.28k
  stop = '"';
4520
2.29k
    } else if (RAW == '\'') {
4521
465
        NEXT;
4522
465
  stop = '\'';
4523
1.83k
    } else {
4524
1.83k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4525
1.83k
  return(NULL);
4526
1.83k
    }
4527
4528
2.75k
    buf = xmlMalloc(size);
4529
2.75k
    if (buf == NULL) {
4530
0
        xmlErrMemory(ctxt);
4531
0
  return(NULL);
4532
0
    }
4533
2.75k
    cur = xmlCurrentCharRecover(ctxt, &l);
4534
26.6k
    while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4535
23.9k
  if (len + 5 >= size) {
4536
227
      xmlChar *tmp;
4537
227
            int newSize;
4538
4539
227
            newSize = xmlGrowCapacity(size, 1, 1, maxLength);
4540
227
            if (newSize < 0) {
4541
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4542
0
                xmlFree(buf);
4543
0
                return(NULL);
4544
0
            }
4545
227
      tmp = xmlRealloc(buf, newSize);
4546
227
      if (tmp == NULL) {
4547
0
          xmlFree(buf);
4548
0
    xmlErrMemory(ctxt);
4549
0
    return(NULL);
4550
0
      }
4551
227
      buf = tmp;
4552
227
            size = newSize;
4553
227
  }
4554
23.9k
  COPY_BUF(buf, len, cur);
4555
23.9k
  NEXTL(l);
4556
23.9k
  cur = xmlCurrentCharRecover(ctxt, &l);
4557
23.9k
    }
4558
2.75k
    buf[len] = 0;
4559
2.75k
    if (!IS_CHAR(cur)) {
4560
1.13k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4561
1.62k
    } else {
4562
1.62k
  NEXT;
4563
1.62k
    }
4564
2.75k
    return(buf);
4565
2.75k
}
4566
4567
/**
4568
 * parse an XML public literal
4569
 *
4570
 * @deprecated Internal function, don't use.
4571
 *
4572
 *     [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4573
 *
4574
 * @param ctxt  an XML parser context
4575
 * @returns the PubidLiteral parsed or NULL.
4576
 */
4577
4578
xmlChar *
4579
3.96k
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4580
3.96k
    xmlChar *buf = NULL;
4581
3.96k
    int len = 0;
4582
3.96k
    int size = XML_PARSER_BUFFER_SIZE;
4583
3.96k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4584
0
                    XML_MAX_TEXT_LENGTH :
4585
3.96k
                    XML_MAX_NAME_LENGTH;
4586
3.96k
    xmlChar cur;
4587
3.96k
    xmlChar stop;
4588
4589
3.96k
    if (RAW == '"') {
4590
2.27k
        NEXT;
4591
2.27k
  stop = '"';
4592
2.27k
    } else if (RAW == '\'') {
4593
1.12k
        NEXT;
4594
1.12k
  stop = '\'';
4595
1.12k
    } else {
4596
568
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4597
568
  return(NULL);
4598
568
    }
4599
3.40k
    buf = xmlMalloc(size);
4600
3.40k
    if (buf == NULL) {
4601
0
  xmlErrMemory(ctxt);
4602
0
  return(NULL);
4603
0
    }
4604
3.40k
    cur = CUR;
4605
46.8k
    while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop) &&
4606
46.8k
           (PARSER_STOPPED(ctxt) == 0)) { /* checked */
4607
43.6k
  if (len + 1 >= size) {
4608
196
      xmlChar *tmp;
4609
196
            int newSize;
4610
4611
196
      newSize = xmlGrowCapacity(size, 1, 1, maxLength);
4612
196
            if (newSize) {
4613
196
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4614
196
                xmlFree(buf);
4615
196
                return(NULL);
4616
196
            }
4617
0
      tmp = xmlRealloc(buf, size);
4618
0
      if (tmp == NULL) {
4619
0
    xmlErrMemory(ctxt);
4620
0
    xmlFree(buf);
4621
0
    return(NULL);
4622
0
      }
4623
0
      buf = tmp;
4624
0
            size = newSize;
4625
0
  }
4626
43.4k
  buf[len++] = cur;
4627
43.4k
  NEXT;
4628
43.4k
  cur = CUR;
4629
43.4k
    }
4630
3.20k
    buf[len] = 0;
4631
3.20k
    if (cur != stop) {
4632
2.52k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4633
2.52k
    } else {
4634
680
  NEXTL(1);
4635
680
    }
4636
3.20k
    return(buf);
4637
3.40k
}
4638
4639
static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial);
4640
4641
/*
4642
 * used for the test in the inner loop of the char data testing
4643
 */
4644
static const unsigned char test_char_data[256] = {
4645
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4646
    0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4647
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4648
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4649
    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4650
    0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4651
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4652
    0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4653
    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4654
    0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4655
    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4656
    0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4657
    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4658
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4659
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4660
    0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4661
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4662
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4663
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4664
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4665
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4666
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4667
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4668
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4669
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4670
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4671
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4672
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4673
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4674
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4675
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4676
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4677
};
4678
4679
static void
4680
xmlCharacters(xmlParserCtxtPtr ctxt, const xmlChar *buf, int size,
4681
45.7k
              int isBlank) {
4682
45.7k
    int checkBlanks;
4683
4684
45.7k
    if ((ctxt->sax == NULL) || (ctxt->disableSAX))
4685
19.9k
        return;
4686
4687
25.7k
    checkBlanks = (!ctxt->keepBlanks) ||
4688
25.7k
                  (ctxt->sax->ignorableWhitespace != ctxt->sax->characters);
4689
4690
    /*
4691
     * Calling areBlanks with only parts of a text node
4692
     * is fundamentally broken, making the NOBLANKS option
4693
     * essentially unusable.
4694
     */
4695
25.7k
    if ((checkBlanks) &&
4696
25.7k
        (areBlanks(ctxt, buf, size, isBlank))) {
4697
2.45k
        if ((ctxt->sax->ignorableWhitespace != NULL) &&
4698
2.45k
            (ctxt->keepBlanks))
4699
0
            ctxt->sax->ignorableWhitespace(ctxt->userData, buf, size);
4700
23.3k
    } else {
4701
23.3k
        if (ctxt->sax->characters != NULL)
4702
23.3k
            ctxt->sax->characters(ctxt->userData, buf, size);
4703
4704
        /*
4705
         * The old code used to update this value for "complex" data
4706
         * even if checkBlanks was false. This was probably a bug.
4707
         */
4708
23.3k
        if ((checkBlanks) && (*ctxt->space == -1))
4709
3.57k
            *ctxt->space = -2;
4710
23.3k
    }
4711
25.7k
}
4712
4713
/**
4714
 * Parse character data. Always makes progress if the first char isn't
4715
 * '<' or '&'.
4716
 *
4717
 * The right angle bracket (>) may be represented using the string "&gt;",
4718
 * and must, for compatibility, be escaped using "&gt;" or a character
4719
 * reference when it appears in the string "]]>" in content, when that
4720
 * string is not marking the end of a CDATA section.
4721
 *
4722
 *     [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4723
 * @param ctxt  an XML parser context
4724
 * @param partial  buffer may contain partial UTF-8 sequences
4725
 */
4726
static void
4727
49.1k
xmlParseCharDataInternal(xmlParserCtxtPtr ctxt, int partial) {
4728
49.1k
    const xmlChar *in;
4729
49.1k
    int nbchar = 0;
4730
49.1k
    int line = ctxt->input->line;
4731
49.1k
    int col = ctxt->input->col;
4732
49.1k
    int ccol;
4733
4734
49.1k
    GROW;
4735
    /*
4736
     * Accelerated common case where input don't need to be
4737
     * modified before passing it to the handler.
4738
     */
4739
49.1k
    in = ctxt->input->cur;
4740
49.5k
    do {
4741
49.9k
get_more_space:
4742
59.8k
        while (*in == 0x20) { in++; ctxt->input->col++; }
4743
49.9k
        if (*in == 0xA) {
4744
716
            do {
4745
716
                ctxt->input->line++; ctxt->input->col = 1;
4746
716
                in++;
4747
716
            } while (*in == 0xA);
4748
325
            goto get_more_space;
4749
325
        }
4750
49.5k
        if (*in == '<') {
4751
5.45k
            nbchar = in - ctxt->input->cur;
4752
5.45k
            if (nbchar > 0) {
4753
5.45k
                const xmlChar *tmp = ctxt->input->cur;
4754
5.45k
                ctxt->input->cur = in;
4755
4756
5.45k
                xmlCharacters(ctxt, tmp, nbchar, 1);
4757
5.45k
            }
4758
5.45k
            return;
4759
5.45k
        }
4760
4761
46.9k
get_more:
4762
46.9k
        ccol = ctxt->input->col;
4763
130k
        while (test_char_data[*in]) {
4764
83.5k
            in++;
4765
83.5k
            ccol++;
4766
83.5k
        }
4767
46.9k
        ctxt->input->col = ccol;
4768
46.9k
        if (*in == 0xA) {
4769
504
            do {
4770
504
                ctxt->input->line++; ctxt->input->col = 1;
4771
504
                in++;
4772
504
            } while (*in == 0xA);
4773
310
            goto get_more;
4774
310
        }
4775
46.6k
        if (*in == ']') {
4776
2.77k
            if ((in[1] == ']') && (in[2] == '>')) {
4777
270
                xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4778
270
                ctxt->input->cur = in + 1;
4779
270
                return;
4780
270
            }
4781
2.50k
            if ((!partial) || (ctxt->input->end - in >= 2)) {
4782
2.50k
                in++;
4783
2.50k
                ctxt->input->col++;
4784
2.50k
                goto get_more;
4785
2.50k
            }
4786
2.50k
        }
4787
43.8k
        nbchar = in - ctxt->input->cur;
4788
43.8k
        if (nbchar > 0) {
4789
33.3k
            const xmlChar *tmp = ctxt->input->cur;
4790
33.3k
            ctxt->input->cur = in;
4791
4792
33.3k
            xmlCharacters(ctxt, tmp, nbchar, 0);
4793
4794
33.3k
            line = ctxt->input->line;
4795
33.3k
            col = ctxt->input->col;
4796
33.3k
        }
4797
43.8k
        ctxt->input->cur = in;
4798
43.8k
        if (*in == 0xD) {
4799
1.57k
            in++;
4800
1.57k
            if (*in == 0xA) {
4801
388
                ctxt->input->cur = in;
4802
388
                in++;
4803
388
                ctxt->input->line++; ctxt->input->col = 1;
4804
388
                continue; /* while */
4805
388
            }
4806
1.18k
            in--;
4807
1.18k
        }
4808
43.4k
        if (*in == '<') {
4809
26.7k
            return;
4810
26.7k
        }
4811
16.7k
        if (*in == '&') {
4812
3.80k
            return;
4813
3.80k
        }
4814
12.9k
        if ((partial) && (*in == ']') && (ctxt->input->end - in < 2)) {
4815
0
            return;
4816
0
        }
4817
12.9k
        SHRINK;
4818
12.9k
        GROW;
4819
12.9k
        in = ctxt->input->cur;
4820
13.2k
    } while (((*in >= 0x20) && (*in <= 0x7F)) ||
4821
13.2k
             (*in == 0x09) || (*in == 0x0a));
4822
12.9k
    ctxt->input->line = line;
4823
12.9k
    ctxt->input->col = col;
4824
12.9k
    xmlParseCharDataComplex(ctxt, partial);
4825
12.9k
}
4826
4827
/**
4828
 * Always makes progress if the first char isn't '<' or '&'.
4829
 *
4830
 * parse a CharData section.this is the fallback function
4831
 * of xmlParseCharData() when the parsing requires handling
4832
 * of non-ASCII characters.
4833
 *
4834
 * @param ctxt  an XML parser context
4835
 * @param partial  whether the input can end with truncated UTF-8
4836
 */
4837
static void
4838
12.9k
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial) {
4839
12.9k
    xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4840
12.9k
    int nbchar = 0;
4841
12.9k
    int cur, l;
4842
4843
12.9k
    cur = xmlCurrentCharRecover(ctxt, &l);
4844
52.2k
    while ((cur != '<') && /* checked */
4845
52.2k
           (cur != '&') &&
4846
52.2k
           ((!partial) || (cur != ']') ||
4847
45.7k
            (ctxt->input->end - ctxt->input->cur >= 2)) &&
4848
52.2k
     (IS_CHAR(cur))) {
4849
39.3k
  if ((cur == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4850
313
      xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4851
313
  }
4852
39.3k
  COPY_BUF(buf, nbchar, cur);
4853
  /* move current position before possible calling of ctxt->sax->characters */
4854
39.3k
  NEXTL(l);
4855
39.3k
  if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4856
181
      buf[nbchar] = 0;
4857
4858
181
            xmlCharacters(ctxt, buf, nbchar, 0);
4859
181
      nbchar = 0;
4860
181
            SHRINK;
4861
181
  }
4862
39.3k
  cur = xmlCurrentCharRecover(ctxt, &l);
4863
39.3k
    }
4864
12.9k
    if (nbchar != 0) {
4865
6.79k
        buf[nbchar] = 0;
4866
4867
6.79k
        xmlCharacters(ctxt, buf, nbchar, 0);
4868
6.79k
    }
4869
    /*
4870
     * cur == 0 can mean
4871
     *
4872
     * - End of buffer.
4873
     * - An actual 0 character.
4874
     * - An incomplete UTF-8 sequence. This is allowed if partial is set.
4875
     */
4876
12.9k
    if (ctxt->input->cur < ctxt->input->end) {
4877
11.5k
        if ((cur == 0) && (CUR != 0)) {
4878
21
            if (partial == 0) {
4879
21
                xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4880
21
                        "Incomplete UTF-8 sequence starting with %02X\n", CUR);
4881
21
                NEXTL(1);
4882
21
            }
4883
11.5k
        } else if ((cur != '<') && (cur != '&') && (cur != ']')) {
4884
            /* Generate the error and skip the offending character */
4885
4.98k
            xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4886
4.98k
                              "PCDATA invalid Char value %d\n", cur);
4887
4.98k
            NEXTL(l);
4888
4.98k
        }
4889
11.5k
    }
4890
12.9k
}
4891
4892
/**
4893
 * @deprecated Internal function, don't use.
4894
 * @param ctxt  an XML parser context
4895
 * @param cdata  unused
4896
 */
4897
void
4898
0
xmlParseCharData(xmlParserCtxtPtr ctxt, ATTRIBUTE_UNUSED int cdata) {
4899
0
    xmlParseCharDataInternal(ctxt, 0);
4900
0
}
4901
4902
/**
4903
 * Parse an External ID or a Public ID
4904
 *
4905
 * @deprecated Internal function, don't use.
4906
 *
4907
 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4908
 * `'PUBLIC' S PubidLiteral S SystemLiteral`
4909
 *
4910
 *     [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4911
 *                       | 'PUBLIC' S PubidLiteral S SystemLiteral
4912
 *
4913
 *     [83] PublicID ::= 'PUBLIC' S PubidLiteral
4914
 *
4915
 * @param ctxt  an XML parser context
4916
 * @param publicID  a xmlChar** receiving PubidLiteral
4917
 * @param strict  indicate whether we should restrict parsing to only
4918
 *          production [75], see NOTE below
4919
 * @returns the function returns SystemLiteral and in the second
4920
 *                case publicID receives PubidLiteral, is strict is off
4921
 *                it is possible to return NULL and have publicID set.
4922
 */
4923
4924
xmlChar *
4925
18.7k
xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4926
18.7k
    xmlChar *URI = NULL;
4927
4928
18.7k
    *publicID = NULL;
4929
18.7k
    if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4930
2.19k
        SKIP(6);
4931
2.19k
  if (SKIP_BLANKS == 0) {
4932
1.89k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4933
1.89k
                     "Space required after 'SYSTEM'\n");
4934
1.89k
  }
4935
2.19k
  URI = xmlParseSystemLiteral(ctxt);
4936
2.19k
  if (URI == NULL) {
4937
397
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4938
397
        }
4939
16.5k
    } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4940
3.96k
        SKIP(6);
4941
3.96k
  if (SKIP_BLANKS == 0) {
4942
3.39k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4943
3.39k
        "Space required after 'PUBLIC'\n");
4944
3.39k
  }
4945
3.96k
  *publicID = xmlParsePubidLiteral(ctxt);
4946
3.96k
  if (*publicID == NULL) {
4947
764
      xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4948
764
  }
4949
3.96k
  if (strict) {
4950
      /*
4951
       * We don't handle [83] so "S SystemLiteral" is required.
4952
       */
4953
1.79k
      if (SKIP_BLANKS == 0) {
4954
1.41k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4955
1.41k
      "Space required after the Public Identifier\n");
4956
1.41k
      }
4957
2.17k
  } else {
4958
      /*
4959
       * We handle [83] so we return immediately, if
4960
       * "S SystemLiteral" is not detected. We skip blanks if no
4961
             * system literal was found, but this is harmless since we must
4962
             * be at the end of a NotationDecl.
4963
       */
4964
2.17k
      if (SKIP_BLANKS == 0) return(NULL);
4965
992
      if ((CUR != '\'') && (CUR != '"')) return(NULL);
4966
992
  }
4967
2.39k
  URI = xmlParseSystemLiteral(ctxt);
4968
2.39k
  if (URI == NULL) {
4969
1.43k
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4970
1.43k
        }
4971
2.39k
    }
4972
17.1k
    return(URI);
4973
18.7k
}
4974
4975
/**
4976
 * Skip an XML (SGML) comment <!-- .... -->
4977
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4978
 *  must not occur within comments. "
4979
 * This is the slow routine in case the accelerator for ascii didn't work
4980
 *
4981
 *     [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4982
 * @param ctxt  an XML parser context
4983
 * @param buf  the already parsed part of the buffer
4984
 * @param len  number of bytes in the buffer
4985
 * @param size  allocated size of the buffer
4986
 */
4987
static void
4988
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4989
8.71k
                       size_t len, size_t size) {
4990
8.71k
    int q, ql;
4991
8.71k
    int r, rl;
4992
8.71k
    int cur, l;
4993
8.71k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4994
0
                    XML_MAX_HUGE_LENGTH :
4995
8.71k
                    XML_MAX_TEXT_LENGTH;
4996
4997
8.71k
    if (buf == NULL) {
4998
4.66k
        len = 0;
4999
4.66k
  size = XML_PARSER_BUFFER_SIZE;
5000
4.66k
  buf = xmlMalloc(size);
5001
4.66k
  if (buf == NULL) {
5002
0
      xmlErrMemory(ctxt);
5003
0
      return;
5004
0
  }
5005
4.66k
    }
5006
8.71k
    q = xmlCurrentCharRecover(ctxt, &ql);
5007
8.71k
    if (q == 0)
5008
3.66k
        goto not_terminated;
5009
5.04k
    if (!IS_CHAR(q)) {
5010
270
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
5011
270
                          "xmlParseComment: invalid xmlChar value %d\n",
5012
270
                    q);
5013
270
  xmlFree (buf);
5014
270
  return;
5015
270
    }
5016
4.77k
    NEXTL(ql);
5017
4.77k
    r = xmlCurrentCharRecover(ctxt, &rl);
5018
4.77k
    if (r == 0)
5019
418
        goto not_terminated;
5020
4.36k
    if (!IS_CHAR(r)) {
5021
135
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
5022
135
                          "xmlParseComment: invalid xmlChar value %d\n",
5023
135
                    r);
5024
135
  xmlFree (buf);
5025
135
  return;
5026
135
    }
5027
4.22k
    NEXTL(rl);
5028
4.22k
    cur = xmlCurrentCharRecover(ctxt, &l);
5029
4.22k
    if (cur == 0)
5030
230
        goto not_terminated;
5031
96.6k
    while (IS_CHAR(cur) && /* checked */
5032
96.6k
           ((cur != '>') ||
5033
95.5k
      (r != '-') || (q != '-'))) {
5034
92.6k
  if ((r == '-') && (q == '-')) {
5035
441
      xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
5036
441
  }
5037
92.6k
  if (len + 5 >= size) {
5038
1.35k
      xmlChar *tmp;
5039
1.35k
            int newSize;
5040
5041
1.35k
      newSize = xmlGrowCapacity(size, 1, 1, maxLength);
5042
1.35k
            if (newSize < 0) {
5043
0
                xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5044
0
                             "Comment too big found", NULL);
5045
0
                xmlFree (buf);
5046
0
                return;
5047
0
            }
5048
1.35k
      tmp = xmlRealloc(buf, newSize);
5049
1.35k
      if (tmp == NULL) {
5050
0
    xmlErrMemory(ctxt);
5051
0
    xmlFree(buf);
5052
0
    return;
5053
0
      }
5054
1.35k
      buf = tmp;
5055
1.35k
            size = newSize;
5056
1.35k
  }
5057
92.6k
  COPY_BUF(buf, len, q);
5058
5059
92.6k
  q = r;
5060
92.6k
  ql = rl;
5061
92.6k
  r = cur;
5062
92.6k
  rl = l;
5063
5064
92.6k
  NEXTL(l);
5065
92.6k
  cur = xmlCurrentCharRecover(ctxt, &l);
5066
5067
92.6k
    }
5068
3.99k
    buf[len] = 0;
5069
3.99k
    if (cur == 0) {
5070
930
  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5071
930
                       "Comment not terminated \n<!--%.50s\n", buf);
5072
3.06k
    } else if (!IS_CHAR(cur)) {
5073
138
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
5074
138
                          "xmlParseComment: invalid xmlChar value %d\n",
5075
138
                    cur);
5076
2.92k
    } else {
5077
2.92k
        NEXT;
5078
2.92k
  if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5079
2.92k
      (!ctxt->disableSAX))
5080
2.72k
      ctxt->sax->comment(ctxt->userData, buf);
5081
2.92k
    }
5082
3.99k
    xmlFree(buf);
5083
3.99k
    return;
5084
4.31k
not_terminated:
5085
4.31k
    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5086
4.31k
       "Comment not terminated\n", NULL);
5087
4.31k
    xmlFree(buf);
5088
4.31k
}
5089
5090
/**
5091
 * Parse an XML (SGML) comment. Always consumes '<!'.
5092
 *
5093
 * @deprecated Internal function, don't use.
5094
 *
5095
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
5096
 *  must not occur within comments. "
5097
 *
5098
 *     [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
5099
 * @param ctxt  an XML parser context
5100
 */
5101
void
5102
12.1k
xmlParseComment(xmlParserCtxtPtr ctxt) {
5103
12.1k
    xmlChar *buf = NULL;
5104
12.1k
    size_t size = XML_PARSER_BUFFER_SIZE;
5105
12.1k
    size_t len = 0;
5106
12.1k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5107
0
                       XML_MAX_HUGE_LENGTH :
5108
12.1k
                       XML_MAX_TEXT_LENGTH;
5109
12.1k
    const xmlChar *in;
5110
12.1k
    size_t nbchar = 0;
5111
12.1k
    int ccol;
5112
5113
    /*
5114
     * Check that there is a comment right here.
5115
     */
5116
12.1k
    if ((RAW != '<') || (NXT(1) != '!'))
5117
0
        return;
5118
12.1k
    SKIP(2);
5119
12.1k
    if ((RAW != '-') || (NXT(1) != '-'))
5120
3
        return;
5121
12.1k
    SKIP(2);
5122
12.1k
    GROW;
5123
5124
    /*
5125
     * Accelerated common case where input don't need to be
5126
     * modified before passing it to the handler.
5127
     */
5128
12.1k
    in = ctxt->input->cur;
5129
12.1k
    do {
5130
12.1k
  if (*in == 0xA) {
5131
411
      do {
5132
411
    ctxt->input->line++; ctxt->input->col = 1;
5133
411
    in++;
5134
411
      } while (*in == 0xA);
5135
212
  }
5136
18.5k
get_more:
5137
18.5k
        ccol = ctxt->input->col;
5138
83.8k
  while (((*in > '-') && (*in <= 0x7F)) ||
5139
83.8k
         ((*in >= 0x20) && (*in < '-')) ||
5140
83.8k
         (*in == 0x09)) {
5141
65.2k
        in++;
5142
65.2k
        ccol++;
5143
65.2k
  }
5144
18.5k
  ctxt->input->col = ccol;
5145
18.5k
  if (*in == 0xA) {
5146
432
      do {
5147
432
    ctxt->input->line++; ctxt->input->col = 1;
5148
432
    in++;
5149
432
      } while (*in == 0xA);
5150
238
      goto get_more;
5151
238
  }
5152
18.3k
  nbchar = in - ctxt->input->cur;
5153
  /*
5154
   * save current set of data
5155
   */
5156
18.3k
  if (nbchar > 0) {
5157
9.58k
            if (nbchar > maxLength - len) {
5158
0
                xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5159
0
                                  "Comment too big found", NULL);
5160
0
                xmlFree(buf);
5161
0
                return;
5162
0
            }
5163
9.58k
            if (buf == NULL) {
5164
5.55k
                if ((*in == '-') && (in[1] == '-'))
5165
1.41k
                    size = nbchar + 1;
5166
4.13k
                else
5167
4.13k
                    size = XML_PARSER_BUFFER_SIZE + nbchar;
5168
5.55k
                buf = xmlMalloc(size);
5169
5.55k
                if (buf == NULL) {
5170
0
                    xmlErrMemory(ctxt);
5171
0
                    return;
5172
0
                }
5173
5.55k
                len = 0;
5174
5.55k
            } else if (len + nbchar + 1 >= size) {
5175
719
                xmlChar *new_buf;
5176
719
                size += len + nbchar + XML_PARSER_BUFFER_SIZE;
5177
719
                new_buf = xmlRealloc(buf, size);
5178
719
                if (new_buf == NULL) {
5179
0
                    xmlErrMemory(ctxt);
5180
0
                    xmlFree(buf);
5181
0
                    return;
5182
0
                }
5183
719
                buf = new_buf;
5184
719
            }
5185
9.58k
            memcpy(&buf[len], ctxt->input->cur, nbchar);
5186
9.58k
            len += nbchar;
5187
9.58k
            buf[len] = 0;
5188
9.58k
  }
5189
18.3k
  ctxt->input->cur = in;
5190
18.3k
  if (*in == 0xA) {
5191
0
      in++;
5192
0
      ctxt->input->line++; ctxt->input->col = 1;
5193
0
  }
5194
18.3k
  if (*in == 0xD) {
5195
1.61k
      in++;
5196
1.61k
      if (*in == 0xA) {
5197
227
    ctxt->input->cur = in;
5198
227
    in++;
5199
227
    ctxt->input->line++; ctxt->input->col = 1;
5200
227
    goto get_more;
5201
227
      }
5202
1.38k
      in--;
5203
1.38k
  }
5204
18.0k
  SHRINK;
5205
18.0k
  GROW;
5206
18.0k
  in = ctxt->input->cur;
5207
18.0k
  if (*in == '-') {
5208
9.36k
      if (in[1] == '-') {
5209
6.63k
          if (in[2] == '>') {
5210
3.46k
        SKIP(3);
5211
3.46k
        if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5212
3.46k
            (!ctxt->disableSAX)) {
5213
2.91k
      if (buf != NULL)
5214
1.34k
          ctxt->sax->comment(ctxt->userData, buf);
5215
1.56k
      else
5216
1.56k
          ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5217
2.91k
        }
5218
3.46k
        if (buf != NULL)
5219
1.50k
            xmlFree(buf);
5220
3.46k
        return;
5221
3.46k
    }
5222
3.17k
    if (buf != NULL) {
5223
1.50k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5224
1.50k
                          "Double hyphen within comment: "
5225
1.50k
                                      "<!--%.50s\n",
5226
1.50k
              buf);
5227
1.50k
    } else
5228
1.67k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5229
1.67k
                          "Double hyphen within comment\n", NULL);
5230
3.17k
    in++;
5231
3.17k
    ctxt->input->col++;
5232
3.17k
      }
5233
5.89k
      in++;
5234
5.89k
      ctxt->input->col++;
5235
5.89k
      goto get_more;
5236
9.36k
  }
5237
18.0k
    } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5238
8.71k
    xmlParseCommentComplex(ctxt, buf, len, size);
5239
8.71k
}
5240
5241
5242
/**
5243
 * parse the name of a PI
5244
 *
5245
 * @deprecated Internal function, don't use.
5246
 *
5247
 *     [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5248
 *
5249
 * @param ctxt  an XML parser context
5250
 * @returns the PITarget name or NULL
5251
 */
5252
5253
const xmlChar *
5254
26.8k
xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5255
26.8k
    const xmlChar *name;
5256
5257
26.8k
    name = xmlParseName(ctxt);
5258
26.8k
    if ((name != NULL) &&
5259
26.8k
        ((name[0] == 'x') || (name[0] == 'X')) &&
5260
26.8k
        ((name[1] == 'm') || (name[1] == 'M')) &&
5261
26.8k
        ((name[2] == 'l') || (name[2] == 'L'))) {
5262
1.48k
  int i;
5263
1.48k
  if ((name[0] == 'x') && (name[1] == 'm') &&
5264
1.48k
      (name[2] == 'l') && (name[3] == 0)) {
5265
292
      xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5266
292
     "XML declaration allowed only at the start of the document\n");
5267
292
      return(name);
5268
1.19k
  } else if (name[3] == 0) {
5269
545
      xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5270
545
      return(name);
5271
545
  }
5272
1.75k
  for (i = 0;;i++) {
5273
1.75k
      if (xmlW3CPIs[i] == NULL) break;
5274
1.29k
      if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5275
194
          return(name);
5276
1.29k
  }
5277
455
  xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5278
455
          "xmlParsePITarget: invalid name prefix 'xml'\n",
5279
455
          NULL, NULL);
5280
455
    }
5281
25.7k
    if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5282
835
  xmlNsErr(ctxt, XML_NS_ERR_COLON,
5283
835
     "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5284
835
    }
5285
25.7k
    return(name);
5286
26.8k
}
5287
5288
#ifdef LIBXML_CATALOG_ENABLED
5289
/**
5290
 * parse an XML Catalog Processing Instruction.
5291
 *
5292
 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5293
 *
5294
 * Occurs only if allowed by the user and if happening in the Misc
5295
 * part of the document before any doctype information
5296
 * This will add the given catalog to the parsing context in order
5297
 * to be used if there is a resolution need further down in the document
5298
 *
5299
 * @param ctxt  an XML parser context
5300
 * @param catalog  the PI value string
5301
 */
5302
5303
static void
5304
608
xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5305
608
    xmlChar *URL = NULL;
5306
608
    const xmlChar *tmp, *base;
5307
608
    xmlChar marker;
5308
5309
608
    tmp = catalog;
5310
608
    while (IS_BLANK_CH(*tmp)) tmp++;
5311
608
    if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5312
187
  goto error;
5313
421
    tmp += 7;
5314
421
    while (IS_BLANK_CH(*tmp)) tmp++;
5315
421
    if (*tmp != '=') {
5316
94
  return;
5317
94
    }
5318
327
    tmp++;
5319
418
    while (IS_BLANK_CH(*tmp)) tmp++;
5320
327
    marker = *tmp;
5321
327
    if ((marker != '\'') && (marker != '"'))
5322
45
  goto error;
5323
282
    tmp++;
5324
282
    base = tmp;
5325
1.43k
    while ((*tmp != 0) && (*tmp != marker)) tmp++;
5326
282
    if (*tmp == 0)
5327
130
  goto error;
5328
152
    URL = xmlStrndup(base, tmp - base);
5329
152
    tmp++;
5330
388
    while (IS_BLANK_CH(*tmp)) tmp++;
5331
152
    if (*tmp != 0)
5332
50
  goto error;
5333
5334
102
    if (URL != NULL) {
5335
        /*
5336
         * Unfortunately, the catalog API doesn't report OOM errors.
5337
         * xmlGetLastError isn't very helpful since we don't know
5338
         * where the last error came from. We'd have to reset it
5339
         * before this call and restore it afterwards.
5340
         */
5341
102
  ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5342
102
  xmlFree(URL);
5343
102
    }
5344
102
    return;
5345
5346
412
error:
5347
412
    xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5348
412
            "Catalog PI syntax error: %s\n",
5349
412
      catalog, NULL);
5350
412
    if (URL != NULL)
5351
50
  xmlFree(URL);
5352
412
}
5353
#endif
5354
5355
/**
5356
 * parse an XML Processing Instruction.
5357
 *
5358
 * @deprecated Internal function, don't use.
5359
 *
5360
 *     [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5361
 *
5362
 * The processing is transferred to SAX once parsed.
5363
 *
5364
 * @param ctxt  an XML parser context
5365
 */
5366
5367
void
5368
26.8k
xmlParsePI(xmlParserCtxtPtr ctxt) {
5369
26.8k
    xmlChar *buf = NULL;
5370
26.8k
    size_t len = 0;
5371
26.8k
    size_t size = XML_PARSER_BUFFER_SIZE;
5372
26.8k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5373
0
                       XML_MAX_HUGE_LENGTH :
5374
26.8k
                       XML_MAX_TEXT_LENGTH;
5375
26.8k
    int cur, l;
5376
26.8k
    const xmlChar *target;
5377
5378
26.8k
    if ((RAW == '<') && (NXT(1) == '?')) {
5379
  /*
5380
   * this is a Processing Instruction.
5381
   */
5382
26.8k
  SKIP(2);
5383
5384
  /*
5385
   * Parse the target name and check for special support like
5386
   * namespace.
5387
   */
5388
26.8k
        target = xmlParsePITarget(ctxt);
5389
26.8k
  if (target != NULL) {
5390
15.3k
      if ((RAW == '?') && (NXT(1) == '>')) {
5391
5.01k
    SKIP(2);
5392
5393
    /*
5394
     * SAX: PI detected.
5395
     */
5396
5.01k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5397
5.01k
        (ctxt->sax->processingInstruction != NULL))
5398
4.11k
        ctxt->sax->processingInstruction(ctxt->userData,
5399
4.11k
                                         target, NULL);
5400
5.01k
    return;
5401
5.01k
      }
5402
10.2k
      buf = xmlMalloc(size);
5403
10.2k
      if (buf == NULL) {
5404
0
    xmlErrMemory(ctxt);
5405
0
    return;
5406
0
      }
5407
10.2k
      if (SKIP_BLANKS == 0) {
5408
6.18k
    xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5409
6.18k
        "ParsePI: PI %s space expected\n", target);
5410
6.18k
      }
5411
10.2k
      cur = xmlCurrentCharRecover(ctxt, &l);
5412
113k
      while (IS_CHAR(cur) && /* checked */
5413
113k
       ((cur != '?') || (NXT(1) != '>'))) {
5414
102k
    if (len + 5 >= size) {
5415
1.00k
        xmlChar *tmp;
5416
1.00k
                    int newSize;
5417
5418
1.00k
                    newSize = xmlGrowCapacity(size, 1, 1, maxLength);
5419
1.00k
                    if (newSize < 0) {
5420
0
                        xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5421
0
                                          "PI %s too big found", target);
5422
0
                        xmlFree(buf);
5423
0
                        return;
5424
0
                    }
5425
1.00k
        tmp = xmlRealloc(buf, newSize);
5426
1.00k
        if (tmp == NULL) {
5427
0
      xmlErrMemory(ctxt);
5428
0
      xmlFree(buf);
5429
0
      return;
5430
0
        }
5431
1.00k
        buf = tmp;
5432
1.00k
                    size = newSize;
5433
1.00k
    }
5434
102k
    COPY_BUF(buf, len, cur);
5435
102k
    NEXTL(l);
5436
102k
    cur = xmlCurrentCharRecover(ctxt, &l);
5437
102k
      }
5438
10.2k
      buf[len] = 0;
5439
10.2k
      if (cur != '?') {
5440
4.75k
    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5441
4.75k
          "ParsePI: PI %s never end ...\n", target);
5442
5.52k
      } else {
5443
5.52k
    SKIP(2);
5444
5445
5.52k
#ifdef LIBXML_CATALOG_ENABLED
5446
5.52k
    if ((ctxt->inSubset == 0) &&
5447
5.52k
        (xmlStrEqual(target, XML_CATALOG_PI))) {
5448
679
        xmlCatalogAllow allow = xmlCatalogGetDefaults();
5449
5450
679
        if ((ctxt->options & XML_PARSE_CATALOG_PI) &&
5451
679
                        ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5452
608
       (allow == XML_CATA_ALLOW_ALL)))
5453
608
      xmlParseCatalogPI(ctxt, buf);
5454
679
    }
5455
5.52k
#endif
5456
5457
    /*
5458
     * SAX: PI detected.
5459
     */
5460
5.52k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5461
5.52k
        (ctxt->sax->processingInstruction != NULL))
5462
4.16k
        ctxt->sax->processingInstruction(ctxt->userData,
5463
4.16k
                                         target, buf);
5464
5.52k
      }
5465
10.2k
      xmlFree(buf);
5466
11.5k
  } else {
5467
11.5k
      xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5468
11.5k
  }
5469
26.8k
    }
5470
26.8k
}
5471
5472
/**
5473
 * Parse a notation declaration. Always consumes '<!'.
5474
 *
5475
 * @deprecated Internal function, don't use.
5476
 *
5477
 *     [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID)
5478
 *                           S? '>'
5479
 *
5480
 * Hence there is actually 3 choices:
5481
 *
5482
 *     'PUBLIC' S PubidLiteral
5483
 *     'PUBLIC' S PubidLiteral S SystemLiteral
5484
 *     'SYSTEM' S SystemLiteral
5485
 *
5486
 * See the NOTE on xmlParseExternalID().
5487
 *
5488
 * @param ctxt  an XML parser context
5489
 */
5490
5491
void
5492
4.74k
xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5493
4.74k
    const xmlChar *name;
5494
4.74k
    xmlChar *Pubid;
5495
4.74k
    xmlChar *Systemid;
5496
5497
4.74k
    if ((CUR != '<') || (NXT(1) != '!'))
5498
0
        return;
5499
4.74k
    SKIP(2);
5500
5501
4.74k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5502
4.72k
  int inputid = ctxt->input->id;
5503
4.72k
  SKIP(8);
5504
4.72k
  if (SKIP_BLANKS_PE == 0) {
5505
214
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5506
214
         "Space required after '<!NOTATION'\n");
5507
214
      return;
5508
214
  }
5509
5510
4.51k
        name = xmlParseName(ctxt);
5511
4.51k
  if (name == NULL) {
5512
408
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5513
408
      return;
5514
408
  }
5515
4.10k
  if (xmlStrchr(name, ':') != NULL) {
5516
472
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5517
472
         "colons are forbidden from notation names '%s'\n",
5518
472
         name, NULL, NULL);
5519
472
  }
5520
4.10k
  if (SKIP_BLANKS_PE == 0) {
5521
208
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5522
208
         "Space required after the NOTATION name'\n");
5523
208
      return;
5524
208
  }
5525
5526
  /*
5527
   * Parse the IDs.
5528
   */
5529
3.89k
  Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5530
3.89k
  SKIP_BLANKS_PE;
5531
5532
3.89k
  if (RAW == '>') {
5533
1.46k
      if (inputid != ctxt->input->id) {
5534
125
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5535
125
                         "Notation declaration doesn't start and stop"
5536
125
                               " in the same entity\n");
5537
125
      }
5538
1.46k
      NEXT;
5539
1.46k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5540
1.46k
    (ctxt->sax->notationDecl != NULL))
5541
1.36k
    ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5542
2.43k
  } else {
5543
2.43k
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5544
2.43k
  }
5545
3.89k
  if (Systemid != NULL) xmlFree(Systemid);
5546
3.89k
  if (Pubid != NULL) xmlFree(Pubid);
5547
3.89k
    }
5548
4.74k
}
5549
5550
/**
5551
 * Parse an entity declaration. Always consumes '<!'.
5552
 *
5553
 * @deprecated Internal function, don't use.
5554
 *
5555
 *     [70] EntityDecl ::= GEDecl | PEDecl
5556
 *
5557
 *     [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5558
 *
5559
 *     [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5560
 *
5561
 *     [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5562
 *
5563
 *     [74] PEDef ::= EntityValue | ExternalID
5564
 *
5565
 *     [76] NDataDecl ::= S 'NDATA' S Name
5566
 *
5567
 * [ VC: Notation Declared ]
5568
 * The Name must match the declared name of a notation.
5569
 *
5570
 * @param ctxt  an XML parser context
5571
 */
5572
5573
void
5574
19.3k
xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5575
19.3k
    const xmlChar *name = NULL;
5576
19.3k
    xmlChar *value = NULL;
5577
19.3k
    xmlChar *URI = NULL, *literal = NULL;
5578
19.3k
    const xmlChar *ndata = NULL;
5579
19.3k
    int isParameter = 0;
5580
19.3k
    xmlChar *orig = NULL;
5581
5582
19.3k
    if ((CUR != '<') || (NXT(1) != '!'))
5583
0
        return;
5584
19.3k
    SKIP(2);
5585
5586
    /* GROW; done in the caller */
5587
19.3k
    if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5588
19.3k
  int inputid = ctxt->input->id;
5589
19.3k
  SKIP(6);
5590
19.3k
  if (SKIP_BLANKS_PE == 0) {
5591
16.9k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5592
16.9k
         "Space required after '<!ENTITY'\n");
5593
16.9k
  }
5594
5595
19.3k
  if (RAW == '%') {
5596
3.97k
      NEXT;
5597
3.97k
      if (SKIP_BLANKS_PE == 0) {
5598
3.76k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5599
3.76k
             "Space required after '%%'\n");
5600
3.76k
      }
5601
3.97k
      isParameter = 1;
5602
3.97k
  }
5603
5604
19.3k
        name = xmlParseName(ctxt);
5605
19.3k
  if (name == NULL) {
5606
506
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5607
506
                     "xmlParseEntityDecl: no name\n");
5608
506
            return;
5609
506
  }
5610
18.7k
  if (xmlStrchr(name, ':') != NULL) {
5611
4.03k
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5612
4.03k
         "colons are forbidden from entities names '%s'\n",
5613
4.03k
         name, NULL, NULL);
5614
4.03k
  }
5615
18.7k
  if (SKIP_BLANKS_PE == 0) {
5616
13.7k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5617
13.7k
         "Space required after the entity name\n");
5618
13.7k
  }
5619
5620
  /*
5621
   * handle the various case of definitions...
5622
   */
5623
18.7k
  if (isParameter) {
5624
3.76k
      if ((RAW == '"') || (RAW == '\'')) {
5625
2.80k
          value = xmlParseEntityValue(ctxt, &orig);
5626
2.80k
    if (value) {
5627
2.73k
        if ((ctxt->sax != NULL) &&
5628
2.73k
      (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5629
2.59k
      ctxt->sax->entityDecl(ctxt->userData, name,
5630
2.59k
                        XML_INTERNAL_PARAMETER_ENTITY,
5631
2.59k
            NULL, NULL, value);
5632
2.73k
    }
5633
2.80k
      } else {
5634
953
          URI = xmlParseExternalID(ctxt, &literal, 1);
5635
953
    if ((URI == NULL) && (literal == NULL)) {
5636
200
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5637
200
    }
5638
953
    if (URI) {
5639
558
                    if (xmlStrchr(URI, '#')) {
5640
194
                        xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5641
364
                    } else {
5642
364
                        if ((ctxt->sax != NULL) &&
5643
364
                            (!ctxt->disableSAX) &&
5644
364
                            (ctxt->sax->entityDecl != NULL))
5645
292
                            ctxt->sax->entityDecl(ctxt->userData, name,
5646
292
                                        XML_EXTERNAL_PARAMETER_ENTITY,
5647
292
                                        literal, URI, NULL);
5648
364
                    }
5649
558
    }
5650
953
      }
5651
15.0k
  } else {
5652
15.0k
      if ((RAW == '"') || (RAW == '\'')) {
5653
11.9k
          value = xmlParseEntityValue(ctxt, &orig);
5654
11.9k
    if ((ctxt->sax != NULL) &&
5655
11.9k
        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5656
10.8k
        ctxt->sax->entityDecl(ctxt->userData, name,
5657
10.8k
        XML_INTERNAL_GENERAL_ENTITY,
5658
10.8k
        NULL, NULL, value);
5659
    /*
5660
     * For expat compatibility in SAX mode.
5661
     */
5662
11.9k
    if ((ctxt->myDoc == NULL) ||
5663
11.9k
        (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5664
267
        if (ctxt->myDoc == NULL) {
5665
135
      ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5666
135
      if (ctxt->myDoc == NULL) {
5667
0
          xmlErrMemory(ctxt);
5668
0
          goto done;
5669
0
      }
5670
135
      ctxt->myDoc->properties = XML_DOC_INTERNAL;
5671
135
        }
5672
267
        if (ctxt->myDoc->intSubset == NULL) {
5673
135
      ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5674
135
              BAD_CAST "fake", NULL, NULL);
5675
135
                        if (ctxt->myDoc->intSubset == NULL) {
5676
0
                            xmlErrMemory(ctxt);
5677
0
                            goto done;
5678
0
                        }
5679
135
                    }
5680
5681
267
        xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5682
267
                    NULL, NULL, value);
5683
267
    }
5684
11.9k
      } else {
5685
3.09k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5686
3.09k
    if ((URI == NULL) && (literal == NULL)) {
5687
2.19k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5688
2.19k
    }
5689
3.09k
    if (URI) {
5690
540
                    if (xmlStrchr(URI, '#')) {
5691
235
                        xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5692
235
                    }
5693
540
    }
5694
3.09k
    if ((RAW != '>') && (SKIP_BLANKS_PE == 0)) {
5695
679
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5696
679
           "Space required before 'NDATA'\n");
5697
679
    }
5698
3.09k
    if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5699
566
        SKIP(5);
5700
566
        if (SKIP_BLANKS_PE == 0) {
5701
307
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5702
307
               "Space required after 'NDATA'\n");
5703
307
        }
5704
566
        ndata = xmlParseName(ctxt);
5705
566
        if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5706
566
            (ctxt->sax->unparsedEntityDecl != NULL))
5707
495
      ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5708
495
            literal, URI, ndata);
5709
2.52k
    } else {
5710
2.52k
        if ((ctxt->sax != NULL) &&
5711
2.52k
            (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5712
2.36k
      ctxt->sax->entityDecl(ctxt->userData, name,
5713
2.36k
            XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5714
2.36k
            literal, URI, NULL);
5715
        /*
5716
         * For expat compatibility in SAX mode.
5717
         * assuming the entity replacement was asked for
5718
         */
5719
2.52k
        if ((ctxt->replaceEntities != 0) &&
5720
2.52k
      ((ctxt->myDoc == NULL) ||
5721
0
      (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5722
0
      if (ctxt->myDoc == NULL) {
5723
0
          ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5724
0
          if (ctxt->myDoc == NULL) {
5725
0
              xmlErrMemory(ctxt);
5726
0
        goto done;
5727
0
          }
5728
0
          ctxt->myDoc->properties = XML_DOC_INTERNAL;
5729
0
      }
5730
5731
0
      if (ctxt->myDoc->intSubset == NULL) {
5732
0
          ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5733
0
            BAD_CAST "fake", NULL, NULL);
5734
0
                            if (ctxt->myDoc->intSubset == NULL) {
5735
0
                                xmlErrMemory(ctxt);
5736
0
                                goto done;
5737
0
                            }
5738
0
                        }
5739
0
      xmlSAX2EntityDecl(ctxt, name,
5740
0
                  XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5741
0
                  literal, URI, NULL);
5742
0
        }
5743
2.52k
    }
5744
3.09k
      }
5745
15.0k
  }
5746
18.7k
  SKIP_BLANKS_PE;
5747
18.7k
  if (RAW != '>') {
5748
1.13k
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5749
1.13k
              "xmlParseEntityDecl: entity %s not terminated\n", name);
5750
1.13k
      xmlHaltParser(ctxt);
5751
17.6k
  } else {
5752
17.6k
      if (inputid != ctxt->input->id) {
5753
314
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5754
314
                         "Entity declaration doesn't start and stop in"
5755
314
                               " the same entity\n");
5756
314
      }
5757
17.6k
      NEXT;
5758
17.6k
  }
5759
18.7k
  if (orig != NULL) {
5760
      /*
5761
       * Ugly mechanism to save the raw entity value.
5762
       */
5763
13.8k
      xmlEntityPtr cur = NULL;
5764
5765
13.8k
      if (isParameter) {
5766
2.73k
          if ((ctxt->sax != NULL) &&
5767
2.73k
        (ctxt->sax->getParameterEntity != NULL))
5768
2.73k
        cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5769
11.0k
      } else {
5770
11.0k
          if ((ctxt->sax != NULL) &&
5771
11.0k
        (ctxt->sax->getEntity != NULL))
5772
11.0k
        cur = ctxt->sax->getEntity(ctxt->userData, name);
5773
11.0k
    if ((cur == NULL) && (ctxt->userData==ctxt)) {
5774
712
        cur = xmlSAX2GetEntity(ctxt, name);
5775
712
    }
5776
11.0k
      }
5777
13.8k
            if ((cur != NULL) && (cur->orig == NULL)) {
5778
6.06k
    cur->orig = orig;
5779
6.06k
                orig = NULL;
5780
6.06k
      }
5781
13.8k
  }
5782
5783
18.7k
done:
5784
18.7k
  if (value != NULL) xmlFree(value);
5785
18.7k
  if (URI != NULL) xmlFree(URI);
5786
18.7k
  if (literal != NULL) xmlFree(literal);
5787
18.7k
        if (orig != NULL) xmlFree(orig);
5788
18.7k
    }
5789
19.3k
}
5790
5791
/**
5792
 * Parse an attribute default declaration
5793
 *
5794
 * @deprecated Internal function, don't use.
5795
 *
5796
 *     [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5797
 *
5798
 * [ VC: Required Attribute ]
5799
 * if the default declaration is the keyword \#REQUIRED, then the
5800
 * attribute must be specified for all elements of the type in the
5801
 * attribute-list declaration.
5802
 *
5803
 * [ VC: Attribute Default Legal ]
5804
 * The declared default value must meet the lexical constraints of
5805
 * the declared attribute type c.f. xmlValidateAttributeDecl()
5806
 *
5807
 * [ VC: Fixed Attribute Default ]
5808
 * if an attribute has a default value declared with the \#FIXED
5809
 * keyword, instances of that attribute must match the default value.
5810
 *
5811
 * [ WFC: No < in Attribute Values ]
5812
 * handled in xmlParseAttValue()
5813
 *
5814
 * @param ctxt  an XML parser context
5815
 * @param value  Receive a possible fixed default value for the attribute
5816
 * @returns XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5817
 *          or XML_ATTRIBUTE_FIXED.
5818
 */
5819
5820
int
5821
37.9k
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5822
37.9k
    int val;
5823
37.9k
    xmlChar *ret;
5824
5825
37.9k
    *value = NULL;
5826
37.9k
    if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5827
488
  SKIP(9);
5828
488
  return(XML_ATTRIBUTE_REQUIRED);
5829
488
    }
5830
37.4k
    if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5831
535
  SKIP(8);
5832
535
  return(XML_ATTRIBUTE_IMPLIED);
5833
535
    }
5834
36.8k
    val = XML_ATTRIBUTE_NONE;
5835
36.8k
    if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5836
436
  SKIP(6);
5837
436
  val = XML_ATTRIBUTE_FIXED;
5838
436
  if (SKIP_BLANKS_PE == 0) {
5839
201
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5840
201
         "Space required after '#FIXED'\n");
5841
201
  }
5842
436
    }
5843
36.8k
    ret = xmlParseAttValue(ctxt);
5844
36.8k
    if (ret == NULL) {
5845
12.4k
  xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5846
12.4k
           "Attribute default value declaration error\n");
5847
12.4k
    } else
5848
24.3k
        *value = ret;
5849
36.8k
    return(val);
5850
37.4k
}
5851
5852
/**
5853
 * parse an Notation attribute type.
5854
 *
5855
 * @deprecated Internal function, don't use.
5856
 *
5857
 * Note: the leading 'NOTATION' S part has already being parsed...
5858
 *
5859
 *     [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5860
 *
5861
 * [ VC: Notation Attributes ]
5862
 * Values of this type must match one of the notation names included
5863
 * in the declaration; all notation names in the declaration must be declared.
5864
 *
5865
 * @param ctxt  an XML parser context
5866
 * @returns the notation attribute tree built while parsing
5867
 */
5868
5869
xmlEnumerationPtr
5870
1.07k
xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5871
1.07k
    const xmlChar *name;
5872
1.07k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5873
5874
1.07k
    if (RAW != '(') {
5875
207
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5876
207
  return(NULL);
5877
207
    }
5878
1.91k
    do {
5879
1.91k
        NEXT;
5880
1.91k
  SKIP_BLANKS_PE;
5881
1.91k
        name = xmlParseName(ctxt);
5882
1.91k
  if (name == NULL) {
5883
200
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5884
200
         "Name expected in NOTATION declaration\n");
5885
200
            xmlFreeEnumeration(ret);
5886
200
      return(NULL);
5887
200
  }
5888
1.71k
  tmp = ret;
5889
3.23k
  while (tmp != NULL) {
5890
2.21k
      if (xmlStrEqual(name, tmp->name)) {
5891
701
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5892
701
    "standalone: attribute notation value token %s duplicated\n",
5893
701
         name, NULL);
5894
701
    if (!xmlDictOwns(ctxt->dict, name))
5895
0
        xmlFree((xmlChar *) name);
5896
701
    break;
5897
701
      }
5898
1.51k
      tmp = tmp->next;
5899
1.51k
  }
5900
1.71k
  if (tmp == NULL) {
5901
1.01k
      cur = xmlCreateEnumeration(name);
5902
1.01k
      if (cur == NULL) {
5903
0
                xmlErrMemory(ctxt);
5904
0
                xmlFreeEnumeration(ret);
5905
0
                return(NULL);
5906
0
            }
5907
1.01k
      if (last == NULL) ret = last = cur;
5908
346
      else {
5909
346
    last->next = cur;
5910
346
    last = cur;
5911
346
      }
5912
1.01k
  }
5913
1.71k
  SKIP_BLANKS_PE;
5914
1.71k
    } while (RAW == '|');
5915
663
    if (RAW != ')') {
5916
382
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5917
382
        xmlFreeEnumeration(ret);
5918
382
  return(NULL);
5919
382
    }
5920
281
    NEXT;
5921
281
    return(ret);
5922
663
}
5923
5924
/**
5925
 * parse an Enumeration attribute type.
5926
 *
5927
 * @deprecated Internal function, don't use.
5928
 *
5929
 *     [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5930
 *
5931
 * [ VC: Enumeration ]
5932
 * Values of this type must match one of the Nmtoken tokens in
5933
 * the declaration
5934
 *
5935
 * @param ctxt  an XML parser context
5936
 * @returns the enumeration attribute tree built while parsing
5937
 */
5938
5939
xmlEnumerationPtr
5940
8.93k
xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5941
8.93k
    xmlChar *name;
5942
8.93k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5943
5944
8.93k
    if (RAW != '(') {
5945
817
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5946
817
  return(NULL);
5947
817
    }
5948
9.31k
    do {
5949
9.31k
        NEXT;
5950
9.31k
  SKIP_BLANKS_PE;
5951
9.31k
        name = xmlParseNmtoken(ctxt);
5952
9.31k
  if (name == NULL) {
5953
306
      xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5954
306
      return(ret);
5955
306
  }
5956
9.00k
  tmp = ret;
5957
11.0k
  while (tmp != NULL) {
5958
2.64k
      if (xmlStrEqual(name, tmp->name)) {
5959
635
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5960
635
    "standalone: attribute enumeration value token %s duplicated\n",
5961
635
         name, NULL);
5962
635
    if (!xmlDictOwns(ctxt->dict, name))
5963
635
        xmlFree(name);
5964
635
    break;
5965
635
      }
5966
2.00k
      tmp = tmp->next;
5967
2.00k
  }
5968
9.00k
  if (tmp == NULL) {
5969
8.37k
      cur = xmlCreateEnumeration(name);
5970
8.37k
      if (!xmlDictOwns(ctxt->dict, name))
5971
8.37k
    xmlFree(name);
5972
8.37k
      if (cur == NULL) {
5973
0
                xmlErrMemory(ctxt);
5974
0
                xmlFreeEnumeration(ret);
5975
0
                return(NULL);
5976
0
            }
5977
8.37k
      if (last == NULL) ret = last = cur;
5978
528
      else {
5979
528
    last->next = cur;
5980
528
    last = cur;
5981
528
      }
5982
8.37k
  }
5983
9.00k
  SKIP_BLANKS_PE;
5984
9.00k
    } while (RAW == '|');
5985
7.81k
    if (RAW != ')') {
5986
1.01k
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5987
1.01k
  return(ret);
5988
1.01k
    }
5989
6.79k
    NEXT;
5990
6.79k
    return(ret);
5991
7.81k
}
5992
5993
/**
5994
 * parse an Enumerated attribute type.
5995
 *
5996
 * @deprecated Internal function, don't use.
5997
 *
5998
 *     [57] EnumeratedType ::= NotationType | Enumeration
5999
 *
6000
 *     [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
6001
 *
6002
 * @param ctxt  an XML parser context
6003
 * @param tree  the enumeration tree built while parsing
6004
 * @returns XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
6005
 */
6006
6007
int
6008
10.2k
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6009
10.2k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
6010
1.26k
  SKIP(8);
6011
1.26k
  if (SKIP_BLANKS_PE == 0) {
6012
195
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6013
195
         "Space required after 'NOTATION'\n");
6014
195
      return(0);
6015
195
  }
6016
1.07k
  *tree = xmlParseNotationType(ctxt);
6017
1.07k
  if (*tree == NULL) return(0);
6018
281
  return(XML_ATTRIBUTE_NOTATION);
6019
1.07k
    }
6020
8.93k
    *tree = xmlParseEnumerationType(ctxt);
6021
8.93k
    if (*tree == NULL) return(0);
6022
7.84k
    return(XML_ATTRIBUTE_ENUMERATION);
6023
8.93k
}
6024
6025
/**
6026
 * parse the Attribute list def for an element
6027
 *
6028
 * @deprecated Internal function, don't use.
6029
 *
6030
 *     [54] AttType ::= StringType | TokenizedType | EnumeratedType
6031
 *
6032
 *     [55] StringType ::= 'CDATA'
6033
 *
6034
 *     [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
6035
 *                            'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
6036
 *
6037
 * Validity constraints for attribute values syntax are checked in
6038
 * xmlValidateAttributeValue()
6039
 *
6040
 * [ VC: ID ]
6041
 * Values of type ID must match the Name production. A name must not
6042
 * appear more than once in an XML document as a value of this type;
6043
 * i.e., ID values must uniquely identify the elements which bear them.
6044
 *
6045
 * [ VC: One ID per Element Type ]
6046
 * No element type may have more than one ID attribute specified.
6047
 *
6048
 * [ VC: ID Attribute Default ]
6049
 * An ID attribute must have a declared default of \#IMPLIED or \#REQUIRED.
6050
 *
6051
 * [ VC: IDREF ]
6052
 * Values of type IDREF must match the Name production, and values
6053
 * of type IDREFS must match Names; each IDREF Name must match the value
6054
 * of an ID attribute on some element in the XML document; i.e. IDREF
6055
 * values must match the value of some ID attribute.
6056
 *
6057
 * [ VC: Entity Name ]
6058
 * Values of type ENTITY must match the Name production, values
6059
 * of type ENTITIES must match Names; each Entity Name must match the
6060
 * name of an unparsed entity declared in the DTD.
6061
 *
6062
 * [ VC: Name Token ]
6063
 * Values of type NMTOKEN must match the Nmtoken production; values
6064
 * of type NMTOKENS must match Nmtokens.
6065
 *
6066
 * @param ctxt  an XML parser context
6067
 * @param tree  the enumeration tree built while parsing
6068
 * @returns the attribute type
6069
 */
6070
int
6071
41.6k
xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6072
41.6k
    if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
6073
2.44k
  SKIP(5);
6074
2.44k
  return(XML_ATTRIBUTE_CDATA);
6075
39.2k
     } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
6076
2.38k
  SKIP(6);
6077
2.38k
  return(XML_ATTRIBUTE_IDREFS);
6078
36.8k
     } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
6079
707
  SKIP(5);
6080
707
  return(XML_ATTRIBUTE_IDREF);
6081
36.1k
     } else if ((RAW == 'I') && (NXT(1) == 'D')) {
6082
21.8k
        SKIP(2);
6083
21.8k
  return(XML_ATTRIBUTE_ID);
6084
21.8k
     } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
6085
718
  SKIP(6);
6086
718
  return(XML_ATTRIBUTE_ENTITY);
6087
13.5k
     } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
6088
1.56k
  SKIP(8);
6089
1.56k
  return(XML_ATTRIBUTE_ENTITIES);
6090
12.0k
     } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
6091
587
  SKIP(8);
6092
587
  return(XML_ATTRIBUTE_NMTOKENS);
6093
11.4k
     } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
6094
1.21k
  SKIP(7);
6095
1.21k
  return(XML_ATTRIBUTE_NMTOKEN);
6096
1.21k
     }
6097
10.2k
     return(xmlParseEnumeratedType(ctxt, tree));
6098
41.6k
}
6099
6100
/**
6101
 * Parse an attribute list declaration for an element. Always consumes '<!'.
6102
 *
6103
 * @deprecated Internal function, don't use.
6104
 *
6105
 *     [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
6106
 *
6107
 *     [53] AttDef ::= S Name S AttType S DefaultDecl
6108
 * @param ctxt  an XML parser context
6109
 */
6110
void
6111
30.0k
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
6112
30.0k
    const xmlChar *elemName;
6113
30.0k
    const xmlChar *attrName;
6114
30.0k
    xmlEnumerationPtr tree;
6115
6116
30.0k
    if ((CUR != '<') || (NXT(1) != '!'))
6117
0
        return;
6118
30.0k
    SKIP(2);
6119
6120
30.0k
    if (CMP7(CUR_PTR, 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
6121
29.9k
  int inputid = ctxt->input->id;
6122
6123
29.9k
  SKIP(7);
6124
29.9k
  if (SKIP_BLANKS_PE == 0) {
6125
28.4k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6126
28.4k
                     "Space required after '<!ATTLIST'\n");
6127
28.4k
  }
6128
29.9k
        elemName = xmlParseName(ctxt);
6129
29.9k
  if (elemName == NULL) {
6130
535
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6131
535
         "ATTLIST: no name for Element\n");
6132
535
      return;
6133
535
  }
6134
29.4k
  SKIP_BLANKS_PE;
6135
29.4k
  GROW;
6136
64.4k
  while ((RAW != '>') && (PARSER_STOPPED(ctxt) == 0)) {
6137
57.9k
      int type;
6138
57.9k
      int def;
6139
57.9k
      xmlChar *defaultValue = NULL;
6140
6141
57.9k
      GROW;
6142
57.9k
            tree = NULL;
6143
57.9k
      attrName = xmlParseName(ctxt);
6144
57.9k
      if (attrName == NULL) {
6145
15.6k
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6146
15.6k
             "ATTLIST: no name for Attribute\n");
6147
15.6k
    break;
6148
15.6k
      }
6149
42.3k
      GROW;
6150
42.3k
      if (SKIP_BLANKS_PE == 0) {
6151
622
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6152
622
            "Space required after the attribute name\n");
6153
622
    break;
6154
622
      }
6155
6156
41.6k
      type = xmlParseAttributeType(ctxt, &tree);
6157
41.6k
      if (type <= 0) {
6158
2.07k
          break;
6159
2.07k
      }
6160
6161
39.6k
      GROW;
6162
39.6k
      if (SKIP_BLANKS_PE == 0) {
6163
1.71k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6164
1.71k
             "Space required after the attribute type\n");
6165
1.71k
          if (tree != NULL)
6166
1.05k
        xmlFreeEnumeration(tree);
6167
1.71k
    break;
6168
1.71k
      }
6169
6170
37.9k
      def = xmlParseDefaultDecl(ctxt, &defaultValue);
6171
37.9k
      if (def <= 0) {
6172
0
                if (defaultValue != NULL)
6173
0
        xmlFree(defaultValue);
6174
0
          if (tree != NULL)
6175
0
        xmlFreeEnumeration(tree);
6176
0
          break;
6177
0
      }
6178
37.9k
      if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6179
22.3k
          xmlAttrNormalizeSpace(defaultValue, defaultValue);
6180
6181
37.9k
      GROW;
6182
37.9k
            if (RAW != '>') {
6183
31.9k
    if (SKIP_BLANKS_PE == 0) {
6184
2.86k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6185
2.86k
      "Space required after the attribute default value\n");
6186
2.86k
        if (defaultValue != NULL)
6187
230
      xmlFree(defaultValue);
6188
2.86k
        if (tree != NULL)
6189
460
      xmlFreeEnumeration(tree);
6190
2.86k
        break;
6191
2.86k
    }
6192
31.9k
      }
6193
35.0k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6194
35.0k
    (ctxt->sax->attributeDecl != NULL))
6195
32.8k
    ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6196
32.8k
                          type, def, defaultValue, tree);
6197
2.17k
      else if (tree != NULL)
6198
128
    xmlFreeEnumeration(tree);
6199
6200
35.0k
      if ((ctxt->sax2) && (defaultValue != NULL) &&
6201
35.0k
          (def != XML_ATTRIBUTE_IMPLIED) &&
6202
35.0k
    (def != XML_ATTRIBUTE_REQUIRED)) {
6203
21.1k
    xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6204
21.1k
      }
6205
35.0k
      if (ctxt->sax2) {
6206
31.8k
    xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6207
31.8k
      }
6208
35.0k
      if (defaultValue != NULL)
6209
24.1k
          xmlFree(defaultValue);
6210
35.0k
      GROW;
6211
35.0k
  }
6212
29.4k
  if (RAW == '>') {
6213
9.04k
      if (inputid != ctxt->input->id) {
6214
197
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6215
197
                               "Attribute list declaration doesn't start and"
6216
197
                               " stop in the same entity\n");
6217
197
      }
6218
9.04k
      NEXT;
6219
9.04k
  }
6220
29.4k
    }
6221
30.0k
}
6222
6223
/**
6224
 * parse the declaration for a Mixed Element content
6225
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl()
6226
 *
6227
 * @deprecated Internal function, don't use.
6228
 *
6229
 *     [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6230
 *                    '(' S? '#PCDATA' S? ')'
6231
 *
6232
 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6233
 *
6234
 * [ VC: No Duplicate Types ]
6235
 * The same name must not appear more than once in a single
6236
 * mixed-content declaration.
6237
 *
6238
 * @param ctxt  an XML parser context
6239
 * @param inputchk  the input used for the current entity, needed for boundary checks
6240
 * @returns the list of the xmlElementContentPtr describing the element choices
6241
 */
6242
xmlElementContentPtr
6243
2.00k
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6244
2.00k
    xmlElementContentPtr ret = NULL, cur = NULL, n;
6245
2.00k
    const xmlChar *elem = NULL;
6246
6247
2.00k
    GROW;
6248
2.00k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6249
2.00k
  SKIP(7);
6250
2.00k
  SKIP_BLANKS_PE;
6251
2.00k
  if (RAW == ')') {
6252
515
      if (ctxt->input->id != inputchk) {
6253
66
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6254
66
                               "Element content declaration doesn't start and"
6255
66
                               " stop in the same entity\n");
6256
66
      }
6257
515
      NEXT;
6258
515
      ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6259
515
      if (ret == NULL)
6260
0
                goto mem_error;
6261
515
      if (RAW == '*') {
6262
219
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6263
219
    NEXT;
6264
219
      }
6265
515
      return(ret);
6266
515
  }
6267
1.49k
  if ((RAW == '(') || (RAW == '|')) {
6268
1.12k
      ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6269
1.12k
      if (ret == NULL)
6270
0
                goto mem_error;
6271
1.12k
  }
6272
2.57k
  while ((RAW == '|') && (PARSER_STOPPED(ctxt) == 0)) {
6273
1.41k
      NEXT;
6274
1.41k
            n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6275
1.41k
            if (n == NULL)
6276
0
                goto mem_error;
6277
1.41k
      if (elem == NULL) {
6278
1.11k
    n->c1 = cur;
6279
1.11k
    if (cur != NULL)
6280
1.11k
        cur->parent = n;
6281
1.11k
    ret = cur = n;
6282
1.11k
      } else {
6283
295
          cur->c2 = n;
6284
295
    n->parent = cur;
6285
295
    n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6286
295
                if (n->c1 == NULL)
6287
0
                    goto mem_error;
6288
295
    n->c1->parent = n;
6289
295
    cur = n;
6290
295
      }
6291
1.41k
      SKIP_BLANKS_PE;
6292
1.41k
      elem = xmlParseName(ctxt);
6293
1.41k
      if (elem == NULL) {
6294
329
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6295
329
      "xmlParseElementMixedContentDecl : Name expected\n");
6296
329
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6297
329
    return(NULL);
6298
329
      }
6299
1.08k
      SKIP_BLANKS_PE;
6300
1.08k
      GROW;
6301
1.08k
  }
6302
1.16k
  if ((RAW == ')') && (NXT(1) == '*')) {
6303
570
      if (elem != NULL) {
6304
570
    cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6305
570
                                   XML_ELEMENT_CONTENT_ELEMENT);
6306
570
    if (cur->c2 == NULL)
6307
0
                    goto mem_error;
6308
570
    cur->c2->parent = cur;
6309
570
            }
6310
570
            if (ret != NULL)
6311
570
                ret->ocur = XML_ELEMENT_CONTENT_MULT;
6312
570
      if (ctxt->input->id != inputchk) {
6313
10
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6314
10
                               "Element content declaration doesn't start and"
6315
10
                               " stop in the same entity\n");
6316
10
      }
6317
570
      SKIP(2);
6318
593
  } else {
6319
593
      xmlFreeDocElementContent(ctxt->myDoc, ret);
6320
593
      xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6321
593
      return(NULL);
6322
593
  }
6323
6324
1.16k
    } else {
6325
0
  xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6326
0
    }
6327
570
    return(ret);
6328
6329
0
mem_error:
6330
0
    xmlErrMemory(ctxt);
6331
0
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6332
0
    return(NULL);
6333
2.00k
}
6334
6335
/**
6336
 * parse the declaration for a Mixed Element content
6337
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl()
6338
 *
6339
 *     [47] children ::= (choice | seq) ('?' | '*' | '+')?
6340
 *
6341
 *     [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6342
 *
6343
 *     [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6344
 *
6345
 *     [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6346
 *
6347
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6348
 * TODO Parameter-entity replacement text must be properly nested
6349
 *  with parenthesized groups. That is to say, if either of the
6350
 *  opening or closing parentheses in a choice, seq, or Mixed
6351
 *  construct is contained in the replacement text for a parameter
6352
 *  entity, both must be contained in the same replacement text. For
6353
 *  interoperability, if a parameter-entity reference appears in a
6354
 *  choice, seq, or Mixed construct, its replacement text should not
6355
 *  be empty, and neither the first nor last non-blank character of
6356
 *  the replacement text should be a connector (| or ,).
6357
 *
6358
 * @param ctxt  an XML parser context
6359
 * @param inputchk  the input used for the current entity, needed for boundary checks
6360
 * @param depth  the level of recursion
6361
 * @returns the tree of xmlElementContentPtr describing the element
6362
 *          hierarchy.
6363
 */
6364
static xmlElementContentPtr
6365
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6366
12.8k
                                       int depth) {
6367
12.8k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
6368
12.8k
    xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6369
12.8k
    const xmlChar *elem;
6370
12.8k
    xmlChar type = 0;
6371
6372
12.8k
    if (depth > maxDepth) {
6373
1
        xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
6374
1
                "xmlParseElementChildrenContentDecl : depth %d too deep, "
6375
1
                "use XML_PARSE_HUGE\n", depth);
6376
1
  return(NULL);
6377
1
    }
6378
12.8k
    SKIP_BLANKS_PE;
6379
12.8k
    GROW;
6380
12.8k
    if (RAW == '(') {
6381
4.22k
  int inputid = ctxt->input->id;
6382
6383
        /* Recurse on first child */
6384
4.22k
  NEXT;
6385
4.22k
  SKIP_BLANKS_PE;
6386
4.22k
        cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6387
4.22k
                                                           depth + 1);
6388
4.22k
        if (cur == NULL)
6389
3.16k
            return(NULL);
6390
1.06k
  SKIP_BLANKS_PE;
6391
1.06k
  GROW;
6392
8.59k
    } else {
6393
8.59k
  elem = xmlParseName(ctxt);
6394
8.59k
  if (elem == NULL) {
6395
727
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6396
727
      return(NULL);
6397
727
  }
6398
7.87k
        cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6399
7.87k
  if (cur == NULL) {
6400
0
      xmlErrMemory(ctxt);
6401
0
      return(NULL);
6402
0
  }
6403
7.87k
  GROW;
6404
7.87k
  if (RAW == '?') {
6405
1.21k
      cur->ocur = XML_ELEMENT_CONTENT_OPT;
6406
1.21k
      NEXT;
6407
6.65k
  } else if (RAW == '*') {
6408
1.31k
      cur->ocur = XML_ELEMENT_CONTENT_MULT;
6409
1.31k
      NEXT;
6410
5.34k
  } else if (RAW == '+') {
6411
504
      cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6412
504
      NEXT;
6413
4.83k
  } else {
6414
4.83k
      cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6415
4.83k
  }
6416
7.87k
  GROW;
6417
7.87k
    }
6418
8.93k
    SKIP_BLANKS_PE;
6419
16.5k
    while ((RAW != ')') && (PARSER_STOPPED(ctxt) == 0)) {
6420
        /*
6421
   * Each loop we parse one separator and one element.
6422
   */
6423
10.9k
        if (RAW == ',') {
6424
2.22k
      if (type == 0) type = CUR;
6425
6426
      /*
6427
       * Detect "Name | Name , Name" error
6428
       */
6429
1.56k
      else if (type != CUR) {
6430
1
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6431
1
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6432
1
                      type);
6433
1
    if ((last != NULL) && (last != ret))
6434
1
        xmlFreeDocElementContent(ctxt->myDoc, last);
6435
1
    if (ret != NULL)
6436
1
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6437
1
    return(NULL);
6438
1
      }
6439
2.22k
      NEXT;
6440
6441
2.22k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6442
2.22k
      if (op == NULL) {
6443
0
                xmlErrMemory(ctxt);
6444
0
    if ((last != NULL) && (last != ret))
6445
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6446
0
          xmlFreeDocElementContent(ctxt->myDoc, ret);
6447
0
    return(NULL);
6448
0
      }
6449
2.22k
      if (last == NULL) {
6450
656
    op->c1 = ret;
6451
656
    if (ret != NULL)
6452
656
        ret->parent = op;
6453
656
    ret = cur = op;
6454
1.56k
      } else {
6455
1.56k
          cur->c2 = op;
6456
1.56k
    if (op != NULL)
6457
1.56k
        op->parent = cur;
6458
1.56k
    op->c1 = last;
6459
1.56k
    if (last != NULL)
6460
1.56k
        last->parent = op;
6461
1.56k
    cur =op;
6462
1.56k
    last = NULL;
6463
1.56k
      }
6464
8.75k
  } else if (RAW == '|') {
6465
6.77k
      if (type == 0) type = CUR;
6466
6467
      /*
6468
       * Detect "Name , Name | Name" error
6469
       */
6470
2.64k
      else if (type != CUR) {
6471
1
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6472
1
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6473
1
          type);
6474
1
    if ((last != NULL) && (last != ret))
6475
1
        xmlFreeDocElementContent(ctxt->myDoc, last);
6476
1
    if (ret != NULL)
6477
1
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6478
1
    return(NULL);
6479
1
      }
6480
6.76k
      NEXT;
6481
6482
6.76k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6483
6.76k
      if (op == NULL) {
6484
0
                xmlErrMemory(ctxt);
6485
0
    if ((last != NULL) && (last != ret))
6486
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6487
0
    if (ret != NULL)
6488
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6489
0
    return(NULL);
6490
0
      }
6491
6.76k
      if (last == NULL) {
6492
4.12k
    op->c1 = ret;
6493
4.12k
    if (ret != NULL)
6494
4.12k
        ret->parent = op;
6495
4.12k
    ret = cur = op;
6496
4.12k
      } else {
6497
2.64k
          cur->c2 = op;
6498
2.64k
    if (op != NULL)
6499
2.64k
        op->parent = cur;
6500
2.64k
    op->c1 = last;
6501
2.64k
    if (last != NULL)
6502
2.64k
        last->parent = op;
6503
2.64k
    cur =op;
6504
2.64k
    last = NULL;
6505
2.64k
      }
6506
6.76k
  } else {
6507
1.98k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6508
1.98k
      if ((last != NULL) && (last != ret))
6509
1.14k
          xmlFreeDocElementContent(ctxt->myDoc, last);
6510
1.98k
      if (ret != NULL)
6511
1.98k
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6512
1.98k
      return(NULL);
6513
1.98k
  }
6514
8.99k
  GROW;
6515
8.99k
  SKIP_BLANKS_PE;
6516
8.99k
  GROW;
6517
8.99k
  if (RAW == '(') {
6518
2.19k
      int inputid = ctxt->input->id;
6519
      /* Recurse on second child */
6520
2.19k
      NEXT;
6521
2.19k
      SKIP_BLANKS_PE;
6522
2.19k
      last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6523
2.19k
                                                          depth + 1);
6524
2.19k
            if (last == NULL) {
6525
1.07k
    if (ret != NULL)
6526
1.07k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6527
1.07k
    return(NULL);
6528
1.07k
            }
6529
1.11k
      SKIP_BLANKS_PE;
6530
6.79k
  } else {
6531
6.79k
      elem = xmlParseName(ctxt);
6532
6.79k
      if (elem == NULL) {
6533
335
    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6534
335
    if (ret != NULL)
6535
335
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6536
335
    return(NULL);
6537
335
      }
6538
6.46k
      last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6539
6.46k
      if (last == NULL) {
6540
0
                xmlErrMemory(ctxt);
6541
0
    if (ret != NULL)
6542
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6543
0
    return(NULL);
6544
0
      }
6545
6.46k
      if (RAW == '?') {
6546
995
    last->ocur = XML_ELEMENT_CONTENT_OPT;
6547
995
    NEXT;
6548
5.46k
      } else if (RAW == '*') {
6549
1.56k
    last->ocur = XML_ELEMENT_CONTENT_MULT;
6550
1.56k
    NEXT;
6551
3.90k
      } else if (RAW == '+') {
6552
201
    last->ocur = XML_ELEMENT_CONTENT_PLUS;
6553
201
    NEXT;
6554
3.70k
      } else {
6555
3.70k
    last->ocur = XML_ELEMENT_CONTENT_ONCE;
6556
3.70k
      }
6557
6.46k
  }
6558
7.57k
  SKIP_BLANKS_PE;
6559
7.57k
  GROW;
6560
7.57k
    }
6561
5.52k
    if ((cur != NULL) && (last != NULL)) {
6562
2.21k
        cur->c2 = last;
6563
2.21k
  if (last != NULL)
6564
2.21k
      last->parent = cur;
6565
2.21k
    }
6566
5.52k
    if (ctxt->input->id != inputchk) {
6567
66
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6568
66
                       "Element content declaration doesn't start and stop in"
6569
66
                       " the same entity\n");
6570
66
    }
6571
5.52k
    NEXT;
6572
5.52k
    if (RAW == '?') {
6573
822
  if (ret != NULL) {
6574
822
      if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6575
822
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6576
525
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6577
297
      else
6578
297
          ret->ocur = XML_ELEMENT_CONTENT_OPT;
6579
822
  }
6580
822
  NEXT;
6581
4.70k
    } else if (RAW == '*') {
6582
1.12k
  if (ret != NULL) {
6583
1.12k
      ret->ocur = XML_ELEMENT_CONTENT_MULT;
6584
1.12k
      cur = ret;
6585
      /*
6586
       * Some normalization:
6587
       * (a | b* | c?)* == (a | b | c)*
6588
       */
6589
2.79k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6590
1.67k
    if ((cur->c1 != NULL) &&
6591
1.67k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6592
1.67k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6593
578
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6594
1.67k
    if ((cur->c2 != NULL) &&
6595
1.67k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6596
1.67k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6597
811
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6598
1.67k
    cur = cur->c2;
6599
1.67k
      }
6600
1.12k
  }
6601
1.12k
  NEXT;
6602
3.58k
    } else if (RAW == '+') {
6603
2.11k
  if (ret != NULL) {
6604
2.11k
      int found = 0;
6605
6606
2.11k
      if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6607
2.11k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6608
637
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6609
1.47k
      else
6610
1.47k
          ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6611
      /*
6612
       * Some normalization:
6613
       * (a | b*)+ == (a | b)*
6614
       * (a | b?)+ == (a | b)*
6615
       */
6616
3.71k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6617
1.60k
    if ((cur->c1 != NULL) &&
6618
1.60k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6619
1.60k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6620
701
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6621
701
        found = 1;
6622
701
    }
6623
1.60k
    if ((cur->c2 != NULL) &&
6624
1.60k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6625
1.60k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6626
755
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6627
755
        found = 1;
6628
755
    }
6629
1.60k
    cur = cur->c2;
6630
1.60k
      }
6631
2.11k
      if (found)
6632
885
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6633
2.11k
  }
6634
2.11k
  NEXT;
6635
2.11k
    }
6636
5.52k
    return(ret);
6637
8.93k
}
6638
6639
/**
6640
 * parse the declaration for a Mixed Element content
6641
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl()
6642
 *
6643
 * @deprecated Internal function, don't use.
6644
 *
6645
 *     [47] children ::= (choice | seq) ('?' | '*' | '+')?
6646
 *
6647
 *     [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6648
 *
6649
 *     [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6650
 *
6651
 *     [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6652
 *
6653
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6654
 * TODO Parameter-entity replacement text must be properly nested
6655
 *  with parenthesized groups. That is to say, if either of the
6656
 *  opening or closing parentheses in a choice, seq, or Mixed
6657
 *  construct is contained in the replacement text for a parameter
6658
 *  entity, both must be contained in the same replacement text. For
6659
 *  interoperability, if a parameter-entity reference appears in a
6660
 *  choice, seq, or Mixed construct, its replacement text should not
6661
 *  be empty, and neither the first nor last non-blank character of
6662
 *  the replacement text should be a connector (| or ,).
6663
 *
6664
 * @param ctxt  an XML parser context
6665
 * @param inputchk  the input used for the current entity, needed for boundary checks
6666
 * @returns the tree of xmlElementContentPtr describing the element
6667
 *          hierarchy.
6668
 */
6669
xmlElementContentPtr
6670
0
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6671
    /* stub left for API/ABI compat */
6672
0
    return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6673
0
}
6674
6675
/**
6676
 * parse the declaration for an Element content either Mixed or Children,
6677
 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl()
6678
 *
6679
 * @deprecated Internal function, don't use.
6680
 *
6681
 *     [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6682
 *
6683
 * @param ctxt  an XML parser context
6684
 * @param name  the name of the element being defined.
6685
 * @param result  the Element Content pointer will be stored here if any
6686
 * @returns the type of element content XML_ELEMENT_TYPE_xxx
6687
 */
6688
6689
int
6690
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6691
8.41k
                           xmlElementContentPtr *result) {
6692
6693
8.41k
    xmlElementContentPtr tree = NULL;
6694
8.41k
    int inputid = ctxt->input->id;
6695
8.41k
    int res;
6696
6697
8.41k
    *result = NULL;
6698
6699
8.41k
    if (RAW != '(') {
6700
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6701
0
    "xmlParseElementContentDecl : %s '(' expected\n", name);
6702
0
  return(-1);
6703
0
    }
6704
8.41k
    NEXT;
6705
8.41k
    GROW;
6706
8.41k
    SKIP_BLANKS_PE;
6707
8.41k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6708
2.00k
        tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6709
2.00k
  res = XML_ELEMENT_TYPE_MIXED;
6710
6.40k
    } else {
6711
6.40k
        tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6712
6.40k
  res = XML_ELEMENT_TYPE_ELEMENT;
6713
6.40k
    }
6714
8.41k
    SKIP_BLANKS_PE;
6715
8.41k
    *result = tree;
6716
8.41k
    return(res);
6717
8.41k
}
6718
6719
/**
6720
 * Parse an element declaration. Always consumes '<!'.
6721
 *
6722
 * @deprecated Internal function, don't use.
6723
 *
6724
 *     [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6725
 *
6726
 * [ VC: Unique Element Type Declaration ]
6727
 * No element type may be declared more than once
6728
 *
6729
 * @param ctxt  an XML parser context
6730
 * @returns the type of the element, or -1 in case of error
6731
 */
6732
int
6733
10.0k
xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6734
10.0k
    const xmlChar *name;
6735
10.0k
    int ret = -1;
6736
10.0k
    xmlElementContentPtr content  = NULL;
6737
6738
10.0k
    if ((CUR != '<') || (NXT(1) != '!'))
6739
0
        return(ret);
6740
10.0k
    SKIP(2);
6741
6742
    /* GROW; done in the caller */
6743
10.0k
    if (CMP7(CUR_PTR, 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6744
9.99k
  int inputid = ctxt->input->id;
6745
6746
9.99k
  SKIP(7);
6747
9.99k
  if (SKIP_BLANKS_PE == 0) {
6748
279
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6749
279
               "Space required after 'ELEMENT'\n");
6750
279
      return(-1);
6751
279
  }
6752
9.71k
        name = xmlParseName(ctxt);
6753
9.71k
  if (name == NULL) {
6754
243
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6755
243
         "xmlParseElementDecl: no name for Element\n");
6756
243
      return(-1);
6757
243
  }
6758
9.47k
  if (SKIP_BLANKS_PE == 0) {
6759
8.53k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6760
8.53k
         "Space required after the element name\n");
6761
8.53k
  }
6762
9.47k
  if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6763
401
      SKIP(5);
6764
      /*
6765
       * Element must always be empty.
6766
       */
6767
401
      ret = XML_ELEMENT_TYPE_EMPTY;
6768
9.07k
  } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6769
9.07k
             (NXT(2) == 'Y')) {
6770
430
      SKIP(3);
6771
      /*
6772
       * Element is a generic container.
6773
       */
6774
430
      ret = XML_ELEMENT_TYPE_ANY;
6775
8.64k
  } else if (RAW == '(') {
6776
8.41k
      ret = xmlParseElementContentDecl(ctxt, name, &content);
6777
8.41k
  } else {
6778
      /*
6779
       * [ WFC: PEs in Internal Subset ] error handling.
6780
       */
6781
233
            xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6782
233
                  "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6783
233
      return(-1);
6784
233
  }
6785
6786
9.24k
  SKIP_BLANKS_PE;
6787
6788
9.24k
  if (RAW != '>') {
6789
4.21k
      xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6790
4.21k
      if (content != NULL) {
6791
1.64k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6792
1.64k
      }
6793
5.02k
  } else {
6794
5.02k
      if (inputid != ctxt->input->id) {
6795
192
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6796
192
                               "Element declaration doesn't start and stop in"
6797
192
                               " the same entity\n");
6798
192
      }
6799
6800
5.02k
      NEXT;
6801
5.02k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6802
5.02k
    (ctxt->sax->elementDecl != NULL)) {
6803
4.60k
    if (content != NULL)
6804
2.61k
        content->parent = NULL;
6805
4.60k
          ctxt->sax->elementDecl(ctxt->userData, name, ret,
6806
4.60k
                           content);
6807
4.60k
    if ((content != NULL) && (content->parent == NULL)) {
6808
        /*
6809
         * this is a trick: if xmlAddElementDecl is called,
6810
         * instead of copying the full tree it is plugged directly
6811
         * if called from the parser. Avoid duplicating the
6812
         * interfaces or change the API/ABI
6813
         */
6814
2.31k
        xmlFreeDocElementContent(ctxt->myDoc, content);
6815
2.31k
    }
6816
4.60k
      } else if (content != NULL) {
6817
174
    xmlFreeDocElementContent(ctxt->myDoc, content);
6818
174
      }
6819
5.02k
  }
6820
9.24k
    }
6821
9.25k
    return(ret);
6822
10.0k
}
6823
6824
/**
6825
 * Parse a conditional section. Always consumes '<!['.
6826
 *
6827
 *     [61] conditionalSect ::= includeSect | ignoreSect
6828
 *     [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6829
 *     [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6830
 *     [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>'
6831
 *                                 Ignore)*
6832
 *     [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6833
 * @param ctxt  an XML parser context
6834
 */
6835
6836
static void
6837
0
xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6838
0
    int *inputIds = NULL;
6839
0
    size_t inputIdsSize = 0;
6840
0
    size_t depth = 0;
6841
6842
0
    while (PARSER_STOPPED(ctxt) == 0) {
6843
0
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6844
0
            int id = ctxt->input->id;
6845
6846
0
            SKIP(3);
6847
0
            SKIP_BLANKS_PE;
6848
6849
0
            if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6850
0
                SKIP(7);
6851
0
                SKIP_BLANKS_PE;
6852
0
                if (RAW != '[') {
6853
0
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6854
0
                    xmlHaltParser(ctxt);
6855
0
                    goto error;
6856
0
                }
6857
0
                if (ctxt->input->id != id) {
6858
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6859
0
                                   "All markup of the conditional section is"
6860
0
                                   " not in the same entity\n");
6861
0
                }
6862
0
                NEXT;
6863
6864
0
                if (inputIdsSize <= depth) {
6865
0
                    int *tmp;
6866
0
                    int newSize;
6867
6868
0
                    newSize = xmlGrowCapacity(inputIdsSize, sizeof(tmp[0]),
6869
0
                                              4, 1000);
6870
0
                    if (newSize < 0) {
6871
0
                        xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
6872
0
                                       "Maximum conditional section nesting"
6873
0
                                       " depth exceeded\n");
6874
0
                        goto error;
6875
0
                    }
6876
0
                    tmp = xmlRealloc(inputIds, newSize * sizeof(tmp[0]));
6877
0
                    if (tmp == NULL) {
6878
0
                        xmlErrMemory(ctxt);
6879
0
                        goto error;
6880
0
                    }
6881
0
                    inputIds = tmp;
6882
0
                    inputIdsSize = newSize;
6883
0
                }
6884
0
                inputIds[depth] = id;
6885
0
                depth++;
6886
0
            } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6887
0
                size_t ignoreDepth = 0;
6888
6889
0
                SKIP(6);
6890
0
                SKIP_BLANKS_PE;
6891
0
                if (RAW != '[') {
6892
0
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6893
0
                    xmlHaltParser(ctxt);
6894
0
                    goto error;
6895
0
                }
6896
0
                if (ctxt->input->id != id) {
6897
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6898
0
                                   "All markup of the conditional section is"
6899
0
                                   " not in the same entity\n");
6900
0
                }
6901
0
                NEXT;
6902
6903
0
                while (PARSER_STOPPED(ctxt) == 0) {
6904
0
                    if (RAW == 0) {
6905
0
                        xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6906
0
                        goto error;
6907
0
                    }
6908
0
                    if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6909
0
                        SKIP(3);
6910
0
                        ignoreDepth++;
6911
                        /* Check for integer overflow */
6912
0
                        if (ignoreDepth == 0) {
6913
0
                            xmlErrMemory(ctxt);
6914
0
                            goto error;
6915
0
                        }
6916
0
                    } else if ((RAW == ']') && (NXT(1) == ']') &&
6917
0
                               (NXT(2) == '>')) {
6918
0
                        SKIP(3);
6919
0
                        if (ignoreDepth == 0)
6920
0
                            break;
6921
0
                        ignoreDepth--;
6922
0
                    } else {
6923
0
                        NEXT;
6924
0
                    }
6925
0
                }
6926
6927
0
                if (ctxt->input->id != id) {
6928
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6929
0
                                   "All markup of the conditional section is"
6930
0
                                   " not in the same entity\n");
6931
0
                }
6932
0
            } else {
6933
0
                xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6934
0
                xmlHaltParser(ctxt);
6935
0
                goto error;
6936
0
            }
6937
0
        } else if ((depth > 0) &&
6938
0
                   (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6939
0
            depth--;
6940
0
            if (ctxt->input->id != inputIds[depth]) {
6941
0
                xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6942
0
                               "All markup of the conditional section is not"
6943
0
                               " in the same entity\n");
6944
0
            }
6945
0
            SKIP(3);
6946
0
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
6947
0
            xmlParseMarkupDecl(ctxt);
6948
0
        } else {
6949
0
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6950
0
            xmlHaltParser(ctxt);
6951
0
            goto error;
6952
0
        }
6953
6954
0
        if (depth == 0)
6955
0
            break;
6956
6957
0
        SKIP_BLANKS_PE;
6958
0
        SHRINK;
6959
0
        GROW;
6960
0
    }
6961
6962
0
error:
6963
0
    xmlFree(inputIds);
6964
0
}
6965
6966
/**
6967
 * Parse markup declarations. Always consumes '<!' or '<?'.
6968
 *
6969
 * @deprecated Internal function, don't use.
6970
 *
6971
 *     [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6972
 *                         NotationDecl | PI | Comment
6973
 *
6974
 * [ VC: Proper Declaration/PE Nesting ]
6975
 * Parameter-entity replacement text must be properly nested with
6976
 * markup declarations. That is to say, if either the first character
6977
 * or the last character of a markup declaration (markupdecl above) is
6978
 * contained in the replacement text for a parameter-entity reference,
6979
 * both must be contained in the same replacement text.
6980
 *
6981
 * [ WFC: PEs in Internal Subset ]
6982
 * In the internal DTD subset, parameter-entity references can occur
6983
 * only where markup declarations can occur, not within markup declarations.
6984
 * (This does not apply to references that occur in external parameter
6985
 * entities or to the external subset.)
6986
 *
6987
 * @param ctxt  an XML parser context
6988
 */
6989
void
6990
96.9k
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6991
96.9k
    GROW;
6992
96.9k
    if (CUR == '<') {
6993
96.9k
        if (NXT(1) == '!') {
6994
74.9k
      switch (NXT(2)) {
6995
29.3k
          case 'E':
6996
29.3k
        if (NXT(3) == 'L')
6997
10.0k
      xmlParseElementDecl(ctxt);
6998
19.3k
        else if (NXT(3) == 'N')
6999
19.3k
      xmlParseEntityDecl(ctxt);
7000
11
                    else
7001
11
                        SKIP(2);
7002
29.3k
        break;
7003
30.0k
          case 'A':
7004
30.0k
        xmlParseAttributeListDecl(ctxt);
7005
30.0k
        break;
7006
4.74k
          case 'N':
7007
4.74k
        xmlParseNotationDecl(ctxt);
7008
4.74k
        break;
7009
9.69k
          case '-':
7010
9.69k
        xmlParseComment(ctxt);
7011
9.69k
        break;
7012
1.21k
    default:
7013
1.21k
                    xmlFatalErr(ctxt,
7014
1.21k
                                ctxt->inSubset == 2 ?
7015
0
                                    XML_ERR_EXT_SUBSET_NOT_FINISHED :
7016
1.21k
                                    XML_ERR_INT_SUBSET_NOT_FINISHED,
7017
1.21k
                                NULL);
7018
1.21k
                    SKIP(2);
7019
1.21k
        break;
7020
74.9k
      }
7021
74.9k
  } else if (NXT(1) == '?') {
7022
21.9k
      xmlParsePI(ctxt);
7023
21.9k
  }
7024
96.9k
    }
7025
96.9k
}
7026
7027
/**
7028
 * parse an XML declaration header for external entities
7029
 *
7030
 * @deprecated Internal function, don't use.
7031
 *
7032
 *     [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
7033
 * @param ctxt  an XML parser context
7034
 */
7035
7036
void
7037
0
xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
7038
0
    xmlChar *version;
7039
7040
    /*
7041
     * We know that '<?xml' is here.
7042
     */
7043
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7044
0
  SKIP(5);
7045
0
    } else {
7046
0
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
7047
0
  return;
7048
0
    }
7049
7050
0
    if (SKIP_BLANKS == 0) {
7051
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7052
0
           "Space needed after '<?xml'\n");
7053
0
    }
7054
7055
    /*
7056
     * We may have the VersionInfo here.
7057
     */
7058
0
    version = xmlParseVersionInfo(ctxt);
7059
0
    if (version == NULL) {
7060
0
  version = xmlCharStrdup(XML_DEFAULT_VERSION);
7061
0
        if (version == NULL) {
7062
0
            xmlErrMemory(ctxt);
7063
0
            return;
7064
0
        }
7065
0
    } else {
7066
0
  if (SKIP_BLANKS == 0) {
7067
0
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7068
0
               "Space needed here\n");
7069
0
  }
7070
0
    }
7071
0
    ctxt->input->version = version;
7072
7073
    /*
7074
     * We must have the encoding declaration
7075
     */
7076
0
    xmlParseEncodingDecl(ctxt);
7077
7078
0
    SKIP_BLANKS;
7079
0
    if ((RAW == '?') && (NXT(1) == '>')) {
7080
0
        SKIP(2);
7081
0
    } else if (RAW == '>') {
7082
        /* Deprecated old WD ... */
7083
0
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7084
0
  NEXT;
7085
0
    } else {
7086
0
        int c;
7087
7088
0
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7089
0
        while ((PARSER_STOPPED(ctxt) == 0) && ((c = CUR) != 0)) {
7090
0
            NEXT;
7091
0
            if (c == '>')
7092
0
                break;
7093
0
        }
7094
0
    }
7095
0
}
7096
7097
/**
7098
 * parse Markup declarations from an external subset
7099
 *
7100
 * @deprecated Internal function, don't use.
7101
 *
7102
 *     [30] extSubset ::= textDecl? extSubsetDecl
7103
 *
7104
 *     [31] extSubsetDecl ::= (markupdecl | conditionalSect |
7105
 *                             PEReference | S) *
7106
 * @param ctxt  an XML parser context
7107
 * @param ExternalID  the external identifier
7108
 * @param SystemID  the system identifier (or URL)
7109
 */
7110
void
7111
xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7112
0
                       const xmlChar *SystemID) {
7113
0
    int oldInputNr;
7114
7115
0
    xmlCtxtInitializeLate(ctxt);
7116
7117
0
    xmlDetectEncoding(ctxt);
7118
7119
0
    if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7120
0
  xmlParseTextDecl(ctxt);
7121
0
    }
7122
0
    if (ctxt->myDoc == NULL) {
7123
0
        ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7124
0
  if (ctxt->myDoc == NULL) {
7125
0
      xmlErrMemory(ctxt);
7126
0
      return;
7127
0
  }
7128
0
  ctxt->myDoc->properties = XML_DOC_INTERNAL;
7129
0
    }
7130
0
    if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL) &&
7131
0
        (xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID) == NULL)) {
7132
0
        xmlErrMemory(ctxt);
7133
0
    }
7134
7135
0
    ctxt->inSubset = 2;
7136
0
    oldInputNr = ctxt->inputNr;
7137
7138
0
    SKIP_BLANKS_PE;
7139
0
    while (((RAW != 0) || (ctxt->inputNr > oldInputNr)) &&
7140
0
           (!PARSER_STOPPED(ctxt))) {
7141
0
  GROW;
7142
0
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7143
0
            xmlParseConditionalSections(ctxt);
7144
0
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7145
0
            xmlParseMarkupDecl(ctxt);
7146
0
        } else {
7147
0
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7148
0
            xmlHaltParser(ctxt);
7149
0
            return;
7150
0
        }
7151
0
        SKIP_BLANKS_PE;
7152
0
        SHRINK;
7153
0
    }
7154
7155
0
    while (ctxt->inputNr > oldInputNr)
7156
0
        xmlPopPE(ctxt);
7157
7158
0
    xmlParserCheckEOF(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED);
7159
0
}
7160
7161
/**
7162
 * parse and handle entity references in content, depending on the SAX
7163
 * interface, this may end-up in a call to character() if this is a
7164
 * CharRef, a predefined entity, if there is no reference() callback.
7165
 * or if the parser was asked to switch to that mode.
7166
 *
7167
 * @deprecated Internal function, don't use.
7168
 *
7169
 * Always consumes '&'.
7170
 *
7171
 *     [67] Reference ::= EntityRef | CharRef
7172
 * @param ctxt  an XML parser context
7173
 */
7174
void
7175
12.8k
xmlParseReference(xmlParserCtxtPtr ctxt) {
7176
12.8k
    xmlEntityPtr ent = NULL;
7177
12.8k
    const xmlChar *name;
7178
12.8k
    xmlChar *val;
7179
7180
12.8k
    if (RAW != '&')
7181
0
        return;
7182
7183
    /*
7184
     * Simple case of a CharRef
7185
     */
7186
12.8k
    if (NXT(1) == '#') {
7187
4.17k
  int i = 0;
7188
4.17k
  xmlChar out[16];
7189
4.17k
  int value = xmlParseCharRef(ctxt);
7190
7191
4.17k
  if (value == 0)
7192
2.59k
      return;
7193
7194
        /*
7195
         * Just encode the value in UTF-8
7196
         */
7197
1.58k
        COPY_BUF(out, i, value);
7198
1.58k
        out[i] = 0;
7199
1.58k
        if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7200
1.58k
            (!ctxt->disableSAX))
7201
760
            ctxt->sax->characters(ctxt->userData, out, i);
7202
1.58k
  return;
7203
4.17k
    }
7204
7205
    /*
7206
     * We are seeing an entity reference
7207
     */
7208
8.67k
    name = xmlParseEntityRefInternal(ctxt);
7209
8.67k
    if (name == NULL)
7210
2.71k
        return;
7211
5.95k
    ent = xmlLookupGeneralEntity(ctxt, name, /* isAttr */ 0);
7212
5.95k
    if (ent == NULL) {
7213
        /*
7214
         * Create a reference for undeclared entities.
7215
         */
7216
2.45k
        if ((ctxt->replaceEntities == 0) &&
7217
2.45k
            (ctxt->sax != NULL) &&
7218
2.45k
            (ctxt->disableSAX == 0) &&
7219
2.45k
            (ctxt->sax->reference != NULL)) {
7220
1.65k
            ctxt->sax->reference(ctxt->userData, name);
7221
1.65k
        }
7222
2.45k
        return;
7223
2.45k
    }
7224
3.50k
    if (!ctxt->wellFormed)
7225
917
  return;
7226
7227
    /* special case of predefined entities */
7228
2.58k
    if ((ent->name == NULL) ||
7229
2.58k
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7230
252
  val = ent->content;
7231
252
  if (val == NULL) return;
7232
  /*
7233
   * inline the entity.
7234
   */
7235
252
  if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7236
252
      (!ctxt->disableSAX))
7237
252
      ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7238
252
  return;
7239
252
    }
7240
7241
    /*
7242
     * Some users try to parse entities on their own and used to set
7243
     * the renamed "checked" member. Fix the flags to cover this
7244
     * case.
7245
     */
7246
2.33k
    if (((ent->flags & XML_ENT_PARSED) == 0) && (ent->children != NULL))
7247
0
        ent->flags |= XML_ENT_PARSED;
7248
7249
    /*
7250
     * The first reference to the entity trigger a parsing phase
7251
     * where the ent->children is filled with the result from
7252
     * the parsing.
7253
     * Note: external parsed entities will not be loaded, it is not
7254
     * required for a non-validating parser, unless the parsing option
7255
     * of validating, or substituting entities were given. Doing so is
7256
     * far more secure as the parser will only process data coming from
7257
     * the document entity by default.
7258
     *
7259
     * FIXME: This doesn't work correctly since entities can be
7260
     * expanded with different namespace declarations in scope.
7261
     * For example:
7262
     *
7263
     * <!DOCTYPE doc [
7264
     *   <!ENTITY ent "<ns:elem/>">
7265
     * ]>
7266
     * <doc>
7267
     *   <decl1 xmlns:ns="urn:ns1">
7268
     *     &ent;
7269
     *   </decl1>
7270
     *   <decl2 xmlns:ns="urn:ns2">
7271
     *     &ent;
7272
     *   </decl2>
7273
     * </doc>
7274
     *
7275
     * Proposed fix:
7276
     *
7277
     * - Ignore current namespace declarations when parsing the
7278
     *   entity. If a prefix can't be resolved, don't report an error
7279
     *   but mark it as unresolved.
7280
     * - Try to resolve these prefixes when expanding the entity.
7281
     *   This will require a specialized version of xmlStaticCopyNode
7282
     *   which can also make use of the namespace hash table to avoid
7283
     *   quadratic behavior.
7284
     *
7285
     * Alternatively, we could simply reparse the entity on each
7286
     * expansion like we already do with custom SAX callbacks.
7287
     * External entity content should be cached in this case.
7288
     */
7289
2.33k
    if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7290
2.33k
        (((ctxt->options & XML_PARSE_NO_XXE) == 0) &&
7291
388
         ((ctxt->replaceEntities) ||
7292
1.94k
          (ctxt->validate)))) {
7293
1.94k
        if ((ent->flags & XML_ENT_PARSED) == 0) {
7294
570
            xmlCtxtParseEntity(ctxt, ent);
7295
1.37k
        } else if (ent->children == NULL) {
7296
            /*
7297
             * Probably running in SAX mode and the callbacks don't
7298
             * build the entity content. Parse the entity again.
7299
             *
7300
             * This will also be triggered in normal tree builder mode
7301
             * if an entity happens to be empty, causing unnecessary
7302
             * reloads. It's hard to come up with a reliable check in
7303
             * which mode we're running.
7304
             */
7305
472
            xmlCtxtParseEntity(ctxt, ent);
7306
472
        }
7307
1.94k
    }
7308
7309
    /*
7310
     * We also check for amplification if entities aren't substituted.
7311
     * They might be expanded later.
7312
     */
7313
2.33k
    if (xmlParserEntityCheck(ctxt, ent->expandedSize))
7314
0
        return;
7315
7316
2.33k
    if ((ctxt->sax == NULL) || (ctxt->disableSAX))
7317
190
        return;
7318
7319
2.14k
    if (ctxt->replaceEntities == 0) {
7320
  /*
7321
   * Create a reference
7322
   */
7323
2.14k
        if (ctxt->sax->reference != NULL)
7324
2.14k
      ctxt->sax->reference(ctxt->userData, ent->name);
7325
2.14k
    } else if ((ent->children != NULL) && (ctxt->node != NULL)) {
7326
0
        xmlNodePtr copy, cur;
7327
7328
        /*
7329
         * Seems we are generating the DOM content, copy the tree
7330
   */
7331
0
        cur = ent->children;
7332
7333
        /*
7334
         * Handle first text node with SAX to coalesce text efficiently
7335
         */
7336
0
        if ((cur->type == XML_TEXT_NODE) ||
7337
0
            (cur->type == XML_CDATA_SECTION_NODE)) {
7338
0
            int len = xmlStrlen(cur->content);
7339
7340
0
            if ((cur->type == XML_TEXT_NODE) ||
7341
0
                (ctxt->options & XML_PARSE_NOCDATA)) {
7342
0
                if (ctxt->sax->characters != NULL)
7343
0
                    ctxt->sax->characters(ctxt, cur->content, len);
7344
0
            } else {
7345
0
                if (ctxt->sax->cdataBlock != NULL)
7346
0
                    ctxt->sax->cdataBlock(ctxt, cur->content, len);
7347
0
            }
7348
7349
0
            cur = cur->next;
7350
0
        }
7351
7352
0
        while (cur != NULL) {
7353
0
            xmlNodePtr last;
7354
7355
            /*
7356
             * Handle last text node with SAX to coalesce text efficiently
7357
             */
7358
0
            if ((cur->next == NULL) &&
7359
0
                ((cur->type == XML_TEXT_NODE) ||
7360
0
                 (cur->type == XML_CDATA_SECTION_NODE))) {
7361
0
                int len = xmlStrlen(cur->content);
7362
7363
0
                if ((cur->type == XML_TEXT_NODE) ||
7364
0
                    (ctxt->options & XML_PARSE_NOCDATA)) {
7365
0
                    if (ctxt->sax->characters != NULL)
7366
0
                        ctxt->sax->characters(ctxt, cur->content, len);
7367
0
                } else {
7368
0
                    if (ctxt->sax->cdataBlock != NULL)
7369
0
                        ctxt->sax->cdataBlock(ctxt, cur->content, len);
7370
0
                }
7371
7372
0
                break;
7373
0
            }
7374
7375
            /*
7376
             * Reset coalesce buffer stats only for non-text nodes.
7377
             */
7378
0
            ctxt->nodemem = 0;
7379
0
            ctxt->nodelen = 0;
7380
7381
0
            copy = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7382
7383
0
            if (copy == NULL) {
7384
0
                xmlErrMemory(ctxt);
7385
0
                break;
7386
0
            }
7387
7388
0
            if (ctxt->parseMode == XML_PARSE_READER) {
7389
                /* Needed for reader */
7390
0
                copy->extra = cur->extra;
7391
                /* Maybe needed for reader */
7392
0
                copy->_private = cur->_private;
7393
0
            }
7394
7395
0
            copy->parent = ctxt->node;
7396
0
            last = ctxt->node->last;
7397
0
            if (last == NULL) {
7398
0
                ctxt->node->children = copy;
7399
0
            } else {
7400
0
                last->next = copy;
7401
0
                copy->prev = last;
7402
0
            }
7403
0
            ctxt->node->last = copy;
7404
7405
0
            cur = cur->next;
7406
0
        }
7407
0
    }
7408
2.14k
}
7409
7410
static void
7411
45.1k
xmlHandleUndeclaredEntity(xmlParserCtxtPtr ctxt, const xmlChar *name) {
7412
    /*
7413
     * [ WFC: Entity Declared ]
7414
     * In a document without any DTD, a document with only an
7415
     * internal DTD subset which contains no parameter entity
7416
     * references, or a document with "standalone='yes'", the
7417
     * Name given in the entity reference must match that in an
7418
     * entity declaration, except that well-formed documents
7419
     * need not declare any of the following entities: amp, lt,
7420
     * gt, apos, quot.
7421
     * The declaration of a parameter entity must precede any
7422
     * reference to it.
7423
     * Similarly, the declaration of a general entity must
7424
     * precede any reference to it which appears in a default
7425
     * value in an attribute-list declaration. Note that if
7426
     * entities are declared in the external subset or in
7427
     * external parameter entities, a non-validating processor
7428
     * is not obligated to read and process their declarations;
7429
     * for such documents, the rule that an entity must be
7430
     * declared is a well-formedness constraint only if
7431
     * standalone='yes'.
7432
     */
7433
45.1k
    if ((ctxt->standalone == 1) ||
7434
45.1k
        ((ctxt->hasExternalSubset == 0) &&
7435
44.7k
         (ctxt->hasPErefs == 0))) {
7436
31.4k
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7437
31.4k
                          "Entity '%s' not defined\n", name);
7438
31.4k
    } else if (ctxt->validate) {
7439
        /*
7440
         * [ VC: Entity Declared ]
7441
         * In a document with an external subset or external
7442
         * parameter entities with "standalone='no'", ...
7443
         * ... The declaration of a parameter entity must
7444
         * precede any reference to it...
7445
         */
7446
0
        xmlValidityError(ctxt, XML_ERR_UNDECLARED_ENTITY,
7447
0
                         "Entity '%s' not defined\n", name, NULL);
7448
13.7k
    } else if ((ctxt->loadsubset & ~XML_SKIP_IDS) ||
7449
13.7k
               ((ctxt->replaceEntities) &&
7450
13.7k
                ((ctxt->options & XML_PARSE_NO_XXE) == 0))) {
7451
        /*
7452
         * Also raise a non-fatal error
7453
         *
7454
         * - if the external subset is loaded and all entity declarations
7455
         *   should be available, or
7456
         * - entity substition was requested without restricting
7457
         *   external entity access.
7458
         */
7459
0
        xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7460
0
                     "Entity '%s' not defined\n", name);
7461
13.7k
    } else {
7462
13.7k
        xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7463
13.7k
                      "Entity '%s' not defined\n", name, NULL);
7464
13.7k
    }
7465
7466
45.1k
    ctxt->valid = 0;
7467
45.1k
}
7468
7469
static xmlEntityPtr
7470
657k
xmlLookupGeneralEntity(xmlParserCtxtPtr ctxt, const xmlChar *name, int inAttr) {
7471
657k
    xmlEntityPtr ent = NULL;
7472
7473
    /*
7474
     * Predefined entities override any extra definition
7475
     */
7476
657k
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7477
656k
        ent = xmlGetPredefinedEntity(name);
7478
656k
        if (ent != NULL)
7479
39.3k
            return(ent);
7480
656k
    }
7481
7482
    /*
7483
     * Ask first SAX for entity resolution, otherwise try the
7484
     * entities which may have stored in the parser context.
7485
     */
7486
617k
    if (ctxt->sax != NULL) {
7487
617k
  if (ctxt->sax->getEntity != NULL)
7488
617k
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7489
617k
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7490
617k
      (ctxt->options & XML_PARSE_OLDSAX))
7491
194
      ent = xmlGetPredefinedEntity(name);
7492
617k
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7493
617k
      (ctxt->userData==ctxt)) {
7494
1.75k
      ent = xmlSAX2GetEntity(ctxt, name);
7495
1.75k
  }
7496
617k
    }
7497
7498
617k
    if (ent == NULL) {
7499
43.5k
        xmlHandleUndeclaredEntity(ctxt, name);
7500
43.5k
    }
7501
7502
    /*
7503
     * [ WFC: Parsed Entity ]
7504
     * An entity reference must not contain the name of an
7505
     * unparsed entity
7506
     */
7507
574k
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7508
196
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7509
196
     "Entity reference to unparsed entity %s\n", name);
7510
196
        ent = NULL;
7511
196
    }
7512
7513
    /*
7514
     * [ WFC: No External Entity References ]
7515
     * Attribute values cannot contain direct or indirect
7516
     * entity references to external entities.
7517
     */
7518
573k
    else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7519
1.18k
        if (inAttr) {
7520
782
            xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7521
782
                 "Attribute references external entity '%s'\n", name);
7522
782
            ent = NULL;
7523
782
        }
7524
1.18k
    }
7525
7526
617k
    return(ent);
7527
657k
}
7528
7529
/**
7530
 * Parse an entity reference. Always consumes '&'.
7531
 *
7532
 *     [68] EntityRef ::= '&' Name ';'
7533
 *
7534
 * @param ctxt  an XML parser context
7535
 * @returns the name, or NULL in case of error.
7536
 */
7537
static const xmlChar *
7538
81.4k
xmlParseEntityRefInternal(xmlParserCtxtPtr ctxt) {
7539
81.4k
    const xmlChar *name;
7540
7541
81.4k
    GROW;
7542
7543
81.4k
    if (RAW != '&')
7544
0
        return(NULL);
7545
81.4k
    NEXT;
7546
81.4k
    name = xmlParseName(ctxt);
7547
81.4k
    if (name == NULL) {
7548
5.40k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7549
5.40k
           "xmlParseEntityRef: no name\n");
7550
5.40k
        return(NULL);
7551
5.40k
    }
7552
76.0k
    if (RAW != ';') {
7553
2.16k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7554
2.16k
  return(NULL);
7555
2.16k
    }
7556
73.8k
    NEXT;
7557
7558
73.8k
    return(name);
7559
76.0k
}
7560
7561
/**
7562
 * @deprecated Internal function, don't use.
7563
 *
7564
 * @param ctxt  an XML parser context
7565
 * @returns the xmlEntityPtr if found, or NULL otherwise.
7566
 */
7567
xmlEntityPtr
7568
0
xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7569
0
    const xmlChar *name;
7570
7571
0
    if (ctxt == NULL)
7572
0
        return(NULL);
7573
7574
0
    name = xmlParseEntityRefInternal(ctxt);
7575
0
    if (name == NULL)
7576
0
        return(NULL);
7577
7578
0
    return(xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 0));
7579
0
}
7580
7581
/**
7582
 * parse ENTITY references declarations, but this version parses it from
7583
 * a string value.
7584
 *
7585
 *     [68] EntityRef ::= '&' Name ';'
7586
 *
7587
 * [ WFC: Entity Declared ]
7588
 * In a document without any DTD, a document with only an internal DTD
7589
 * subset which contains no parameter entity references, or a document
7590
 * with "standalone='yes'", the Name given in the entity reference
7591
 * must match that in an entity declaration, except that well-formed
7592
 * documents need not declare any of the following entities: amp, lt,
7593
 * gt, apos, quot.  The declaration of a parameter entity must precede
7594
 * any reference to it.  Similarly, the declaration of a general entity
7595
 * must precede any reference to it which appears in a default value in an
7596
 * attribute-list declaration. Note that if entities are declared in the
7597
 * external subset or in external parameter entities, a non-validating
7598
 * processor is not obligated to read and process their declarations;
7599
 * for such documents, the rule that an entity must be declared is a
7600
 * well-formedness constraint only if standalone='yes'.
7601
 *
7602
 * [ WFC: Parsed Entity ]
7603
 * An entity reference must not contain the name of an unparsed entity
7604
 *
7605
 * @param ctxt  an XML parser context
7606
 * @param str  a pointer to an index in the string
7607
 * @returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7608
 * is updated to the current location in the string.
7609
 */
7610
static xmlChar *
7611
583k
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7612
583k
    xmlChar *name;
7613
583k
    const xmlChar *ptr;
7614
583k
    xmlChar cur;
7615
7616
583k
    if ((str == NULL) || (*str == NULL))
7617
0
        return(NULL);
7618
583k
    ptr = *str;
7619
583k
    cur = *ptr;
7620
583k
    if (cur != '&')
7621
0
  return(NULL);
7622
7623
583k
    ptr++;
7624
583k
    name = xmlParseStringName(ctxt, &ptr);
7625
583k
    if (name == NULL) {
7626
5
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7627
5
           "xmlParseStringEntityRef: no name\n");
7628
5
  *str = ptr;
7629
5
  return(NULL);
7630
5
    }
7631
583k
    if (*ptr != ';') {
7632
4
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7633
4
        xmlFree(name);
7634
4
  *str = ptr;
7635
4
  return(NULL);
7636
4
    }
7637
583k
    ptr++;
7638
7639
583k
    *str = ptr;
7640
583k
    return(name);
7641
583k
}
7642
7643
/**
7644
 * Parse a parameter entity reference. Always consumes '%'.
7645
 *
7646
 * @deprecated Internal function, don't use.
7647
 *
7648
 * The entity content is handled directly by pushing it's content as
7649
 * a new input stream.
7650
 *
7651
 *     [69] PEReference ::= '%' Name ';'
7652
 *
7653
 * [ WFC: No Recursion ]
7654
 * A parsed entity must not contain a recursive
7655
 * reference to itself, either directly or indirectly.
7656
 *
7657
 * [ WFC: Entity Declared ]
7658
 * In a document without any DTD, a document with only an internal DTD
7659
 * subset which contains no parameter entity references, or a document
7660
 * with "standalone='yes'", ...  ... The declaration of a parameter
7661
 * entity must precede any reference to it...
7662
 *
7663
 * [ VC: Entity Declared ]
7664
 * In a document with an external subset or external parameter entities
7665
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7666
 * must precede any reference to it...
7667
 *
7668
 * [ WFC: In DTD ]
7669
 * Parameter-entity references may only appear in the DTD.
7670
 * NOTE: misleading but this is handled.
7671
 *
7672
 * @param ctxt  an XML parser context
7673
 */
7674
void
7675
xmlParsePEReference(xmlParserCtxtPtr ctxt)
7676
56.4k
{
7677
56.4k
    const xmlChar *name;
7678
56.4k
    xmlEntityPtr entity = NULL;
7679
56.4k
    xmlParserInputPtr input;
7680
7681
56.4k
    if (RAW != '%')
7682
0
        return;
7683
56.4k
    NEXT;
7684
56.4k
    name = xmlParseName(ctxt);
7685
56.4k
    if (name == NULL) {
7686
920
  xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
7687
920
  return;
7688
920
    }
7689
55.5k
    if (RAW != ';') {
7690
466
  xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
7691
466
        return;
7692
466
    }
7693
7694
55.1k
    NEXT;
7695
7696
    /* Must be set before xmlHandleUndeclaredEntity */
7697
55.1k
    ctxt->hasPErefs = 1;
7698
7699
    /*
7700
     * Request the entity from SAX
7701
     */
7702
55.1k
    if ((ctxt->sax != NULL) &&
7703
55.1k
  (ctxt->sax->getParameterEntity != NULL))
7704
55.1k
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7705
7706
55.1k
    if (entity == NULL) {
7707
1.28k
        xmlHandleUndeclaredEntity(ctxt, name);
7708
53.8k
    } else {
7709
  /*
7710
   * Internal checking in case the entity quest barfed
7711
   */
7712
53.8k
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7713
53.8k
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7714
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7715
0
      "Internal: %%%s; is not a parameter entity\n",
7716
0
        name, NULL);
7717
53.8k
  } else {
7718
53.8k
      if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7719
53.8k
                ((ctxt->options & XML_PARSE_NO_XXE) ||
7720
388
     ((ctxt->loadsubset == 0) &&
7721
194
      (ctxt->replaceEntities == 0) &&
7722
194
      (ctxt->validate == 0))))
7723
388
    return;
7724
7725
53.4k
            if (entity->flags & XML_ENT_EXPANDING) {
7726
2
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7727
2
                xmlHaltParser(ctxt);
7728
2
                return;
7729
2
            }
7730
7731
53.4k
      input = xmlNewEntityInputStream(ctxt, entity);
7732
53.4k
      if (xmlCtxtPushInput(ctxt, input) < 0) {
7733
0
                xmlFreeInputStream(input);
7734
0
    return;
7735
0
            }
7736
7737
53.4k
            entity->flags |= XML_ENT_EXPANDING;
7738
7739
53.4k
            GROW;
7740
7741
53.4k
      if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
7742
0
                xmlDetectEncoding(ctxt);
7743
7744
0
                if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7745
0
                    (IS_BLANK_CH(NXT(5)))) {
7746
0
                    xmlParseTextDecl(ctxt);
7747
0
                }
7748
0
            }
7749
53.4k
  }
7750
53.8k
    }
7751
55.1k
}
7752
7753
/**
7754
 * Load the content of an entity.
7755
 *
7756
 * @param ctxt  an XML parser context
7757
 * @param entity  an unloaded system entity
7758
 * @returns 0 in case of success and -1 in case of failure
7759
 */
7760
static int
7761
0
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7762
0
    xmlParserInputPtr oldinput, input = NULL;
7763
0
    xmlParserInputPtr *oldinputTab;
7764
0
    const xmlChar *oldencoding;
7765
0
    xmlChar *content = NULL;
7766
0
    xmlResourceType rtype;
7767
0
    size_t length, i;
7768
0
    int oldinputNr, oldinputMax;
7769
0
    int ret = -1;
7770
0
    int res;
7771
7772
0
    if ((ctxt == NULL) || (entity == NULL) ||
7773
0
        ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7774
0
   (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7775
0
  (entity->content != NULL)) {
7776
0
  xmlFatalErr(ctxt, XML_ERR_ARGUMENT,
7777
0
              "xmlLoadEntityContent parameter error");
7778
0
        return(-1);
7779
0
    }
7780
7781
0
    if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)
7782
0
        rtype = XML_RESOURCE_PARAMETER_ENTITY;
7783
0
    else
7784
0
        rtype = XML_RESOURCE_GENERAL_ENTITY;
7785
7786
0
    input = xmlLoadResource(ctxt, (char *) entity->URI,
7787
0
                            (char *) entity->ExternalID, rtype);
7788
0
    if (input == NULL)
7789
0
        return(-1);
7790
7791
0
    oldinput = ctxt->input;
7792
0
    oldinputNr = ctxt->inputNr;
7793
0
    oldinputMax = ctxt->inputMax;
7794
0
    oldinputTab = ctxt->inputTab;
7795
0
    oldencoding = ctxt->encoding;
7796
7797
0
    ctxt->input = NULL;
7798
0
    ctxt->inputNr = 0;
7799
0
    ctxt->inputMax = 1;
7800
0
    ctxt->encoding = NULL;
7801
0
    ctxt->inputTab = xmlMalloc(sizeof(xmlParserInputPtr));
7802
0
    if (ctxt->inputTab == NULL) {
7803
0
        xmlErrMemory(ctxt);
7804
0
        xmlFreeInputStream(input);
7805
0
        goto error;
7806
0
    }
7807
7808
0
    xmlBufResetInput(input->buf->buffer, input);
7809
7810
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
7811
0
        xmlFreeInputStream(input);
7812
0
        goto error;
7813
0
    }
7814
7815
0
    xmlDetectEncoding(ctxt);
7816
7817
    /*
7818
     * Parse a possible text declaration first
7819
     */
7820
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7821
0
  xmlParseTextDecl(ctxt);
7822
        /*
7823
         * An XML-1.0 document can't reference an entity not XML-1.0
7824
         */
7825
0
        if ((xmlStrEqual(ctxt->version, BAD_CAST "1.0")) &&
7826
0
            (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
7827
0
            xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
7828
0
                           "Version mismatch between document and entity\n");
7829
0
        }
7830
0
    }
7831
7832
0
    length = input->cur - input->base;
7833
0
    xmlBufShrink(input->buf->buffer, length);
7834
0
    xmlSaturatedAdd(&ctxt->sizeentities, length);
7835
7836
0
    while ((res = xmlParserInputBufferGrow(input->buf, 4096)) > 0)
7837
0
        ;
7838
7839
0
    xmlBufResetInput(input->buf->buffer, input);
7840
7841
0
    if (res < 0) {
7842
0
        xmlCtxtErrIO(ctxt, input->buf->error, NULL);
7843
0
        goto error;
7844
0
    }
7845
7846
0
    length = xmlBufUse(input->buf->buffer);
7847
0
    if (length > INT_MAX) {
7848
0
        xmlErrMemory(ctxt);
7849
0
        goto error;
7850
0
    }
7851
7852
0
    content = xmlStrndup(xmlBufContent(input->buf->buffer), length);
7853
0
    if (content == NULL) {
7854
0
        xmlErrMemory(ctxt);
7855
0
        goto error;
7856
0
    }
7857
7858
0
    for (i = 0; i < length; ) {
7859
0
        int clen = length - i;
7860
0
        int c = xmlGetUTF8Char(content + i, &clen);
7861
7862
0
        if ((c < 0) || (!IS_CHAR(c))) {
7863
0
            xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
7864
0
                              "xmlLoadEntityContent: invalid char value %d\n",
7865
0
                              content[i]);
7866
0
            goto error;
7867
0
        }
7868
0
        i += clen;
7869
0
    }
7870
7871
0
    xmlSaturatedAdd(&ctxt->sizeentities, length);
7872
0
    entity->content = content;
7873
0
    entity->length = length;
7874
0
    content = NULL;
7875
0
    ret = 0;
7876
7877
0
error:
7878
0
    while (ctxt->inputNr > 0)
7879
0
        xmlFreeInputStream(xmlCtxtPopInput(ctxt));
7880
0
    xmlFree(ctxt->inputTab);
7881
0
    xmlFree((xmlChar *) ctxt->encoding);
7882
7883
0
    ctxt->input = oldinput;
7884
0
    ctxt->inputNr = oldinputNr;
7885
0
    ctxt->inputMax = oldinputMax;
7886
0
    ctxt->inputTab = oldinputTab;
7887
0
    ctxt->encoding = oldencoding;
7888
7889
0
    xmlFree(content);
7890
7891
0
    return(ret);
7892
0
}
7893
7894
/**
7895
 * parse PEReference declarations
7896
 *
7897
 *     [69] PEReference ::= '%' Name ';'
7898
 *
7899
 * [ WFC: No Recursion ]
7900
 * A parsed entity must not contain a recursive
7901
 * reference to itself, either directly or indirectly.
7902
 *
7903
 * [ WFC: Entity Declared ]
7904
 * In a document without any DTD, a document with only an internal DTD
7905
 * subset which contains no parameter entity references, or a document
7906
 * with "standalone='yes'", ...  ... The declaration of a parameter
7907
 * entity must precede any reference to it...
7908
 *
7909
 * [ VC: Entity Declared ]
7910
 * In a document with an external subset or external parameter entities
7911
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7912
 * must precede any reference to it...
7913
 *
7914
 * [ WFC: In DTD ]
7915
 * Parameter-entity references may only appear in the DTD.
7916
 * NOTE: misleading but this is handled.
7917
 *
7918
 * @param ctxt  an XML parser context
7919
 * @param str  a pointer to an index in the string
7920
 * @returns the string of the entity content.
7921
 *         str is updated to the current value of the index
7922
 */
7923
static xmlEntityPtr
7924
2.13k
xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7925
2.13k
    const xmlChar *ptr;
7926
2.13k
    xmlChar cur;
7927
2.13k
    xmlChar *name;
7928
2.13k
    xmlEntityPtr entity = NULL;
7929
7930
2.13k
    if ((str == NULL) || (*str == NULL)) return(NULL);
7931
2.13k
    ptr = *str;
7932
2.13k
    cur = *ptr;
7933
2.13k
    if (cur != '%')
7934
0
        return(NULL);
7935
2.13k
    ptr++;
7936
2.13k
    name = xmlParseStringName(ctxt, &ptr);
7937
2.13k
    if (name == NULL) {
7938
1.11k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7939
1.11k
           "xmlParseStringPEReference: no name\n");
7940
1.11k
  *str = ptr;
7941
1.11k
  return(NULL);
7942
1.11k
    }
7943
1.01k
    cur = *ptr;
7944
1.01k
    if (cur != ';') {
7945
415
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7946
415
  xmlFree(name);
7947
415
  *str = ptr;
7948
415
  return(NULL);
7949
415
    }
7950
604
    ptr++;
7951
7952
    /* Must be set before xmlHandleUndeclaredEntity */
7953
604
    ctxt->hasPErefs = 1;
7954
7955
    /*
7956
     * Request the entity from SAX
7957
     */
7958
604
    if ((ctxt->sax != NULL) &&
7959
604
  (ctxt->sax->getParameterEntity != NULL))
7960
604
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7961
7962
604
    if (entity == NULL) {
7963
327
        xmlHandleUndeclaredEntity(ctxt, name);
7964
327
    } else {
7965
  /*
7966
   * Internal checking in case the entity quest barfed
7967
   */
7968
277
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7969
277
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7970
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7971
0
        "%%%s; is not a parameter entity\n",
7972
0
        name, NULL);
7973
0
  }
7974
277
    }
7975
7976
604
    xmlFree(name);
7977
604
    *str = ptr;
7978
604
    return(entity);
7979
1.01k
}
7980
7981
/**
7982
 * parse a DOCTYPE declaration
7983
 *
7984
 * @deprecated Internal function, don't use.
7985
 *
7986
 *     [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7987
 *                          ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7988
 *
7989
 * [ VC: Root Element Type ]
7990
 * The Name in the document type declaration must match the element
7991
 * type of the root element.
7992
 *
7993
 * @param ctxt  an XML parser context
7994
 */
7995
7996
void
7997
10.7k
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
7998
10.7k
    const xmlChar *name = NULL;
7999
10.7k
    xmlChar *ExternalID = NULL;
8000
10.7k
    xmlChar *URI = NULL;
8001
8002
    /*
8003
     * We know that '<!DOCTYPE' has been detected.
8004
     */
8005
10.7k
    SKIP(9);
8006
8007
10.7k
    if (SKIP_BLANKS == 0) {
8008
18
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8009
18
                       "Space required after 'DOCTYPE'\n");
8010
18
    }
8011
8012
    /*
8013
     * Parse the DOCTYPE name.
8014
     */
8015
10.7k
    name = xmlParseName(ctxt);
8016
10.7k
    if (name == NULL) {
8017
7.59k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8018
7.59k
           "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8019
7.59k
    }
8020
10.7k
    ctxt->intSubName = name;
8021
8022
10.7k
    SKIP_BLANKS;
8023
8024
    /*
8025
     * Check for SystemID and ExternalID
8026
     */
8027
10.7k
    URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8028
8029
10.7k
    if ((URI != NULL) || (ExternalID != NULL)) {
8030
835
        ctxt->hasExternalSubset = 1;
8031
835
    }
8032
10.7k
    ctxt->extSubURI = URI;
8033
10.7k
    ctxt->extSubSystem = ExternalID;
8034
8035
10.7k
    SKIP_BLANKS;
8036
8037
    /*
8038
     * Create and update the internal subset.
8039
     */
8040
10.7k
    if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8041
10.7k
  (!ctxt->disableSAX))
8042
8.18k
  ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8043
8044
10.7k
    if ((RAW != '[') && (RAW != '>')) {
8045
1.33k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8046
1.33k
    }
8047
10.7k
}
8048
8049
/**
8050
 * parse the internal subset declaration
8051
 *
8052
 *     [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8053
 * @param ctxt  an XML parser context
8054
 */
8055
8056
static void
8057
9.02k
xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8058
    /*
8059
     * Is there any DTD definition ?
8060
     */
8061
9.02k
    if (RAW == '[') {
8062
9.02k
        int oldInputNr = ctxt->inputNr;
8063
8064
9.02k
        NEXT;
8065
  /*
8066
   * Parse the succession of Markup declarations and
8067
   * PEReferences.
8068
   * Subsequence (markupdecl | PEReference | S)*
8069
   */
8070
9.02k
  SKIP_BLANKS;
8071
162k
  while (((RAW != ']') || (ctxt->inputNr > oldInputNr)) &&
8072
162k
               (PARSER_STOPPED(ctxt) == 0)) {
8073
8074
            /*
8075
             * Conditional sections are allowed from external entities included
8076
             * by PE References in the internal subset.
8077
             */
8078
159k
            if ((PARSER_EXTERNAL(ctxt)) &&
8079
159k
                (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8080
0
                xmlParseConditionalSections(ctxt);
8081
159k
            } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
8082
96.9k
          xmlParseMarkupDecl(ctxt);
8083
96.9k
            } else if (RAW == '%') {
8084
56.4k
          xmlParsePEReference(ctxt);
8085
56.4k
            } else {
8086
5.88k
    xmlFatalErr(ctxt, XML_ERR_INT_SUBSET_NOT_FINISHED, NULL);
8087
5.88k
                break;
8088
5.88k
            }
8089
153k
      SKIP_BLANKS_PE;
8090
153k
            SHRINK;
8091
153k
            GROW;
8092
153k
  }
8093
8094
9.06k
        while (ctxt->inputNr > oldInputNr)
8095
48
            xmlPopPE(ctxt);
8096
8097
9.02k
  if (RAW == ']') {
8098
2.12k
      NEXT;
8099
2.12k
      SKIP_BLANKS;
8100
2.12k
  }
8101
9.02k
    }
8102
8103
    /*
8104
     * We should be at the end of the DOCTYPE declaration.
8105
     */
8106
9.02k
    if ((ctxt->wellFormed) && (RAW != '>')) {
8107
44
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8108
44
  return;
8109
44
    }
8110
8.97k
    NEXT;
8111
8.97k
}
8112
8113
#ifdef LIBXML_SAX1_ENABLED
8114
/**
8115
 * parse an attribute
8116
 *
8117
 * @deprecated Internal function, don't use.
8118
 *
8119
 *     [41] Attribute ::= Name Eq AttValue
8120
 *
8121
 * [ WFC: No External Entity References ]
8122
 * Attribute values cannot contain direct or indirect entity references
8123
 * to external entities.
8124
 *
8125
 * [ WFC: No < in Attribute Values ]
8126
 * The replacement text of any entity referred to directly or indirectly in
8127
 * an attribute value (other than "&lt;") must not contain a <.
8128
 *
8129
 * [ VC: Attribute Value Type ]
8130
 * The attribute must have been declared; the value must be of the type
8131
 * declared for it.
8132
 *
8133
 *     [25] Eq ::= S? '=' S?
8134
 *
8135
 * With namespace:
8136
 *
8137
 *     [NS 11] Attribute ::= QName Eq AttValue
8138
 *
8139
 * Also the case QName == xmlns:??? is handled independently as a namespace
8140
 * definition.
8141
 *
8142
 * @param ctxt  an XML parser context
8143
 * @param value  a xmlChar ** used to store the value of the attribute
8144
 * @returns the attribute name, and the value in *value.
8145
 */
8146
8147
const xmlChar *
8148
49.5k
xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8149
49.5k
    const xmlChar *name;
8150
49.5k
    xmlChar *val;
8151
8152
49.5k
    *value = NULL;
8153
49.5k
    GROW;
8154
49.5k
    name = xmlParseName(ctxt);
8155
49.5k
    if (name == NULL) {
8156
38.1k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8157
38.1k
                 "error parsing attribute name\n");
8158
38.1k
        return(NULL);
8159
38.1k
    }
8160
8161
    /*
8162
     * read the value
8163
     */
8164
11.4k
    SKIP_BLANKS;
8165
11.4k
    if (RAW == '=') {
8166
8.42k
        NEXT;
8167
8.42k
  SKIP_BLANKS;
8168
8.42k
  val = xmlParseAttValue(ctxt);
8169
8.42k
    } else {
8170
3.03k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8171
3.03k
         "Specification mandates value for attribute %s\n", name);
8172
3.03k
  return(name);
8173
3.03k
    }
8174
8175
    /*
8176
     * Check that xml:lang conforms to the specification
8177
     * No more registered as an error, just generate a warning now
8178
     * since this was deprecated in XML second edition
8179
     */
8180
8.42k
    if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8181
2.39k
  if (!xmlCheckLanguageID(val)) {
8182
1.60k
      xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8183
1.60k
              "Malformed value for xml:lang : %s\n",
8184
1.60k
        val, NULL);
8185
1.60k
  }
8186
2.39k
    }
8187
8188
    /*
8189
     * Check that xml:space conforms to the specification
8190
     */
8191
8.42k
    if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8192
218
  if (xmlStrEqual(val, BAD_CAST "default"))
8193
78
      *(ctxt->space) = 0;
8194
140
  else if (xmlStrEqual(val, BAD_CAST "preserve"))
8195
74
      *(ctxt->space) = 1;
8196
66
  else {
8197
66
    xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8198
66
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8199
66
                                 val, NULL);
8200
66
  }
8201
218
    }
8202
8203
8.42k
    *value = val;
8204
8.42k
    return(name);
8205
11.4k
}
8206
8207
/**
8208
 * Parse a start tag. Always consumes '<'.
8209
 *
8210
 * @deprecated Internal function, don't use.
8211
 *
8212
 *     [40] STag ::= '<' Name (S Attribute)* S? '>'
8213
 *
8214
 * [ WFC: Unique Att Spec ]
8215
 * No attribute name may appear more than once in the same start-tag or
8216
 * empty-element tag.
8217
 *
8218
 *     [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8219
 *
8220
 * [ WFC: Unique Att Spec ]
8221
 * No attribute name may appear more than once in the same start-tag or
8222
 * empty-element tag.
8223
 *
8224
 * With namespace:
8225
 *
8226
 *     [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8227
 *
8228
 *     [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8229
 *
8230
 * @param ctxt  an XML parser context
8231
 * @returns the element name parsed
8232
 */
8233
8234
const xmlChar *
8235
56.4k
xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8236
56.4k
    const xmlChar *name;
8237
56.4k
    const xmlChar *attname;
8238
56.4k
    xmlChar *attvalue;
8239
56.4k
    const xmlChar **atts = ctxt->atts;
8240
56.4k
    int nbatts = 0;
8241
56.4k
    int maxatts = ctxt->maxatts;
8242
56.4k
    int i;
8243
8244
56.4k
    if (RAW != '<') return(NULL);
8245
56.4k
    NEXT1;
8246
8247
56.4k
    name = xmlParseName(ctxt);
8248
56.4k
    if (name == NULL) {
8249
6.70k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8250
6.70k
       "xmlParseStartTag: invalid element name\n");
8251
6.70k
        return(NULL);
8252
6.70k
    }
8253
8254
    /*
8255
     * Now parse the attributes, it ends up with the ending
8256
     *
8257
     * (S Attribute)* S?
8258
     */
8259
49.7k
    SKIP_BLANKS;
8260
49.7k
    GROW;
8261
8262
59.2k
    while (((RAW != '>') &&
8263
59.2k
     ((RAW != '/') || (NXT(1) != '>')) &&
8264
59.2k
     (IS_BYTE_CHAR(RAW))) && (PARSER_STOPPED(ctxt) == 0)) {
8265
49.5k
  attname = xmlParseAttribute(ctxt, &attvalue);
8266
49.5k
        if (attname == NULL)
8267
38.1k
      break;
8268
11.4k
        if (attvalue != NULL) {
8269
      /*
8270
       * [ WFC: Unique Att Spec ]
8271
       * No attribute name may appear more than once in the same
8272
       * start-tag or empty-element tag.
8273
       */
8274
13.1k
      for (i = 0; i < nbatts;i += 2) {
8275
7.22k
          if (xmlStrEqual(atts[i], attname)) {
8276
1.83k
        xmlErrAttributeDup(ctxt, NULL, attname);
8277
1.83k
        goto failed;
8278
1.83k
    }
8279
7.22k
      }
8280
      /*
8281
       * Add the pair to atts
8282
       */
8283
5.88k
      if (nbatts + 4 > maxatts) {
8284
1.20k
          const xmlChar **n;
8285
1.20k
                int newSize;
8286
8287
1.20k
                newSize = xmlGrowCapacity(maxatts, sizeof(n[0]) * 2,
8288
1.20k
                                          11, XML_MAX_ATTRS);
8289
1.20k
                if (newSize < 0) {
8290
0
        xmlErrMemory(ctxt);
8291
0
        goto failed;
8292
0
    }
8293
1.20k
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
8294
1.20k
                if (newSize < 2)
8295
937
                    newSize = 2;
8296
1.20k
#endif
8297
1.20k
          n = xmlRealloc(atts, newSize * sizeof(n[0]) * 2);
8298
1.20k
    if (n == NULL) {
8299
0
        xmlErrMemory(ctxt);
8300
0
        goto failed;
8301
0
    }
8302
1.20k
    atts = n;
8303
1.20k
                maxatts = newSize * 2;
8304
1.20k
    ctxt->atts = atts;
8305
1.20k
    ctxt->maxatts = maxatts;
8306
1.20k
      }
8307
8308
5.88k
      atts[nbatts++] = attname;
8309
5.88k
      atts[nbatts++] = attvalue;
8310
5.88k
      atts[nbatts] = NULL;
8311
5.88k
      atts[nbatts + 1] = NULL;
8312
8313
5.88k
            attvalue = NULL;
8314
5.88k
  }
8315
8316
11.4k
failed:
8317
8318
11.4k
        if (attvalue != NULL)
8319
1.83k
            xmlFree(attvalue);
8320
8321
11.4k
  GROW
8322
11.4k
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8323
2.03k
      break;
8324
9.42k
  if (SKIP_BLANKS == 0) {
8325
7.48k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8326
7.48k
         "attributes construct error\n");
8327
7.48k
  }
8328
9.42k
  SHRINK;
8329
9.42k
        GROW;
8330
9.42k
    }
8331
8332
    /*
8333
     * SAX: Start of Element !
8334
     */
8335
49.7k
    if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8336
49.7k
  (!ctxt->disableSAX)) {
8337
47.6k
  if (nbatts > 0)
8338
3.93k
      ctxt->sax->startElement(ctxt->userData, name, atts);
8339
43.7k
  else
8340
43.7k
      ctxt->sax->startElement(ctxt->userData, name, NULL);
8341
47.6k
    }
8342
8343
49.7k
    if (atts != NULL) {
8344
        /* Free only the content strings */
8345
11.6k
        for (i = 1;i < nbatts;i+=2)
8346
5.88k
      if (atts[i] != NULL)
8347
5.88k
         xmlFree((xmlChar *) atts[i]);
8348
5.75k
    }
8349
49.7k
    return(name);
8350
49.7k
}
8351
8352
/**
8353
 * Parse an end tag. Always consumes '</'.
8354
 *
8355
 *     [42] ETag ::= '</' Name S? '>'
8356
 *
8357
 * With namespace
8358
 *
8359
 *     [NS 9] ETag ::= '</' QName S? '>'
8360
 * @param ctxt  an XML parser context
8361
 * @param line  line of the start tag
8362
 */
8363
8364
static void
8365
4.90k
xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8366
4.90k
    const xmlChar *name;
8367
8368
4.90k
    GROW;
8369
4.90k
    if ((RAW != '<') || (NXT(1) != '/')) {
8370
8
  xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8371
8
           "xmlParseEndTag: '</' not found\n");
8372
8
  return;
8373
8
    }
8374
4.89k
    SKIP(2);
8375
8376
4.89k
    name = xmlParseNameAndCompare(ctxt,ctxt->name);
8377
8378
    /*
8379
     * We should definitely be at the ending "S? '>'" part
8380
     */
8381
4.89k
    GROW;
8382
4.89k
    SKIP_BLANKS;
8383
4.89k
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8384
2.11k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8385
2.11k
    } else
8386
2.78k
  NEXT1;
8387
8388
    /*
8389
     * [ WFC: Element Type Match ]
8390
     * The Name in an element's end-tag must match the element type in the
8391
     * start-tag.
8392
     *
8393
     */
8394
4.89k
    if (name != (xmlChar*)1) {
8395
3.41k
        if (name == NULL) name = BAD_CAST "unparsable";
8396
3.41k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8397
3.41k
         "Opening and ending tag mismatch: %s line %d and %s\n",
8398
3.41k
                    ctxt->name, line, name);
8399
3.41k
    }
8400
8401
    /*
8402
     * SAX: End of Tag
8403
     */
8404
4.89k
    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8405
4.89k
  (!ctxt->disableSAX))
8406
4.02k
        ctxt->sax->endElement(ctxt->userData, ctxt->name);
8407
8408
4.89k
    namePop(ctxt);
8409
4.89k
    spacePop(ctxt);
8410
4.89k
}
8411
8412
/**
8413
 * parse an end of tag
8414
 *
8415
 * @deprecated Internal function, don't use.
8416
 *
8417
 *     [42] ETag ::= '</' Name S? '>'
8418
 *
8419
 * With namespace
8420
 *
8421
 *     [NS 9] ETag ::= '</' QName S? '>'
8422
 * @param ctxt  an XML parser context
8423
 */
8424
8425
void
8426
0
xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8427
0
    xmlParseEndTag1(ctxt, 0);
8428
0
}
8429
#endif /* LIBXML_SAX1_ENABLED */
8430
8431
/************************************************************************
8432
 *                  *
8433
 *          SAX 2 specific operations       *
8434
 *                  *
8435
 ************************************************************************/
8436
8437
/**
8438
 * parse an XML Namespace QName
8439
 *
8440
 *     [6]  QName  ::= (Prefix ':')? LocalPart
8441
 *     [7]  Prefix  ::= NCName
8442
 *     [8]  LocalPart  ::= NCName
8443
 *
8444
 * @param ctxt  an XML parser context
8445
 * @param prefix  pointer to store the prefix part
8446
 * @returns the Name parsed or NULL
8447
 */
8448
8449
static xmlHashedString
8450
161k
xmlParseQNameHashed(xmlParserCtxtPtr ctxt, xmlHashedString *prefix) {
8451
161k
    xmlHashedString l, p;
8452
161k
    int start, isNCName = 0;
8453
8454
161k
    l.name = NULL;
8455
161k
    p.name = NULL;
8456
8457
161k
    GROW;
8458
161k
    start = CUR_PTR - BASE_PTR;
8459
8460
161k
    l = xmlParseNCName(ctxt);
8461
161k
    if (l.name != NULL) {
8462
99.7k
        isNCName = 1;
8463
99.7k
        if (CUR == ':') {
8464
19.0k
            NEXT;
8465
19.0k
            p = l;
8466
19.0k
            l = xmlParseNCName(ctxt);
8467
19.0k
        }
8468
99.7k
    }
8469
161k
    if ((l.name == NULL) || (CUR == ':')) {
8470
64.1k
        xmlChar *tmp;
8471
8472
64.1k
        l.name = NULL;
8473
64.1k
        p.name = NULL;
8474
64.1k
        if ((isNCName == 0) && (CUR != ':'))
8475
51.3k
            return(l);
8476
12.8k
        tmp = xmlParseNmtoken(ctxt);
8477
12.8k
        if (tmp != NULL)
8478
11.4k
            xmlFree(tmp);
8479
12.8k
        l = xmlDictLookupHashed(ctxt->dict, BASE_PTR + start,
8480
12.8k
                                CUR_PTR - (BASE_PTR + start));
8481
12.8k
        if (l.name == NULL) {
8482
0
            xmlErrMemory(ctxt);
8483
0
            return(l);
8484
0
        }
8485
12.8k
        xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8486
12.8k
                 "Failed to parse QName '%s'\n", l.name, NULL, NULL);
8487
12.8k
    }
8488
8489
110k
    *prefix = p;
8490
110k
    return(l);
8491
161k
}
8492
8493
/**
8494
 * parse an XML Namespace QName
8495
 *
8496
 *     [6]  QName  ::= (Prefix ':')? LocalPart
8497
 *     [7]  Prefix  ::= NCName
8498
 *     [8]  LocalPart  ::= NCName
8499
 *
8500
 * @param ctxt  an XML parser context
8501
 * @param prefix  pointer to store the prefix part
8502
 * @returns the Name parsed or NULL
8503
 */
8504
8505
static const xmlChar *
8506
1.57k
xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8507
1.57k
    xmlHashedString n, p;
8508
8509
1.57k
    n = xmlParseQNameHashed(ctxt, &p);
8510
1.57k
    if (n.name == NULL)
8511
464
        return(NULL);
8512
1.11k
    *prefix = p.name;
8513
1.11k
    return(n.name);
8514
1.57k
}
8515
8516
/**
8517
 * parse an XML name and compares for match
8518
 * (specialized for endtag parsing)
8519
 *
8520
 * @param ctxt  an XML parser context
8521
 * @param name  the localname
8522
 * @param prefix  the prefix, if any.
8523
 * @returns NULL for an illegal name, (xmlChar*) 1 for success
8524
 * and the name for mismatch
8525
 */
8526
8527
static const xmlChar *
8528
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8529
1.86k
                        xmlChar const *prefix) {
8530
1.86k
    const xmlChar *cmp;
8531
1.86k
    const xmlChar *in;
8532
1.86k
    const xmlChar *ret;
8533
1.86k
    const xmlChar *prefix2;
8534
8535
1.86k
    if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8536
8537
1.86k
    GROW;
8538
1.86k
    in = ctxt->input->cur;
8539
8540
1.86k
    cmp = prefix;
8541
3.01k
    while (*in != 0 && *in == *cmp) {
8542
1.14k
  ++in;
8543
1.14k
  ++cmp;
8544
1.14k
    }
8545
1.86k
    if ((*cmp == 0) && (*in == ':')) {
8546
546
        in++;
8547
546
  cmp = name;
8548
1.34k
  while (*in != 0 && *in == *cmp) {
8549
803
      ++in;
8550
803
      ++cmp;
8551
803
  }
8552
546
  if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8553
      /* success */
8554
291
            ctxt->input->col += in - ctxt->input->cur;
8555
291
      ctxt->input->cur = in;
8556
291
      return((const xmlChar*) 1);
8557
291
  }
8558
546
    }
8559
    /*
8560
     * all strings coms from the dictionary, equality can be done directly
8561
     */
8562
1.57k
    ret = xmlParseQName (ctxt, &prefix2);
8563
1.57k
    if (ret == NULL)
8564
464
        return(NULL);
8565
1.11k
    if ((ret == name) && (prefix == prefix2))
8566
126
  return((const xmlChar*) 1);
8567
987
    return ret;
8568
1.11k
}
8569
8570
/**
8571
 * parse an attribute in the new SAX2 framework.
8572
 *
8573
 * @param ctxt  an XML parser context
8574
 * @param pref  the element prefix
8575
 * @param elem  the element name
8576
 * @param hprefix  resulting attribute prefix
8577
 * @param value  resulting value of the attribute
8578
 * @param len  resulting length of the attribute
8579
 * @param alloc  resulting indicator if the attribute was allocated
8580
 * @returns the attribute name, and the value in *value, .
8581
 */
8582
8583
static xmlHashedString
8584
xmlParseAttribute2(xmlParserCtxtPtr ctxt,
8585
                   const xmlChar * pref, const xmlChar * elem,
8586
                   xmlHashedString * hprefix, xmlChar ** value,
8587
                   int *len, int *alloc)
8588
61.5k
{
8589
61.5k
    xmlHashedString hname;
8590
61.5k
    const xmlChar *prefix, *name;
8591
61.5k
    xmlChar *val = NULL, *internal_val = NULL;
8592
61.5k
    int normalize = 0;
8593
61.5k
    int isNamespace;
8594
8595
61.5k
    *value = NULL;
8596
61.5k
    GROW;
8597
61.5k
    hname = xmlParseQNameHashed(ctxt, hprefix);
8598
61.5k
    if (hname.name == NULL) {
8599
33.7k
        xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8600
33.7k
                       "error parsing attribute name\n");
8601
33.7k
        return(hname);
8602
33.7k
    }
8603
27.7k
    name = hname.name;
8604
27.7k
    prefix = hprefix->name;
8605
8606
    /*
8607
     * get the type if needed
8608
     */
8609
27.7k
    if (ctxt->attsSpecial != NULL) {
8610
4.11k
        int type;
8611
8612
4.11k
        type = XML_PTR_TO_INT(xmlHashQLookup2(ctxt->attsSpecial, pref, elem,
8613
4.11k
                                              prefix, name));
8614
4.11k
        if (type != 0)
8615
2.30k
            normalize = 1;
8616
4.11k
    }
8617
8618
    /*
8619
     * read the value
8620
     */
8621
27.7k
    SKIP_BLANKS;
8622
27.7k
    if (RAW == '=') {
8623
25.4k
        NEXT;
8624
25.4k
        SKIP_BLANKS;
8625
25.4k
        isNamespace = (((prefix == NULL) && (name == ctxt->str_xmlns)) ||
8626
25.4k
                       (prefix == ctxt->str_xmlns));
8627
25.4k
        val = xmlParseAttValueInternal(ctxt, len, alloc, normalize,
8628
25.4k
                                       isNamespace);
8629
25.4k
        if (val == NULL)
8630
1.86k
            goto error;
8631
25.4k
    } else {
8632
2.38k
        xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8633
2.38k
                          "Specification mandates value for attribute %s\n",
8634
2.38k
                          name);
8635
2.38k
        goto error;
8636
2.38k
    }
8637
8638
23.5k
    if (prefix == ctxt->str_xml) {
8639
        /*
8640
         * Check that xml:lang conforms to the specification
8641
         * No more registered as an error, just generate a warning now
8642
         * since this was deprecated in XML second edition
8643
         */
8644
3.90k
        if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8645
502
            internal_val = xmlStrndup(val, *len);
8646
502
            if (internal_val == NULL)
8647
0
                goto mem_error;
8648
502
            if (!xmlCheckLanguageID(internal_val)) {
8649
275
                xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8650
275
                              "Malformed value for xml:lang : %s\n",
8651
275
                              internal_val, NULL);
8652
275
            }
8653
502
        }
8654
8655
        /*
8656
         * Check that xml:space conforms to the specification
8657
         */
8658
3.90k
        if (xmlStrEqual(name, BAD_CAST "space")) {
8659
199
            internal_val = xmlStrndup(val, *len);
8660
199
            if (internal_val == NULL)
8661
0
                goto mem_error;
8662
199
            if (xmlStrEqual(internal_val, BAD_CAST "default"))
8663
67
                *(ctxt->space) = 0;
8664
132
            else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8665
66
                *(ctxt->space) = 1;
8666
66
            else {
8667
66
                xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8668
66
                              "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8669
66
                              internal_val, NULL);
8670
66
            }
8671
199
        }
8672
3.90k
        if (internal_val) {
8673
701
            xmlFree(internal_val);
8674
701
        }
8675
3.90k
    }
8676
8677
23.5k
    *value = val;
8678
23.5k
    return (hname);
8679
8680
0
mem_error:
8681
0
    xmlErrMemory(ctxt);
8682
4.24k
error:
8683
4.24k
    if ((val != NULL) && (*alloc != 0))
8684
0
        xmlFree(val);
8685
4.24k
    return(hname);
8686
0
}
8687
8688
/**
8689
 * Inserts a new attribute into the hash table.
8690
 *
8691
 * @param ctxt  parser context
8692
 * @param size  size of the hash table
8693
 * @param name  attribute name
8694
 * @param uri  namespace uri
8695
 * @param hashValue  combined hash value of name and uri
8696
 * @param aindex  attribute index (this is a multiple of 5)
8697
 * @returns INT_MAX if no existing attribute was found, the attribute
8698
 * index if an attribute was found, -1 if a memory allocation failed.
8699
 */
8700
static int
8701
xmlAttrHashInsert(xmlParserCtxtPtr ctxt, unsigned size, const xmlChar *name,
8702
23.8k
                  const xmlChar *uri, unsigned hashValue, int aindex) {
8703
23.8k
    xmlAttrHashBucket *table = ctxt->attrHash;
8704
23.8k
    xmlAttrHashBucket *bucket;
8705
23.8k
    unsigned hindex;
8706
8707
23.8k
    hindex = hashValue & (size - 1);
8708
23.8k
    bucket = &table[hindex];
8709
8710
28.0k
    while (bucket->index >= 0) {
8711
14.2k
        const xmlChar **atts = &ctxt->atts[bucket->index];
8712
8713
14.2k
        if (name == atts[0]) {
8714
11.3k
            int nsIndex = XML_PTR_TO_INT(atts[2]);
8715
8716
11.3k
            if ((nsIndex == NS_INDEX_EMPTY) ? (uri == NULL) :
8717
11.3k
                (nsIndex == NS_INDEX_XML) ? (uri == ctxt->str_xml_ns) :
8718
4.31k
                (uri == ctxt->nsTab[nsIndex * 2 + 1]))
8719
10.0k
                return(bucket->index);
8720
11.3k
        }
8721
8722
4.20k
        hindex++;
8723
4.20k
        bucket++;
8724
4.20k
        if (hindex >= size) {
8725
1.31k
            hindex = 0;
8726
1.31k
            bucket = table;
8727
1.31k
        }
8728
4.20k
    }
8729
8730
13.7k
    bucket->index = aindex;
8731
8732
13.7k
    return(INT_MAX);
8733
23.8k
}
8734
8735
static int
8736
xmlAttrHashInsertQName(xmlParserCtxtPtr ctxt, unsigned size,
8737
                       const xmlChar *name, const xmlChar *prefix,
8738
783
                       unsigned hashValue, int aindex) {
8739
783
    xmlAttrHashBucket *table = ctxt->attrHash;
8740
783
    xmlAttrHashBucket *bucket;
8741
783
    unsigned hindex;
8742
8743
783
    hindex = hashValue & (size - 1);
8744
783
    bucket = &table[hindex];
8745
8746
1.42k
    while (bucket->index >= 0) {
8747
940
        const xmlChar **atts = &ctxt->atts[bucket->index];
8748
8749
940
        if ((name == atts[0]) && (prefix == atts[1]))
8750
303
            return(bucket->index);
8751
8752
637
        hindex++;
8753
637
        bucket++;
8754
637
        if (hindex >= size) {
8755
169
            hindex = 0;
8756
169
            bucket = table;
8757
169
        }
8758
637
    }
8759
8760
480
    bucket->index = aindex;
8761
8762
480
    return(INT_MAX);
8763
783
}
8764
/**
8765
 * Parse a start tag. Always consumes '<'.
8766
 *
8767
 * This routine is called when running SAX2 parsing
8768
 *
8769
 *     [40] STag ::= '<' Name (S Attribute)* S? '>'
8770
 *
8771
 * [ WFC: Unique Att Spec ]
8772
 * No attribute name may appear more than once in the same start-tag or
8773
 * empty-element tag.
8774
 *
8775
 *     [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8776
 *
8777
 * [ WFC: Unique Att Spec ]
8778
 * No attribute name may appear more than once in the same start-tag or
8779
 * empty-element tag.
8780
 *
8781
 * With namespace:
8782
 *
8783
 *     [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8784
 *
8785
 *     [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8786
 *
8787
 * @param ctxt  an XML parser context
8788
 * @param pref  resulting namespace prefix
8789
 * @param URI  resulting namespace URI
8790
 * @param nbNsPtr  resulting number of namespace declarations
8791
 * @returns the element name parsed
8792
 */
8793
8794
static const xmlChar *
8795
xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
8796
98.7k
                  const xmlChar **URI, int *nbNsPtr) {
8797
98.7k
    xmlHashedString hlocalname;
8798
98.7k
    xmlHashedString hprefix;
8799
98.7k
    xmlHashedString hattname;
8800
98.7k
    xmlHashedString haprefix;
8801
98.7k
    const xmlChar *localname;
8802
98.7k
    const xmlChar *prefix;
8803
98.7k
    const xmlChar *attname;
8804
98.7k
    const xmlChar *aprefix;
8805
98.7k
    const xmlChar *uri;
8806
98.7k
    xmlChar *attvalue = NULL;
8807
98.7k
    const xmlChar **atts = ctxt->atts;
8808
98.7k
    unsigned attrHashSize = 0;
8809
98.7k
    int maxatts = ctxt->maxatts;
8810
98.7k
    int nratts, nbatts, nbdef;
8811
98.7k
    int i, j, nbNs, nbTotalDef, attval, nsIndex, maxAtts;
8812
98.7k
    int alloc = 0;
8813
98.7k
    int numNsErr = 0;
8814
98.7k
    int numDupErr = 0;
8815
8816
98.7k
    if (RAW != '<') return(NULL);
8817
98.7k
    NEXT1;
8818
8819
98.7k
    nbatts = 0;
8820
98.7k
    nratts = 0;
8821
98.7k
    nbdef = 0;
8822
98.7k
    nbNs = 0;
8823
98.7k
    nbTotalDef = 0;
8824
98.7k
    attval = 0;
8825
8826
98.7k
    if (xmlParserNsStartElement(ctxt->nsdb) < 0) {
8827
0
        xmlErrMemory(ctxt);
8828
0
        return(NULL);
8829
0
    }
8830
8831
98.7k
    hlocalname = xmlParseQNameHashed(ctxt, &hprefix);
8832
98.7k
    if (hlocalname.name == NULL) {
8833
17.0k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8834
17.0k
           "StartTag: invalid element name\n");
8835
17.0k
        return(NULL);
8836
17.0k
    }
8837
81.7k
    localname = hlocalname.name;
8838
81.7k
    prefix = hprefix.name;
8839
8840
    /*
8841
     * Now parse the attributes, it ends up with the ending
8842
     *
8843
     * (S Attribute)* S?
8844
     */
8845
81.7k
    SKIP_BLANKS;
8846
81.7k
    GROW;
8847
8848
    /*
8849
     * The ctxt->atts array will be ultimately passed to the SAX callback
8850
     * containing five xmlChar pointers for each attribute:
8851
     *
8852
     * [0] attribute name
8853
     * [1] attribute prefix
8854
     * [2] namespace URI
8855
     * [3] attribute value
8856
     * [4] end of attribute value
8857
     *
8858
     * To save memory, we reuse this array temporarily and store integers
8859
     * in these pointer variables.
8860
     *
8861
     * [0] attribute name
8862
     * [1] attribute prefix
8863
     * [2] hash value of attribute prefix, and later namespace index
8864
     * [3] for non-allocated values: ptrdiff_t offset into input buffer
8865
     * [4] for non-allocated values: ptrdiff_t offset into input buffer
8866
     *
8867
     * The ctxt->attallocs array contains an additional unsigned int for
8868
     * each attribute, containing the hash value of the attribute name
8869
     * and the alloc flag in bit 31.
8870
     */
8871
8872
95.8k
    while (((RAW != '>') &&
8873
95.8k
     ((RAW != '/') || (NXT(1) != '>')) &&
8874
95.8k
     (IS_BYTE_CHAR(RAW))) && (PARSER_STOPPED(ctxt) == 0)) {
8875
61.5k
  int len = -1;
8876
8877
61.5k
  hattname = xmlParseAttribute2(ctxt, prefix, localname,
8878
61.5k
                                          &haprefix, &attvalue, &len,
8879
61.5k
                                          &alloc);
8880
61.5k
        if (hattname.name == NULL)
8881
33.7k
      break;
8882
27.7k
        if (attvalue == NULL)
8883
4.24k
            goto next_attr;
8884
23.5k
        attname = hattname.name;
8885
23.5k
        aprefix = haprefix.name;
8886
23.5k
  if (len < 0) len = xmlStrlen(attvalue);
8887
8888
23.5k
        if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8889
3.25k
            xmlHashedString huri;
8890
3.25k
            xmlURIPtr parsedUri;
8891
8892
3.25k
            huri = xmlDictLookupHashed(ctxt->dict, attvalue, len);
8893
3.25k
            uri = huri.name;
8894
3.25k
            if (uri == NULL) {
8895
0
                xmlErrMemory(ctxt);
8896
0
                goto next_attr;
8897
0
            }
8898
3.25k
            if (*uri != 0) {
8899
3.01k
                if (xmlParseURISafe((const char *) uri, &parsedUri) < 0) {
8900
0
                    xmlErrMemory(ctxt);
8901
0
                    goto next_attr;
8902
0
                }
8903
3.01k
                if (parsedUri == NULL) {
8904
1.67k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
8905
1.67k
                             "xmlns: '%s' is not a valid URI\n",
8906
1.67k
                                       uri, NULL, NULL);
8907
1.67k
                } else {
8908
1.34k
                    if (parsedUri->scheme == NULL) {
8909
1.07k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8910
1.07k
                                  "xmlns: URI %s is not absolute\n",
8911
1.07k
                                  uri, NULL, NULL);
8912
1.07k
                    }
8913
1.34k
                    xmlFreeURI(parsedUri);
8914
1.34k
                }
8915
3.01k
                if (uri == ctxt->str_xml_ns) {
8916
34
                    if (attname != ctxt->str_xml) {
8917
34
                        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8918
34
                     "xml namespace URI cannot be the default namespace\n",
8919
34
                                 NULL, NULL, NULL);
8920
34
                    }
8921
34
                    goto next_attr;
8922
34
                }
8923
2.98k
                if ((len == 29) &&
8924
2.98k
                    (xmlStrEqual(uri,
8925
102
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8926
34
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8927
34
                         "reuse of the xmlns namespace name is forbidden\n",
8928
34
                             NULL, NULL, NULL);
8929
34
                    goto next_attr;
8930
34
                }
8931
2.98k
            }
8932
8933
3.18k
            if (xmlParserNsPush(ctxt, NULL, &huri, NULL, 0) > 0)
8934
2.63k
                nbNs++;
8935
20.2k
        } else if (aprefix == ctxt->str_xmlns) {
8936
3.41k
            xmlHashedString huri;
8937
3.41k
            xmlURIPtr parsedUri;
8938
8939
3.41k
            huri = xmlDictLookupHashed(ctxt->dict, attvalue, len);
8940
3.41k
            uri = huri.name;
8941
3.41k
            if (uri == NULL) {
8942
0
                xmlErrMemory(ctxt);
8943
0
                goto next_attr;
8944
0
            }
8945
8946
3.41k
            if (attname == ctxt->str_xml) {
8947
84
                if (uri != ctxt->str_xml_ns) {
8948
66
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8949
66
                             "xml namespace prefix mapped to wrong URI\n",
8950
66
                             NULL, NULL, NULL);
8951
66
                }
8952
                /*
8953
                 * Do not keep a namespace definition node
8954
                 */
8955
84
                goto next_attr;
8956
84
            }
8957
3.33k
            if (uri == ctxt->str_xml_ns) {
8958
34
                if (attname != ctxt->str_xml) {
8959
34
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8960
34
                             "xml namespace URI mapped to wrong prefix\n",
8961
34
                             NULL, NULL, NULL);
8962
34
                }
8963
34
                goto next_attr;
8964
34
            }
8965
3.30k
            if (attname == ctxt->str_xmlns) {
8966
68
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8967
68
                         "redefinition of the xmlns prefix is forbidden\n",
8968
68
                         NULL, NULL, NULL);
8969
68
                goto next_attr;
8970
68
            }
8971
3.23k
            if ((len == 29) &&
8972
3.23k
                (xmlStrEqual(uri,
8973
68
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8974
34
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8975
34
                         "reuse of the xmlns namespace name is forbidden\n",
8976
34
                         NULL, NULL, NULL);
8977
34
                goto next_attr;
8978
34
            }
8979
3.19k
            if ((uri == NULL) || (uri[0] == 0)) {
8980
71
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8981
71
                         "xmlns:%s: Empty XML namespace is not allowed\n",
8982
71
                              attname, NULL, NULL);
8983
71
                goto next_attr;
8984
3.12k
            } else {
8985
3.12k
                if (xmlParseURISafe((const char *) uri, &parsedUri) < 0) {
8986
0
                    xmlErrMemory(ctxt);
8987
0
                    goto next_attr;
8988
0
                }
8989
3.12k
                if (parsedUri == NULL) {
8990
878
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
8991
878
                         "xmlns:%s: '%s' is not a valid URI\n",
8992
878
                                       attname, uri, NULL);
8993
2.25k
                } else {
8994
2.25k
                    if ((ctxt->pedantic) && (parsedUri->scheme == NULL)) {
8995
71
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8996
71
                                  "xmlns:%s: URI %s is not absolute\n",
8997
71
                                  attname, uri, NULL);
8998
71
                    }
8999
2.25k
                    xmlFreeURI(parsedUri);
9000
2.25k
                }
9001
3.12k
            }
9002
9003
3.12k
            if (xmlParserNsPush(ctxt, &hattname, &huri, NULL, 0) > 0)
9004
2.98k
                nbNs++;
9005
16.8k
        } else {
9006
            /*
9007
             * Populate attributes array, see above for repurposing
9008
             * of xmlChar pointers.
9009
             */
9010
16.8k
            if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9011
4.24k
                int res = xmlCtxtGrowAttrs(ctxt);
9012
9013
4.24k
                maxatts = ctxt->maxatts;
9014
4.24k
                atts = ctxt->atts;
9015
9016
4.24k
                if (res < 0)
9017
0
                    goto next_attr;
9018
4.24k
            }
9019
16.8k
            ctxt->attallocs[nratts++] = (hattname.hashValue & 0x7FFFFFFF) |
9020
16.8k
                                        ((unsigned) alloc << 31);
9021
16.8k
            atts[nbatts++] = attname;
9022
16.8k
            atts[nbatts++] = aprefix;
9023
16.8k
            atts[nbatts++] = (const xmlChar *) (size_t) haprefix.hashValue;
9024
16.8k
            if (alloc) {
9025
1.61k
                atts[nbatts++] = attvalue;
9026
1.61k
                attvalue += len;
9027
1.61k
                atts[nbatts++] = attvalue;
9028
15.2k
            } else {
9029
                /*
9030
                 * attvalue points into the input buffer which can be
9031
                 * reallocated. Store differences to input->base instead.
9032
                 * The pointers will be reconstructed later.
9033
                 */
9034
15.2k
                atts[nbatts++] = (void *) (attvalue - BASE_PTR);
9035
15.2k
                attvalue += len;
9036
15.2k
                atts[nbatts++] = (void *) (attvalue - BASE_PTR);
9037
15.2k
            }
9038
            /*
9039
             * tag if some deallocation is needed
9040
             */
9041
16.8k
            if (alloc != 0) attval = 1;
9042
16.8k
            attvalue = NULL; /* moved into atts */
9043
16.8k
        }
9044
9045
27.7k
next_attr:
9046
27.7k
        if ((attvalue != NULL) && (alloc != 0)) {
9047
1.32k
            xmlFree(attvalue);
9048
1.32k
            attvalue = NULL;
9049
1.32k
        }
9050
9051
27.7k
  GROW
9052
27.7k
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9053
3.86k
      break;
9054
23.9k
  if (SKIP_BLANKS == 0) {
9055
9.78k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9056
9.78k
         "attributes construct error\n");
9057
9.78k
      break;
9058
9.78k
  }
9059
14.1k
        GROW;
9060
14.1k
    }
9061
9062
    /*
9063
     * Namespaces from default attributes
9064
     */
9065
81.7k
    if (ctxt->attsDefault != NULL) {
9066
25.4k
        xmlDefAttrsPtr defaults;
9067
9068
25.4k
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9069
25.4k
  if (defaults != NULL) {
9070
123k
      for (i = 0; i < defaults->nbAttrs; i++) {
9071
99.6k
                xmlDefAttr *attr = &defaults->attrs[i];
9072
9073
99.6k
          attname = attr->name.name;
9074
99.6k
    aprefix = attr->prefix.name;
9075
9076
99.6k
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9077
8.09k
                    xmlParserEntityCheck(ctxt, attr->expandedSize);
9078
9079
8.09k
                    if (xmlParserNsPush(ctxt, NULL, &attr->value, NULL, 1) > 0)
9080
7.64k
                        nbNs++;
9081
91.5k
    } else if (aprefix == ctxt->str_xmlns) {
9082
72.8k
                    xmlParserEntityCheck(ctxt, attr->expandedSize);
9083
9084
72.8k
                    if (xmlParserNsPush(ctxt, &attr->name, &attr->value,
9085
72.8k
                                      NULL, 1) > 0)
9086
72.2k
                        nbNs++;
9087
72.8k
    } else {
9088
18.6k
                    if (nratts + nbTotalDef >= XML_MAX_ATTRS) {
9089
0
                        xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
9090
0
                                    "Maximum number of attributes exceeded");
9091
0
                        break;
9092
0
                    }
9093
18.6k
                    nbTotalDef += 1;
9094
18.6k
                }
9095
99.6k
      }
9096
23.7k
  }
9097
25.4k
    }
9098
9099
    /*
9100
     * Resolve attribute namespaces
9101
     */
9102
98.6k
    for (i = 0; i < nbatts; i += 5) {
9103
16.8k
        attname = atts[i];
9104
16.8k
        aprefix = atts[i+1];
9105
9106
        /*
9107
  * The default namespace does not apply to attribute names.
9108
  */
9109
16.8k
  if (aprefix == NULL) {
9110
9.33k
            nsIndex = NS_INDEX_EMPTY;
9111
9.33k
        } else if (aprefix == ctxt->str_xml) {
9112
3.90k
            nsIndex = NS_INDEX_XML;
9113
3.90k
        } else {
9114
3.63k
            haprefix.name = aprefix;
9115
3.63k
            haprefix.hashValue = (size_t) atts[i+2];
9116
3.63k
            nsIndex = xmlParserNsLookup(ctxt, &haprefix, NULL);
9117
9118
3.63k
      if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex)) {
9119
1.78k
                xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9120
1.78k
        "Namespace prefix %s for %s on %s is not defined\n",
9121
1.78k
        aprefix, attname, localname);
9122
1.78k
                nsIndex = NS_INDEX_EMPTY;
9123
1.78k
            }
9124
3.63k
        }
9125
9126
16.8k
        atts[i+2] = XML_INT_TO_PTR(nsIndex);
9127
16.8k
    }
9128
9129
    /*
9130
     * Maximum number of attributes including default attributes.
9131
     */
9132
81.7k
    maxAtts = nratts + nbTotalDef;
9133
9134
    /*
9135
     * Verify that attribute names are unique.
9136
     */
9137
81.7k
    if (maxAtts > 1) {
9138
6.53k
        attrHashSize = 4;
9139
11.0k
        while (attrHashSize / 2 < (unsigned) maxAtts)
9140
4.50k
            attrHashSize *= 2;
9141
9142
6.53k
        if (attrHashSize > ctxt->attrHashMax) {
9143
1.01k
            xmlAttrHashBucket *tmp;
9144
9145
1.01k
            tmp = xmlRealloc(ctxt->attrHash, attrHashSize * sizeof(tmp[0]));
9146
1.01k
            if (tmp == NULL) {
9147
0
                xmlErrMemory(ctxt);
9148
0
                goto done;
9149
0
            }
9150
9151
1.01k
            ctxt->attrHash = tmp;
9152
1.01k
            ctxt->attrHashMax = attrHashSize;
9153
1.01k
        }
9154
9155
6.53k
        memset(ctxt->attrHash, -1, attrHashSize * sizeof(ctxt->attrHash[0]));
9156
9157
18.8k
        for (i = 0, j = 0; j < nratts; i += 5, j++) {
9158
12.3k
            const xmlChar *nsuri;
9159
12.3k
            unsigned hashValue, nameHashValue, uriHashValue;
9160
12.3k
            int res;
9161
9162
12.3k
            attname = atts[i];
9163
12.3k
            aprefix = atts[i+1];
9164
12.3k
            nsIndex = XML_PTR_TO_INT(atts[i+2]);
9165
            /* Hash values always have bit 31 set, see dict.c */
9166
12.3k
            nameHashValue = ctxt->attallocs[j] | 0x80000000;
9167
9168
12.3k
            if (nsIndex == NS_INDEX_EMPTY) {
9169
                /*
9170
                 * Prefix with empty namespace means an undeclared
9171
                 * prefix which was already reported above.
9172
                 */
9173
9.01k
                if (aprefix != NULL)
9174
1.67k
                    continue;
9175
7.33k
                nsuri = NULL;
9176
7.33k
                uriHashValue = URI_HASH_EMPTY;
9177
7.33k
            } else if (nsIndex == NS_INDEX_XML) {
9178
1.59k
                nsuri = ctxt->str_xml_ns;
9179
1.59k
                uriHashValue = URI_HASH_XML;
9180
1.75k
            } else {
9181
1.75k
                nsuri = ctxt->nsTab[nsIndex * 2 + 1];
9182
1.75k
                uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue;
9183
1.75k
            }
9184
9185
10.6k
            hashValue = xmlDictCombineHash(nameHashValue, uriHashValue);
9186
10.6k
            res = xmlAttrHashInsert(ctxt, attrHashSize, attname, nsuri,
9187
10.6k
                                    hashValue, i);
9188
10.6k
            if (res < 0)
9189
0
                continue;
9190
9191
            /*
9192
             * [ WFC: Unique Att Spec ]
9193
             * No attribute name may appear more than once in the same
9194
             * start-tag or empty-element tag.
9195
             * As extended by the Namespace in XML REC.
9196
             */
9197
10.6k
            if (res < INT_MAX) {
9198
8.22k
                if (aprefix == atts[res+1]) {
9199
7.79k
                    xmlErrAttributeDup(ctxt, aprefix, attname);
9200
7.79k
                    numDupErr += 1;
9201
7.79k
                } else {
9202
430
                    xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9203
430
                             "Namespaced Attribute %s in '%s' redefined\n",
9204
430
                             attname, nsuri, NULL);
9205
430
                    numNsErr += 1;
9206
430
                }
9207
8.22k
            }
9208
10.6k
        }
9209
6.53k
    }
9210
9211
    /*
9212
     * Default attributes
9213
     */
9214
81.7k
    if (ctxt->attsDefault != NULL) {
9215
25.4k
        xmlDefAttrsPtr defaults;
9216
9217
25.4k
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9218
25.4k
  if (defaults != NULL) {
9219
123k
      for (i = 0; i < defaults->nbAttrs; i++) {
9220
99.6k
                xmlDefAttr *attr = &defaults->attrs[i];
9221
99.6k
                const xmlChar *nsuri = NULL;
9222
99.6k
                unsigned hashValue, uriHashValue = 0;
9223
99.6k
                int res;
9224
9225
99.6k
          attname = attr->name.name;
9226
99.6k
    aprefix = attr->prefix.name;
9227
9228
99.6k
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL))
9229
8.09k
                    continue;
9230
91.5k
    if (aprefix == ctxt->str_xmlns)
9231
72.8k
                    continue;
9232
9233
18.6k
                if (aprefix == NULL) {
9234
6.67k
                    nsIndex = NS_INDEX_EMPTY;
9235
6.67k
                    nsuri = NULL;
9236
6.67k
                    uriHashValue = URI_HASH_EMPTY;
9237
11.9k
                } else if (aprefix == ctxt->str_xml) {
9238
467
                    nsIndex = NS_INDEX_XML;
9239
467
                    nsuri = ctxt->str_xml_ns;
9240
467
                    uriHashValue = URI_HASH_XML;
9241
11.5k
                } else {
9242
11.5k
                    nsIndex = xmlParserNsLookup(ctxt, &attr->prefix, NULL);
9243
11.5k
                    if ((nsIndex == INT_MAX) ||
9244
11.5k
                        (nsIndex < ctxt->nsdb->minNsIndex)) {
9245
7.16k
                        xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9246
7.16k
                                 "Namespace prefix %s for %s on %s is not "
9247
7.16k
                                 "defined\n",
9248
7.16k
                                 aprefix, attname, localname);
9249
7.16k
                        nsIndex = NS_INDEX_EMPTY;
9250
7.16k
                        nsuri = NULL;
9251
7.16k
                        uriHashValue = URI_HASH_EMPTY;
9252
7.16k
                    } else {
9253
4.35k
                        nsuri = ctxt->nsTab[nsIndex * 2 + 1];
9254
4.35k
                        uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue;
9255
4.35k
                    }
9256
11.5k
                }
9257
9258
                /*
9259
                 * Check whether the attribute exists
9260
                 */
9261
18.6k
                if (maxAtts > 1) {
9262
13.1k
                    hashValue = xmlDictCombineHash(attr->name.hashValue,
9263
13.1k
                                                   uriHashValue);
9264
13.1k
                    res = xmlAttrHashInsert(ctxt, attrHashSize, attname, nsuri,
9265
13.1k
                                            hashValue, nbatts);
9266
13.1k
                    if (res < 0)
9267
0
                        continue;
9268
13.1k
                    if (res < INT_MAX) {
9269
1.83k
                        if (aprefix == atts[res+1])
9270
393
                            continue;
9271
1.44k
                        xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9272
1.44k
                                 "Namespaced Attribute %s in '%s' redefined\n",
9273
1.44k
                                 attname, nsuri, NULL);
9274
1.44k
                    }
9275
13.1k
                }
9276
9277
18.2k
                xmlParserEntityCheck(ctxt, attr->expandedSize);
9278
9279
18.2k
                if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9280
653
                    res = xmlCtxtGrowAttrs(ctxt);
9281
9282
653
                    maxatts = ctxt->maxatts;
9283
653
                    atts = ctxt->atts;
9284
9285
653
                    if (res < 0) {
9286
0
                        localname = NULL;
9287
0
                        goto done;
9288
0
                    }
9289
653
                }
9290
9291
18.2k
                atts[nbatts++] = attname;
9292
18.2k
                atts[nbatts++] = aprefix;
9293
18.2k
                atts[nbatts++] = XML_INT_TO_PTR(nsIndex);
9294
18.2k
                atts[nbatts++] = attr->value.name;
9295
18.2k
                atts[nbatts++] = attr->valueEnd;
9296
18.2k
                if ((ctxt->standalone == 1) && (attr->external != 0)) {
9297
0
                    xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9298
0
                            "standalone: attribute %s on %s defaulted "
9299
0
                            "from external subset\n",
9300
0
                            attname, localname);
9301
0
                }
9302
18.2k
                nbdef++;
9303
18.2k
      }
9304
23.7k
  }
9305
25.4k
    }
9306
9307
    /*
9308
     * Using a single hash table for nsUri/localName pairs cannot
9309
     * detect duplicate QNames reliably. The following example will
9310
     * only result in two namespace errors.
9311
     *
9312
     * <doc xmlns:a="a" xmlns:b="a">
9313
     *   <elem a:a="" b:a="" b:a=""/>
9314
     * </doc>
9315
     *
9316
     * If we saw more than one namespace error but no duplicate QNames
9317
     * were found, we have to scan for duplicate QNames.
9318
     */
9319
81.7k
    if ((numDupErr == 0) && (numNsErr > 1)) {
9320
138
        memset(ctxt->attrHash, -1,
9321
138
               attrHashSize * sizeof(ctxt->attrHash[0]));
9322
9323
1.02k
        for (i = 0, j = 0; j < nratts; i += 5, j++) {
9324
882
            unsigned hashValue, nameHashValue, prefixHashValue;
9325
882
            int res;
9326
9327
882
            aprefix = atts[i+1];
9328
882
            if (aprefix == NULL)
9329
99
                continue;
9330
9331
783
            attname = atts[i];
9332
            /* Hash values always have bit 31 set, see dict.c */
9333
783
            nameHashValue = ctxt->attallocs[j] | 0x80000000;
9334
783
            prefixHashValue = xmlDictComputeHash(ctxt->dict, aprefix);
9335
9336
783
            hashValue = xmlDictCombineHash(nameHashValue, prefixHashValue);
9337
783
            res = xmlAttrHashInsertQName(ctxt, attrHashSize, attname,
9338
783
                                         aprefix, hashValue, i);
9339
783
            if (res < INT_MAX)
9340
303
                xmlErrAttributeDup(ctxt, aprefix, attname);
9341
783
        }
9342
138
    }
9343
9344
    /*
9345
     * Reconstruct attribute pointers
9346
     */
9347
116k
    for (i = 0, j = 0; i < nbatts; i += 5, j++) {
9348
        /* namespace URI */
9349
35.1k
        nsIndex = XML_PTR_TO_INT(atts[i+2]);
9350
35.1k
        if (nsIndex == INT_MAX)
9351
24.6k
            atts[i+2] = NULL;
9352
10.5k
        else if (nsIndex == INT_MAX - 1)
9353
4.29k
            atts[i+2] = ctxt->str_xml_ns;
9354
6.20k
        else
9355
6.20k
            atts[i+2] = ctxt->nsTab[nsIndex * 2 + 1];
9356
9357
35.1k
        if ((j < nratts) && (ctxt->attallocs[j] & 0x80000000) == 0) {
9358
15.2k
            atts[i+3] = BASE_PTR + XML_PTR_TO_INT(atts[i+3]);  /* value */
9359
15.2k
            atts[i+4] = BASE_PTR + XML_PTR_TO_INT(atts[i+4]);  /* valuend */
9360
15.2k
        }
9361
35.1k
    }
9362
9363
81.7k
    uri = xmlParserNsLookupUri(ctxt, &hprefix);
9364
81.7k
    if ((prefix != NULL) && (uri == NULL)) {
9365
4.04k
  xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9366
4.04k
           "Namespace prefix %s on %s is not defined\n",
9367
4.04k
     prefix, localname, NULL);
9368
4.04k
    }
9369
81.7k
    *pref = prefix;
9370
81.7k
    *URI = uri;
9371
9372
    /*
9373
     * SAX callback
9374
     */
9375
81.7k
    if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9376
81.7k
  (!ctxt->disableSAX)) {
9377
37.1k
  if (nbNs > 0)
9378
8.03k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix, uri,
9379
8.03k
                          nbNs, ctxt->nsTab + 2 * (ctxt->nsNr - nbNs),
9380
8.03k
        nbatts / 5, nbdef, atts);
9381
29.1k
  else
9382
29.1k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix, uri,
9383
29.1k
                          0, NULL, nbatts / 5, nbdef, atts);
9384
37.1k
    }
9385
9386
81.7k
done:
9387
    /*
9388
     * Free allocated attribute values
9389
     */
9390
81.7k
    if (attval != 0) {
9391
3.06k
  for (i = 0, j = 0; j < nratts; i += 5, j++)
9392
1.86k
      if (ctxt->attallocs[j] & 0x80000000)
9393
1.61k
          xmlFree((xmlChar *) atts[i+3]);
9394
1.19k
    }
9395
9396
81.7k
    *nbNsPtr = nbNs;
9397
81.7k
    return(localname);
9398
81.7k
}
9399
9400
/**
9401
 * Parse an end tag. Always consumes '</'.
9402
 *
9403
 *     [42] ETag ::= '</' Name S? '>'
9404
 *
9405
 * With namespace
9406
 *
9407
 *     [NS 9] ETag ::= '</' QName S? '>'
9408
 * @param ctxt  an XML parser context
9409
 * @param tag  the corresponding start tag
9410
 */
9411
9412
static void
9413
10.8k
xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9414
10.8k
    const xmlChar *name;
9415
9416
10.8k
    GROW;
9417
10.8k
    if ((RAW != '<') || (NXT(1) != '/')) {
9418
39
  xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9419
39
  return;
9420
39
    }
9421
10.7k
    SKIP(2);
9422
9423
10.7k
    if (tag->prefix == NULL)
9424
8.90k
        name = xmlParseNameAndCompare(ctxt, ctxt->name);
9425
1.86k
    else
9426
1.86k
        name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9427
9428
    /*
9429
     * We should definitely be at the ending "S? '>'" part
9430
     */
9431
10.7k
    GROW;
9432
10.7k
    SKIP_BLANKS;
9433
10.7k
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9434
3.90k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9435
3.90k
    } else
9436
6.86k
  NEXT1;
9437
9438
    /*
9439
     * [ WFC: Element Type Match ]
9440
     * The Name in an element's end-tag must match the element type in the
9441
     * start-tag.
9442
     *
9443
     */
9444
10.7k
    if (name != (xmlChar*)1) {
9445
8.37k
        if (name == NULL) name = BAD_CAST "unparsable";
9446
8.37k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9447
8.37k
         "Opening and ending tag mismatch: %s line %d and %s\n",
9448
8.37k
                    ctxt->name, tag->line, name);
9449
8.37k
    }
9450
9451
    /*
9452
     * SAX: End of Tag
9453
     */
9454
10.7k
    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9455
10.7k
  (!ctxt->disableSAX))
9456
2.61k
  ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9457
2.61k
                                tag->URI);
9458
9459
10.7k
    spacePop(ctxt);
9460
10.7k
    if (tag->nsNr != 0)
9461
1.44k
  xmlParserNsPop(ctxt, tag->nsNr);
9462
10.7k
}
9463
9464
/**
9465
 * Parse escaped pure raw content. Always consumes '<!['.
9466
 *
9467
 * @deprecated Internal function, don't use.
9468
 *
9469
 *     [18] CDSect ::= CDStart CData CDEnd
9470
 *
9471
 *     [19] CDStart ::= '<![CDATA['
9472
 *
9473
 *     [20] Data ::= (Char* - (Char* ']]>' Char*))
9474
 *
9475
 *     [21] CDEnd ::= ']]>'
9476
 * @param ctxt  an XML parser context
9477
 */
9478
void
9479
1.80k
xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9480
1.80k
    xmlChar *buf = NULL;
9481
1.80k
    int len = 0;
9482
1.80k
    int size = XML_PARSER_BUFFER_SIZE;
9483
1.80k
    int r, rl;
9484
1.80k
    int s, sl;
9485
1.80k
    int cur, l;
9486
1.80k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9487
0
                    XML_MAX_HUGE_LENGTH :
9488
1.80k
                    XML_MAX_TEXT_LENGTH;
9489
9490
1.80k
    if ((CUR != '<') || (NXT(1) != '!') || (NXT(2) != '['))
9491
0
        return;
9492
1.80k
    SKIP(3);
9493
9494
1.80k
    if (!CMP6(CUR_PTR, 'C', 'D', 'A', 'T', 'A', '['))
9495
0
        return;
9496
1.80k
    SKIP(6);
9497
9498
1.80k
    r = xmlCurrentCharRecover(ctxt, &rl);
9499
1.80k
    if (!IS_CHAR(r)) {
9500
140
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9501
140
        goto out;
9502
140
    }
9503
1.66k
    NEXTL(rl);
9504
1.66k
    s = xmlCurrentCharRecover(ctxt, &sl);
9505
1.66k
    if (!IS_CHAR(s)) {
9506
249
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9507
249
        goto out;
9508
249
    }
9509
1.41k
    NEXTL(sl);
9510
1.41k
    cur = xmlCurrentCharRecover(ctxt, &l);
9511
1.41k
    buf = xmlMalloc(size);
9512
1.41k
    if (buf == NULL) {
9513
0
  xmlErrMemory(ctxt);
9514
0
        goto out;
9515
0
    }
9516
9.63k
    while (IS_CHAR(cur) &&
9517
9.63k
           ((r != ']') || (s != ']') || (cur != '>'))) {
9518
8.21k
  if (len + 5 >= size) {
9519
53
      xmlChar *tmp;
9520
53
            int newSize;
9521
9522
53
            newSize = xmlGrowCapacity(size, 1, 1, maxLength);
9523
53
            if (newSize < 0) {
9524
0
                xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9525
0
                               "CData section too big found\n");
9526
0
                goto out;
9527
0
            }
9528
53
      tmp = xmlRealloc(buf, newSize);
9529
53
      if (tmp == NULL) {
9530
0
    xmlErrMemory(ctxt);
9531
0
                goto out;
9532
0
      }
9533
53
      buf = tmp;
9534
53
      size = newSize;
9535
53
  }
9536
8.21k
  COPY_BUF(buf, len, r);
9537
8.21k
  r = s;
9538
8.21k
  rl = sl;
9539
8.21k
  s = cur;
9540
8.21k
  sl = l;
9541
8.21k
  NEXTL(l);
9542
8.21k
  cur = xmlCurrentCharRecover(ctxt, &l);
9543
8.21k
    }
9544
1.41k
    buf[len] = 0;
9545
1.41k
    if (cur != '>') {
9546
573
  xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9547
573
                       "CData section not finished\n%.50s\n", buf);
9548
573
        goto out;
9549
573
    }
9550
845
    NEXTL(l);
9551
9552
    /*
9553
     * OK the buffer is to be consumed as cdata.
9554
     */
9555
845
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9556
549
        if (ctxt->options & XML_PARSE_NOCDATA) {
9557
130
            if (ctxt->sax->characters != NULL)
9558
130
                ctxt->sax->characters(ctxt->userData, buf, len);
9559
419
        } else {
9560
419
            if (ctxt->sax->cdataBlock != NULL)
9561
419
                ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9562
419
        }
9563
549
    }
9564
9565
1.80k
out:
9566
1.80k
    xmlFree(buf);
9567
1.80k
}
9568
9569
/**
9570
 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9571
 * unexpected EOF to the caller.
9572
 *
9573
 * @param ctxt  an XML parser context
9574
 */
9575
9576
static void
9577
10.4k
xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9578
10.4k
    int oldNameNr = ctxt->nameNr;
9579
10.4k
    int oldSpaceNr = ctxt->spaceNr;
9580
10.4k
    int oldNodeNr = ctxt->nodeNr;
9581
9582
10.4k
    GROW;
9583
226k
    while ((ctxt->input->cur < ctxt->input->end) &&
9584
226k
     (PARSER_STOPPED(ctxt) == 0)) {
9585
220k
  const xmlChar *cur = ctxt->input->cur;
9586
9587
  /*
9588
   * First case : a Processing Instruction.
9589
   */
9590
220k
  if ((*cur == '<') && (cur[1] == '?')) {
9591
2.20k
      xmlParsePI(ctxt);
9592
2.20k
  }
9593
9594
  /*
9595
   * Second case : a CDSection
9596
   */
9597
  /* 2.6.0 test was *cur not RAW */
9598
218k
  else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9599
1.80k
      xmlParseCDSect(ctxt);
9600
1.80k
  }
9601
9602
  /*
9603
   * Third case :  a comment
9604
   */
9605
216k
  else if ((*cur == '<') && (NXT(1) == '!') &&
9606
216k
     (NXT(2) == '-') && (NXT(3) == '-')) {
9607
1.02k
      xmlParseComment(ctxt);
9608
1.02k
  }
9609
9610
  /*
9611
   * Fourth case :  a sub-element.
9612
   */
9613
215k
  else if (*cur == '<') {
9614
153k
            if (NXT(1) == '/') {
9615
15.6k
                if (ctxt->nameNr <= oldNameNr)
9616
4.40k
                    break;
9617
11.2k
          xmlParseElementEnd(ctxt);
9618
137k
            } else {
9619
137k
          xmlParseElementStart(ctxt);
9620
137k
            }
9621
153k
  }
9622
9623
  /*
9624
   * Fifth case : a reference. If if has not been resolved,
9625
   *    parsing returns it's Name, create the node
9626
   */
9627
9628
62.0k
  else if (*cur == '&') {
9629
12.8k
      xmlParseReference(ctxt);
9630
12.8k
  }
9631
9632
  /*
9633
   * Last case, text. Note that References are handled directly.
9634
   */
9635
49.1k
  else {
9636
49.1k
      xmlParseCharDataInternal(ctxt, 0);
9637
49.1k
  }
9638
9639
216k
  SHRINK;
9640
216k
  GROW;
9641
216k
    }
9642
9643
10.4k
    if ((ctxt->nameNr > oldNameNr) &&
9644
10.4k
        (ctxt->input->cur >= ctxt->input->end) &&
9645
10.4k
        (ctxt->wellFormed)) {
9646
29
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9647
29
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9648
29
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9649
29
                "Premature end of data in tag %s line %d\n",
9650
29
                name, line, NULL);
9651
29
    }
9652
9653
    /*
9654
     * Clean up in error case
9655
     */
9656
9657
25.1k
    while (ctxt->nodeNr > oldNodeNr)
9658
14.7k
        nodePop(ctxt);
9659
9660
36.1k
    while (ctxt->nameNr > oldNameNr) {
9661
25.7k
        xmlStartTag *tag = &ctxt->pushTab[ctxt->nameNr - 1];
9662
9663
25.7k
        if (tag->nsNr != 0)
9664
9.82k
            xmlParserNsPop(ctxt, tag->nsNr);
9665
9666
25.7k
        namePop(ctxt);
9667
25.7k
    }
9668
9669
36.1k
    while (ctxt->spaceNr > oldSpaceNr)
9670
25.7k
        spacePop(ctxt);
9671
10.4k
}
9672
9673
/**
9674
 * Parse XML element content. This is useful if you're only interested
9675
 * in custom SAX callbacks. If you want a node list, use
9676
 * xmlCtxtParseContent().
9677
 *
9678
 * @param ctxt  an XML parser context
9679
 */
9680
void
9681
0
xmlParseContent(xmlParserCtxtPtr ctxt) {
9682
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
9683
0
        return;
9684
9685
0
    xmlCtxtInitializeLate(ctxt);
9686
9687
0
    xmlParseContentInternal(ctxt);
9688
9689
0
    xmlParserCheckEOF(ctxt, XML_ERR_NOT_WELL_BALANCED);
9690
0
}
9691
9692
/**
9693
 * parse an XML element
9694
 *
9695
 * @deprecated Internal function, don't use.
9696
 *
9697
 *     [39] element ::= EmptyElemTag | STag content ETag
9698
 *
9699
 * [ WFC: Element Type Match ]
9700
 * The Name in an element's end-tag must match the element type in the
9701
 * start-tag.
9702
 *
9703
 * @param ctxt  an XML parser context
9704
 */
9705
9706
void
9707
17.4k
xmlParseElement(xmlParserCtxtPtr ctxt) {
9708
17.4k
    if (xmlParseElementStart(ctxt) != 0)
9709
8.07k
        return;
9710
9711
9.40k
    xmlParseContentInternal(ctxt);
9712
9713
9.40k
    if (ctxt->input->cur >= ctxt->input->end) {
9714
4.97k
        if (ctxt->wellFormed) {
9715
26
            const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9716
26
            int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9717
26
            xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9718
26
                    "Premature end of data in tag %s line %d\n",
9719
26
                    name, line, NULL);
9720
26
        }
9721
4.97k
        return;
9722
4.97k
    }
9723
9724
4.42k
    xmlParseElementEnd(ctxt);
9725
4.42k
}
9726
9727
/**
9728
 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
9729
 * opening tag was parsed, 1 if an empty element was parsed.
9730
 *
9731
 * Always consumes '<'.
9732
 *
9733
 * @param ctxt  an XML parser context
9734
 */
9735
static int
9736
155k
xmlParseElementStart(xmlParserCtxtPtr ctxt) {
9737
155k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
9738
155k
    const xmlChar *name;
9739
155k
    const xmlChar *prefix = NULL;
9740
155k
    const xmlChar *URI = NULL;
9741
155k
    xmlParserNodeInfo node_info;
9742
155k
    int line;
9743
155k
    xmlNodePtr cur;
9744
155k
    int nbNs = 0;
9745
9746
155k
    if (ctxt->nameNr > maxDepth) {
9747
1
        xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
9748
1
                "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9749
1
                ctxt->nameNr);
9750
1
  xmlHaltParser(ctxt);
9751
1
  return(-1);
9752
1
    }
9753
9754
    /* Capture start position */
9755
155k
    if (ctxt->record_info) {
9756
0
        node_info.begin_pos = ctxt->input->consumed +
9757
0
                          (CUR_PTR - ctxt->input->base);
9758
0
  node_info.begin_line = ctxt->input->line;
9759
0
    }
9760
9761
155k
    if (ctxt->spaceNr == 0)
9762
0
  spacePush(ctxt, -1);
9763
155k
    else if (*ctxt->space == -2)
9764
18.3k
  spacePush(ctxt, -1);
9765
136k
    else
9766
136k
  spacePush(ctxt, *ctxt->space);
9767
9768
155k
    line = ctxt->input->line;
9769
155k
#ifdef LIBXML_SAX1_ENABLED
9770
155k
    if (ctxt->sax2)
9771
98.7k
#endif /* LIBXML_SAX1_ENABLED */
9772
98.7k
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &nbNs);
9773
56.4k
#ifdef LIBXML_SAX1_ENABLED
9774
56.4k
    else
9775
56.4k
  name = xmlParseStartTag(ctxt);
9776
155k
#endif /* LIBXML_SAX1_ENABLED */
9777
155k
    if (name == NULL) {
9778
23.7k
  spacePop(ctxt);
9779
23.7k
        return(-1);
9780
23.7k
    }
9781
131k
    nameNsPush(ctxt, name, prefix, URI, line, nbNs);
9782
131k
    cur = ctxt->node;
9783
9784
131k
#ifdef LIBXML_VALID_ENABLED
9785
    /*
9786
     * [ VC: Root Element Type ]
9787
     * The Name in the document type declaration must match the element
9788
     * type of the root element.
9789
     */
9790
131k
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9791
131k
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
9792
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9793
131k
#endif /* LIBXML_VALID_ENABLED */
9794
9795
    /*
9796
     * Check for an Empty Element.
9797
     */
9798
131k
    if ((RAW == '/') && (NXT(1) == '>')) {
9799
2.50k
        SKIP(2);
9800
2.50k
  if (ctxt->sax2) {
9801
1.81k
      if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9802
1.81k
    (!ctxt->disableSAX))
9803
503
    ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
9804
1.81k
#ifdef LIBXML_SAX1_ENABLED
9805
1.81k
  } else {
9806
693
      if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9807
693
    (!ctxt->disableSAX))
9808
365
    ctxt->sax->endElement(ctxt->userData, name);
9809
693
#endif /* LIBXML_SAX1_ENABLED */
9810
693
  }
9811
2.50k
  namePop(ctxt);
9812
2.50k
  spacePop(ctxt);
9813
2.50k
  if (nbNs > 0)
9814
596
      xmlParserNsPop(ctxt, nbNs);
9815
2.50k
  if (cur != NULL && ctxt->record_info) {
9816
0
            node_info.node = cur;
9817
0
            node_info.end_pos = ctxt->input->consumed +
9818
0
                                (CUR_PTR - ctxt->input->base);
9819
0
            node_info.end_line = ctxt->input->line;
9820
0
            xmlParserAddNodeInfo(ctxt, &node_info);
9821
0
  }
9822
2.50k
  return(1);
9823
2.50k
    }
9824
129k
    if (RAW == '>') {
9825
46.4k
        NEXT1;
9826
46.4k
        if (cur != NULL && ctxt->record_info) {
9827
0
            node_info.node = cur;
9828
0
            node_info.end_pos = 0;
9829
0
            node_info.end_line = 0;
9830
0
            xmlParserAddNodeInfo(ctxt, &node_info);
9831
0
        }
9832
82.6k
    } else {
9833
82.6k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9834
82.6k
         "Couldn't find end of Start Tag %s line %d\n",
9835
82.6k
                    name, line, NULL);
9836
9837
  /*
9838
   * end of parsing of this node.
9839
   */
9840
82.6k
  nodePop(ctxt);
9841
82.6k
  namePop(ctxt);
9842
82.6k
  spacePop(ctxt);
9843
82.6k
  if (nbNs > 0)
9844
11.0k
      xmlParserNsPop(ctxt, nbNs);
9845
82.6k
  return(-1);
9846
82.6k
    }
9847
9848
46.4k
    return(0);
9849
129k
}
9850
9851
/**
9852
 * Parse the end of an XML element. Always consumes '</'.
9853
 *
9854
 * @param ctxt  an XML parser context
9855
 */
9856
static void
9857
15.7k
xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
9858
15.7k
    xmlNodePtr cur = ctxt->node;
9859
9860
15.7k
    if (ctxt->nameNr <= 0) {
9861
0
        if ((RAW == '<') && (NXT(1) == '/'))
9862
0
            SKIP(2);
9863
0
        return;
9864
0
    }
9865
9866
    /*
9867
     * parse the end of tag: '</' should be here.
9868
     */
9869
15.7k
    if (ctxt->sax2) {
9870
10.8k
  xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
9871
10.8k
  namePop(ctxt);
9872
10.8k
    }
9873
4.90k
#ifdef LIBXML_SAX1_ENABLED
9874
4.90k
    else
9875
4.90k
  xmlParseEndTag1(ctxt, 0);
9876
15.7k
#endif /* LIBXML_SAX1_ENABLED */
9877
9878
    /*
9879
     * Capture end position
9880
     */
9881
15.7k
    if (cur != NULL && ctxt->record_info) {
9882
0
        xmlParserNodeInfoPtr node_info;
9883
9884
0
        node_info = (xmlParserNodeInfoPtr) xmlParserFindNodeInfo(ctxt, cur);
9885
0
        if (node_info != NULL) {
9886
0
            node_info->end_pos = ctxt->input->consumed +
9887
0
                                 (CUR_PTR - ctxt->input->base);
9888
0
            node_info->end_line = ctxt->input->line;
9889
0
        }
9890
0
    }
9891
15.7k
}
9892
9893
/**
9894
 * parse the XML version value.
9895
 *
9896
 * @deprecated Internal function, don't use.
9897
 *
9898
 *     [26] VersionNum ::= '1.' [0-9]+
9899
 *
9900
 * In practice allow [0-9].[0-9]+ at that level
9901
 *
9902
 * @param ctxt  an XML parser context
9903
 * @returns the string giving the XML version number, or NULL
9904
 */
9905
xmlChar *
9906
7.96k
xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
9907
7.96k
    xmlChar *buf = NULL;
9908
7.96k
    int len = 0;
9909
7.96k
    int size = 10;
9910
7.96k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9911
0
                    XML_MAX_TEXT_LENGTH :
9912
7.96k
                    XML_MAX_NAME_LENGTH;
9913
7.96k
    xmlChar cur;
9914
9915
7.96k
    buf = xmlMalloc(size);
9916
7.96k
    if (buf == NULL) {
9917
0
  xmlErrMemory(ctxt);
9918
0
  return(NULL);
9919
0
    }
9920
7.96k
    cur = CUR;
9921
7.96k
    if (!((cur >= '0') && (cur <= '9'))) {
9922
7.89k
  xmlFree(buf);
9923
7.89k
  return(NULL);
9924
7.89k
    }
9925
63
    buf[len++] = cur;
9926
63
    NEXT;
9927
63
    cur=CUR;
9928
63
    if (cur != '.') {
9929
24
  xmlFree(buf);
9930
24
  return(NULL);
9931
24
    }
9932
39
    buf[len++] = cur;
9933
39
    NEXT;
9934
39
    cur=CUR;
9935
84
    while ((cur >= '0') && (cur <= '9')) {
9936
46
  if (len + 1 >= size) {
9937
1
      xmlChar *tmp;
9938
1
            int newSize;
9939
9940
1
            newSize = xmlGrowCapacity(size, 1, 1, maxLength);
9941
1
            if (newSize) {
9942
1
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "VersionNum");
9943
1
                xmlFree(buf);
9944
1
                return(NULL);
9945
1
            }
9946
0
      tmp = xmlRealloc(buf, newSize);
9947
0
      if (tmp == NULL) {
9948
0
    xmlErrMemory(ctxt);
9949
0
          xmlFree(buf);
9950
0
    return(NULL);
9951
0
      }
9952
0
      buf = tmp;
9953
0
            size = newSize;
9954
0
  }
9955
45
  buf[len++] = cur;
9956
45
  NEXT;
9957
45
  cur=CUR;
9958
45
    }
9959
38
    buf[len] = 0;
9960
38
    return(buf);
9961
39
}
9962
9963
/**
9964
 * parse the XML version.
9965
 *
9966
 * @deprecated Internal function, don't use.
9967
 *
9968
 *     [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
9969
 *
9970
 *     [25] Eq ::= S? '=' S?
9971
 *
9972
 * @param ctxt  an XML parser context
9973
 * @returns the version string, e.g. "1.0"
9974
 */
9975
9976
xmlChar *
9977
11.5k
xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
9978
11.5k
    xmlChar *version = NULL;
9979
9980
11.5k
    if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
9981
7.96k
  SKIP(7);
9982
7.96k
  SKIP_BLANKS;
9983
7.96k
  if (RAW != '=') {
9984
2
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
9985
2
      return(NULL);
9986
2
        }
9987
7.96k
  NEXT;
9988
7.96k
  SKIP_BLANKS;
9989
7.96k
  if (RAW == '"') {
9990
7.96k
      NEXT;
9991
7.96k
      version = xmlParseVersionNum(ctxt);
9992
7.96k
      if (RAW != '"') {
9993
7.83k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9994
7.83k
      } else
9995
122
          NEXT;
9996
7.96k
  } else if (RAW == '\''){
9997
2
      NEXT;
9998
2
      version = xmlParseVersionNum(ctxt);
9999
2
      if (RAW != '\'') {
10000
1
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10001
1
      } else
10002
1
          NEXT;
10003
2
  } else {
10004
1
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10005
1
  }
10006
7.96k
    }
10007
11.5k
    return(version);
10008
11.5k
}
10009
10010
/**
10011
 * parse the XML encoding name
10012
 *
10013
 * @deprecated Internal function, don't use.
10014
 *
10015
 *     [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10016
 *
10017
 * @param ctxt  an XML parser context
10018
 * @returns the encoding name value or NULL
10019
 */
10020
xmlChar *
10021
3.59k
xmlParseEncName(xmlParserCtxtPtr ctxt) {
10022
3.59k
    xmlChar *buf = NULL;
10023
3.59k
    int len = 0;
10024
3.59k
    int size = 10;
10025
3.59k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
10026
0
                    XML_MAX_TEXT_LENGTH :
10027
3.59k
                    XML_MAX_NAME_LENGTH;
10028
3.59k
    xmlChar cur;
10029
10030
3.59k
    cur = CUR;
10031
3.59k
    if (((cur >= 'a') && (cur <= 'z')) ||
10032
3.59k
        ((cur >= 'A') && (cur <= 'Z'))) {
10033
2.38k
  buf = xmlMalloc(size);
10034
2.38k
  if (buf == NULL) {
10035
0
      xmlErrMemory(ctxt);
10036
0
      return(NULL);
10037
0
  }
10038
10039
2.38k
  buf[len++] = cur;
10040
2.38k
  NEXT;
10041
2.38k
  cur = CUR;
10042
12.5k
  while (((cur >= 'a') && (cur <= 'z')) ||
10043
12.5k
         ((cur >= 'A') && (cur <= 'Z')) ||
10044
12.5k
         ((cur >= '0') && (cur <= '9')) ||
10045
12.5k
         (cur == '.') || (cur == '_') ||
10046
12.5k
         (cur == '-')) {
10047
10.1k
      if (len + 1 >= size) {
10048
272
          xmlChar *tmp;
10049
272
                int newSize;
10050
10051
272
                newSize = xmlGrowCapacity(size, 1, 1, maxLength);
10052
272
                if (newSize < 0) {
10053
0
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "EncName");
10054
0
                    xmlFree(buf);
10055
0
                    return(NULL);
10056
0
                }
10057
272
    tmp = xmlRealloc(buf, newSize);
10058
272
    if (tmp == NULL) {
10059
0
        xmlErrMemory(ctxt);
10060
0
        xmlFree(buf);
10061
0
        return(NULL);
10062
0
    }
10063
272
    buf = tmp;
10064
272
                size = newSize;
10065
272
      }
10066
10.1k
      buf[len++] = cur;
10067
10.1k
      NEXT;
10068
10.1k
      cur = CUR;
10069
10.1k
        }
10070
2.38k
  buf[len] = 0;
10071
2.38k
    } else {
10072
1.21k
  xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10073
1.21k
    }
10074
3.59k
    return(buf);
10075
3.59k
}
10076
10077
/**
10078
 * parse the XML encoding declaration
10079
 *
10080
 * @deprecated Internal function, don't use.
10081
 *
10082
 *     [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | 
10083
 *                           "'" EncName "'")
10084
 *
10085
 * this setups the conversion filters.
10086
 *
10087
 * @param ctxt  an XML parser context
10088
 * @returns the encoding value or NULL
10089
 */
10090
10091
const xmlChar *
10092
11.4k
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10093
11.4k
    xmlChar *encoding = NULL;
10094
10095
11.4k
    SKIP_BLANKS;
10096
11.4k
    if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g') == 0)
10097
7.80k
        return(NULL);
10098
10099
3.60k
    SKIP(8);
10100
3.60k
    SKIP_BLANKS;
10101
3.60k
    if (RAW != '=') {
10102
3
        xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10103
3
        return(NULL);
10104
3
    }
10105
3.60k
    NEXT;
10106
3.60k
    SKIP_BLANKS;
10107
3.60k
    if (RAW == '"') {
10108
2
        NEXT;
10109
2
        encoding = xmlParseEncName(ctxt);
10110
2
        if (RAW != '"') {
10111
1
            xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10112
1
            xmlFree((xmlChar *) encoding);
10113
1
            return(NULL);
10114
1
        } else
10115
1
            NEXT;
10116
3.59k
    } else if (RAW == '\''){
10117
3.59k
        NEXT;
10118
3.59k
        encoding = xmlParseEncName(ctxt);
10119
3.59k
        if (RAW != '\'') {
10120
1.25k
            xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10121
1.25k
            xmlFree((xmlChar *) encoding);
10122
1.25k
            return(NULL);
10123
1.25k
        } else
10124
2.34k
            NEXT;
10125
3.59k
    } else {
10126
3
        xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10127
3
    }
10128
10129
2.34k
    if (encoding == NULL)
10130
11
        return(NULL);
10131
10132
2.33k
    xmlSetDeclaredEncoding(ctxt, encoding);
10133
10134
2.33k
    return(ctxt->encoding);
10135
2.34k
}
10136
10137
/**
10138
 * parse the XML standalone declaration
10139
 *
10140
 * @deprecated Internal function, don't use.
10141
 *
10142
 *     [32] SDDecl ::= S 'standalone' Eq
10143
 *                     (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10144
 *
10145
 * [ VC: Standalone Document Declaration ]
10146
 * TODO The standalone document declaration must have the value "no"
10147
 * if any external markup declarations contain declarations of:
10148
 *  - attributes with default values, if elements to which these
10149
 *    attributes apply appear in the document without specifications
10150
 *    of values for these attributes, or
10151
 *  - entities (other than amp, lt, gt, apos, quot), if references
10152
 *    to those entities appear in the document, or
10153
 *  - attributes with values subject to normalization, where the
10154
 *    attribute appears in the document with a value which will change
10155
 *    as a result of normalization, or
10156
 *  - element types with element content, if white space occurs directly
10157
 *    within any instance of those types.
10158
 *
10159
 * @param ctxt  an XML parser context
10160
 * @returns
10161
 *   1 if standalone="yes"
10162
 *   0 if standalone="no"
10163
 *  -2 if standalone attribute is missing or invalid
10164
 *    (A standalone value of -2 means that the XML declaration was found,
10165
 *     but no value was specified for the standalone attribute).
10166
 */
10167
10168
int
10169
9.97k
xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10170
9.97k
    int standalone = -2;
10171
10172
9.97k
    SKIP_BLANKS;
10173
9.97k
    if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10174
75
  SKIP(10);
10175
75
        SKIP_BLANKS;
10176
75
  if (RAW != '=') {
10177
6
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10178
6
      return(standalone);
10179
6
        }
10180
69
  NEXT;
10181
69
  SKIP_BLANKS;
10182
69
        if (RAW == '\''){
10183
54
      NEXT;
10184
54
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10185
2
          standalone = 0;
10186
2
                SKIP(2);
10187
52
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10188
52
                 (NXT(2) == 's')) {
10189
44
          standalone = 1;
10190
44
    SKIP(3);
10191
44
            } else {
10192
8
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10193
8
      }
10194
54
      if (RAW != '\'') {
10195
53
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10196
53
      } else
10197
1
          NEXT;
10198
54
  } else if (RAW == '"'){
10199
12
      NEXT;
10200
12
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10201
3
          standalone = 0;
10202
3
    SKIP(2);
10203
9
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10204
9
                 (NXT(2) == 's')) {
10205
4
          standalone = 1;
10206
4
                SKIP(3);
10207
5
            } else {
10208
5
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10209
5
      }
10210
12
      if (RAW != '"') {
10211
6
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10212
6
      } else
10213
6
          NEXT;
10214
12
  } else {
10215
3
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10216
3
        }
10217
69
    }
10218
9.96k
    return(standalone);
10219
9.97k
}
10220
10221
/**
10222
 * parse an XML declaration header
10223
 *
10224
 * @deprecated Internal function, don't use.
10225
 *
10226
 *     [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10227
 * @param ctxt  an XML parser context
10228
 */
10229
10230
void
10231
11.5k
xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10232
11.5k
    xmlChar *version;
10233
10234
    /*
10235
     * This value for standalone indicates that the document has an
10236
     * XML declaration but it does not have a standalone attribute.
10237
     * It will be overwritten later if a standalone attribute is found.
10238
     */
10239
10240
11.5k
    ctxt->standalone = -2;
10241
10242
    /*
10243
     * We know that '<?xml' is here.
10244
     */
10245
11.5k
    SKIP(5);
10246
10247
11.5k
    if (!IS_BLANK_CH(RAW)) {
10248
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10249
0
                 "Blank needed after '<?xml'\n");
10250
0
    }
10251
11.5k
    SKIP_BLANKS;
10252
10253
    /*
10254
     * We must have the VersionInfo here.
10255
     */
10256
11.5k
    version = xmlParseVersionInfo(ctxt);
10257
11.5k
    if (version == NULL) {
10258
11.5k
  xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10259
11.5k
    } else {
10260
38
  if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10261
      /*
10262
       * Changed here for XML-1.0 5th edition
10263
       */
10264
31
      if (ctxt->options & XML_PARSE_OLD10) {
10265
1
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10266
1
                "Unsupported version '%s'\n",
10267
1
                version);
10268
30
      } else {
10269
30
          if ((version[0] == '1') && ((version[1] == '.'))) {
10270
24
        xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10271
24
                      "Unsupported version '%s'\n",
10272
24
          version, NULL);
10273
24
    } else {
10274
6
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10275
6
              "Unsupported version '%s'\n",
10276
6
              version);
10277
6
    }
10278
30
      }
10279
31
  }
10280
38
  if (ctxt->version != NULL)
10281
0
      xmlFree((void *) ctxt->version);
10282
38
  ctxt->version = version;
10283
38
    }
10284
10285
    /*
10286
     * We may have the encoding declaration
10287
     */
10288
11.5k
    if (!IS_BLANK_CH(RAW)) {
10289
11.4k
        if ((RAW == '?') && (NXT(1) == '>')) {
10290
150
      SKIP(2);
10291
150
      return;
10292
150
  }
10293
11.3k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10294
11.3k
    }
10295
11.4k
    xmlParseEncodingDecl(ctxt);
10296
10297
    /*
10298
     * We may have the standalone status.
10299
     */
10300
11.4k
    if ((ctxt->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10301
2.06k
        if ((RAW == '?') && (NXT(1) == '>')) {
10302
1.43k
      SKIP(2);
10303
1.43k
      return;
10304
1.43k
  }
10305
626
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10306
626
    }
10307
10308
    /*
10309
     * We can grow the input buffer freely at that point
10310
     */
10311
9.97k
    GROW;
10312
10313
9.97k
    SKIP_BLANKS;
10314
9.97k
    ctxt->standalone = xmlParseSDDecl(ctxt);
10315
10316
9.97k
    SKIP_BLANKS;
10317
9.97k
    if ((RAW == '?') && (NXT(1) == '>')) {
10318
159
        SKIP(2);
10319
9.81k
    } else if (RAW == '>') {
10320
        /* Deprecated old WD ... */
10321
8.30k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10322
8.30k
  NEXT;
10323
8.30k
    } else {
10324
1.51k
        int c;
10325
10326
1.51k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10327
21.1k
        while ((PARSER_STOPPED(ctxt) == 0) &&
10328
21.1k
               ((c = CUR) != 0)) {
10329
20.9k
            NEXT;
10330
20.9k
            if (c == '>')
10331
1.30k
                break;
10332
20.9k
        }
10333
1.51k
    }
10334
9.97k
}
10335
10336
/**
10337
 * @since 2.14.0
10338
 *
10339
 * @param ctxt  parser context
10340
 * @returns the version from the XML declaration.
10341
 */
10342
const xmlChar *
10343
0
xmlCtxtGetVersion(xmlParserCtxtPtr ctxt) {
10344
0
    if (ctxt == NULL)
10345
0
        return(NULL);
10346
10347
0
    return(ctxt->version);
10348
0
}
10349
10350
/**
10351
 * @since 2.14.0
10352
 *
10353
 * @param ctxt  parser context
10354
 * @returns the value from the standalone document declaration.
10355
 */
10356
int
10357
0
xmlCtxtGetStandalone(xmlParserCtxtPtr ctxt) {
10358
0
    if (ctxt == NULL)
10359
0
        return(0);
10360
10361
0
    return(ctxt->standalone);
10362
0
}
10363
10364
/**
10365
 * parse an XML Misc* optional field.
10366
 *
10367
 * @deprecated Internal function, don't use.
10368
 *
10369
 *     [27] Misc ::= Comment | PI |  S
10370
 * @param ctxt  an XML parser context
10371
 */
10372
10373
void
10374
50.2k
xmlParseMisc(xmlParserCtxtPtr ctxt) {
10375
54.3k
    while (PARSER_STOPPED(ctxt) == 0) {
10376
53.0k
        SKIP_BLANKS;
10377
53.0k
        GROW;
10378
53.0k
        if ((RAW == '<') && (NXT(1) == '?')) {
10379
2.61k
      xmlParsePI(ctxt);
10380
50.4k
        } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10381
1.47k
      xmlParseComment(ctxt);
10382
48.9k
        } else {
10383
48.9k
            break;
10384
48.9k
        }
10385
53.0k
    }
10386
50.2k
}
10387
10388
static void
10389
21.9k
xmlFinishDocument(xmlParserCtxtPtr ctxt) {
10390
21.9k
    xmlDocPtr doc;
10391
10392
    /*
10393
     * SAX: end of the document processing.
10394
     */
10395
21.9k
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10396
21.9k
        ctxt->sax->endDocument(ctxt->userData);
10397
10398
    /*
10399
     * Remove locally kept entity definitions if the tree was not built
10400
     */
10401
21.9k
    doc = ctxt->myDoc;
10402
21.9k
    if ((doc != NULL) &&
10403
21.9k
        (xmlStrEqual(doc->version, SAX_COMPAT_MODE))) {
10404
135
        xmlFreeDoc(doc);
10405
135
        ctxt->myDoc = NULL;
10406
135
    }
10407
21.9k
}
10408
10409
/**
10410
 * Parse an XML document and invoke the SAX handlers. This is useful
10411
 * if you're only interested in custom SAX callbacks. If you want a
10412
 * document tree, use xmlCtxtParseDocument().
10413
 *
10414
 * @param ctxt  an XML parser context
10415
 * @returns 0, -1 in case of error.
10416
 */
10417
10418
int
10419
21.9k
xmlParseDocument(xmlParserCtxtPtr ctxt) {
10420
21.9k
    if ((ctxt == NULL) || (ctxt->input == NULL))
10421
0
        return(-1);
10422
10423
21.9k
    GROW;
10424
10425
    /*
10426
     * SAX: detecting the level.
10427
     */
10428
21.9k
    xmlCtxtInitializeLate(ctxt);
10429
10430
21.9k
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10431
21.9k
        ctxt->sax->setDocumentLocator(ctxt->userData,
10432
21.9k
                (xmlSAXLocator *) &xmlDefaultSAXLocator);
10433
21.9k
    }
10434
10435
21.9k
    xmlDetectEncoding(ctxt);
10436
10437
21.9k
    if (CUR == 0) {
10438
0
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10439
0
  return(-1);
10440
0
    }
10441
10442
21.9k
    GROW;
10443
21.9k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10444
10445
  /*
10446
   * Note that we will switch encoding on the fly.
10447
   */
10448
11.5k
  xmlParseXMLDecl(ctxt);
10449
11.5k
  SKIP_BLANKS;
10450
11.5k
    } else {
10451
10.4k
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10452
10.4k
        if (ctxt->version == NULL) {
10453
0
            xmlErrMemory(ctxt);
10454
0
            return(-1);
10455
0
        }
10456
10.4k
    }
10457
21.9k
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10458
14.5k
        ctxt->sax->startDocument(ctxt->userData);
10459
21.9k
    if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10460
21.9k
        (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10461
0
  ctxt->myDoc->compression = ctxt->input->buf->compressed;
10462
0
    }
10463
10464
    /*
10465
     * The Misc part of the Prolog
10466
     */
10467
21.9k
    xmlParseMisc(ctxt);
10468
10469
    /*
10470
     * Then possibly doc type declaration(s) and more Misc
10471
     * (doctypedecl Misc*)?
10472
     */
10473
21.9k
    GROW;
10474
21.9k
    if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10475
10476
10.7k
  ctxt->inSubset = 1;
10477
10.7k
  xmlParseDocTypeDecl(ctxt);
10478
10.7k
  if (RAW == '[') {
10479
9.02k
      xmlParseInternalSubset(ctxt);
10480
9.02k
  } else if (RAW == '>') {
10481
439
            NEXT;
10482
439
        }
10483
10484
  /*
10485
   * Create and update the external subset.
10486
   */
10487
10.7k
  ctxt->inSubset = 2;
10488
10.7k
  if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10489
10.7k
      (!ctxt->disableSAX))
10490
6.51k
      ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10491
6.51k
                                ctxt->extSubSystem, ctxt->extSubURI);
10492
10.7k
  ctxt->inSubset = 0;
10493
10494
10.7k
        xmlCleanSpecialAttr(ctxt);
10495
10496
10.7k
  xmlParseMisc(ctxt);
10497
10.7k
    }
10498
10499
    /*
10500
     * Time to start parsing the tree itself
10501
     */
10502
21.9k
    GROW;
10503
21.9k
    if (RAW != '<') {
10504
4.49k
        if (ctxt->wellFormed)
10505
2
            xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10506
2
                           "Start tag expected, '<' not found\n");
10507
17.4k
    } else {
10508
17.4k
  xmlParseElement(ctxt);
10509
10510
  /*
10511
   * The Misc part at the end
10512
   */
10513
17.4k
  xmlParseMisc(ctxt);
10514
10515
17.4k
        xmlParserCheckEOF(ctxt, XML_ERR_DOCUMENT_END);
10516
17.4k
    }
10517
10518
21.9k
    ctxt->instate = XML_PARSER_EOF;
10519
21.9k
    xmlFinishDocument(ctxt);
10520
10521
21.9k
    if (! ctxt->wellFormed) {
10522
21.9k
  ctxt->valid = 0;
10523
21.9k
  return(-1);
10524
21.9k
    }
10525
10526
50
    return(0);
10527
21.9k
}
10528
10529
/**
10530
 * parse a general parsed entity
10531
 * An external general parsed entity is well-formed if it matches the
10532
 * production labeled extParsedEnt.
10533
 *
10534
 * @deprecated Internal function, don't use.
10535
 *
10536
 *     [78] extParsedEnt ::= TextDecl? content
10537
 *
10538
 * @param ctxt  an XML parser context
10539
 * @returns 0, -1 in case of error. the parser context is augmented
10540
 *                as a result of the parsing.
10541
 */
10542
10543
int
10544
0
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10545
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
10546
0
        return(-1);
10547
10548
0
    xmlCtxtInitializeLate(ctxt);
10549
10550
0
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10551
0
        ctxt->sax->setDocumentLocator(ctxt->userData,
10552
0
                (xmlSAXLocator *) &xmlDefaultSAXLocator);
10553
0
    }
10554
10555
0
    xmlDetectEncoding(ctxt);
10556
10557
0
    if (CUR == 0) {
10558
0
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10559
0
    }
10560
10561
    /*
10562
     * Check for the XMLDecl in the Prolog.
10563
     */
10564
0
    GROW;
10565
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10566
10567
  /*
10568
   * Note that we will switch encoding on the fly.
10569
   */
10570
0
  xmlParseXMLDecl(ctxt);
10571
0
  SKIP_BLANKS;
10572
0
    } else {
10573
0
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10574
0
    }
10575
0
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10576
0
        ctxt->sax->startDocument(ctxt->userData);
10577
10578
    /*
10579
     * Doing validity checking on chunk doesn't make sense
10580
     */
10581
0
    ctxt->options &= ~XML_PARSE_DTDVALID;
10582
0
    ctxt->validate = 0;
10583
0
    ctxt->depth = 0;
10584
10585
0
    xmlParseContentInternal(ctxt);
10586
10587
0
    if (ctxt->input->cur < ctxt->input->end)
10588
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10589
10590
    /*
10591
     * SAX: end of the document processing.
10592
     */
10593
0
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10594
0
        ctxt->sax->endDocument(ctxt->userData);
10595
10596
0
    if (! ctxt->wellFormed) return(-1);
10597
0
    return(0);
10598
0
}
10599
10600
#ifdef LIBXML_PUSH_ENABLED
10601
/************************************************************************
10602
 *                  *
10603
 *    Progressive parsing interfaces        *
10604
 *                  *
10605
 ************************************************************************/
10606
10607
/**
10608
 * Check whether the input buffer contains a character.
10609
 *
10610
 * @param ctxt  an XML parser context
10611
 * @param c  character
10612
 */
10613
static int
10614
0
xmlParseLookupChar(xmlParserCtxtPtr ctxt, int c) {
10615
0
    const xmlChar *cur;
10616
10617
0
    if (ctxt->checkIndex == 0) {
10618
0
        cur = ctxt->input->cur + 1;
10619
0
    } else {
10620
0
        cur = ctxt->input->cur + ctxt->checkIndex;
10621
0
    }
10622
10623
0
    if (memchr(cur, c, ctxt->input->end - cur) == NULL) {
10624
0
        size_t index = ctxt->input->end - ctxt->input->cur;
10625
10626
0
        if (index > LONG_MAX) {
10627
0
            ctxt->checkIndex = 0;
10628
0
            return(1);
10629
0
        }
10630
0
        ctxt->checkIndex = index;
10631
0
        return(0);
10632
0
    } else {
10633
0
        ctxt->checkIndex = 0;
10634
0
        return(1);
10635
0
    }
10636
0
}
10637
10638
/**
10639
 * Check whether the input buffer contains a string.
10640
 *
10641
 * @param ctxt  an XML parser context
10642
 * @param startDelta  delta to apply at the start
10643
 * @param str  string
10644
 * @param strLen  length of string
10645
 */
10646
static const xmlChar *
10647
xmlParseLookupString(xmlParserCtxtPtr ctxt, size_t startDelta,
10648
0
                     const char *str, size_t strLen) {
10649
0
    const xmlChar *cur, *term;
10650
10651
0
    if (ctxt->checkIndex == 0) {
10652
0
        cur = ctxt->input->cur + startDelta;
10653
0
    } else {
10654
0
        cur = ctxt->input->cur + ctxt->checkIndex;
10655
0
    }
10656
10657
0
    term = BAD_CAST strstr((const char *) cur, str);
10658
0
    if (term == NULL) {
10659
0
        const xmlChar *end = ctxt->input->end;
10660
0
        size_t index;
10661
10662
        /* Rescan (strLen - 1) characters. */
10663
0
        if ((size_t) (end - cur) < strLen)
10664
0
            end = cur;
10665
0
        else
10666
0
            end -= strLen - 1;
10667
0
        index = end - ctxt->input->cur;
10668
0
        if (index > LONG_MAX) {
10669
0
            ctxt->checkIndex = 0;
10670
0
            return(ctxt->input->end - strLen);
10671
0
        }
10672
0
        ctxt->checkIndex = index;
10673
0
    } else {
10674
0
        ctxt->checkIndex = 0;
10675
0
    }
10676
10677
0
    return(term);
10678
0
}
10679
10680
/**
10681
 * Check whether the input buffer contains terminated char data.
10682
 *
10683
 * @param ctxt  an XML parser context
10684
 */
10685
static int
10686
0
xmlParseLookupCharData(xmlParserCtxtPtr ctxt) {
10687
0
    const xmlChar *cur = ctxt->input->cur + ctxt->checkIndex;
10688
0
    const xmlChar *end = ctxt->input->end;
10689
0
    size_t index;
10690
10691
0
    while (cur < end) {
10692
0
        if ((*cur == '<') || (*cur == '&')) {
10693
0
            ctxt->checkIndex = 0;
10694
0
            return(1);
10695
0
        }
10696
0
        cur++;
10697
0
    }
10698
10699
0
    index = cur - ctxt->input->cur;
10700
0
    if (index > LONG_MAX) {
10701
0
        ctxt->checkIndex = 0;
10702
0
        return(1);
10703
0
    }
10704
0
    ctxt->checkIndex = index;
10705
0
    return(0);
10706
0
}
10707
10708
/**
10709
 * Check whether there's enough data in the input buffer to finish parsing
10710
 * a start tag. This has to take quotes into account.
10711
 *
10712
 * @param ctxt  an XML parser context
10713
 */
10714
static int
10715
0
xmlParseLookupGt(xmlParserCtxtPtr ctxt) {
10716
0
    const xmlChar *cur;
10717
0
    const xmlChar *end = ctxt->input->end;
10718
0
    int state = ctxt->endCheckState;
10719
0
    size_t index;
10720
10721
0
    if (ctxt->checkIndex == 0)
10722
0
        cur = ctxt->input->cur + 1;
10723
0
    else
10724
0
        cur = ctxt->input->cur + ctxt->checkIndex;
10725
10726
0
    while (cur < end) {
10727
0
        if (state) {
10728
0
            if (*cur == state)
10729
0
                state = 0;
10730
0
        } else if (*cur == '\'' || *cur == '"') {
10731
0
            state = *cur;
10732
0
        } else if (*cur == '>') {
10733
0
            ctxt->checkIndex = 0;
10734
0
            ctxt->endCheckState = 0;
10735
0
            return(1);
10736
0
        }
10737
0
        cur++;
10738
0
    }
10739
10740
0
    index = cur - ctxt->input->cur;
10741
0
    if (index > LONG_MAX) {
10742
0
        ctxt->checkIndex = 0;
10743
0
        ctxt->endCheckState = 0;
10744
0
        return(1);
10745
0
    }
10746
0
    ctxt->checkIndex = index;
10747
0
    ctxt->endCheckState = state;
10748
0
    return(0);
10749
0
}
10750
10751
/**
10752
 * Check whether there's enough data in the input buffer to finish parsing
10753
 * the internal subset.
10754
 *
10755
 * @param ctxt  an XML parser context
10756
 */
10757
static int
10758
0
xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt) {
10759
    /*
10760
     * Sorry, but progressive parsing of the internal subset is not
10761
     * supported. We first check that the full content of the internal
10762
     * subset is available and parsing is launched only at that point.
10763
     * Internal subset ends with "']' S? '>'" in an unescaped section and
10764
     * not in a ']]>' sequence which are conditional sections.
10765
     */
10766
0
    const xmlChar *cur, *start;
10767
0
    const xmlChar *end = ctxt->input->end;
10768
0
    int state = ctxt->endCheckState;
10769
0
    size_t index;
10770
10771
0
    if (ctxt->checkIndex == 0) {
10772
0
        cur = ctxt->input->cur + 1;
10773
0
    } else {
10774
0
        cur = ctxt->input->cur + ctxt->checkIndex;
10775
0
    }
10776
0
    start = cur;
10777
10778
0
    while (cur < end) {
10779
0
        if (state == '-') {
10780
0
            if ((*cur == '-') &&
10781
0
                (cur[1] == '-') &&
10782
0
                (cur[2] == '>')) {
10783
0
                state = 0;
10784
0
                cur += 3;
10785
0
                start = cur;
10786
0
                continue;
10787
0
            }
10788
0
        }
10789
0
        else if (state == ']') {
10790
0
            if (*cur == '>') {
10791
0
                ctxt->checkIndex = 0;
10792
0
                ctxt->endCheckState = 0;
10793
0
                return(1);
10794
0
            }
10795
0
            if (IS_BLANK_CH(*cur)) {
10796
0
                state = ' ';
10797
0
            } else if (*cur != ']') {
10798
0
                state = 0;
10799
0
                start = cur;
10800
0
                continue;
10801
0
            }
10802
0
        }
10803
0
        else if (state == ' ') {
10804
0
            if (*cur == '>') {
10805
0
                ctxt->checkIndex = 0;
10806
0
                ctxt->endCheckState = 0;
10807
0
                return(1);
10808
0
            }
10809
0
            if (!IS_BLANK_CH(*cur)) {
10810
0
                state = 0;
10811
0
                start = cur;
10812
0
                continue;
10813
0
            }
10814
0
        }
10815
0
        else if (state != 0) {
10816
0
            if (*cur == state) {
10817
0
                state = 0;
10818
0
                start = cur + 1;
10819
0
            }
10820
0
        }
10821
0
        else if (*cur == '<') {
10822
0
            if ((cur[1] == '!') &&
10823
0
                (cur[2] == '-') &&
10824
0
                (cur[3] == '-')) {
10825
0
                state = '-';
10826
0
                cur += 4;
10827
                /* Don't treat <!--> as comment */
10828
0
                start = cur;
10829
0
                continue;
10830
0
            }
10831
0
        }
10832
0
        else if ((*cur == '"') || (*cur == '\'') || (*cur == ']')) {
10833
0
            state = *cur;
10834
0
        }
10835
10836
0
        cur++;
10837
0
    }
10838
10839
    /*
10840
     * Rescan the three last characters to detect "<!--" and "-->"
10841
     * split across chunks.
10842
     */
10843
0
    if ((state == 0) || (state == '-')) {
10844
0
        if (cur - start < 3)
10845
0
            cur = start;
10846
0
        else
10847
0
            cur -= 3;
10848
0
    }
10849
0
    index = cur - ctxt->input->cur;
10850
0
    if (index > LONG_MAX) {
10851
0
        ctxt->checkIndex = 0;
10852
0
        ctxt->endCheckState = 0;
10853
0
        return(1);
10854
0
    }
10855
0
    ctxt->checkIndex = index;
10856
0
    ctxt->endCheckState = state;
10857
0
    return(0);
10858
0
}
10859
10860
/**
10861
 * Try to progress on parsing
10862
 *
10863
 * @param ctxt  an XML parser context
10864
 * @param terminate  last chunk indicator
10865
 * @returns zero if no parsing was possible
10866
 */
10867
static int
10868
0
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
10869
0
    int ret = 0;
10870
0
    size_t avail;
10871
0
    xmlChar cur, next;
10872
10873
0
    if (ctxt->input == NULL)
10874
0
        return(0);
10875
10876
0
    if ((ctxt->input != NULL) &&
10877
0
        (ctxt->input->cur - ctxt->input->base > 4096)) {
10878
0
        xmlParserShrink(ctxt);
10879
0
    }
10880
10881
0
    while (ctxt->disableSAX == 0) {
10882
0
        avail = ctxt->input->end - ctxt->input->cur;
10883
0
        if (avail < 1)
10884
0
      goto done;
10885
0
        switch (ctxt->instate) {
10886
0
            case XML_PARSER_EOF:
10887
          /*
10888
     * Document parsing is done !
10889
     */
10890
0
          goto done;
10891
0
            case XML_PARSER_START:
10892
                /*
10893
                 * Very first chars read from the document flow.
10894
                 */
10895
0
                if ((!terminate) && (avail < 4))
10896
0
                    goto done;
10897
10898
                /*
10899
                 * We need more bytes to detect EBCDIC code pages.
10900
                 * See xmlDetectEBCDIC.
10901
                 */
10902
0
                if ((CMP4(CUR_PTR, 0x4C, 0x6F, 0xA7, 0x94)) &&
10903
0
                    (!terminate) && (avail < 200))
10904
0
                    goto done;
10905
10906
0
                xmlDetectEncoding(ctxt);
10907
0
                ctxt->instate = XML_PARSER_XML_DECL;
10908
0
    break;
10909
10910
0
            case XML_PARSER_XML_DECL:
10911
0
    if ((!terminate) && (avail < 2))
10912
0
        goto done;
10913
0
    cur = ctxt->input->cur[0];
10914
0
    next = ctxt->input->cur[1];
10915
0
          if ((cur == '<') && (next == '?')) {
10916
        /* PI or XML decl */
10917
0
        if ((!terminate) &&
10918
0
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
10919
0
      goto done;
10920
0
        if ((ctxt->input->cur[2] == 'x') &&
10921
0
      (ctxt->input->cur[3] == 'm') &&
10922
0
      (ctxt->input->cur[4] == 'l') &&
10923
0
      (IS_BLANK_CH(ctxt->input->cur[5]))) {
10924
0
      ret += 5;
10925
0
      xmlParseXMLDecl(ctxt);
10926
0
        } else {
10927
0
      ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10928
0
                        if (ctxt->version == NULL) {
10929
0
                            xmlErrMemory(ctxt);
10930
0
                            break;
10931
0
                        }
10932
0
        }
10933
0
    } else {
10934
0
        ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10935
0
        if (ctxt->version == NULL) {
10936
0
            xmlErrMemory(ctxt);
10937
0
      break;
10938
0
        }
10939
0
    }
10940
0
                if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10941
0
                    ctxt->sax->setDocumentLocator(ctxt->userData,
10942
0
                            (xmlSAXLocator *) &xmlDefaultSAXLocator);
10943
0
                }
10944
0
                if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10945
0
                    (!ctxt->disableSAX))
10946
0
                    ctxt->sax->startDocument(ctxt->userData);
10947
0
                ctxt->instate = XML_PARSER_MISC;
10948
0
    break;
10949
0
            case XML_PARSER_START_TAG: {
10950
0
          const xmlChar *name;
10951
0
    const xmlChar *prefix = NULL;
10952
0
    const xmlChar *URI = NULL;
10953
0
                int line = ctxt->input->line;
10954
0
    int nbNs = 0;
10955
10956
0
    if ((!terminate) && (avail < 2))
10957
0
        goto done;
10958
0
    cur = ctxt->input->cur[0];
10959
0
          if (cur != '<') {
10960
0
        xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10961
0
                                   "Start tag expected, '<' not found");
10962
0
                    ctxt->instate = XML_PARSER_EOF;
10963
0
                    xmlFinishDocument(ctxt);
10964
0
        goto done;
10965
0
    }
10966
0
    if ((!terminate) && (!xmlParseLookupGt(ctxt)))
10967
0
                    goto done;
10968
0
    if (ctxt->spaceNr == 0)
10969
0
        spacePush(ctxt, -1);
10970
0
    else if (*ctxt->space == -2)
10971
0
        spacePush(ctxt, -1);
10972
0
    else
10973
0
        spacePush(ctxt, *ctxt->space);
10974
0
#ifdef LIBXML_SAX1_ENABLED
10975
0
    if (ctxt->sax2)
10976
0
#endif /* LIBXML_SAX1_ENABLED */
10977
0
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &nbNs);
10978
0
#ifdef LIBXML_SAX1_ENABLED
10979
0
    else
10980
0
        name = xmlParseStartTag(ctxt);
10981
0
#endif /* LIBXML_SAX1_ENABLED */
10982
0
    if (name == NULL) {
10983
0
        spacePop(ctxt);
10984
0
                    ctxt->instate = XML_PARSER_EOF;
10985
0
                    xmlFinishDocument(ctxt);
10986
0
        goto done;
10987
0
    }
10988
0
#ifdef LIBXML_VALID_ENABLED
10989
    /*
10990
     * [ VC: Root Element Type ]
10991
     * The Name in the document type declaration must match
10992
     * the element type of the root element.
10993
     */
10994
0
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10995
0
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
10996
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10997
0
#endif /* LIBXML_VALID_ENABLED */
10998
10999
    /*
11000
     * Check for an Empty Element.
11001
     */
11002
0
    if ((RAW == '/') && (NXT(1) == '>')) {
11003
0
        SKIP(2);
11004
11005
0
        if (ctxt->sax2) {
11006
0
      if ((ctxt->sax != NULL) &&
11007
0
          (ctxt->sax->endElementNs != NULL) &&
11008
0
          (!ctxt->disableSAX))
11009
0
          ctxt->sax->endElementNs(ctxt->userData, name,
11010
0
                                  prefix, URI);
11011
0
      if (nbNs > 0)
11012
0
          xmlParserNsPop(ctxt, nbNs);
11013
0
#ifdef LIBXML_SAX1_ENABLED
11014
0
        } else {
11015
0
      if ((ctxt->sax != NULL) &&
11016
0
          (ctxt->sax->endElement != NULL) &&
11017
0
          (!ctxt->disableSAX))
11018
0
          ctxt->sax->endElement(ctxt->userData, name);
11019
0
#endif /* LIBXML_SAX1_ENABLED */
11020
0
        }
11021
0
        spacePop(ctxt);
11022
0
    } else if (RAW == '>') {
11023
0
        NEXT;
11024
0
                    nameNsPush(ctxt, name, prefix, URI, line, nbNs);
11025
0
    } else {
11026
0
        xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11027
0
           "Couldn't find end of Start Tag %s\n",
11028
0
           name);
11029
0
        nodePop(ctxt);
11030
0
        spacePop(ctxt);
11031
0
                    if (nbNs > 0)
11032
0
                        xmlParserNsPop(ctxt, nbNs);
11033
0
    }
11034
11035
0
                if (ctxt->nameNr == 0)
11036
0
                    ctxt->instate = XML_PARSER_EPILOG;
11037
0
                else
11038
0
                    ctxt->instate = XML_PARSER_CONTENT;
11039
0
                break;
11040
0
      }
11041
0
            case XML_PARSER_CONTENT: {
11042
0
    cur = ctxt->input->cur[0];
11043
11044
0
    if (cur == '<') {
11045
0
                    if ((!terminate) && (avail < 2))
11046
0
                        goto done;
11047
0
        next = ctxt->input->cur[1];
11048
11049
0
                    if (next == '/') {
11050
0
                        ctxt->instate = XML_PARSER_END_TAG;
11051
0
                        break;
11052
0
                    } else if (next == '?') {
11053
0
                        if ((!terminate) &&
11054
0
                            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11055
0
                            goto done;
11056
0
                        xmlParsePI(ctxt);
11057
0
                        ctxt->instate = XML_PARSER_CONTENT;
11058
0
                        break;
11059
0
                    } else if (next == '!') {
11060
0
                        if ((!terminate) && (avail < 3))
11061
0
                            goto done;
11062
0
                        next = ctxt->input->cur[2];
11063
11064
0
                        if (next == '-') {
11065
0
                            if ((!terminate) && (avail < 4))
11066
0
                                goto done;
11067
0
                            if (ctxt->input->cur[3] == '-') {
11068
0
                                if ((!terminate) &&
11069
0
                                    (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11070
0
                                    goto done;
11071
0
                                xmlParseComment(ctxt);
11072
0
                                ctxt->instate = XML_PARSER_CONTENT;
11073
0
                                break;
11074
0
                            }
11075
0
                        } else if (next == '[') {
11076
0
                            if ((!terminate) && (avail < 9))
11077
0
                                goto done;
11078
0
                            if ((ctxt->input->cur[2] == '[') &&
11079
0
                                (ctxt->input->cur[3] == 'C') &&
11080
0
                                (ctxt->input->cur[4] == 'D') &&
11081
0
                                (ctxt->input->cur[5] == 'A') &&
11082
0
                                (ctxt->input->cur[6] == 'T') &&
11083
0
                                (ctxt->input->cur[7] == 'A') &&
11084
0
                                (ctxt->input->cur[8] == '[')) {
11085
0
                                if ((!terminate) &&
11086
0
                                    (!xmlParseLookupString(ctxt, 9, "]]>", 3)))
11087
0
                                    goto done;
11088
0
                                ctxt->instate = XML_PARSER_CDATA_SECTION;
11089
0
                                xmlParseCDSect(ctxt);
11090
0
                                ctxt->instate = XML_PARSER_CONTENT;
11091
0
                                break;
11092
0
                            }
11093
0
                        }
11094
0
                    }
11095
0
    } else if (cur == '&') {
11096
0
        if ((!terminate) && (!xmlParseLookupChar(ctxt, ';')))
11097
0
      goto done;
11098
0
        xmlParseReference(ctxt);
11099
0
                    break;
11100
0
    } else {
11101
        /* TODO Avoid the extra copy, handle directly !!! */
11102
        /*
11103
         * Goal of the following test is:
11104
         *  - minimize calls to the SAX 'character' callback
11105
         *    when they are mergeable
11106
         *  - handle an problem for isBlank when we only parse
11107
         *    a sequence of blank chars and the next one is
11108
         *    not available to check against '<' presence.
11109
         *  - tries to homogenize the differences in SAX
11110
         *    callbacks between the push and pull versions
11111
         *    of the parser.
11112
         */
11113
0
        if (avail < XML_PARSER_BIG_BUFFER_SIZE) {
11114
0
      if ((!terminate) && (!xmlParseLookupCharData(ctxt)))
11115
0
          goto done;
11116
0
                    }
11117
0
                    ctxt->checkIndex = 0;
11118
0
        xmlParseCharDataInternal(ctxt, !terminate);
11119
0
                    break;
11120
0
    }
11121
11122
0
                ctxt->instate = XML_PARSER_START_TAG;
11123
0
    break;
11124
0
      }
11125
0
            case XML_PARSER_END_TAG:
11126
0
    if ((!terminate) && (!xmlParseLookupChar(ctxt, '>')))
11127
0
        goto done;
11128
0
    if (ctxt->sax2) {
11129
0
              xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11130
0
        nameNsPop(ctxt);
11131
0
    }
11132
0
#ifdef LIBXML_SAX1_ENABLED
11133
0
      else
11134
0
        xmlParseEndTag1(ctxt, 0);
11135
0
#endif /* LIBXML_SAX1_ENABLED */
11136
0
    if (ctxt->nameNr == 0) {
11137
0
        ctxt->instate = XML_PARSER_EPILOG;
11138
0
    } else {
11139
0
        ctxt->instate = XML_PARSER_CONTENT;
11140
0
    }
11141
0
    break;
11142
0
            case XML_PARSER_MISC:
11143
0
            case XML_PARSER_PROLOG:
11144
0
            case XML_PARSER_EPILOG:
11145
0
    SKIP_BLANKS;
11146
0
                avail = ctxt->input->end - ctxt->input->cur;
11147
0
    if (avail < 1)
11148
0
        goto done;
11149
0
    if (ctxt->input->cur[0] == '<') {
11150
0
                    if ((!terminate) && (avail < 2))
11151
0
                        goto done;
11152
0
                    next = ctxt->input->cur[1];
11153
0
                    if (next == '?') {
11154
0
                        if ((!terminate) &&
11155
0
                            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11156
0
                            goto done;
11157
0
                        xmlParsePI(ctxt);
11158
0
                        break;
11159
0
                    } else if (next == '!') {
11160
0
                        if ((!terminate) && (avail < 3))
11161
0
                            goto done;
11162
11163
0
                        if (ctxt->input->cur[2] == '-') {
11164
0
                            if ((!terminate) && (avail < 4))
11165
0
                                goto done;
11166
0
                            if (ctxt->input->cur[3] == '-') {
11167
0
                                if ((!terminate) &&
11168
0
                                    (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11169
0
                                    goto done;
11170
0
                                xmlParseComment(ctxt);
11171
0
                                break;
11172
0
                            }
11173
0
                        } else if (ctxt->instate == XML_PARSER_MISC) {
11174
0
                            if ((!terminate) && (avail < 9))
11175
0
                                goto done;
11176
0
                            if ((ctxt->input->cur[2] == 'D') &&
11177
0
                                (ctxt->input->cur[3] == 'O') &&
11178
0
                                (ctxt->input->cur[4] == 'C') &&
11179
0
                                (ctxt->input->cur[5] == 'T') &&
11180
0
                                (ctxt->input->cur[6] == 'Y') &&
11181
0
                                (ctxt->input->cur[7] == 'P') &&
11182
0
                                (ctxt->input->cur[8] == 'E')) {
11183
0
                                if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11184
0
                                    goto done;
11185
0
                                ctxt->inSubset = 1;
11186
0
                                xmlParseDocTypeDecl(ctxt);
11187
0
                                if (RAW == '[') {
11188
0
                                    ctxt->instate = XML_PARSER_DTD;
11189
0
                                } else {
11190
0
                                    if (RAW == '>')
11191
0
                                        NEXT;
11192
                                    /*
11193
                                     * Create and update the external subset.
11194
                                     */
11195
0
                                    ctxt->inSubset = 2;
11196
0
                                    if ((ctxt->sax != NULL) &&
11197
0
                                        (!ctxt->disableSAX) &&
11198
0
                                        (ctxt->sax->externalSubset != NULL))
11199
0
                                        ctxt->sax->externalSubset(
11200
0
                                                ctxt->userData,
11201
0
                                                ctxt->intSubName,
11202
0
                                                ctxt->extSubSystem,
11203
0
                                                ctxt->extSubURI);
11204
0
                                    ctxt->inSubset = 0;
11205
0
                                    xmlCleanSpecialAttr(ctxt);
11206
0
                                    ctxt->instate = XML_PARSER_PROLOG;
11207
0
                                }
11208
0
                                break;
11209
0
                            }
11210
0
                        }
11211
0
                    }
11212
0
                }
11213
11214
0
                if (ctxt->instate == XML_PARSER_EPILOG) {
11215
0
                    if (ctxt->errNo == XML_ERR_OK)
11216
0
                        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11217
0
        ctxt->instate = XML_PARSER_EOF;
11218
0
                    xmlFinishDocument(ctxt);
11219
0
                } else {
11220
0
        ctxt->instate = XML_PARSER_START_TAG;
11221
0
    }
11222
0
    break;
11223
0
            case XML_PARSER_DTD: {
11224
0
                if ((!terminate) && (!xmlParseLookupInternalSubset(ctxt)))
11225
0
                    goto done;
11226
0
    xmlParseInternalSubset(ctxt);
11227
0
    ctxt->inSubset = 2;
11228
0
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11229
0
        (ctxt->sax->externalSubset != NULL))
11230
0
        ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11231
0
          ctxt->extSubSystem, ctxt->extSubURI);
11232
0
    ctxt->inSubset = 0;
11233
0
    xmlCleanSpecialAttr(ctxt);
11234
0
    ctxt->instate = XML_PARSER_PROLOG;
11235
0
                break;
11236
0
      }
11237
0
            default:
11238
0
                xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
11239
0
      "PP: internal error\n");
11240
0
    ctxt->instate = XML_PARSER_EOF;
11241
0
    break;
11242
0
  }
11243
0
    }
11244
0
done:
11245
0
    return(ret);
11246
0
}
11247
11248
/**
11249
 * Parse a chunk of memory in push parser mode.
11250
 *
11251
 * Assumes that the parser context was initialized with
11252
 * xmlCreatePushParserCtxt().
11253
 *
11254
 * The last chunk, which will often be empty, must be marked with
11255
 * the `terminate` flag. With the default SAX callbacks, the resulting
11256
 * document will be available in ctxt->myDoc. This pointer will not
11257
 * be freed when calling xmlFreeParserCtxt() and must be freed by the
11258
 * caller. If the document isn't well-formed, it will still be returned
11259
 * in ctxt->myDoc.
11260
 *
11261
 * As an exception, xmlCtxtResetPush() will free the document in
11262
 * ctxt->myDoc. So ctxt->myDoc should be set to NULL after extracting
11263
 * the document.
11264
 *
11265
 * @param ctxt  an XML parser context
11266
 * @param chunk  chunk of memory
11267
 * @param size  size of chunk in bytes
11268
 * @param terminate  last chunk indicator
11269
 * @returns an xmlParserErrors code (0 on success).
11270
 */
11271
int
11272
xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
11273
0
              int terminate) {
11274
0
    size_t curBase;
11275
0
    size_t maxLength;
11276
0
    size_t pos;
11277
0
    int end_in_lf = 0;
11278
0
    int res;
11279
11280
0
    if ((ctxt == NULL) || (size < 0))
11281
0
        return(XML_ERR_ARGUMENT);
11282
0
    if ((chunk == NULL) && (size > 0))
11283
0
        return(XML_ERR_ARGUMENT);
11284
0
    if ((ctxt->input == NULL) || (ctxt->input->buf == NULL))
11285
0
        return(XML_ERR_ARGUMENT);
11286
0
    if (ctxt->disableSAX != 0)
11287
0
        return(ctxt->errNo);
11288
11289
0
    ctxt->input->flags |= XML_INPUT_PROGRESSIVE;
11290
0
    if (ctxt->instate == XML_PARSER_START)
11291
0
        xmlCtxtInitializeLate(ctxt);
11292
0
    if ((size > 0) && (chunk != NULL) && (!terminate) &&
11293
0
        (chunk[size - 1] == '\r')) {
11294
0
  end_in_lf = 1;
11295
0
  size--;
11296
0
    }
11297
11298
    /*
11299
     * Also push an empty chunk to make sure that the raw buffer
11300
     * will be flushed if there is an encoder.
11301
     */
11302
0
    pos = ctxt->input->cur - ctxt->input->base;
11303
0
    res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11304
0
    xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
11305
0
    if (res < 0) {
11306
0
        xmlCtxtErrIO(ctxt, ctxt->input->buf->error, NULL);
11307
0
        xmlHaltParser(ctxt);
11308
0
        return(ctxt->errNo);
11309
0
    }
11310
11311
0
    xmlParseTryOrFinish(ctxt, terminate);
11312
11313
0
    curBase = ctxt->input->cur - ctxt->input->base;
11314
0
    maxLength = (ctxt->options & XML_PARSE_HUGE) ?
11315
0
                XML_MAX_HUGE_LENGTH :
11316
0
                XML_MAX_LOOKUP_LIMIT;
11317
0
    if (curBase > maxLength) {
11318
0
        xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
11319
0
                    "Buffer size limit exceeded, try XML_PARSE_HUGE\n");
11320
0
        xmlHaltParser(ctxt);
11321
0
    }
11322
11323
0
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX != 0))
11324
0
        return(ctxt->errNo);
11325
11326
0
    if (end_in_lf == 1) {
11327
0
  pos = ctxt->input->cur - ctxt->input->base;
11328
0
  res = xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
11329
0
  xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
11330
0
        if (res < 0) {
11331
0
            xmlCtxtErrIO(ctxt, ctxt->input->buf->error, NULL);
11332
0
            xmlHaltParser(ctxt);
11333
0
            return(ctxt->errNo);
11334
0
        }
11335
0
    }
11336
0
    if (terminate) {
11337
  /*
11338
   * Check for termination
11339
   */
11340
0
        if ((ctxt->instate != XML_PARSER_EOF) &&
11341
0
            (ctxt->instate != XML_PARSER_EPILOG)) {
11342
0
            if (ctxt->nameNr > 0) {
11343
0
                const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
11344
0
                int line = ctxt->pushTab[ctxt->nameNr - 1].line;
11345
0
                xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
11346
0
                        "Premature end of data in tag %s line %d\n",
11347
0
                        name, line, NULL);
11348
0
            } else if (ctxt->instate == XML_PARSER_START) {
11349
0
                xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11350
0
            } else {
11351
0
                xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
11352
0
                               "Start tag expected, '<' not found\n");
11353
0
            }
11354
0
        } else {
11355
0
            xmlParserCheckEOF(ctxt, XML_ERR_DOCUMENT_END);
11356
0
        }
11357
0
  if (ctxt->instate != XML_PARSER_EOF) {
11358
0
            ctxt->instate = XML_PARSER_EOF;
11359
0
            xmlFinishDocument(ctxt);
11360
0
  }
11361
0
    }
11362
0
    if (ctxt->wellFormed == 0)
11363
0
  return((xmlParserErrors) ctxt->errNo);
11364
0
    else
11365
0
        return(0);
11366
0
}
11367
11368
/************************************************************************
11369
 *                  *
11370
 *    I/O front end functions to the parser     *
11371
 *                  *
11372
 ************************************************************************/
11373
11374
/**
11375
 * Create a parser context for using the XML parser in push mode.
11376
 * See xmlParseChunk().
11377
 *
11378
 * Passing an initial chunk is useless and deprecated.
11379
 *
11380
 * The push parser doesn't support recovery mode or the
11381
 * XML_PARSE_NOBLANKS option.
11382
 *
11383
 * `filename` is used as base URI to fetch external entities and for
11384
 * error reports.
11385
 *
11386
 * @param sax  a SAX handler (optional)
11387
 * @param user_data  user data for SAX callbacks (optional)
11388
 * @param chunk  initial chunk (optional, deprecated)
11389
 * @param size  size of initial chunk in bytes
11390
 * @param filename  file name or URI (optional)
11391
 * @returns the new parser context or NULL if a memory allocation
11392
 * failed.
11393
 */
11394
11395
xmlParserCtxtPtr
11396
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11397
0
                        const char *chunk, int size, const char *filename) {
11398
0
    xmlParserCtxtPtr ctxt;
11399
0
    xmlParserInputPtr input;
11400
11401
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
11402
0
    if (ctxt == NULL)
11403
0
  return(NULL);
11404
11405
0
    ctxt->options &= ~XML_PARSE_NODICT;
11406
0
    ctxt->dictNames = 1;
11407
11408
0
    input = xmlNewPushInput(filename, chunk, size);
11409
0
    if (input == NULL) {
11410
0
  xmlFreeParserCtxt(ctxt);
11411
0
  return(NULL);
11412
0
    }
11413
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
11414
0
        xmlFreeInputStream(input);
11415
0
        xmlFreeParserCtxt(ctxt);
11416
0
        return(NULL);
11417
0
    }
11418
11419
0
    return(ctxt);
11420
0
}
11421
#endif /* LIBXML_PUSH_ENABLED */
11422
11423
/**
11424
 * Blocks further parser processing
11425
 *
11426
 * @param ctxt  an XML parser context
11427
 */
11428
void
11429
0
xmlStopParser(xmlParserCtxtPtr ctxt) {
11430
0
    if (ctxt == NULL)
11431
0
        return;
11432
0
    xmlHaltParser(ctxt);
11433
0
    if (ctxt->errNo != XML_ERR_NO_MEMORY)
11434
0
        ctxt->errNo = XML_ERR_USER_STOP;
11435
0
}
11436
11437
/**
11438
 * Create a parser context for using the XML parser with an existing
11439
 * I/O stream
11440
 *
11441
 * @param sax  a SAX handler (optional)
11442
 * @param user_data  user data for SAX callbacks (optional)
11443
 * @param ioread  an I/O read function
11444
 * @param ioclose  an I/O close function (optional)
11445
 * @param ioctx  an I/O handler
11446
 * @param enc  the charset encoding if known (deprecated)
11447
 * @returns the new parser context or NULL
11448
 */
11449
xmlParserCtxtPtr
11450
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11451
                      xmlInputReadCallback ioread,
11452
                      xmlInputCloseCallback ioclose,
11453
0
                      void *ioctx, xmlCharEncoding enc) {
11454
0
    xmlParserCtxtPtr ctxt;
11455
0
    xmlParserInputPtr input;
11456
0
    const char *encoding;
11457
11458
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
11459
0
    if (ctxt == NULL)
11460
0
  return(NULL);
11461
11462
0
    encoding = xmlGetCharEncodingName(enc);
11463
0
    input = xmlCtxtNewInputFromIO(ctxt, NULL, ioread, ioclose, ioctx,
11464
0
                                  encoding, 0);
11465
0
    if (input == NULL) {
11466
0
  xmlFreeParserCtxt(ctxt);
11467
0
        return (NULL);
11468
0
    }
11469
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
11470
0
        xmlFreeInputStream(input);
11471
0
        xmlFreeParserCtxt(ctxt);
11472
0
        return(NULL);
11473
0
    }
11474
11475
0
    return(ctxt);
11476
0
}
11477
11478
#ifdef LIBXML_VALID_ENABLED
11479
/************************************************************************
11480
 *                  *
11481
 *    Front ends when parsing a DTD       *
11482
 *                  *
11483
 ************************************************************************/
11484
11485
/**
11486
 * Parse a DTD.
11487
 *
11488
 * Option XML_PARSE_DTDLOAD should be enabled in the parser context
11489
 * to make external entities work.
11490
 *
11491
 * @since 2.14.0
11492
 *
11493
 * @param ctxt  a parser context
11494
 * @param input  a parser input
11495
 * @param publicId  public ID of the DTD (optional)
11496
 * @param systemId  system ID of the DTD (optional)
11497
 * @returns the resulting xmlDtdPtr or NULL in case of error.
11498
 * `input` will be freed by the function in any case.
11499
 */
11500
xmlDtdPtr
11501
xmlCtxtParseDtd(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
11502
0
                const xmlChar *publicId, const xmlChar *systemId) {
11503
0
    xmlDtdPtr ret = NULL;
11504
11505
0
    if ((ctxt == NULL) || (input == NULL)) {
11506
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
11507
0
        xmlFreeInputStream(input);
11508
0
        return(NULL);
11509
0
    }
11510
11511
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
11512
0
        xmlFreeInputStream(input);
11513
0
        return(NULL);
11514
0
    }
11515
11516
0
    if (publicId == NULL)
11517
0
        publicId = BAD_CAST "none";
11518
0
    if (systemId == NULL)
11519
0
        systemId = BAD_CAST "none";
11520
11521
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11522
0
    if (ctxt->myDoc == NULL) {
11523
0
        xmlErrMemory(ctxt);
11524
0
        goto error;
11525
0
    }
11526
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
11527
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11528
0
                                       publicId, systemId);
11529
0
    if (ctxt->myDoc->extSubset == NULL) {
11530
0
        xmlErrMemory(ctxt);
11531
0
        xmlFreeDoc(ctxt->myDoc);
11532
0
        goto error;
11533
0
    }
11534
11535
0
    xmlParseExternalSubset(ctxt, publicId, systemId);
11536
11537
0
    if (ctxt->wellFormed) {
11538
0
        ret = ctxt->myDoc->extSubset;
11539
0
        ctxt->myDoc->extSubset = NULL;
11540
0
        if (ret != NULL) {
11541
0
            xmlNodePtr tmp;
11542
11543
0
            ret->doc = NULL;
11544
0
            tmp = ret->children;
11545
0
            while (tmp != NULL) {
11546
0
                tmp->doc = NULL;
11547
0
                tmp = tmp->next;
11548
0
            }
11549
0
        }
11550
0
    } else {
11551
0
        ret = NULL;
11552
0
    }
11553
0
    xmlFreeDoc(ctxt->myDoc);
11554
0
    ctxt->myDoc = NULL;
11555
11556
0
error:
11557
0
    xmlFreeInputStream(xmlCtxtPopInput(ctxt));
11558
11559
0
    return(ret);
11560
0
}
11561
11562
/**
11563
 * Load and parse a DTD
11564
 *
11565
 * @deprecated Use xmlCtxtParseDtd().
11566
 *
11567
 * @param sax  the SAX handler block or NULL
11568
 * @param input  an Input Buffer
11569
 * @param enc  the charset encoding if known
11570
 * @returns the resulting xmlDtdPtr or NULL in case of error.
11571
 * `input` will be freed by the function in any case.
11572
 */
11573
11574
xmlDtdPtr
11575
xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
11576
0
        xmlCharEncoding enc) {
11577
0
    xmlDtdPtr ret = NULL;
11578
0
    xmlParserCtxtPtr ctxt;
11579
0
    xmlParserInputPtr pinput = NULL;
11580
11581
0
    if (input == NULL)
11582
0
  return(NULL);
11583
11584
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
11585
0
    if (ctxt == NULL) {
11586
0
        xmlFreeParserInputBuffer(input);
11587
0
  return(NULL);
11588
0
    }
11589
0
    xmlCtxtSetOptions(ctxt, XML_PARSE_DTDLOAD);
11590
11591
    /*
11592
     * generate a parser input from the I/O handler
11593
     */
11594
11595
0
    pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
11596
0
    if (pinput == NULL) {
11597
0
        xmlFreeParserInputBuffer(input);
11598
0
  xmlFreeParserCtxt(ctxt);
11599
0
  return(NULL);
11600
0
    }
11601
11602
0
    if (enc != XML_CHAR_ENCODING_NONE) {
11603
0
        xmlSwitchEncoding(ctxt, enc);
11604
0
    }
11605
11606
0
    ret = xmlCtxtParseDtd(ctxt, pinput, NULL, NULL);
11607
11608
0
    xmlFreeParserCtxt(ctxt);
11609
0
    return(ret);
11610
0
}
11611
11612
/**
11613
 * Load and parse an external subset.
11614
 *
11615
 * @deprecated Use xmlCtxtParseDtd().
11616
 *
11617
 * @param sax  the SAX handler block
11618
 * @param ExternalID  a NAME* containing the External ID of the DTD
11619
 * @param SystemID  a NAME* containing the URL to the DTD
11620
 * @returns the resulting xmlDtdPtr or NULL in case of error.
11621
 */
11622
11623
xmlDtdPtr
11624
xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
11625
0
                          const xmlChar *SystemID) {
11626
0
    xmlDtdPtr ret = NULL;
11627
0
    xmlParserCtxtPtr ctxt;
11628
0
    xmlParserInputPtr input = NULL;
11629
0
    xmlChar* systemIdCanonic;
11630
11631
0
    if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
11632
11633
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
11634
0
    if (ctxt == NULL) {
11635
0
  return(NULL);
11636
0
    }
11637
0
    xmlCtxtSetOptions(ctxt, XML_PARSE_DTDLOAD);
11638
11639
    /*
11640
     * Canonicalise the system ID
11641
     */
11642
0
    systemIdCanonic = xmlCanonicPath(SystemID);
11643
0
    if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
11644
0
  xmlFreeParserCtxt(ctxt);
11645
0
  return(NULL);
11646
0
    }
11647
11648
    /*
11649
     * Ask the Entity resolver to load the damn thing
11650
     */
11651
11652
0
    if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
11653
0
  input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
11654
0
                                   systemIdCanonic);
11655
0
    if (input == NULL) {
11656
0
  xmlFreeParserCtxt(ctxt);
11657
0
  if (systemIdCanonic != NULL)
11658
0
      xmlFree(systemIdCanonic);
11659
0
  return(NULL);
11660
0
    }
11661
11662
0
    if (input->filename == NULL)
11663
0
  input->filename = (char *) systemIdCanonic;
11664
0
    else
11665
0
  xmlFree(systemIdCanonic);
11666
11667
0
    ret = xmlCtxtParseDtd(ctxt, input, ExternalID, SystemID);
11668
11669
0
    xmlFreeParserCtxt(ctxt);
11670
0
    return(ret);
11671
0
}
11672
11673
11674
/**
11675
 * Load and parse an external subset.
11676
 *
11677
 * @param ExternalID  a NAME* containing the External ID of the DTD
11678
 * @param SystemID  a NAME* containing the URL to the DTD
11679
 * @returns the resulting xmlDtdPtr or NULL in case of error.
11680
 */
11681
11682
xmlDtdPtr
11683
0
xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
11684
0
    return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
11685
0
}
11686
#endif /* LIBXML_VALID_ENABLED */
11687
11688
/************************************************************************
11689
 *                  *
11690
 *    Front ends when parsing an Entity     *
11691
 *                  *
11692
 ************************************************************************/
11693
11694
static xmlNodePtr
11695
xmlCtxtParseContentInternal(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
11696
1.03k
                            int hasTextDecl, int buildTree) {
11697
1.03k
    xmlNodePtr root = NULL;
11698
1.03k
    xmlNodePtr list = NULL;
11699
1.03k
    xmlChar *rootName = BAD_CAST "#root";
11700
1.03k
    int result;
11701
11702
1.03k
    if (buildTree) {
11703
1.03k
        root = xmlNewDocNode(ctxt->myDoc, NULL, rootName, NULL);
11704
1.03k
        if (root == NULL) {
11705
0
            xmlErrMemory(ctxt);
11706
0
            goto error;
11707
0
        }
11708
1.03k
    }
11709
11710
1.03k
    if (xmlCtxtPushInput(ctxt, input) < 0)
11711
0
        goto error;
11712
11713
1.03k
    nameNsPush(ctxt, rootName, NULL, NULL, 0, 0);
11714
1.03k
    spacePush(ctxt, -1);
11715
11716
1.03k
    if (buildTree)
11717
1.03k
        nodePush(ctxt, root);
11718
11719
1.03k
    if (hasTextDecl) {
11720
0
        xmlDetectEncoding(ctxt);
11721
11722
        /*
11723
         * Parse a possible text declaration first
11724
         */
11725
0
        if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
11726
0
            (IS_BLANK_CH(NXT(5)))) {
11727
0
            xmlParseTextDecl(ctxt);
11728
            /*
11729
             * An XML-1.0 document can't reference an entity not XML-1.0
11730
             */
11731
0
            if ((xmlStrEqual(ctxt->version, BAD_CAST "1.0")) &&
11732
0
                (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
11733
0
                xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
11734
0
                               "Version mismatch between document and "
11735
0
                               "entity\n");
11736
0
            }
11737
0
        }
11738
0
    }
11739
11740
1.03k
    xmlParseContentInternal(ctxt);
11741
11742
1.03k
    if (ctxt->input->cur < ctxt->input->end)
11743
48
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11744
11745
1.03k
    if ((ctxt->wellFormed) ||
11746
1.03k
        ((ctxt->recovery) && (!xmlCtxtIsCatastrophicError(ctxt)))) {
11747
858
        if (root != NULL) {
11748
858
            xmlNodePtr cur;
11749
11750
            /*
11751
             * Unlink newly created node list.
11752
             */
11753
858
            list = root->children;
11754
858
            root->children = NULL;
11755
858
            root->last = NULL;
11756
3.29k
            for (cur = list; cur != NULL; cur = cur->next)
11757
2.43k
                cur->parent = NULL;
11758
858
        }
11759
858
    }
11760
11761
    /*
11762
     * Read the rest of the stream in case of errors. We want
11763
     * to account for the whole entity size.
11764
     */
11765
1.03k
    do {
11766
1.03k
        ctxt->input->cur = ctxt->input->end;
11767
1.03k
        xmlParserShrink(ctxt);
11768
1.03k
        result = xmlParserGrow(ctxt);
11769
1.03k
    } while (result > 0);
11770
11771
1.03k
    if (buildTree)
11772
1.03k
        nodePop(ctxt);
11773
11774
1.03k
    namePop(ctxt);
11775
1.03k
    spacePop(ctxt);
11776
11777
1.03k
    xmlCtxtPopInput(ctxt);
11778
11779
1.03k
error:
11780
1.03k
    xmlFreeNode(root);
11781
11782
1.03k
    return(list);
11783
1.03k
}
11784
11785
static void
11786
1.04k
xmlCtxtParseEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr ent) {
11787
1.04k
    xmlParserInputPtr input;
11788
1.04k
    xmlNodePtr list;
11789
1.04k
    unsigned long consumed;
11790
1.04k
    int isExternal;
11791
1.04k
    int buildTree;
11792
1.04k
    int oldMinNsIndex;
11793
1.04k
    int oldNodelen, oldNodemem;
11794
11795
1.04k
    isExternal = (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY);
11796
1.04k
    buildTree = (ctxt->node != NULL);
11797
11798
    /*
11799
     * Recursion check
11800
     */
11801
1.04k
    if (ent->flags & XML_ENT_EXPANDING) {
11802
9
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
11803
9
        xmlHaltParser(ctxt);
11804
9
        goto error;
11805
9
    }
11806
11807
    /*
11808
     * Load entity
11809
     */
11810
1.03k
    input = xmlNewEntityInputStream(ctxt, ent);
11811
1.03k
    if (input == NULL)
11812
0
        goto error;
11813
11814
    /*
11815
     * When building a tree, we need to limit the scope of namespace
11816
     * declarations, so that entities don't reference xmlNs structs
11817
     * from the parent of a reference.
11818
     */
11819
1.03k
    oldMinNsIndex = ctxt->nsdb->minNsIndex;
11820
1.03k
    if (buildTree)
11821
1.03k
        ctxt->nsdb->minNsIndex = ctxt->nsNr;
11822
11823
1.03k
    oldNodelen = ctxt->nodelen;
11824
1.03k
    oldNodemem = ctxt->nodemem;
11825
1.03k
    ctxt->nodelen = 0;
11826
1.03k
    ctxt->nodemem = 0;
11827
11828
    /*
11829
     * Parse content
11830
     *
11831
     * This initiates a recursive call chain:
11832
     *
11833
     * - xmlCtxtParseContentInternal
11834
     * - xmlParseContentInternal
11835
     * - xmlParseReference
11836
     * - xmlCtxtParseEntity
11837
     *
11838
     * The nesting depth is limited by the maximum number of inputs,
11839
     * see xmlCtxtPushInput.
11840
     *
11841
     * It's possible to make this non-recursive (minNsIndex must be
11842
     * stored in the input struct) at the expense of code readability.
11843
     */
11844
11845
1.03k
    ent->flags |= XML_ENT_EXPANDING;
11846
11847
1.03k
    list = xmlCtxtParseContentInternal(ctxt, input, isExternal, buildTree);
11848
11849
1.03k
    ent->flags &= ~XML_ENT_EXPANDING;
11850
11851
1.03k
    ctxt->nsdb->minNsIndex = oldMinNsIndex;
11852
1.03k
    ctxt->nodelen = oldNodelen;
11853
1.03k
    ctxt->nodemem = oldNodemem;
11854
11855
    /*
11856
     * Entity size accounting
11857
     */
11858
1.03k
    consumed = input->consumed;
11859
1.03k
    xmlSaturatedAddSizeT(&consumed, input->end - input->base);
11860
11861
1.03k
    if ((ent->flags & XML_ENT_CHECKED) == 0)
11862
552
        xmlSaturatedAdd(&ent->expandedSize, consumed);
11863
11864
1.03k
    if ((ent->flags & XML_ENT_PARSED) == 0) {
11865
552
        if (isExternal)
11866
0
            xmlSaturatedAdd(&ctxt->sizeentities, consumed);
11867
11868
552
        ent->children = list;
11869
11870
2.98k
        while (list != NULL) {
11871
2.43k
            list->parent = (xmlNodePtr) ent;
11872
11873
            /*
11874
             * Downstream code like the nginx xslt module can set
11875
             * ctxt->myDoc->extSubset to a separate DTD, so the entity
11876
             * might have a different or a NULL document.
11877
             */
11878
2.43k
            if (list->doc != ent->doc)
11879
0
                xmlSetTreeDoc(list, ent->doc);
11880
11881
2.43k
            if (list->next == NULL)
11882
356
                ent->last = list;
11883
2.43k
            list = list->next;
11884
2.43k
        }
11885
552
    } else {
11886
481
        xmlFreeNodeList(list);
11887
481
    }
11888
11889
1.03k
    xmlFreeInputStream(input);
11890
11891
1.04k
error:
11892
1.04k
    ent->flags |= XML_ENT_PARSED | XML_ENT_CHECKED;
11893
1.04k
}
11894
11895
/**
11896
 * Parse an external general entity within an existing parsing context
11897
 * An external general parsed entity is well-formed if it matches the
11898
 * production labeled extParsedEnt.
11899
 *
11900
 *     [78] extParsedEnt ::= TextDecl? content
11901
 *
11902
 * @param ctxt  the existing parsing context
11903
 * @param URL  the URL for the entity to load
11904
 * @param ID  the System ID for the entity to load
11905
 * @param listOut  the return value for the set of parsed nodes
11906
 * @returns 0 if the entity is well formed, -1 in case of args problem and
11907
 *    the parser error code otherwise
11908
 */
11909
11910
int
11911
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctxt, const xmlChar *URL,
11912
0
                           const xmlChar *ID, xmlNodePtr *listOut) {
11913
0
    xmlParserInputPtr input;
11914
0
    xmlNodePtr list;
11915
11916
0
    if (listOut != NULL)
11917
0
        *listOut = NULL;
11918
11919
0
    if (ctxt == NULL)
11920
0
        return(XML_ERR_ARGUMENT);
11921
11922
0
    input = xmlLoadResource(ctxt, (char *) URL, (char *) ID,
11923
0
                            XML_RESOURCE_GENERAL_ENTITY);
11924
0
    if (input == NULL)
11925
0
        return(ctxt->errNo);
11926
11927
0
    xmlCtxtInitializeLate(ctxt);
11928
11929
0
    list = xmlCtxtParseContentInternal(ctxt, input, /* hasTextDecl */ 1, 1);
11930
0
    if (listOut != NULL)
11931
0
        *listOut = list;
11932
0
    else
11933
0
        xmlFreeNodeList(list);
11934
11935
0
    xmlFreeInputStream(input);
11936
0
    return(ctxt->errNo);
11937
0
}
11938
11939
#ifdef LIBXML_SAX1_ENABLED
11940
/**
11941
 * Parse an external general entity
11942
 * An external general parsed entity is well-formed if it matches the
11943
 * production labeled extParsedEnt.
11944
 *
11945
 * @deprecated Use xmlParseCtxtExternalEntity().
11946
 *
11947
 *     [78] extParsedEnt ::= TextDecl? content
11948
 *
11949
 * @param doc  the document the chunk pertains to
11950
 * @param sax  the SAX handler block (possibly NULL)
11951
 * @param user_data  The user data returned on SAX callbacks (possibly NULL)
11952
 * @param depth  Used for loop detection, use 0
11953
 * @param URL  the URL for the entity to load
11954
 * @param ID  the System ID for the entity to load
11955
 * @param list  the return value for the set of parsed nodes
11956
 * @returns 0 if the entity is well formed, -1 in case of args problem and
11957
 *    the parser error code otherwise
11958
 */
11959
11960
int
11961
xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
11962
0
    int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *list) {
11963
0
    xmlParserCtxtPtr ctxt;
11964
0
    int ret;
11965
11966
0
    if (list != NULL)
11967
0
        *list = NULL;
11968
11969
0
    if (doc == NULL)
11970
0
        return(XML_ERR_ARGUMENT);
11971
11972
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
11973
0
    if (ctxt == NULL)
11974
0
        return(XML_ERR_NO_MEMORY);
11975
11976
0
    ctxt->depth = depth;
11977
0
    ctxt->myDoc = doc;
11978
0
    ret = xmlParseCtxtExternalEntity(ctxt, URL, ID, list);
11979
11980
0
    xmlFreeParserCtxt(ctxt);
11981
0
    return(ret);
11982
0
}
11983
11984
/**
11985
 * Parse a well-balanced chunk of an XML document
11986
 * called by the parser
11987
 * The allowed sequence for the Well Balanced Chunk is the one defined by
11988
 * the content production in the XML grammar:
11989
 *
11990
 *     [43] content ::= (element | CharData | Reference | CDSect | PI |
11991
 *                       Comment)*
11992
 *
11993
 * @param doc  the document the chunk pertains to (must not be NULL)
11994
 * @param sax  the SAX handler block (possibly NULL)
11995
 * @param user_data  The user data returned on SAX callbacks (possibly NULL)
11996
 * @param depth  Used for loop detection, use 0
11997
 * @param string  the input string in UTF8 or ISO-Latin (zero terminated)
11998
 * @param lst  the return value for the set of parsed nodes
11999
 * @returns 0 if the chunk is well balanced, -1 in case of args problem and
12000
 *    the parser error code otherwise
12001
 */
12002
12003
int
12004
xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12005
0
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
12006
0
    return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
12007
0
                                                depth, string, lst, 0 );
12008
0
}
12009
#endif /* LIBXML_SAX1_ENABLED */
12010
12011
/**
12012
 * Parse a well-balanced chunk of XML matching the 'content' production.
12013
 *
12014
 * Namespaces in scope of `node` and entities of `node`'s document are
12015
 * recognized. When validating, the DTD of `node`'s document is used.
12016
 *
12017
 * Always consumes `input` even in error case.
12018
 *
12019
 * @since 2.14.0
12020
 *
12021
 * @param ctxt  parser context
12022
 * @param input  parser input
12023
 * @param node  target node or document
12024
 * @param hasTextDecl  whether to parse text declaration
12025
 * @returns a node list or NULL in case of error.
12026
 */
12027
xmlNodePtr
12028
xmlCtxtParseContent(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
12029
0
                    xmlNodePtr node, int hasTextDecl) {
12030
0
    xmlDocPtr doc;
12031
0
    xmlNodePtr cur, list = NULL;
12032
0
    int nsnr = 0;
12033
0
    xmlDictPtr oldDict;
12034
0
    int oldOptions, oldDictNames, oldLoadSubset;
12035
12036
0
    if ((ctxt == NULL) || (input == NULL) || (node == NULL)) {
12037
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
12038
0
        goto exit;
12039
0
    }
12040
12041
0
    doc = node->doc;
12042
0
    if (doc == NULL) {
12043
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
12044
0
        goto exit;
12045
0
    }
12046
12047
0
    switch (node->type) {
12048
0
        case XML_ELEMENT_NODE:
12049
0
        case XML_DOCUMENT_NODE:
12050
0
        case XML_HTML_DOCUMENT_NODE:
12051
0
            break;
12052
12053
0
        case XML_ATTRIBUTE_NODE:
12054
0
        case XML_TEXT_NODE:
12055
0
        case XML_CDATA_SECTION_NODE:
12056
0
        case XML_ENTITY_REF_NODE:
12057
0
        case XML_PI_NODE:
12058
0
        case XML_COMMENT_NODE:
12059
0
            for (cur = node->parent; cur != NULL; cur = node->parent) {
12060
0
                if ((cur->type == XML_ELEMENT_NODE) ||
12061
0
                    (cur->type == XML_DOCUMENT_NODE) ||
12062
0
                    (cur->type == XML_HTML_DOCUMENT_NODE)) {
12063
0
                    node = cur;
12064
0
                    break;
12065
0
                }
12066
0
            }
12067
0
            break;
12068
12069
0
        default:
12070
0
            xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
12071
0
            goto exit;
12072
0
    }
12073
12074
0
#ifdef LIBXML_HTML_ENABLED
12075
0
    if (ctxt->html)
12076
0
        htmlCtxtReset(ctxt);
12077
0
    else
12078
0
#endif
12079
0
        xmlCtxtReset(ctxt);
12080
12081
0
    oldDict = ctxt->dict;
12082
0
    oldOptions = ctxt->options;
12083
0
    oldDictNames = ctxt->dictNames;
12084
0
    oldLoadSubset = ctxt->loadsubset;
12085
12086
    /*
12087
     * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
12088
     */
12089
0
    if (doc->dict != NULL) {
12090
0
        ctxt->dict = doc->dict;
12091
0
    } else {
12092
0
        ctxt->options |= XML_PARSE_NODICT;
12093
0
        ctxt->dictNames = 0;
12094
0
    }
12095
12096
    /*
12097
     * Disable IDs
12098
     */
12099
0
    ctxt->loadsubset |= XML_SKIP_IDS;
12100
12101
0
    ctxt->myDoc = doc;
12102
12103
0
#ifdef LIBXML_HTML_ENABLED
12104
0
    if (ctxt->html) {
12105
        /*
12106
         * When parsing in context, it makes no sense to add implied
12107
         * elements like html/body/etc...
12108
         */
12109
0
        ctxt->options |= HTML_PARSE_NOIMPLIED;
12110
12111
0
        list = htmlCtxtParseContentInternal(ctxt, input);
12112
0
    } else
12113
0
#endif
12114
0
    {
12115
0
        xmlCtxtInitializeLate(ctxt);
12116
12117
        /*
12118
         * initialize the SAX2 namespaces stack
12119
         */
12120
0
        cur = node;
12121
0
        while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
12122
0
            xmlNsPtr ns = cur->nsDef;
12123
0
            xmlHashedString hprefix, huri;
12124
12125
0
            while (ns != NULL) {
12126
0
                hprefix = xmlDictLookupHashed(ctxt->dict, ns->prefix, -1);
12127
0
                huri = xmlDictLookupHashed(ctxt->dict, ns->href, -1);
12128
0
                if (xmlParserNsPush(ctxt, &hprefix, &huri, ns, 1) > 0)
12129
0
                    nsnr++;
12130
0
                ns = ns->next;
12131
0
            }
12132
0
            cur = cur->parent;
12133
0
        }
12134
12135
0
        list = xmlCtxtParseContentInternal(ctxt, input, hasTextDecl, 1);
12136
12137
0
        if (nsnr > 0)
12138
0
            xmlParserNsPop(ctxt, nsnr);
12139
0
    }
12140
12141
0
    ctxt->dict = oldDict;
12142
0
    ctxt->options = oldOptions;
12143
0
    ctxt->dictNames = oldDictNames;
12144
0
    ctxt->loadsubset = oldLoadSubset;
12145
0
    ctxt->myDoc = NULL;
12146
0
    ctxt->node = NULL;
12147
12148
0
exit:
12149
0
    xmlFreeInputStream(input);
12150
0
    return(list);
12151
0
}
12152
12153
/**
12154
 * Parse a well-balanced chunk of an XML document
12155
 * within the context (DTD, namespaces, etc ...) of the given node.
12156
 *
12157
 * The allowed sequence for the data is a Well Balanced Chunk defined by
12158
 * the content production in the XML grammar:
12159
 *
12160
 *     [43] content ::= (element | CharData | Reference | CDSect | PI |
12161
 *                       Comment)*
12162
 *
12163
 * This function assumes the encoding of `node`'s document which is
12164
 * typically not what you want. A better alternative is
12165
 * xmlCtxtParseContent().
12166
 *
12167
 * @param node  the context node
12168
 * @param data  the input string
12169
 * @param datalen  the input string length in bytes
12170
 * @param options  a combination of xmlParserOption
12171
 * @param listOut  the return value for the set of parsed nodes
12172
 * @returns XML_ERR_OK if the chunk is well balanced, and the parser
12173
 * error code otherwise
12174
 */
12175
xmlParserErrors
12176
xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
12177
0
                      int options, xmlNodePtr *listOut) {
12178
0
    xmlParserCtxtPtr ctxt;
12179
0
    xmlParserInputPtr input;
12180
0
    xmlDocPtr doc;
12181
0
    xmlNodePtr list;
12182
0
    xmlParserErrors ret;
12183
12184
0
    if (listOut == NULL)
12185
0
        return(XML_ERR_INTERNAL_ERROR);
12186
0
    *listOut = NULL;
12187
12188
0
    if ((node == NULL) || (data == NULL) || (datalen < 0))
12189
0
        return(XML_ERR_INTERNAL_ERROR);
12190
12191
0
    doc = node->doc;
12192
0
    if (doc == NULL)
12193
0
        return(XML_ERR_INTERNAL_ERROR);
12194
12195
0
#ifdef LIBXML_HTML_ENABLED
12196
0
    if (doc->type == XML_HTML_DOCUMENT_NODE) {
12197
0
        ctxt = htmlNewParserCtxt();
12198
0
    }
12199
0
    else
12200
0
#endif
12201
0
        ctxt = xmlNewParserCtxt();
12202
12203
0
    if (ctxt == NULL)
12204
0
        return(XML_ERR_NO_MEMORY);
12205
12206
0
    input = xmlCtxtNewInputFromMemory(ctxt, NULL, data, datalen,
12207
0
                                      (const char *) doc->encoding,
12208
0
                                      XML_INPUT_BUF_STATIC);
12209
0
    if (input == NULL) {
12210
0
        xmlFreeParserCtxt(ctxt);
12211
0
        return(XML_ERR_NO_MEMORY);
12212
0
    }
12213
12214
0
    xmlCtxtUseOptions(ctxt, options);
12215
12216
0
    list = xmlCtxtParseContent(ctxt, input, node, /* hasTextDecl */ 0);
12217
12218
0
    if (list == NULL) {
12219
0
        ret = ctxt->errNo;
12220
0
        if (ret == XML_ERR_ARGUMENT)
12221
0
            ret = XML_ERR_INTERNAL_ERROR;
12222
0
    } else {
12223
0
        ret = XML_ERR_OK;
12224
0
        *listOut = list;
12225
0
    }
12226
12227
0
    xmlFreeParserCtxt(ctxt);
12228
12229
0
    return(ret);
12230
0
}
12231
12232
#ifdef LIBXML_SAX1_ENABLED
12233
/**
12234
 * Parse a well-balanced chunk of an XML document
12235
 *
12236
 * The allowed sequence for the Well Balanced Chunk is the one defined by
12237
 * the content production in the XML grammar:
12238
 *
12239
 *     [43] content ::= (element | CharData | Reference | CDSect | PI |
12240
 *                       Comment)*
12241
 *
12242
 * In case recover is set to 1, the nodelist will not be empty even if
12243
 * the parsed chunk is not well balanced, assuming the parsing succeeded to
12244
 * some extent.
12245
 *
12246
 * @param doc  the document the chunk pertains to (must not be NULL)
12247
 * @param sax  the SAX handler block (possibly NULL)
12248
 * @param user_data  The user data returned on SAX callbacks (possibly NULL)
12249
 * @param depth  Used for loop detection, use 0
12250
 * @param string  the input string in UTF8 or ISO-Latin (zero terminated)
12251
 * @param listOut  the return value for the set of parsed nodes
12252
 * @param recover  return nodes even if the data is broken (use 0)
12253
 * @returns 0 if the chunk is well balanced, or thehe parser error code
12254
 * otherwise.
12255
 */
12256
int
12257
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12258
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *listOut,
12259
0
     int recover) {
12260
0
    xmlParserCtxtPtr ctxt;
12261
0
    xmlParserInputPtr input;
12262
0
    xmlNodePtr list;
12263
0
    int ret;
12264
12265
0
    if (listOut != NULL)
12266
0
        *listOut = NULL;
12267
12268
0
    if (string == NULL)
12269
0
        return(XML_ERR_ARGUMENT);
12270
12271
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12272
0
    if (ctxt == NULL)
12273
0
        return(XML_ERR_NO_MEMORY);
12274
12275
0
    xmlCtxtInitializeLate(ctxt);
12276
12277
0
    ctxt->depth = depth;
12278
0
    ctxt->myDoc = doc;
12279
0
    if (recover) {
12280
0
        ctxt->options |= XML_PARSE_RECOVER;
12281
0
        ctxt->recovery = 1;
12282
0
    }
12283
12284
0
    input = xmlNewStringInputStream(ctxt, string);
12285
0
    if (input == NULL) {
12286
0
        ret = ctxt->errNo;
12287
0
        goto error;
12288
0
    }
12289
12290
0
    list = xmlCtxtParseContentInternal(ctxt, input, /* hasTextDecl */ 0, 1);
12291
0
    if (listOut != NULL)
12292
0
        *listOut = list;
12293
0
    else
12294
0
        xmlFreeNodeList(list);
12295
12296
0
    if (!ctxt->wellFormed)
12297
0
        ret = ctxt->errNo;
12298
0
    else
12299
0
        ret = XML_ERR_OK;
12300
12301
0
error:
12302
0
    xmlFreeInputStream(input);
12303
0
    xmlFreeParserCtxt(ctxt);
12304
0
    return(ret);
12305
0
}
12306
12307
/**
12308
 * parse an XML external entity out of context and build a tree.
12309
 * It use the given SAX function block to handle the parsing callback.
12310
 * If sax is NULL, fallback to the default DOM tree building routines.
12311
 *
12312
 * @deprecated Don't use.
12313
 *
12314
 *     [78] extParsedEnt ::= TextDecl? content
12315
 *
12316
 * This correspond to a "Well Balanced" chunk
12317
 *
12318
 * @param sax  the SAX handler block
12319
 * @param filename  the filename
12320
 * @returns the resulting document tree
12321
 */
12322
12323
xmlDocPtr
12324
0
xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
12325
0
    xmlDocPtr ret;
12326
0
    xmlParserCtxtPtr ctxt;
12327
12328
0
    ctxt = xmlCreateFileParserCtxt(filename);
12329
0
    if (ctxt == NULL) {
12330
0
  return(NULL);
12331
0
    }
12332
0
    if (sax != NULL) {
12333
0
        if (sax->initialized == XML_SAX2_MAGIC) {
12334
0
            *ctxt->sax = *sax;
12335
0
        } else {
12336
0
            memset(ctxt->sax, 0, sizeof(*ctxt->sax));
12337
0
            memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12338
0
        }
12339
0
        ctxt->userData = NULL;
12340
0
    }
12341
12342
0
    xmlParseExtParsedEnt(ctxt);
12343
12344
0
    if (ctxt->wellFormed) {
12345
0
  ret = ctxt->myDoc;
12346
0
    } else {
12347
0
        ret = NULL;
12348
0
        xmlFreeDoc(ctxt->myDoc);
12349
0
    }
12350
12351
0
    xmlFreeParserCtxt(ctxt);
12352
12353
0
    return(ret);
12354
0
}
12355
12356
/**
12357
 * parse an XML external entity out of context and build a tree.
12358
 *
12359
 *     [78] extParsedEnt ::= TextDecl? content
12360
 *
12361
 * This correspond to a "Well Balanced" chunk
12362
 *
12363
 * @param filename  the filename
12364
 * @returns the resulting document tree
12365
 */
12366
12367
xmlDocPtr
12368
0
xmlParseEntity(const char *filename) {
12369
0
    return(xmlSAXParseEntity(NULL, filename));
12370
0
}
12371
#endif /* LIBXML_SAX1_ENABLED */
12372
12373
/**
12374
 * Create a parser context for an external entity
12375
 * Automatic support for ZLIB/Compress compressed document is provided
12376
 * by default if found at compile-time.
12377
 *
12378
 * @deprecated Don't use.
12379
 *
12380
 * @param URL  the entity URL
12381
 * @param ID  the entity PUBLIC ID
12382
 * @param base  a possible base for the target URI
12383
 * @returns the new parser context or NULL
12384
 */
12385
xmlParserCtxtPtr
12386
xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
12387
0
                    const xmlChar *base) {
12388
0
    xmlParserCtxtPtr ctxt;
12389
0
    xmlParserInputPtr input;
12390
0
    xmlChar *uri = NULL;
12391
12392
0
    ctxt = xmlNewParserCtxt();
12393
0
    if (ctxt == NULL)
12394
0
  return(NULL);
12395
12396
0
    if (base != NULL) {
12397
0
        if (xmlBuildURISafe(URL, base, &uri) < 0)
12398
0
            goto error;
12399
0
        if (uri != NULL)
12400
0
            URL = uri;
12401
0
    }
12402
12403
0
    input = xmlLoadResource(ctxt, (char *) URL, (char *) ID,
12404
0
                            XML_RESOURCE_UNKNOWN);
12405
0
    if (input == NULL)
12406
0
        goto error;
12407
12408
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
12409
0
        xmlFreeInputStream(input);
12410
0
        goto error;
12411
0
    }
12412
12413
0
    xmlFree(uri);
12414
0
    return(ctxt);
12415
12416
0
error:
12417
0
    xmlFree(uri);
12418
0
    xmlFreeParserCtxt(ctxt);
12419
0
    return(NULL);
12420
0
}
12421
12422
/************************************************************************
12423
 *                  *
12424
 *    Front ends when parsing from a file     *
12425
 *                  *
12426
 ************************************************************************/
12427
12428
/**
12429
 * Create a parser context for a file or URL content.
12430
 * Automatic support for ZLIB/Compress compressed document is provided
12431
 * by default if found at compile-time and for file accesses
12432
 *
12433
 * @deprecated Use xmlNewParserCtxt() and xmlCtxtReadFile().
12434
 *
12435
 * @param filename  the filename or URL
12436
 * @param options  a combination of xmlParserOption
12437
 * @returns the new parser context or NULL
12438
 */
12439
xmlParserCtxtPtr
12440
xmlCreateURLParserCtxt(const char *filename, int options)
12441
0
{
12442
0
    xmlParserCtxtPtr ctxt;
12443
0
    xmlParserInputPtr input;
12444
12445
0
    ctxt = xmlNewParserCtxt();
12446
0
    if (ctxt == NULL)
12447
0
  return(NULL);
12448
12449
0
    options |= XML_PARSE_UNZIP;
12450
12451
0
    xmlCtxtUseOptions(ctxt, options);
12452
0
    ctxt->linenumbers = 1;
12453
12454
0
    input = xmlLoadResource(ctxt, filename, NULL, XML_RESOURCE_MAIN_DOCUMENT);
12455
0
    if (input == NULL) {
12456
0
  xmlFreeParserCtxt(ctxt);
12457
0
  return(NULL);
12458
0
    }
12459
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
12460
0
        xmlFreeInputStream(input);
12461
0
        xmlFreeParserCtxt(ctxt);
12462
0
        return(NULL);
12463
0
    }
12464
12465
0
    return(ctxt);
12466
0
}
12467
12468
/**
12469
 * Create a parser context for a file content.
12470
 * Automatic support for ZLIB/Compress compressed document is provided
12471
 * by default if found at compile-time.
12472
 *
12473
 * @deprecated Use xmlNewParserCtxt() and xmlCtxtReadFile().
12474
 *
12475
 * @param filename  the filename
12476
 * @returns the new parser context or NULL
12477
 */
12478
xmlParserCtxtPtr
12479
xmlCreateFileParserCtxt(const char *filename)
12480
0
{
12481
0
    return(xmlCreateURLParserCtxt(filename, 0));
12482
0
}
12483
12484
#ifdef LIBXML_SAX1_ENABLED
12485
/**
12486
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12487
 * compressed document is provided by default if found at compile-time.
12488
 * It use the given SAX function block to handle the parsing callback.
12489
 * If sax is NULL, fallback to the default DOM tree building routines.
12490
 *
12491
 * @deprecated Use xmlNewSAXParserCtxt() and xmlCtxtReadFile().
12492
 *
12493
 * User data (void *) is stored within the parser context in the
12494
 * context's _private member, so it is available nearly everywhere in libxml
12495
 *
12496
 * @param sax  the SAX handler block
12497
 * @param filename  the filename
12498
 * @param recovery  work in recovery mode, i.e. tries to read no Well Formed
12499
 *             documents
12500
 * @param data  the userdata
12501
 * @returns the resulting document tree
12502
 */
12503
12504
xmlDocPtr
12505
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
12506
0
                        int recovery, void *data) {
12507
0
    xmlDocPtr ret = NULL;
12508
0
    xmlParserCtxtPtr ctxt;
12509
0
    xmlParserInputPtr input;
12510
12511
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12512
0
    if (ctxt == NULL)
12513
0
  return(NULL);
12514
12515
0
    if (data != NULL)
12516
0
  ctxt->_private = data;
12517
12518
0
    if (recovery) {
12519
0
        ctxt->options |= XML_PARSE_RECOVER;
12520
0
        ctxt->recovery = 1;
12521
0
    }
12522
12523
0
    if ((filename != NULL) && (filename[0] == '-') && (filename[1] == 0))
12524
0
        input = xmlCtxtNewInputFromFd(ctxt, filename, STDIN_FILENO, NULL, 0);
12525
0
    else
12526
0
        input = xmlCtxtNewInputFromUrl(ctxt, filename, NULL, NULL, 0);
12527
12528
0
    if (input != NULL)
12529
0
        ret = xmlCtxtParseDocument(ctxt, input);
12530
12531
0
    xmlFreeParserCtxt(ctxt);
12532
0
    return(ret);
12533
0
}
12534
12535
/**
12536
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12537
 * compressed document is provided by default if found at compile-time.
12538
 * It use the given SAX function block to handle the parsing callback.
12539
 * If sax is NULL, fallback to the default DOM tree building routines.
12540
 *
12541
 * @deprecated Use xmlNewSAXParserCtxt() and xmlCtxtReadFile().
12542
 *
12543
 * @param sax  the SAX handler block
12544
 * @param filename  the filename
12545
 * @param recovery  work in recovery mode, i.e. tries to read no Well Formed
12546
 *             documents
12547
 * @returns the resulting document tree
12548
 */
12549
12550
xmlDocPtr
12551
xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
12552
0
                          int recovery) {
12553
0
    return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
12554
0
}
12555
12556
/**
12557
 * parse an XML in-memory document and build a tree.
12558
 * In the case the document is not Well Formed, a attempt to build a
12559
 * tree is tried anyway
12560
 *
12561
 * @deprecated Use xmlReadDoc() with XML_PARSE_RECOVER.
12562
 *
12563
 * @param cur  a pointer to an array of xmlChar
12564
 * @returns the resulting document tree or NULL in case of failure
12565
 */
12566
12567
xmlDocPtr
12568
0
xmlRecoverDoc(const xmlChar *cur) {
12569
0
    return(xmlSAXParseDoc(NULL, cur, 1));
12570
0
}
12571
12572
/**
12573
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12574
 * compressed document is provided by default if found at compile-time.
12575
 *
12576
 * @deprecated Use xmlReadFile().
12577
 *
12578
 * @param filename  the filename
12579
 * @returns the resulting document tree if the file was wellformed,
12580
 * NULL otherwise.
12581
 */
12582
12583
xmlDocPtr
12584
0
xmlParseFile(const char *filename) {
12585
0
    return(xmlSAXParseFile(NULL, filename, 0));
12586
0
}
12587
12588
/**
12589
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12590
 * compressed document is provided by default if found at compile-time.
12591
 * In the case the document is not Well Formed, it attempts to build
12592
 * a tree anyway
12593
 *
12594
 * @deprecated Use xmlReadFile() with XML_PARSE_RECOVER.
12595
 *
12596
 * @param filename  the filename
12597
 * @returns the resulting document tree or NULL in case of failure
12598
 */
12599
12600
xmlDocPtr
12601
0
xmlRecoverFile(const char *filename) {
12602
0
    return(xmlSAXParseFile(NULL, filename, 1));
12603
0
}
12604
12605
12606
/**
12607
 * Setup the parser context to parse a new buffer; Clears any prior
12608
 * contents from the parser context. The buffer parameter must not be
12609
 * NULL, but the filename parameter can be
12610
 *
12611
 * @deprecated Don't use.
12612
 *
12613
 * @param ctxt  an XML parser context
12614
 * @param buffer  a xmlChar * buffer
12615
 * @param filename  a file name
12616
 */
12617
void
12618
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
12619
                             const char* filename)
12620
0
{
12621
0
    xmlParserInputPtr input;
12622
12623
0
    if ((ctxt == NULL) || (buffer == NULL))
12624
0
        return;
12625
12626
0
    xmlCtxtReset(ctxt);
12627
12628
0
    input = xmlCtxtNewInputFromString(ctxt, filename, (const char *) buffer,
12629
0
                                      NULL, 0);
12630
0
    if (input == NULL)
12631
0
        return;
12632
0
    if (xmlCtxtPushInput(ctxt, input) < 0)
12633
0
        xmlFreeInputStream(input);
12634
0
}
12635
12636
/**
12637
 * parse an XML file and call the given SAX handler routines.
12638
 * Automatic support for ZLIB/Compress compressed document is provided
12639
 *
12640
 * @deprecated Use xmlNewSAXParserCtxt() and xmlCtxtReadFile().
12641
 *
12642
 * @param sax  a SAX handler
12643
 * @param user_data  The user data returned on SAX callbacks
12644
 * @param filename  a file name
12645
 * @returns 0 in case of success or a error number otherwise
12646
 */
12647
int
12648
xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
12649
0
                    const char *filename) {
12650
0
    int ret = 0;
12651
0
    xmlParserCtxtPtr ctxt;
12652
12653
0
    ctxt = xmlCreateFileParserCtxt(filename);
12654
0
    if (ctxt == NULL) return -1;
12655
0
    if (sax != NULL) {
12656
0
        if (sax->initialized == XML_SAX2_MAGIC) {
12657
0
            *ctxt->sax = *sax;
12658
0
        } else {
12659
0
            memset(ctxt->sax, 0, sizeof(*ctxt->sax));
12660
0
            memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12661
0
        }
12662
0
  ctxt->userData = user_data;
12663
0
    }
12664
12665
0
    xmlParseDocument(ctxt);
12666
12667
0
    if (ctxt->wellFormed)
12668
0
  ret = 0;
12669
0
    else {
12670
0
        if (ctxt->errNo != 0)
12671
0
      ret = ctxt->errNo;
12672
0
  else
12673
0
      ret = -1;
12674
0
    }
12675
0
    if (ctxt->myDoc != NULL) {
12676
0
        xmlFreeDoc(ctxt->myDoc);
12677
0
  ctxt->myDoc = NULL;
12678
0
    }
12679
0
    xmlFreeParserCtxt(ctxt);
12680
12681
0
    return ret;
12682
0
}
12683
#endif /* LIBXML_SAX1_ENABLED */
12684
12685
/************************************************************************
12686
 *                  *
12687
 *    Front ends when parsing from memory     *
12688
 *                  *
12689
 ************************************************************************/
12690
12691
/**
12692
 * Create a parser context for an XML in-memory document. The input buffer
12693
 * must not contain a terminating null byte.
12694
 *
12695
 * @param buffer  a pointer to a char array
12696
 * @param size  the size of the array
12697
 * @returns the new parser context or NULL
12698
 */
12699
xmlParserCtxtPtr
12700
0
xmlCreateMemoryParserCtxt(const char *buffer, int size) {
12701
0
    xmlParserCtxtPtr ctxt;
12702
0
    xmlParserInputPtr input;
12703
12704
0
    if (size < 0)
12705
0
  return(NULL);
12706
12707
0
    ctxt = xmlNewParserCtxt();
12708
0
    if (ctxt == NULL)
12709
0
  return(NULL);
12710
12711
0
    input = xmlCtxtNewInputFromMemory(ctxt, NULL, buffer, size, NULL, 0);
12712
0
    if (input == NULL) {
12713
0
  xmlFreeParserCtxt(ctxt);
12714
0
  return(NULL);
12715
0
    }
12716
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
12717
0
        xmlFreeInputStream(input);
12718
0
        xmlFreeParserCtxt(ctxt);
12719
0
        return(NULL);
12720
0
    }
12721
12722
0
    return(ctxt);
12723
0
}
12724
12725
#ifdef LIBXML_SAX1_ENABLED
12726
/**
12727
 * parse an XML in-memory block and use the given SAX function block
12728
 * to handle the parsing callback. If sax is NULL, fallback to the default
12729
 * DOM tree building routines.
12730
 *
12731
 * @deprecated Use xmlNewSAXParserCtxt() and xmlCtxtReadMemory().
12732
 *
12733
 * User data (void *) is stored within the parser context in the
12734
 * context's _private member, so it is available nearly everywhere in libxml
12735
 *
12736
 * @param sax  the SAX handler block
12737
 * @param buffer  an pointer to a char array
12738
 * @param size  the size of the array
12739
 * @param recovery  work in recovery mode, i.e. tries to read no Well Formed
12740
 *             documents
12741
 * @param data  the userdata
12742
 * @returns the resulting document tree
12743
 */
12744
12745
xmlDocPtr
12746
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
12747
0
                          int size, int recovery, void *data) {
12748
0
    xmlDocPtr ret = NULL;
12749
0
    xmlParserCtxtPtr ctxt;
12750
0
    xmlParserInputPtr input;
12751
12752
0
    if (size < 0)
12753
0
        return(NULL);
12754
12755
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12756
0
    if (ctxt == NULL)
12757
0
        return(NULL);
12758
12759
0
    if (data != NULL)
12760
0
  ctxt->_private=data;
12761
12762
0
    if (recovery) {
12763
0
        ctxt->options |= XML_PARSE_RECOVER;
12764
0
        ctxt->recovery = 1;
12765
0
    }
12766
12767
0
    input = xmlCtxtNewInputFromMemory(ctxt, NULL, buffer, size, NULL,
12768
0
                                      XML_INPUT_BUF_STATIC);
12769
12770
0
    if (input != NULL)
12771
0
        ret = xmlCtxtParseDocument(ctxt, input);
12772
12773
0
    xmlFreeParserCtxt(ctxt);
12774
0
    return(ret);
12775
0
}
12776
12777
/**
12778
 * parse an XML in-memory block and use the given SAX function block
12779
 * to handle the parsing callback. If sax is NULL, fallback to the default
12780
 * DOM tree building routines.
12781
 *
12782
 * @deprecated Use xmlNewSAXParserCtxt() and xmlCtxtReadMemory().
12783
 *
12784
 * @param sax  the SAX handler block
12785
 * @param buffer  an pointer to a char array
12786
 * @param size  the size of the array
12787
 * @param recovery  work in recovery mode, i.e. tries to read not Well Formed
12788
 *             documents
12789
 * @returns the resulting document tree
12790
 */
12791
xmlDocPtr
12792
xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
12793
0
            int size, int recovery) {
12794
0
    return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
12795
0
}
12796
12797
/**
12798
 * parse an XML in-memory block and build a tree.
12799
 *
12800
 * @deprecated Use xmlReadMemory().
12801
 *
12802
 * @param buffer  an pointer to a char array
12803
 * @param size  the size of the array
12804
 * @returns the resulting document tree
12805
 */
12806
12807
0
xmlDocPtr xmlParseMemory(const char *buffer, int size) {
12808
0
   return(xmlSAXParseMemory(NULL, buffer, size, 0));
12809
0
}
12810
12811
/**
12812
 * parse an XML in-memory block and build a tree.
12813
 * In the case the document is not Well Formed, an attempt to
12814
 * build a tree is tried anyway
12815
 *
12816
 * @deprecated Use xmlReadMemory() with XML_PARSE_RECOVER.
12817
 *
12818
 * @param buffer  an pointer to a char array
12819
 * @param size  the size of the array
12820
 * @returns the resulting document tree or NULL in case of error
12821
 */
12822
12823
0
xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
12824
0
   return(xmlSAXParseMemory(NULL, buffer, size, 1));
12825
0
}
12826
12827
/**
12828
 * parse an XML in-memory buffer and call the given SAX handler routines.
12829
 *
12830
 * @deprecated Use xmlNewSAXParserCtxt() and xmlCtxtReadMemory().
12831
 *
12832
 * @param sax  a SAX handler
12833
 * @param user_data  The user data returned on SAX callbacks
12834
 * @param buffer  an in-memory XML document input
12835
 * @param size  the length of the XML document in bytes
12836
 * @returns 0 in case of success or a error number otherwise
12837
 */
12838
int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
12839
0
        const char *buffer, int size) {
12840
0
    int ret = 0;
12841
0
    xmlParserCtxtPtr ctxt;
12842
12843
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12844
0
    if (ctxt == NULL) return -1;
12845
0
    if (sax != NULL) {
12846
0
        if (sax->initialized == XML_SAX2_MAGIC) {
12847
0
            *ctxt->sax = *sax;
12848
0
        } else {
12849
0
            memset(ctxt->sax, 0, sizeof(*ctxt->sax));
12850
0
            memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12851
0
        }
12852
0
  ctxt->userData = user_data;
12853
0
    }
12854
12855
0
    xmlParseDocument(ctxt);
12856
12857
0
    if (ctxt->wellFormed)
12858
0
  ret = 0;
12859
0
    else {
12860
0
        if (ctxt->errNo != 0)
12861
0
      ret = ctxt->errNo;
12862
0
  else
12863
0
      ret = -1;
12864
0
    }
12865
0
    if (ctxt->myDoc != NULL) {
12866
0
        xmlFreeDoc(ctxt->myDoc);
12867
0
  ctxt->myDoc = NULL;
12868
0
    }
12869
0
    xmlFreeParserCtxt(ctxt);
12870
12871
0
    return ret;
12872
0
}
12873
#endif /* LIBXML_SAX1_ENABLED */
12874
12875
/**
12876
 * Creates a parser context for an XML in-memory document.
12877
 *
12878
 * @param str  a pointer to an array of xmlChar
12879
 * @returns the new parser context or NULL
12880
 */
12881
xmlParserCtxtPtr
12882
0
xmlCreateDocParserCtxt(const xmlChar *str) {
12883
0
    xmlParserCtxtPtr ctxt;
12884
0
    xmlParserInputPtr input;
12885
12886
0
    ctxt = xmlNewParserCtxt();
12887
0
    if (ctxt == NULL)
12888
0
  return(NULL);
12889
12890
0
    input = xmlCtxtNewInputFromString(ctxt, NULL, (const char *) str, NULL, 0);
12891
0
    if (input == NULL) {
12892
0
  xmlFreeParserCtxt(ctxt);
12893
0
  return(NULL);
12894
0
    }
12895
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
12896
0
        xmlFreeInputStream(input);
12897
0
        xmlFreeParserCtxt(ctxt);
12898
0
        return(NULL);
12899
0
    }
12900
12901
0
    return(ctxt);
12902
0
}
12903
12904
#ifdef LIBXML_SAX1_ENABLED
12905
/**
12906
 * parse an XML in-memory document and build a tree.
12907
 * It use the given SAX function block to handle the parsing callback.
12908
 * If sax is NULL, fallback to the default DOM tree building routines.
12909
 *
12910
 * @deprecated Use xmlNewSAXParserCtxt() and xmlCtxtReadDoc().
12911
 *
12912
 * @param sax  the SAX handler block
12913
 * @param cur  a pointer to an array of xmlChar
12914
 * @param recovery  work in recovery mode, i.e. tries to read no Well Formed
12915
 *             documents
12916
 * @returns the resulting document tree
12917
 */
12918
12919
xmlDocPtr
12920
0
xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
12921
0
    xmlDocPtr ret;
12922
0
    xmlParserCtxtPtr ctxt;
12923
0
    xmlSAXHandlerPtr oldsax = NULL;
12924
12925
0
    if (cur == NULL) return(NULL);
12926
12927
12928
0
    ctxt = xmlCreateDocParserCtxt(cur);
12929
0
    if (ctxt == NULL) return(NULL);
12930
0
    if (sax != NULL) {
12931
0
        oldsax = ctxt->sax;
12932
0
        ctxt->sax = sax;
12933
0
        ctxt->userData = NULL;
12934
0
    }
12935
12936
0
    xmlParseDocument(ctxt);
12937
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12938
0
    else {
12939
0
       ret = NULL;
12940
0
       xmlFreeDoc(ctxt->myDoc);
12941
0
       ctxt->myDoc = NULL;
12942
0
    }
12943
0
    if (sax != NULL)
12944
0
  ctxt->sax = oldsax;
12945
0
    xmlFreeParserCtxt(ctxt);
12946
12947
0
    return(ret);
12948
0
}
12949
12950
/**
12951
 * parse an XML in-memory document and build a tree.
12952
 *
12953
 * @deprecated Use xmlReadDoc().
12954
 *
12955
 * @param cur  a pointer to an array of xmlChar
12956
 * @returns the resulting document tree
12957
 */
12958
12959
xmlDocPtr
12960
0
xmlParseDoc(const xmlChar *cur) {
12961
0
    return(xmlSAXParseDoc(NULL, cur, 0));
12962
0
}
12963
#endif /* LIBXML_SAX1_ENABLED */
12964
12965
/************************************************************************
12966
 *                  *
12967
 *  New set (2.6.0) of simpler and more flexible APIs   *
12968
 *                  *
12969
 ************************************************************************/
12970
12971
/**
12972
 * Free a string if it is not owned by the "dict" dictionary in the
12973
 * current scope
12974
 *
12975
 * @param str  a string
12976
 */
12977
#define DICT_FREE(str)            \
12978
0
  if ((str) && ((!dict) ||       \
12979
0
      (xmlDictOwns(dict, (const xmlChar *)(str)) == 0)))  \
12980
0
      xmlFree((char *)(str));
12981
12982
/**
12983
 * Reset a parser context
12984
 *
12985
 * @param ctxt  an XML parser context
12986
 */
12987
void
12988
xmlCtxtReset(xmlParserCtxtPtr ctxt)
12989
0
{
12990
0
    xmlParserInputPtr input;
12991
0
    xmlDictPtr dict;
12992
12993
0
    if (ctxt == NULL)
12994
0
        return;
12995
12996
0
    dict = ctxt->dict;
12997
12998
0
    while ((input = xmlCtxtPopInput(ctxt)) != NULL) { /* Non consuming */
12999
0
        xmlFreeInputStream(input);
13000
0
    }
13001
0
    ctxt->inputNr = 0;
13002
0
    ctxt->input = NULL;
13003
13004
0
    ctxt->spaceNr = 0;
13005
0
    if (ctxt->spaceTab != NULL) {
13006
0
  ctxt->spaceTab[0] = -1;
13007
0
  ctxt->space = &ctxt->spaceTab[0];
13008
0
    } else {
13009
0
        ctxt->space = NULL;
13010
0
    }
13011
13012
13013
0
    ctxt->nodeNr = 0;
13014
0
    ctxt->node = NULL;
13015
13016
0
    ctxt->nameNr = 0;
13017
0
    ctxt->name = NULL;
13018
13019
0
    ctxt->nsNr = 0;
13020
0
    xmlParserNsReset(ctxt->nsdb);
13021
13022
0
    DICT_FREE(ctxt->version);
13023
0
    ctxt->version = NULL;
13024
0
    DICT_FREE(ctxt->encoding);
13025
0
    ctxt->encoding = NULL;
13026
0
    DICT_FREE(ctxt->extSubURI);
13027
0
    ctxt->extSubURI = NULL;
13028
0
    DICT_FREE(ctxt->extSubSystem);
13029
0
    ctxt->extSubSystem = NULL;
13030
13031
0
    if (ctxt->directory != NULL) {
13032
0
        xmlFree(ctxt->directory);
13033
0
        ctxt->directory = NULL;
13034
0
    }
13035
13036
0
    if (ctxt->myDoc != NULL)
13037
0
        xmlFreeDoc(ctxt->myDoc);
13038
0
    ctxt->myDoc = NULL;
13039
13040
0
    ctxt->standalone = -1;
13041
0
    ctxt->hasExternalSubset = 0;
13042
0
    ctxt->hasPErefs = 0;
13043
0
    ctxt->html = 0;
13044
0
    ctxt->instate = XML_PARSER_START;
13045
13046
0
    ctxt->wellFormed = 1;
13047
0
    ctxt->nsWellFormed = 1;
13048
0
    ctxt->disableSAX = 0;
13049
0
    ctxt->valid = 1;
13050
0
    ctxt->record_info = 0;
13051
0
    ctxt->checkIndex = 0;
13052
0
    ctxt->endCheckState = 0;
13053
0
    ctxt->inSubset = 0;
13054
0
    ctxt->errNo = XML_ERR_OK;
13055
0
    ctxt->depth = 0;
13056
0
    ctxt->catalogs = NULL;
13057
0
    ctxt->sizeentities = 0;
13058
0
    ctxt->sizeentcopy = 0;
13059
0
    xmlInitNodeInfoSeq(&ctxt->node_seq);
13060
13061
0
    if (ctxt->attsDefault != NULL) {
13062
0
        xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
13063
0
        ctxt->attsDefault = NULL;
13064
0
    }
13065
0
    if (ctxt->attsSpecial != NULL) {
13066
0
        xmlHashFree(ctxt->attsSpecial, NULL);
13067
0
        ctxt->attsSpecial = NULL;
13068
0
    }
13069
13070
0
#ifdef LIBXML_CATALOG_ENABLED
13071
0
    if (ctxt->catalogs != NULL)
13072
0
  xmlCatalogFreeLocal(ctxt->catalogs);
13073
0
#endif
13074
0
    ctxt->nbErrors = 0;
13075
0
    ctxt->nbWarnings = 0;
13076
0
    if (ctxt->lastError.code != XML_ERR_OK)
13077
0
        xmlResetError(&ctxt->lastError);
13078
0
}
13079
13080
/**
13081
 * Reset a push parser context
13082
 *
13083
 * @param ctxt  an XML parser context
13084
 * @param chunk  a pointer to an array of chars
13085
 * @param size  number of chars in the array
13086
 * @param filename  an optional file name or URI
13087
 * @param encoding  the document encoding, or NULL
13088
 * @returns 0 in case of success and 1 in case of error
13089
 */
13090
int
13091
xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
13092
                 int size, const char *filename, const char *encoding)
13093
0
{
13094
0
    xmlParserInputPtr input;
13095
13096
0
    if (ctxt == NULL)
13097
0
        return(1);
13098
13099
0
    xmlCtxtReset(ctxt);
13100
13101
0
    input = xmlNewPushInput(filename, chunk, size);
13102
0
    if (input == NULL)
13103
0
        return(1);
13104
13105
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
13106
0
        xmlFreeInputStream(input);
13107
0
        return(1);
13108
0
    }
13109
13110
0
    if (encoding != NULL)
13111
0
        xmlSwitchEncodingName(ctxt, encoding);
13112
13113
0
    return(0);
13114
0
}
13115
13116
static int
13117
xmlCtxtSetOptionsInternal(xmlParserCtxtPtr ctxt, int options, int keepMask)
13118
21.9k
{
13119
21.9k
    int allMask;
13120
13121
21.9k
    if (ctxt == NULL)
13122
0
        return(-1);
13123
13124
    /*
13125
     * XInclude options aren't handled by the parser.
13126
     *
13127
     * XML_PARSE_XINCLUDE
13128
     * XML_PARSE_NOXINCNODE
13129
     * XML_PARSE_NOBASEFIX
13130
     */
13131
21.9k
    allMask = XML_PARSE_RECOVER |
13132
21.9k
              XML_PARSE_NOENT |
13133
21.9k
              XML_PARSE_DTDLOAD |
13134
21.9k
              XML_PARSE_DTDATTR |
13135
21.9k
              XML_PARSE_DTDVALID |
13136
21.9k
              XML_PARSE_NOERROR |
13137
21.9k
              XML_PARSE_NOWARNING |
13138
21.9k
              XML_PARSE_PEDANTIC |
13139
21.9k
              XML_PARSE_NOBLANKS |
13140
21.9k
#ifdef LIBXML_SAX1_ENABLED
13141
21.9k
              XML_PARSE_SAX1 |
13142
21.9k
#endif
13143
21.9k
              XML_PARSE_NONET |
13144
21.9k
              XML_PARSE_NODICT |
13145
21.9k
              XML_PARSE_NSCLEAN |
13146
21.9k
              XML_PARSE_NOCDATA |
13147
21.9k
              XML_PARSE_COMPACT |
13148
21.9k
              XML_PARSE_OLD10 |
13149
21.9k
              XML_PARSE_HUGE |
13150
21.9k
              XML_PARSE_OLDSAX |
13151
21.9k
              XML_PARSE_IGNORE_ENC |
13152
21.9k
              XML_PARSE_BIG_LINES |
13153
21.9k
              XML_PARSE_NO_XXE |
13154
21.9k
              XML_PARSE_UNZIP |
13155
21.9k
              XML_PARSE_NO_SYS_CATALOG |
13156
21.9k
              XML_PARSE_CATALOG_PI;
13157
13158
21.9k
    ctxt->options = (ctxt->options & keepMask) | (options & allMask);
13159
13160
    /*
13161
     * For some options, struct members are historically the source
13162
     * of truth. The values are initalized from global variables and
13163
     * old code could also modify them directly. Several older API
13164
     * functions that don't take an options argument rely on these
13165
     * deprecated mechanisms.
13166
     *
13167
     * Once public access to struct members and the globals are
13168
     * disabled, we can use the options bitmask as source of
13169
     * truth, making all these struct members obsolete.
13170
     *
13171
     * The XML_DETECT_IDS flags is misnamed. It simply enables
13172
     * loading of the external subset.
13173
     */
13174
21.9k
    ctxt->recovery = (options & XML_PARSE_RECOVER) ? 1 : 0;
13175
21.9k
    ctxt->replaceEntities = (options & XML_PARSE_NOENT) ? 1 : 0;
13176
21.9k
    ctxt->loadsubset = (options & XML_PARSE_DTDLOAD) ? XML_DETECT_IDS : 0;
13177
21.9k
    ctxt->loadsubset |= (options & XML_PARSE_DTDATTR) ? XML_COMPLETE_ATTRS : 0;
13178
21.9k
    ctxt->validate = (options & XML_PARSE_DTDVALID) ? 1 : 0;
13179
21.9k
    ctxt->pedantic = (options & XML_PARSE_PEDANTIC) ? 1 : 0;
13180
21.9k
    ctxt->keepBlanks = (options & XML_PARSE_NOBLANKS) ? 0 : 1;
13181
21.9k
    ctxt->dictNames = (options & XML_PARSE_NODICT) ? 0 : 1;
13182
13183
21.9k
    if (options & XML_PARSE_HUGE) {
13184
0
        if (ctxt->dict != NULL)
13185
0
            xmlDictSetLimit(ctxt->dict, 0);
13186
0
    }
13187
13188
21.9k
    ctxt->linenumbers = 1;
13189
13190
21.9k
    return(options & ~allMask);
13191
21.9k
}
13192
13193
/**
13194
 * Applies the options to the parser context. Unset options are
13195
 * cleared.
13196
 *
13197
 * @since 2.13.0
13198
 *
13199
 * With older versions, you can use xmlCtxtUseOptions().
13200
 *
13201
 * @param ctxt  an XML parser context
13202
 * @param options  a bitmask of xmlParserOption values
13203
 * @returns 0 in case of success, the set of unknown or unimplemented options
13204
 *         in case of error.
13205
 */
13206
int
13207
xmlCtxtSetOptions(xmlParserCtxtPtr ctxt, int options)
13208
0
{
13209
0
#ifdef LIBXML_HTML_ENABLED
13210
0
    if ((ctxt != NULL) && (ctxt->html))
13211
0
        return(htmlCtxtSetOptions(ctxt, options));
13212
0
#endif
13213
13214
0
    return(xmlCtxtSetOptionsInternal(ctxt, options, 0));
13215
0
}
13216
13217
/**
13218
 * Get the current options of the parser context.
13219
 *
13220
 * @since 2.14.0
13221
 *
13222
 * @param ctxt  an XML parser context
13223
 * @returns the current options set in the parser context, or -1 if ctxt is NULL.
13224
 */
13225
int
13226
xmlCtxtGetOptions(xmlParserCtxtPtr ctxt)
13227
0
{
13228
0
    if (ctxt == NULL)
13229
0
        return(-1);
13230
13231
0
    return(ctxt->options);
13232
0
}
13233
13234
/**
13235
 * Applies the options to the parser context. The following options
13236
 * are never cleared and can only be enabled:
13237
 *
13238
 * @deprecated Use xmlCtxtSetOptions().
13239
 *
13240
 * - XML_PARSE_NOERROR
13241
 * - XML_PARSE_NOWARNING
13242
 * - XML_PARSE_NONET
13243
 * - XML_PARSE_NSCLEAN
13244
 * - XML_PARSE_NOCDATA
13245
 * - XML_PARSE_COMPACT
13246
 * - XML_PARSE_OLD10
13247
 * - XML_PARSE_HUGE
13248
 * - XML_PARSE_OLDSA- X
13249
 * - XML_PARSE_IGNORE_ENC
13250
 * - XML_PARSE_BIG_LINES
13251
 *
13252
 * @param ctxt  an XML parser context
13253
 * @param options  a combination of xmlParserOption
13254
 * @returns 0 in case of success, the set of unknown or unimplemented options
13255
 *         in case of error.
13256
 */
13257
int
13258
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
13259
21.9k
{
13260
21.9k
    int keepMask;
13261
13262
21.9k
#ifdef LIBXML_HTML_ENABLED
13263
21.9k
    if ((ctxt != NULL) && (ctxt->html))
13264
0
        return(htmlCtxtUseOptions(ctxt, options));
13265
21.9k
#endif
13266
13267
    /*
13268
     * For historic reasons, some options can only be enabled.
13269
     */
13270
21.9k
    keepMask = XML_PARSE_NOERROR |
13271
21.9k
               XML_PARSE_NOWARNING |
13272
21.9k
               XML_PARSE_NONET |
13273
21.9k
               XML_PARSE_NSCLEAN |
13274
21.9k
               XML_PARSE_NOCDATA |
13275
21.9k
               XML_PARSE_COMPACT |
13276
21.9k
               XML_PARSE_OLD10 |
13277
21.9k
               XML_PARSE_HUGE |
13278
21.9k
               XML_PARSE_OLDSAX |
13279
21.9k
               XML_PARSE_IGNORE_ENC |
13280
21.9k
               XML_PARSE_BIG_LINES;
13281
13282
21.9k
    return(xmlCtxtSetOptionsInternal(ctxt, options, keepMask));
13283
21.9k
}
13284
13285
/**
13286
 * To protect against exponential entity expansion ("billion laughs"), the
13287
 * size of serialized output is (roughly) limited to the input size
13288
 * multiplied by this factor. The default value is 5.
13289
 *
13290
 * When working with documents making heavy use of entity expansion, it can
13291
 * be necessary to increase the value. For security reasons, this should only
13292
 * be considered when processing trusted input.
13293
 *
13294
 * @param ctxt  an XML parser context
13295
 * @param maxAmpl  maximum amplification factor
13296
 */
13297
void
13298
xmlCtxtSetMaxAmplification(xmlParserCtxtPtr ctxt, unsigned maxAmpl)
13299
0
{
13300
0
    if (ctxt == NULL)
13301
0
        return;
13302
0
    ctxt->maxAmpl = maxAmpl;
13303
0
}
13304
13305
/**
13306
 * Parse an XML document and return the resulting document tree.
13307
 * Takes ownership of the input object.
13308
 *
13309
 * @since 2.13.0
13310
 *
13311
 * @param ctxt  an XML parser context
13312
 * @param input  parser input
13313
 * @returns the resulting document tree or NULL
13314
 */
13315
xmlDocPtr
13316
xmlCtxtParseDocument(xmlParserCtxtPtr ctxt, xmlParserInputPtr input)
13317
21.9k
{
13318
21.9k
    xmlDocPtr ret = NULL;
13319
13320
21.9k
    if ((ctxt == NULL) || (input == NULL)) {
13321
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
13322
0
        xmlFreeInputStream(input);
13323
0
        return(NULL);
13324
0
    }
13325
13326
    /* assert(ctxt->inputNr == 0); */
13327
21.9k
    while (ctxt->inputNr > 0)
13328
0
        xmlFreeInputStream(xmlCtxtPopInput(ctxt));
13329
13330
21.9k
    if (xmlCtxtPushInput(ctxt, input) < 0) {
13331
0
        xmlFreeInputStream(input);
13332
0
        return(NULL);
13333
0
    }
13334
13335
21.9k
    xmlParseDocument(ctxt);
13336
13337
21.9k
    ret = xmlCtxtGetDocument(ctxt);
13338
13339
    /* assert(ctxt->inputNr == 1); */
13340
43.9k
    while (ctxt->inputNr > 0)
13341
21.9k
        xmlFreeInputStream(xmlCtxtPopInput(ctxt));
13342
13343
21.9k
    return(ret);
13344
21.9k
}
13345
13346
/**
13347
 * Convenience function to parse an XML document from a
13348
 * zero-terminated string.
13349
 *
13350
 * See xmlCtxtReadDoc() for details.
13351
 *
13352
 * @param cur  a pointer to a zero terminated string
13353
 * @param URL  base URL (optional)
13354
 * @param encoding  the document encoding (optional)
13355
 * @param options  a combination of xmlParserOption
13356
 * @returns the resulting document tree
13357
 */
13358
xmlDocPtr
13359
xmlReadDoc(const xmlChar *cur, const char *URL, const char *encoding,
13360
           int options)
13361
0
{
13362
0
    xmlParserCtxtPtr ctxt;
13363
0
    xmlParserInputPtr input;
13364
0
    xmlDocPtr doc = NULL;
13365
13366
0
    ctxt = xmlNewParserCtxt();
13367
0
    if (ctxt == NULL)
13368
0
        return(NULL);
13369
13370
0
    xmlCtxtUseOptions(ctxt, options);
13371
13372
0
    input = xmlCtxtNewInputFromString(ctxt, URL, (const char *) cur, encoding,
13373
0
                                      XML_INPUT_BUF_STATIC);
13374
13375
0
    if (input != NULL)
13376
0
        doc = xmlCtxtParseDocument(ctxt, input);
13377
13378
0
    xmlFreeParserCtxt(ctxt);
13379
0
    return(doc);
13380
0
}
13381
13382
/**
13383
 * Convenience function to parse an XML file from the filesystem,
13384
 * the network or a global user-define resource loader.
13385
 *
13386
 * This function always enables the XML_PARSE_UNZIP option for
13387
 * backward compatibility. If a "-" filename is passed, it will
13388
 * read from stdin. Both of these features are potentially
13389
 * insecure and might be removed from later versions.
13390
 *
13391
 * See xmlCtxtReadFile() for details.
13392
 *
13393
 * @param filename  a file or URL
13394
 * @param encoding  the document encoding (optional)
13395
 * @param options  a combination of xmlParserOption
13396
 * @returns the resulting document tree
13397
 */
13398
xmlDocPtr
13399
xmlReadFile(const char *filename, const char *encoding, int options)
13400
0
{
13401
0
    xmlParserCtxtPtr ctxt;
13402
0
    xmlParserInputPtr input;
13403
0
    xmlDocPtr doc = NULL;
13404
13405
0
    ctxt = xmlNewParserCtxt();
13406
0
    if (ctxt == NULL)
13407
0
        return(NULL);
13408
13409
0
    options |= XML_PARSE_UNZIP;
13410
13411
0
    xmlCtxtUseOptions(ctxt, options);
13412
13413
    /*
13414
     * Backward compatibility for users of command line utilities like
13415
     * xmlstarlet expecting "-" to mean stdin. This is dangerous and
13416
     * should be removed at some point.
13417
     */
13418
0
    if ((filename != NULL) && (filename[0] == '-') && (filename[1] == 0))
13419
0
        input = xmlCtxtNewInputFromFd(ctxt, filename, STDIN_FILENO,
13420
0
                                      encoding, 0);
13421
0
    else
13422
0
        input = xmlCtxtNewInputFromUrl(ctxt, filename, NULL, encoding, 0);
13423
13424
0
    if (input != NULL)
13425
0
        doc = xmlCtxtParseDocument(ctxt, input);
13426
13427
0
    xmlFreeParserCtxt(ctxt);
13428
0
    return(doc);
13429
0
}
13430
13431
/**
13432
 * Parse an XML in-memory document and build a tree. The input buffer must
13433
 * not contain a terminating null byte.
13434
 *
13435
 * See xmlCtxtReadMemory() for details.
13436
 *
13437
 * @param buffer  a pointer to a char array
13438
 * @param size  the size of the array
13439
 * @param url  base URL (optional)
13440
 * @param encoding  the document encoding (optional)
13441
 * @param options  a combination of xmlParserOption
13442
 * @returns the resulting document tree
13443
 */
13444
xmlDocPtr
13445
xmlReadMemory(const char *buffer, int size, const char *url,
13446
              const char *encoding, int options)
13447
21.9k
{
13448
21.9k
    xmlParserCtxtPtr ctxt;
13449
21.9k
    xmlParserInputPtr input;
13450
21.9k
    xmlDocPtr doc = NULL;
13451
13452
21.9k
    if (size < 0)
13453
0
  return(NULL);
13454
13455
21.9k
    ctxt = xmlNewParserCtxt();
13456
21.9k
    if (ctxt == NULL)
13457
0
        return(NULL);
13458
13459
21.9k
    xmlCtxtUseOptions(ctxt, options);
13460
13461
21.9k
    input = xmlCtxtNewInputFromMemory(ctxt, url, buffer, size, encoding,
13462
21.9k
                                      XML_INPUT_BUF_STATIC);
13463
13464
21.9k
    if (input != NULL)
13465
21.9k
        doc = xmlCtxtParseDocument(ctxt, input);
13466
13467
21.9k
    xmlFreeParserCtxt(ctxt);
13468
21.9k
    return(doc);
13469
21.9k
}
13470
13471
/**
13472
 * Parse an XML from a file descriptor and build a tree.
13473
 *
13474
 * See xmlCtxtReadFd() for details.
13475
 *
13476
 * NOTE that the file descriptor will not be closed when the
13477
 * context is freed or reset.
13478
 *
13479
 * @param fd  an open file descriptor
13480
 * @param URL  base URL (optional)
13481
 * @param encoding  the document encoding (optional)
13482
 * @param options  a combination of xmlParserOption
13483
 * @returns the resulting document tree
13484
 */
13485
xmlDocPtr
13486
xmlReadFd(int fd, const char *URL, const char *encoding, int options)
13487
0
{
13488
0
    xmlParserCtxtPtr ctxt;
13489
0
    xmlParserInputPtr input;
13490
0
    xmlDocPtr doc = NULL;
13491
13492
0
    ctxt = xmlNewParserCtxt();
13493
0
    if (ctxt == NULL)
13494
0
        return(NULL);
13495
13496
0
    xmlCtxtUseOptions(ctxt, options);
13497
13498
0
    input = xmlCtxtNewInputFromFd(ctxt, URL, fd, encoding, 0);
13499
13500
0
    if (input != NULL)
13501
0
        doc = xmlCtxtParseDocument(ctxt, input);
13502
13503
0
    xmlFreeParserCtxt(ctxt);
13504
0
    return(doc);
13505
0
}
13506
13507
/**
13508
 * Parse an XML document from I/O functions and context and build a tree.
13509
 *
13510
 * See xmlCtxtReadIO() for details.
13511
 *
13512
 * @param ioread  an I/O read function
13513
 * @param ioclose  an I/O close function (optional)
13514
 * @param ioctx  an I/O handler
13515
 * @param URL  base URL (optional)
13516
 * @param encoding  the document encoding (optional)
13517
 * @param options  a combination of xmlParserOption
13518
 * @returns the resulting document tree
13519
 */
13520
xmlDocPtr
13521
xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
13522
          void *ioctx, const char *URL, const char *encoding, int options)
13523
0
{
13524
0
    xmlParserCtxtPtr ctxt;
13525
0
    xmlParserInputPtr input;
13526
0
    xmlDocPtr doc = NULL;
13527
13528
0
    ctxt = xmlNewParserCtxt();
13529
0
    if (ctxt == NULL)
13530
0
        return(NULL);
13531
13532
0
    xmlCtxtUseOptions(ctxt, options);
13533
13534
0
    input = xmlCtxtNewInputFromIO(ctxt, URL, ioread, ioclose, ioctx,
13535
0
                                  encoding, 0);
13536
13537
0
    if (input != NULL)
13538
0
        doc = xmlCtxtParseDocument(ctxt, input);
13539
13540
0
    xmlFreeParserCtxt(ctxt);
13541
0
    return(doc);
13542
0
}
13543
13544
/**
13545
 * Parse an XML in-memory document and build a tree.
13546
 *
13547
 * `URL` is used as base to resolve external entities and for error
13548
 * reporting.
13549
 *
13550
 * See xmlCtxtUseOptions() for details.
13551
 *
13552
 * @param ctxt  an XML parser context
13553
 * @param str  a pointer to a zero terminated string
13554
 * @param URL  base URL (optional)
13555
 * @param encoding  the document encoding (optional)
13556
 * @param options  a combination of xmlParserOption
13557
 * @returns the resulting document tree
13558
 */
13559
xmlDocPtr
13560
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar *str,
13561
               const char *URL, const char *encoding, int options)
13562
0
{
13563
0
    xmlParserInputPtr input;
13564
13565
0
    if (ctxt == NULL)
13566
0
        return(NULL);
13567
13568
0
    xmlCtxtReset(ctxt);
13569
0
    xmlCtxtUseOptions(ctxt, options);
13570
13571
0
    input = xmlCtxtNewInputFromString(ctxt, URL, (const char *) str, encoding,
13572
0
                                      XML_INPUT_BUF_STATIC);
13573
0
    if (input == NULL)
13574
0
        return(NULL);
13575
13576
0
    return(xmlCtxtParseDocument(ctxt, input));
13577
0
}
13578
13579
/**
13580
 * Parse an XML file from the filesystem, the network or a user-defined
13581
 * resource loader.
13582
 *
13583
 * This function always enables the XML_PARSE_UNZIP option for
13584
 * backward compatibility. This feature is potentially insecure
13585
 * and might be removed from later versions.
13586
 *
13587
 * @param ctxt  an XML parser context
13588
 * @param filename  a file or URL
13589
 * @param encoding  the document encoding (optional)
13590
 * @param options  a combination of xmlParserOption
13591
 * @returns the resulting document tree
13592
 */
13593
xmlDocPtr
13594
xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
13595
                const char *encoding, int options)
13596
0
{
13597
0
    xmlParserInputPtr input;
13598
13599
0
    if (ctxt == NULL)
13600
0
        return(NULL);
13601
13602
0
    options |= XML_PARSE_UNZIP;
13603
13604
0
    xmlCtxtReset(ctxt);
13605
0
    xmlCtxtUseOptions(ctxt, options);
13606
13607
0
    input = xmlCtxtNewInputFromUrl(ctxt, filename, NULL, encoding, 0);
13608
0
    if (input == NULL)
13609
0
        return(NULL);
13610
13611
0
    return(xmlCtxtParseDocument(ctxt, input));
13612
0
}
13613
13614
/**
13615
 * Parse an XML in-memory document and build a tree. The input buffer must
13616
 * not contain a terminating null byte.
13617
 *
13618
 * `URL` is used as base to resolve external entities and for error
13619
 * reporting.
13620
 *
13621
 * See xmlCtxtUseOptions() for details.
13622
 *
13623
 * @param ctxt  an XML parser context
13624
 * @param buffer  a pointer to a char array
13625
 * @param size  the size of the array
13626
 * @param URL  base URL (optional)
13627
 * @param encoding  the document encoding (optional)
13628
 * @param options  a combination of xmlParserOption
13629
 * @returns the resulting document tree
13630
 */
13631
xmlDocPtr
13632
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
13633
                  const char *URL, const char *encoding, int options)
13634
0
{
13635
0
    xmlParserInputPtr input;
13636
13637
0
    if ((ctxt == NULL) || (size < 0))
13638
0
        return(NULL);
13639
13640
0
    xmlCtxtReset(ctxt);
13641
0
    xmlCtxtUseOptions(ctxt, options);
13642
13643
0
    input = xmlCtxtNewInputFromMemory(ctxt, URL, buffer, size, encoding,
13644
0
                                      XML_INPUT_BUF_STATIC);
13645
0
    if (input == NULL)
13646
0
        return(NULL);
13647
13648
0
    return(xmlCtxtParseDocument(ctxt, input));
13649
0
}
13650
13651
/**
13652
 * Parse an XML document from a file descriptor and build a tree.
13653
 *
13654
 * NOTE that the file descriptor will not be closed when the
13655
 * context is freed or reset.
13656
 *
13657
 * `URL` is used as base to resolve external entities and for error
13658
 * reporting.
13659
 *
13660
 * See xmlCtxtUseOptions() for details.
13661
 *
13662
 * @param ctxt  an XML parser context
13663
 * @param fd  an open file descriptor
13664
 * @param URL  base URL (optional)
13665
 * @param encoding  the document encoding (optional)
13666
 * @param options  a combination of xmlParserOption
13667
 * @returns the resulting document tree
13668
 */
13669
xmlDocPtr
13670
xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
13671
              const char *URL, const char *encoding, int options)
13672
0
{
13673
0
    xmlParserInputPtr input;
13674
13675
0
    if (ctxt == NULL)
13676
0
        return(NULL);
13677
13678
0
    xmlCtxtReset(ctxt);
13679
0
    xmlCtxtUseOptions(ctxt, options);
13680
13681
0
    input = xmlCtxtNewInputFromFd(ctxt, URL, fd, encoding, 0);
13682
0
    if (input == NULL)
13683
0
        return(NULL);
13684
13685
0
    return(xmlCtxtParseDocument(ctxt, input));
13686
0
}
13687
13688
/**
13689
 * parse an XML document from I/O functions and source and build a tree.
13690
 * This reuses the existing `ctxt` parser context
13691
 *
13692
 * `URL` is used as base to resolve external entities and for error
13693
 * reporting.
13694
 *
13695
 * See xmlCtxtUseOptions() for details.
13696
 *
13697
 * @param ctxt  an XML parser context
13698
 * @param ioread  an I/O read function
13699
 * @param ioclose  an I/O close function
13700
 * @param ioctx  an I/O handler
13701
 * @param URL  the base URL to use for the document
13702
 * @param encoding  the document encoding, or NULL
13703
 * @param options  a combination of xmlParserOption
13704
 * @returns the resulting document tree
13705
 */
13706
xmlDocPtr
13707
xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
13708
              xmlInputCloseCallback ioclose, void *ioctx,
13709
        const char *URL,
13710
              const char *encoding, int options)
13711
0
{
13712
0
    xmlParserInputPtr input;
13713
13714
0
    if (ctxt == NULL)
13715
0
        return(NULL);
13716
13717
0
    xmlCtxtReset(ctxt);
13718
0
    xmlCtxtUseOptions(ctxt, options);
13719
13720
0
    input = xmlCtxtNewInputFromIO(ctxt, URL, ioread, ioclose, ioctx,
13721
0
                                  encoding, 0);
13722
0
    if (input == NULL)
13723
0
        return(NULL);
13724
13725
0
    return(xmlCtxtParseDocument(ctxt, input));
13726
0
}
13727