Coverage Report

Created: 2025-07-01 06:27

/src/libxml2/parser.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
 *            implemented on top of the SAX interfaces
4
 *
5
 * References:
6
 *   The XML specification:
7
 *     http://www.w3.org/TR/REC-xml
8
 *   Original 1.0 version:
9
 *     http://www.w3.org/TR/1998/REC-xml-19980210
10
 *   XML second edition working draft
11
 *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12
 *
13
 * Okay this is a big file, the parser core is around 7000 lines, then it
14
 * is followed by the progressive parser top routines, then the various
15
 * high level APIs to call the parser and a few miscellaneous functions.
16
 * A number of helper functions and deprecated ones have been moved to
17
 * parserInternals.c to reduce this file size.
18
 * As much as possible the functions are associated with their relative
19
 * production in the XML specification. A few productions defining the
20
 * different ranges of character are actually implanted either in
21
 * parserInternals.h or parserInternals.c
22
 * The DOM tree build is realized from the default SAX callbacks in
23
 * the module SAX2.c.
24
 * The routines doing the validation checks are in valid.c and called either
25
 * from the SAX callbacks or as standalone functions using a preparsed
26
 * document.
27
 *
28
 * See Copyright for the status of this software.
29
 *
30
 * Author: Daniel Veillard
31
 */
32
33
/* To avoid EBCDIC trouble when parsing on zOS */
34
#if defined(__MVS__)
35
#pragma convert("ISO8859-1")
36
#endif
37
38
#define IN_LIBXML
39
#include "libxml.h"
40
41
#if defined(_WIN32)
42
#define XML_DIR_SEP '\\'
43
#else
44
#define XML_DIR_SEP '/'
45
#endif
46
47
#include <stdlib.h>
48
#include <limits.h>
49
#include <string.h>
50
#include <stdarg.h>
51
#include <stddef.h>
52
#include <ctype.h>
53
#include <stdlib.h>
54
#include <libxml/parser.h>
55
#include <libxml/xmlmemory.h>
56
#include <libxml/tree.h>
57
#include <libxml/parserInternals.h>
58
#include <libxml/valid.h>
59
#include <libxml/entities.h>
60
#include <libxml/xmlerror.h>
61
#include <libxml/encoding.h>
62
#include <libxml/xmlIO.h>
63
#include <libxml/uri.h>
64
#include <libxml/SAX2.h>
65
#include <libxml/HTMLparser.h>
66
#ifdef LIBXML_CATALOG_ENABLED
67
#include <libxml/catalog.h>
68
#endif
69
70
#include "private/buf.h"
71
#include "private/dict.h"
72
#include "private/entities.h"
73
#include "private/error.h"
74
#include "private/html.h"
75
#include "private/io.h"
76
#include "private/memory.h"
77
#include "private/parser.h"
78
#include "private/tree.h"
79
80
110k
#define NS_INDEX_EMPTY  INT_MAX
81
53.6k
#define NS_INDEX_XML    (INT_MAX - 1)
82
44.2k
#define URI_HASH_EMPTY  0xD943A04E
83
12.0k
#define URI_HASH_XML    0xF0451F02
84
85
#ifndef STDIN_FILENO
86
0
  #define STDIN_FILENO 0
87
#endif
88
89
#ifndef SIZE_MAX
90
  #define SIZE_MAX ((size_t) -1)
91
#endif
92
93
101k
#define XML_MAX_ATTRS 100000000 /* 100 million */
94
95
273k
#define XML_SPECIAL_EXTERNAL    (1 << 20)
96
260k
#define XML_SPECIAL_TYPE_MASK   (XML_SPECIAL_EXTERNAL - 1)
97
98
293k
#define XML_ATTVAL_ALLOC        (1 << 0)
99
3.95k
#define XML_ATTVAL_NORM_CHANGE  (1 << 1)
100
101
struct _xmlStartTag {
102
    const xmlChar *prefix;
103
    const xmlChar *URI;
104
    int line;
105
    int nsNr;
106
};
107
108
typedef struct {
109
    void *saxData;
110
    unsigned prefixHashValue;
111
    unsigned uriHashValue;
112
    unsigned elementId;
113
    int oldIndex;
114
} xmlParserNsExtra;
115
116
typedef struct {
117
    unsigned hashValue;
118
    int index;
119
} xmlParserNsBucket;
120
121
struct _xmlParserNsData {
122
    xmlParserNsExtra *extra;
123
124
    unsigned hashSize;
125
    unsigned hashElems;
126
    xmlParserNsBucket *hash;
127
128
    unsigned elementId;
129
    int defaultNsIndex;
130
    int minNsIndex;
131
};
132
133
static int
134
xmlParseElementStart(xmlParserCtxtPtr ctxt);
135
136
static void
137
xmlParseElementEnd(xmlParserCtxtPtr ctxt);
138
139
static xmlEntityPtr
140
xmlLookupGeneralEntity(xmlParserCtxtPtr ctxt, const xmlChar *name, int inAttr);
141
142
static const xmlChar *
143
xmlParseEntityRefInternal(xmlParserCtxtPtr ctxt);
144
145
/************************************************************************
146
 *                  *
147
 *  Arbitrary limits set in the parser. See XML_PARSE_HUGE    *
148
 *                  *
149
 ************************************************************************/
150
151
#define XML_PARSER_BIG_ENTITY 1000
152
#define XML_PARSER_LOT_ENTITY 5000
153
154
/*
155
 * Constants for protection against abusive entity expansion
156
 * ("billion laughs").
157
 */
158
159
/*
160
 * A certain amount of entity expansion which is always allowed.
161
 */
162
66.7k
#define XML_PARSER_ALLOWED_EXPANSION 1000000
163
164
/*
165
 * Fixed cost for each entity reference. This crudely models processing time
166
 * as well to protect, for example, against exponential expansion of empty
167
 * or very short entities.
168
 */
169
67.3k
#define XML_ENT_FIXED_COST 20
170
171
470k
#define XML_PARSER_BIG_BUFFER_SIZE 300
172
261k
#define XML_PARSER_BUFFER_SIZE 100
173
172k
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
174
175
/**
176
 * XML_PARSER_CHUNK_SIZE
177
 *
178
 * When calling GROW that's the minimal amount of data
179
 * the parser expected to have received. It is not a hard
180
 * limit but an optimization when reading strings like Names
181
 * It is not strictly needed as long as inputs available characters
182
 * are followed by 0, which should be provided by the I/O level
183
 */
184
#define XML_PARSER_CHUNK_SIZE 100
185
186
/**
187
 * Constant string describing the version of the library used at
188
 * run-time.
189
 */
190
const char *const
191
xmlParserVersion = LIBXML_VERSION_STRING LIBXML_VERSION_EXTRA;
192
193
/*
194
 * List of XML prefixed PI allowed by W3C specs
195
 */
196
197
static const char* const xmlW3CPIs[] = {
198
    "xml-stylesheet",
199
    "xml-model",
200
    NULL
201
};
202
203
204
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
205
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
206
                                              const xmlChar **str);
207
208
static void
209
xmlCtxtParseEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr ent);
210
211
static int
212
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
213
214
static void
215
xmlParsePERefInternal(xmlParserCtxt *ctxt, int markupDecl);
216
217
/************************************************************************
218
 *                  *
219
 *    Some factorized error routines        *
220
 *                  *
221
 ************************************************************************/
222
223
static void
224
432
xmlErrMemory(xmlParserCtxtPtr ctxt) {
225
432
    xmlCtxtErrMemory(ctxt);
226
432
}
227
228
/**
229
 * Handle a redefinition of attribute error
230
 *
231
 * @param ctxt  an XML parser context
232
 * @param prefix  the attribute prefix
233
 * @param localname  the attribute localname
234
 */
235
static void
236
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
237
                   const xmlChar * localname)
238
8.02k
{
239
8.02k
    if (prefix == NULL)
240
4.51k
        xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, XML_ERR_ATTRIBUTE_REDEFINED,
241
4.51k
                   XML_ERR_FATAL, localname, NULL, NULL, 0,
242
4.51k
                   "Attribute %s redefined\n", localname);
243
3.51k
    else
244
3.51k
        xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, XML_ERR_ATTRIBUTE_REDEFINED,
245
3.51k
                   XML_ERR_FATAL, prefix, localname, NULL, 0,
246
3.51k
                   "Attribute %s:%s redefined\n", prefix, localname);
247
8.02k
}
248
249
/**
250
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
251
 *
252
 * @param ctxt  an XML parser context
253
 * @param error  the error number
254
 * @param msg  the error message
255
 */
256
static void LIBXML_ATTR_FORMAT(3,0)
257
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
258
               const char *msg)
259
263k
{
260
263k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
261
263k
               NULL, NULL, NULL, 0, "%s", msg);
262
263k
}
263
264
/**
265
 * Handle a warning.
266
 *
267
 * @param ctxt  an XML parser context
268
 * @param error  the error number
269
 * @param msg  the error message
270
 * @param str1  extra data
271
 * @param str2  extra data
272
 */
273
void LIBXML_ATTR_FORMAT(3,0)
274
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
275
              const char *msg, const xmlChar *str1, const xmlChar *str2)
276
108k
{
277
108k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_WARNING,
278
108k
               str1, str2, NULL, 0, msg, str1, str2);
279
108k
}
280
281
#ifdef LIBXML_VALID_ENABLED
282
/**
283
 * Handle a validity error.
284
 *
285
 * @param ctxt  an XML parser context
286
 * @param error  the error number
287
 * @param msg  the error message
288
 * @param str1  extra data
289
 * @param str2  extra data
290
 */
291
static void LIBXML_ATTR_FORMAT(3,0)
292
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
293
              const char *msg, const xmlChar *str1, const xmlChar *str2)
294
449
{
295
449
    ctxt->valid = 0;
296
297
449
    xmlCtxtErr(ctxt, NULL, XML_FROM_DTD, error, XML_ERR_ERROR,
298
449
               str1, str2, NULL, 0, msg, str1, str2);
299
449
}
300
#endif
301
302
/**
303
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
304
 *
305
 * @param ctxt  an XML parser context
306
 * @param error  the error number
307
 * @param msg  the error message
308
 * @param val  an integer value
309
 */
310
static void LIBXML_ATTR_FORMAT(3,0)
311
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
312
                  const char *msg, int val)
313
1.46M
{
314
1.46M
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
315
1.46M
               NULL, NULL, NULL, val, msg, val);
316
1.46M
}
317
318
/**
319
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
320
 *
321
 * @param ctxt  an XML parser context
322
 * @param error  the error number
323
 * @param msg  the error message
324
 * @param str1  an string info
325
 * @param val  an integer value
326
 * @param str2  an string info
327
 */
328
static void LIBXML_ATTR_FORMAT(3,0)
329
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
330
                  const char *msg, const xmlChar *str1, int val,
331
      const xmlChar *str2)
332
49.4k
{
333
49.4k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
334
49.4k
               str1, str2, NULL, val, msg, str1, val, str2);
335
49.4k
}
336
337
/**
338
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
339
 *
340
 * @param ctxt  an XML parser context
341
 * @param error  the error number
342
 * @param msg  the error message
343
 * @param val  a string value
344
 */
345
static void LIBXML_ATTR_FORMAT(3,0)
346
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
347
                  const char *msg, const xmlChar * val)
348
42.6k
{
349
42.6k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
350
42.6k
               val, NULL, NULL, 0, msg, val);
351
42.6k
}
352
353
/**
354
 * Handle a non fatal parser error
355
 *
356
 * @param ctxt  an XML parser context
357
 * @param error  the error number
358
 * @param msg  the error message
359
 * @param val  a string value
360
 */
361
static void LIBXML_ATTR_FORMAT(3,0)
362
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
363
                  const char *msg, const xmlChar * val)
364
638
{
365
638
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_ERROR,
366
638
               val, NULL, NULL, 0, msg, val);
367
638
}
368
369
/**
370
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
371
 *
372
 * @param ctxt  an XML parser context
373
 * @param error  the error number
374
 * @param msg  the message
375
 * @param info1  extra information string
376
 * @param info2  extra information string
377
 * @param info3  extra information string
378
 */
379
static void LIBXML_ATTR_FORMAT(3,0)
380
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
381
         const char *msg,
382
         const xmlChar * info1, const xmlChar * info2,
383
         const xmlChar * info3)
384
120k
{
385
120k
    ctxt->nsWellFormed = 0;
386
387
120k
    xmlCtxtErr(ctxt, NULL, XML_FROM_NAMESPACE, error, XML_ERR_ERROR,
388
120k
               info1, info2, info3, 0, msg, info1, info2, info3);
389
120k
}
390
391
/**
392
 * Handle a namespace warning error
393
 *
394
 * @param ctxt  an XML parser context
395
 * @param error  the error number
396
 * @param msg  the message
397
 * @param info1  extra information string
398
 * @param info2  extra information string
399
 * @param info3  extra information string
400
 */
401
static void LIBXML_ATTR_FORMAT(3,0)
402
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
403
         const char *msg,
404
         const xmlChar * info1, const xmlChar * info2,
405
         const xmlChar * info3)
406
17.3k
{
407
17.3k
    xmlCtxtErr(ctxt, NULL, XML_FROM_NAMESPACE, error, XML_ERR_WARNING,
408
17.3k
               info1, info2, info3, 0, msg, info1, info2, info3);
409
17.3k
}
410
411
static void
412
203k
xmlSaturatedAdd(unsigned long *dst, unsigned long val) {
413
203k
    if (val > ULONG_MAX - *dst)
414
0
        *dst = ULONG_MAX;
415
203k
    else
416
203k
        *dst += val;
417
203k
}
418
419
static void
420
70.4k
xmlSaturatedAddSizeT(unsigned long *dst, unsigned long val) {
421
70.4k
    if (val > ULONG_MAX - *dst)
422
0
        *dst = ULONG_MAX;
423
70.4k
    else
424
70.4k
        *dst += val;
425
70.4k
}
426
427
/**
428
 * Check for non-linear entity expansion behaviour.
429
 *
430
 * In some cases like xmlExpandEntityInAttValue, this function is called
431
 * for each, possibly nested entity and its unexpanded content length.
432
 *
433
 * In other cases like #xmlParseReference, it's only called for each
434
 * top-level entity with its unexpanded content length plus the sum of
435
 * the unexpanded content lengths (plus fixed cost) of all nested
436
 * entities.
437
 *
438
 * Summing the unexpanded lengths also adds the length of the reference.
439
 * This is by design. Taking the length of the entity name into account
440
 * discourages attacks that try to waste CPU time with abusively long
441
 * entity names. See test/recurse/lol6.xml for example. Each call also
442
 * adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with
443
 * short entities.
444
 *
445
 * @param ctxt  parser context
446
 * @param extra  sum of unexpanded entity sizes
447
 * @returns 1 on error, 0 on success.
448
 */
449
static int
450
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long extra)
451
67.3k
{
452
67.3k
    unsigned long consumed;
453
67.3k
    unsigned long *expandedSize;
454
67.3k
    xmlParserInputPtr input = ctxt->input;
455
67.3k
    xmlEntityPtr entity = input->entity;
456
457
67.3k
    if ((entity) && (entity->flags & XML_ENT_CHECKED))
458
677
        return(0);
459
460
    /*
461
     * Compute total consumed bytes so far, including input streams of
462
     * external entities.
463
     */
464
66.7k
    consumed = input->consumed;
465
66.7k
    xmlSaturatedAddSizeT(&consumed, input->cur - input->base);
466
66.7k
    xmlSaturatedAdd(&consumed, ctxt->sizeentities);
467
468
66.7k
    if (entity)
469
5.15k
        expandedSize = &entity->expandedSize;
470
61.5k
    else
471
61.5k
        expandedSize = &ctxt->sizeentcopy;
472
473
    /*
474
     * Add extra cost and some fixed cost.
475
     */
476
66.7k
    xmlSaturatedAdd(expandedSize, extra);
477
66.7k
    xmlSaturatedAdd(expandedSize, XML_ENT_FIXED_COST);
478
479
    /*
480
     * It's important to always use saturation arithmetic when tracking
481
     * entity sizes to make the size checks reliable. If "sizeentcopy"
482
     * overflows, we have to abort.
483
     */
484
66.7k
    if ((*expandedSize > XML_PARSER_ALLOWED_EXPANSION) &&
485
66.7k
        ((*expandedSize >= ULONG_MAX) ||
486
13
         (*expandedSize / ctxt->maxAmpl > consumed))) {
487
0
        xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
488
0
                       "Maximum entity amplification factor exceeded, see "
489
0
                       "xmlCtxtSetMaxAmplification.\n");
490
0
        xmlHaltParser(ctxt);
491
0
        return(1);
492
0
    }
493
494
66.7k
    return(0);
495
66.7k
}
496
497
/************************************************************************
498
 *                  *
499
 *    Library wide options          *
500
 *                  *
501
 ************************************************************************/
502
503
/**
504
 * Examines if the library has been compiled with a given feature.
505
 *
506
 * @param feature  the feature to be examined
507
 * @returns zero (0) if the feature does not exist or an unknown
508
 * feature is requested, non-zero otherwise.
509
 */
510
int
511
xmlHasFeature(xmlFeature feature)
512
0
{
513
0
    switch (feature) {
514
0
  case XML_WITH_THREAD:
515
0
#ifdef LIBXML_THREAD_ENABLED
516
0
      return(1);
517
#else
518
      return(0);
519
#endif
520
0
        case XML_WITH_TREE:
521
0
            return(1);
522
0
        case XML_WITH_OUTPUT:
523
0
#ifdef LIBXML_OUTPUT_ENABLED
524
0
            return(1);
525
#else
526
            return(0);
527
#endif
528
0
        case XML_WITH_PUSH:
529
0
#ifdef LIBXML_PUSH_ENABLED
530
0
            return(1);
531
#else
532
            return(0);
533
#endif
534
0
        case XML_WITH_READER:
535
0
#ifdef LIBXML_READER_ENABLED
536
0
            return(1);
537
#else
538
            return(0);
539
#endif
540
0
        case XML_WITH_PATTERN:
541
0
#ifdef LIBXML_PATTERN_ENABLED
542
0
            return(1);
543
#else
544
            return(0);
545
#endif
546
0
        case XML_WITH_WRITER:
547
0
#ifdef LIBXML_WRITER_ENABLED
548
0
            return(1);
549
#else
550
            return(0);
551
#endif
552
0
        case XML_WITH_SAX1:
553
0
#ifdef LIBXML_SAX1_ENABLED
554
0
            return(1);
555
#else
556
            return(0);
557
#endif
558
0
        case XML_WITH_HTTP:
559
0
            return(0);
560
0
        case XML_WITH_VALID:
561
0
#ifdef LIBXML_VALID_ENABLED
562
0
            return(1);
563
#else
564
            return(0);
565
#endif
566
0
        case XML_WITH_HTML:
567
0
#ifdef LIBXML_HTML_ENABLED
568
0
            return(1);
569
#else
570
            return(0);
571
#endif
572
0
        case XML_WITH_LEGACY:
573
0
            return(0);
574
0
        case XML_WITH_C14N:
575
0
#ifdef LIBXML_C14N_ENABLED
576
0
            return(1);
577
#else
578
            return(0);
579
#endif
580
0
        case XML_WITH_CATALOG:
581
0
#ifdef LIBXML_CATALOG_ENABLED
582
0
            return(1);
583
#else
584
            return(0);
585
#endif
586
0
        case XML_WITH_XPATH:
587
0
#ifdef LIBXML_XPATH_ENABLED
588
0
            return(1);
589
#else
590
            return(0);
591
#endif
592
0
        case XML_WITH_XPTR:
593
0
#ifdef LIBXML_XPTR_ENABLED
594
0
            return(1);
595
#else
596
            return(0);
597
#endif
598
0
        case XML_WITH_XINCLUDE:
599
0
#ifdef LIBXML_XINCLUDE_ENABLED
600
0
            return(1);
601
#else
602
            return(0);
603
#endif
604
0
        case XML_WITH_ICONV:
605
0
#ifdef LIBXML_ICONV_ENABLED
606
0
            return(1);
607
#else
608
            return(0);
609
#endif
610
0
        case XML_WITH_ISO8859X:
611
0
#ifdef LIBXML_ISO8859X_ENABLED
612
0
            return(1);
613
#else
614
            return(0);
615
#endif
616
0
        case XML_WITH_UNICODE:
617
0
            return(0);
618
0
        case XML_WITH_REGEXP:
619
0
#ifdef LIBXML_REGEXP_ENABLED
620
0
            return(1);
621
#else
622
            return(0);
623
#endif
624
0
        case XML_WITH_AUTOMATA:
625
0
#ifdef LIBXML_REGEXP_ENABLED
626
0
            return(1);
627
#else
628
            return(0);
629
#endif
630
0
        case XML_WITH_EXPR:
631
0
            return(0);
632
0
        case XML_WITH_RELAXNG:
633
0
#ifdef LIBXML_RELAXNG_ENABLED
634
0
            return(1);
635
#else
636
            return(0);
637
#endif
638
0
        case XML_WITH_SCHEMAS:
639
0
#ifdef LIBXML_SCHEMAS_ENABLED
640
0
            return(1);
641
#else
642
            return(0);
643
#endif
644
0
        case XML_WITH_SCHEMATRON:
645
#ifdef LIBXML_SCHEMATRON_ENABLED
646
            return(1);
647
#else
648
0
            return(0);
649
0
#endif
650
0
        case XML_WITH_MODULES:
651
0
#ifdef LIBXML_MODULES_ENABLED
652
0
            return(1);
653
#else
654
            return(0);
655
#endif
656
0
        case XML_WITH_DEBUG:
657
#ifdef LIBXML_DEBUG_ENABLED
658
            return(1);
659
#else
660
0
            return(0);
661
0
#endif
662
0
        case XML_WITH_DEBUG_MEM:
663
0
            return(0);
664
0
        case XML_WITH_ZLIB:
665
0
#ifdef LIBXML_ZLIB_ENABLED
666
0
            return(1);
667
#else
668
            return(0);
669
#endif
670
0
        case XML_WITH_LZMA:
671
0
#ifdef LIBXML_LZMA_ENABLED
672
0
            return(1);
673
#else
674
            return(0);
675
#endif
676
0
        case XML_WITH_ICU:
677
#ifdef LIBXML_ICU_ENABLED
678
            return(1);
679
#else
680
0
            return(0);
681
0
#endif
682
0
        default:
683
0
      break;
684
0
     }
685
0
     return(0);
686
0
}
687
688
/************************************************************************
689
 *                  *
690
 *      Simple string buffer        *
691
 *                  *
692
 ************************************************************************/
693
694
typedef struct {
695
    xmlChar *mem;
696
    unsigned size;
697
    unsigned cap; /* size < cap */
698
    unsigned max; /* size <= max */
699
    xmlParserErrors code;
700
} xmlSBuf;
701
702
static void
703
293k
xmlSBufInit(xmlSBuf *buf, unsigned max) {
704
293k
    buf->mem = NULL;
705
293k
    buf->size = 0;
706
293k
    buf->cap = 0;
707
293k
    buf->max = max;
708
293k
    buf->code = XML_ERR_OK;
709
293k
}
710
711
static int
712
174k
xmlSBufGrow(xmlSBuf *buf, unsigned len) {
713
174k
    xmlChar *mem;
714
174k
    unsigned cap;
715
716
174k
    if (len >= UINT_MAX / 2 - buf->size) {
717
0
        if (buf->code == XML_ERR_OK)
718
0
            buf->code = XML_ERR_RESOURCE_LIMIT;
719
0
        return(-1);
720
0
    }
721
722
174k
    cap = (buf->size + len) * 2;
723
174k
    if (cap < 240)
724
159k
        cap = 240;
725
726
174k
    mem = xmlRealloc(buf->mem, cap);
727
174k
    if (mem == NULL) {
728
62
        buf->code = XML_ERR_NO_MEMORY;
729
62
        return(-1);
730
62
    }
731
732
174k
    buf->mem = mem;
733
174k
    buf->cap = cap;
734
735
174k
    return(0);
736
174k
}
737
738
static void
739
1.37M
xmlSBufAddString(xmlSBuf *buf, const xmlChar *str, unsigned len) {
740
1.37M
    if (buf->max - buf->size < len) {
741
0
        if (buf->code == XML_ERR_OK)
742
0
            buf->code = XML_ERR_RESOURCE_LIMIT;
743
0
        return;
744
0
    }
745
746
1.37M
    if (buf->cap - buf->size <= len) {
747
169k
        if (xmlSBufGrow(buf, len) < 0)
748
58
            return;
749
169k
    }
750
751
1.37M
    if (len > 0)
752
1.37M
        memcpy(buf->mem + buf->size, str, len);
753
1.37M
    buf->size += len;
754
1.37M
}
755
756
static void
757
442k
xmlSBufAddCString(xmlSBuf *buf, const char *str, unsigned len) {
758
442k
    xmlSBufAddString(buf, (const xmlChar *) str, len);
759
442k
}
760
761
static void
762
110k
xmlSBufAddChar(xmlSBuf *buf, int c) {
763
110k
    xmlChar *end;
764
765
110k
    if (buf->max - buf->size < 4) {
766
0
        if (buf->code == XML_ERR_OK)
767
0
            buf->code = XML_ERR_RESOURCE_LIMIT;
768
0
        return;
769
0
    }
770
771
110k
    if (buf->cap - buf->size <= 4) {
772
4.92k
        if (xmlSBufGrow(buf, 4) < 0)
773
4
            return;
774
4.92k
    }
775
776
110k
    end = buf->mem + buf->size;
777
778
110k
    if (c < 0x80) {
779
75.2k
        *end = (xmlChar) c;
780
75.2k
        buf->size += 1;
781
75.2k
    } else {
782
35.2k
        buf->size += xmlCopyCharMultiByte(end, c);
783
35.2k
    }
784
110k
}
785
786
static void
787
333k
xmlSBufAddReplChar(xmlSBuf *buf) {
788
333k
    xmlSBufAddCString(buf, "\xEF\xBF\xBD", 3);
789
333k
}
790
791
static void
792
67
xmlSBufReportError(xmlSBuf *buf, xmlParserCtxtPtr ctxt, const char *errMsg) {
793
67
    if (buf->code == XML_ERR_NO_MEMORY)
794
67
        xmlCtxtErrMemory(ctxt);
795
0
    else
796
0
        xmlFatalErr(ctxt, buf->code, errMsg);
797
67
}
798
799
static xmlChar *
800
xmlSBufFinish(xmlSBuf *buf, int *sizeOut, xmlParserCtxtPtr ctxt,
801
176k
              const char *errMsg) {
802
176k
    if (buf->mem == NULL) {
803
14.4k
        buf->mem = xmlMalloc(1);
804
14.4k
        if (buf->mem == NULL) {
805
5
            buf->code = XML_ERR_NO_MEMORY;
806
14.4k
        } else {
807
14.4k
            buf->mem[0] = 0;
808
14.4k
        }
809
161k
    } else {
810
161k
        buf->mem[buf->size] = 0;
811
161k
    }
812
813
176k
    if (buf->code == XML_ERR_OK) {
814
175k
        if (sizeOut != NULL)
815
42.3k
            *sizeOut = buf->size;
816
175k
        return(buf->mem);
817
175k
    }
818
819
61
    xmlSBufReportError(buf, ctxt, errMsg);
820
821
61
    xmlFree(buf->mem);
822
823
61
    if (sizeOut != NULL)
824
17
        *sizeOut = 0;
825
61
    return(NULL);
826
176k
}
827
828
static void
829
111k
xmlSBufCleanup(xmlSBuf *buf, xmlParserCtxtPtr ctxt, const char *errMsg) {
830
111k
    if (buf->code != XML_ERR_OK)
831
6
        xmlSBufReportError(buf, ctxt, errMsg);
832
833
111k
    xmlFree(buf->mem);
834
111k
}
835
836
static int
837
xmlUTF8MultibyteLen(xmlParserCtxtPtr ctxt, const xmlChar *str,
838
287k
                    const char *errMsg) {
839
287k
    int c = str[0];
840
287k
    int c1 = str[1];
841
842
287k
    if ((c1 & 0xC0) != 0x80)
843
207k
        goto encoding_error;
844
845
80.3k
    if (c < 0xE0) {
846
        /* 2-byte sequence */
847
68.5k
        if (c < 0xC2)
848
57.1k
            goto encoding_error;
849
850
11.4k
        return(2);
851
68.5k
    } else {
852
11.8k
        int c2 = str[2];
853
854
11.8k
        if ((c2 & 0xC0) != 0x80)
855
4.72k
            goto encoding_error;
856
857
7.07k
        if (c < 0xF0) {
858
            /* 3-byte sequence */
859
3.76k
            if (c == 0xE0) {
860
                /* overlong */
861
1.03k
                if (c1 < 0xA0)
862
810
                    goto encoding_error;
863
2.73k
            } else if (c == 0xED) {
864
                /* surrogate */
865
438
                if (c1 >= 0xA0)
866
200
                    goto encoding_error;
867
2.29k
            } else if (c == 0xEF) {
868
                /* U+FFFE and U+FFFF are invalid Chars */
869
1.11k
                if ((c1 == 0xBF) && (c2 >= 0xBE))
870
307
                    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR, errMsg);
871
1.11k
            }
872
873
2.75k
            return(3);
874
3.76k
        } else {
875
            /* 4-byte sequence */
876
3.31k
            if ((str[3] & 0xC0) != 0x80)
877
1.07k
                goto encoding_error;
878
2.23k
            if (c == 0xF0) {
879
                /* overlong */
880
865
                if (c1 < 0x90)
881
671
                    goto encoding_error;
882
1.37k
            } else if (c >= 0xF4) {
883
                /* greater than 0x10FFFF */
884
1.10k
                if ((c > 0xF4) || (c1 >= 0x90))
885
871
                    goto encoding_error;
886
1.10k
            }
887
888
695
            return(4);
889
2.23k
        }
890
7.07k
    }
891
892
272k
encoding_error:
893
    /* Only report the first error */
894
272k
    if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
895
3.88k
        xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
896
3.88k
        ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
897
3.88k
    }
898
899
272k
    return(0);
900
80.3k
}
901
902
/************************************************************************
903
 *                  *
904
 *    SAX2 defaulted attributes handling      *
905
 *                  *
906
 ************************************************************************/
907
908
/**
909
 * Final initialization of the parser context before starting to parse.
910
 *
911
 * This accounts for users modifying struct members of parser context
912
 * directly.
913
 *
914
 * @param ctxt  an XML parser context
915
 */
916
static void
917
178k
xmlCtxtInitializeLate(xmlParserCtxtPtr ctxt) {
918
178k
    xmlSAXHandlerPtr sax;
919
920
    /* Avoid unused variable warning if features are disabled. */
921
178k
    (void) sax;
922
923
    /*
924
     * Changing the SAX struct directly is still widespread practice
925
     * in internal and external code.
926
     */
927
178k
    if (ctxt == NULL) return;
928
178k
    sax = ctxt->sax;
929
178k
#ifdef LIBXML_SAX1_ENABLED
930
    /*
931
     * Only enable SAX2 if there SAX2 element handlers, except when there
932
     * are no element handlers at all.
933
     */
934
178k
    if (((ctxt->options & XML_PARSE_SAX1) == 0) &&
935
178k
        (sax) &&
936
178k
        (sax->initialized == XML_SAX2_MAGIC) &&
937
178k
        ((sax->startElementNs != NULL) ||
938
121k
         (sax->endElementNs != NULL) ||
939
121k
         ((sax->startElement == NULL) && (sax->endElement == NULL))))
940
121k
        ctxt->sax2 = 1;
941
#else
942
    ctxt->sax2 = 1;
943
#endif /* LIBXML_SAX1_ENABLED */
944
945
    /*
946
     * Some users replace the dictionary directly in the context struct.
947
     * We really need an API function to do that cleanly.
948
     */
949
178k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
950
178k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
951
178k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
952
178k
    if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
953
178k
    (ctxt->str_xml_ns == NULL)) {
954
44
        xmlErrMemory(ctxt);
955
44
    }
956
957
178k
    xmlDictSetLimit(ctxt->dict,
958
178k
                    (ctxt->options & XML_PARSE_HUGE) ?
959
46.3k
                        0 :
960
178k
                        XML_MAX_DICTIONARY_LIMIT);
961
962
178k
#ifdef LIBXML_VALID_ENABLED
963
178k
    if (ctxt->validate)
964
58.7k
        ctxt->vctxt.flags |= XML_VCTXT_VALIDATE;
965
120k
    else
966
120k
        ctxt->vctxt.flags &= ~XML_VCTXT_VALIDATE;
967
178k
#endif /* LIBXML_VALID_ENABLED */
968
178k
}
969
970
typedef struct {
971
    xmlHashedString prefix;
972
    xmlHashedString name;
973
    xmlHashedString value;
974
    const xmlChar *valueEnd;
975
    int external;
976
    int expandedSize;
977
} xmlDefAttr;
978
979
typedef struct _xmlDefAttrs xmlDefAttrs;
980
typedef xmlDefAttrs *xmlDefAttrsPtr;
981
struct _xmlDefAttrs {
982
    int nbAttrs;  /* number of defaulted attributes on that element */
983
    int maxAttrs;       /* the size of the array */
984
#if __STDC_VERSION__ >= 199901L
985
    /* Using a C99 flexible array member avoids UBSan errors. */
986
    xmlDefAttr attrs[] ATTRIBUTE_COUNTED_BY(maxAttrs);
987
#else
988
    xmlDefAttr attrs[1];
989
#endif
990
};
991
992
/**
993
 * Normalize the space in non CDATA attribute values:
994
 * If the attribute type is not CDATA, then the XML processor MUST further
995
 * process the normalized attribute value by discarding any leading and
996
 * trailing space (\#x20) characters, and by replacing sequences of space
997
 * (\#x20) characters by a single space (\#x20) character.
998
 * Note that the size of dst need to be at least src, and if one doesn't need
999
 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1000
 * passing src as dst is just fine.
1001
 *
1002
 * @param src  the source string
1003
 * @param dst  the target string
1004
 * @returns a pointer to the normalized value (dst) or NULL if no conversion
1005
 *         is needed.
1006
 */
1007
static xmlChar *
1008
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1009
27.0k
{
1010
27.0k
    if ((src == NULL) || (dst == NULL))
1011
0
        return(NULL);
1012
1013
30.6k
    while (*src == 0x20) src++;
1014
146k
    while (*src != 0) {
1015
119k
  if (*src == 0x20) {
1016
10.9k
      while (*src == 0x20) src++;
1017
4.18k
      if (*src != 0)
1018
3.56k
    *dst++ = 0x20;
1019
115k
  } else {
1020
115k
      *dst++ = *src++;
1021
115k
  }
1022
119k
    }
1023
27.0k
    *dst = 0;
1024
27.0k
    if (dst == src)
1025
25.4k
       return(NULL);
1026
1.54k
    return(dst);
1027
27.0k
}
1028
1029
/**
1030
 * Add a defaulted attribute for an element
1031
 *
1032
 * @param ctxt  an XML parser context
1033
 * @param fullname  the element fullname
1034
 * @param fullattr  the attribute fullname
1035
 * @param value  the attribute value
1036
 */
1037
static void
1038
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1039
               const xmlChar *fullname,
1040
               const xmlChar *fullattr,
1041
26.7k
               const xmlChar *value) {
1042
26.7k
    xmlDefAttrsPtr defaults;
1043
26.7k
    xmlDefAttr *attr;
1044
26.7k
    int len, expandedSize;
1045
26.7k
    xmlHashedString name;
1046
26.7k
    xmlHashedString prefix;
1047
26.7k
    xmlHashedString hvalue;
1048
26.7k
    const xmlChar *localname;
1049
1050
    /*
1051
     * Allows to detect attribute redefinitions
1052
     */
1053
26.7k
    if (ctxt->attsSpecial != NULL) {
1054
13.0k
        if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1055
480
      return;
1056
13.0k
    }
1057
1058
26.2k
    if (ctxt->attsDefault == NULL) {
1059
13.7k
        ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1060
13.7k
  if (ctxt->attsDefault == NULL)
1061
5
      goto mem_error;
1062
13.7k
    }
1063
1064
    /*
1065
     * split the element name into prefix:localname , the string found
1066
     * are within the DTD and then not associated to namespace names.
1067
     */
1068
26.2k
    localname = xmlSplitQName3(fullname, &len);
1069
26.2k
    if (localname == NULL) {
1070
24.8k
        name = xmlDictLookupHashed(ctxt->dict, fullname, -1);
1071
24.8k
  prefix.name = NULL;
1072
24.8k
    } else {
1073
1.44k
        name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1074
1.44k
  prefix = xmlDictLookupHashed(ctxt->dict, fullname, len);
1075
1.44k
        if (prefix.name == NULL)
1076
1
            goto mem_error;
1077
1.44k
    }
1078
26.2k
    if (name.name == NULL)
1079
1
        goto mem_error;
1080
1081
    /*
1082
     * make sure there is some storage
1083
     */
1084
26.2k
    defaults = xmlHashLookup2(ctxt->attsDefault, name.name, prefix.name);
1085
26.2k
    if ((defaults == NULL) ||
1086
26.2k
        (defaults->nbAttrs >= defaults->maxAttrs)) {
1087
17.9k
        xmlDefAttrsPtr temp;
1088
17.9k
        int newSize;
1089
1090
17.9k
        if (defaults == NULL) {
1091
17.6k
            newSize = 4;
1092
17.6k
        } else {
1093
314
            if ((defaults->maxAttrs >= XML_MAX_ATTRS) ||
1094
314
                ((size_t) defaults->maxAttrs >
1095
314
                     SIZE_MAX / 2 / sizeof(temp[0]) - sizeof(*defaults)))
1096
0
                goto mem_error;
1097
1098
314
            if (defaults->maxAttrs > XML_MAX_ATTRS / 2)
1099
0
                newSize = XML_MAX_ATTRS;
1100
314
            else
1101
314
                newSize = defaults->maxAttrs * 2;
1102
314
        }
1103
17.9k
        temp = xmlRealloc(defaults,
1104
17.9k
                          sizeof(*defaults) + newSize * sizeof(xmlDefAttr));
1105
17.9k
  if (temp == NULL)
1106
1
      goto mem_error;
1107
17.9k
        if (defaults == NULL)
1108
17.6k
            temp->nbAttrs = 0;
1109
17.9k
  temp->maxAttrs = newSize;
1110
17.9k
        defaults = temp;
1111
17.9k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name.name, prefix.name,
1112
17.9k
                          defaults, NULL) < 0) {
1113
0
      xmlFree(defaults);
1114
0
      goto mem_error;
1115
0
  }
1116
17.9k
    }
1117
1118
    /*
1119
     * Split the attribute name into prefix:localname , the string found
1120
     * are within the DTD and hen not associated to namespace names.
1121
     */
1122
26.2k
    localname = xmlSplitQName3(fullattr, &len);
1123
26.2k
    if (localname == NULL) {
1124
12.0k
        name = xmlDictLookupHashed(ctxt->dict, fullattr, -1);
1125
12.0k
  prefix.name = NULL;
1126
14.2k
    } else {
1127
14.2k
        name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1128
14.2k
  prefix = xmlDictLookupHashed(ctxt->dict, fullattr, len);
1129
14.2k
        if (prefix.name == NULL)
1130
1
            goto mem_error;
1131
14.2k
    }
1132
26.2k
    if (name.name == NULL)
1133
0
        goto mem_error;
1134
1135
    /* intern the string and precompute the end */
1136
26.2k
    len = strlen((const char *) value);
1137
26.2k
    hvalue = xmlDictLookupHashed(ctxt->dict, value, len);
1138
26.2k
    if (hvalue.name == NULL)
1139
1
        goto mem_error;
1140
1141
26.2k
    expandedSize = strlen((const char *) name.name);
1142
26.2k
    if (prefix.name != NULL)
1143
14.2k
        expandedSize += strlen((const char *) prefix.name);
1144
26.2k
    expandedSize += len;
1145
1146
26.2k
    attr = &defaults->attrs[defaults->nbAttrs++];
1147
26.2k
    attr->name = name;
1148
26.2k
    attr->prefix = prefix;
1149
26.2k
    attr->value = hvalue;
1150
26.2k
    attr->valueEnd = hvalue.name + len;
1151
26.2k
    attr->external = PARSER_EXTERNAL(ctxt);
1152
26.2k
    attr->expandedSize = expandedSize;
1153
1154
26.2k
    return;
1155
1156
10
mem_error:
1157
10
    xmlErrMemory(ctxt);
1158
10
}
1159
1160
/**
1161
 * Register this attribute type
1162
 *
1163
 * @param ctxt  an XML parser context
1164
 * @param fullname  the element fullname
1165
 * @param fullattr  the attribute fullname
1166
 * @param type  the attribute type
1167
 */
1168
static void
1169
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1170
      const xmlChar *fullname,
1171
      const xmlChar *fullattr,
1172
      int type)
1173
27.8k
{
1174
27.8k
    if (ctxt->attsSpecial == NULL) {
1175
14.6k
        ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1176
14.6k
  if (ctxt->attsSpecial == NULL)
1177
4
      goto mem_error;
1178
14.6k
    }
1179
1180
27.8k
    if (PARSER_EXTERNAL(ctxt))
1181
0
        type |= XML_SPECIAL_EXTERNAL;
1182
1183
27.8k
    if (xmlHashAdd2(ctxt->attsSpecial, fullname, fullattr,
1184
27.8k
                    XML_INT_TO_PTR(type)) < 0)
1185
0
        goto mem_error;
1186
27.8k
    return;
1187
1188
27.8k
mem_error:
1189
4
    xmlErrMemory(ctxt);
1190
4
}
1191
1192
/**
1193
 * Removes CDATA attributes from the special attribute table
1194
 */
1195
static void
1196
xmlCleanSpecialAttrCallback(void *payload, void *data,
1197
                            const xmlChar *fullname, const xmlChar *fullattr,
1198
27.2k
                            const xmlChar *unused ATTRIBUTE_UNUSED) {
1199
27.2k
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1200
1201
27.2k
    if (XML_PTR_TO_INT(payload) == XML_ATTRIBUTE_CDATA) {
1202
6.64k
        xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1203
6.64k
    }
1204
27.2k
}
1205
1206
/**
1207
 * Trim the list of attributes defined to remove all those of type
1208
 * CDATA as they are not special. This call should be done when finishing
1209
 * to parse the DTD and before starting to parse the document root.
1210
 *
1211
 * @param ctxt  an XML parser context
1212
 */
1213
static void
1214
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1215
109k
{
1216
109k
    if (ctxt->attsSpecial == NULL)
1217
94.8k
        return;
1218
1219
14.6k
    xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1220
1221
14.6k
    if (xmlHashSize(ctxt->attsSpecial) == 0) {
1222
4.71k
        xmlHashFree(ctxt->attsSpecial, NULL);
1223
4.71k
        ctxt->attsSpecial = NULL;
1224
4.71k
    }
1225
14.6k
}
1226
1227
/**
1228
 * Checks that the value conforms to the LanguageID production:
1229
 *
1230
 * @deprecated Internal function, do not use.
1231
 *
1232
 * NOTE: this is somewhat deprecated, those productions were removed from
1233
 * the XML Second edition.
1234
 *
1235
 *     [33] LanguageID ::= Langcode ('-' Subcode)*
1236
 *     [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1237
 *     [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1238
 *     [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1239
 *     [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1240
 *     [38] Subcode ::= ([a-z] | [A-Z])+
1241
 *
1242
 * The current REC reference the successors of RFC 1766, currently 5646
1243
 *
1244
 * http://www.rfc-editor.org/rfc/rfc5646.txt
1245
 *
1246
 *     langtag       = language
1247
 *                     ["-" script]
1248
 *                     ["-" region]
1249
 *                     *("-" variant)
1250
 *                     *("-" extension)
1251
 *                     ["-" privateuse]
1252
 *     language      = 2*3ALPHA            ; shortest ISO 639 code
1253
 *                     ["-" extlang]       ; sometimes followed by
1254
 *                                         ; extended language subtags
1255
 *                   / 4ALPHA              ; or reserved for future use
1256
 *                   / 5*8ALPHA            ; or registered language subtag
1257
 *
1258
 *     extlang       = 3ALPHA              ; selected ISO 639 codes
1259
 *                     *2("-" 3ALPHA)      ; permanently reserved
1260
 *
1261
 *     script        = 4ALPHA              ; ISO 15924 code
1262
 *
1263
 *     region        = 2ALPHA              ; ISO 3166-1 code
1264
 *                   / 3DIGIT              ; UN M.49 code
1265
 *
1266
 *     variant       = 5*8alphanum         ; registered variants
1267
 *                   / (DIGIT 3alphanum)
1268
 *
1269
 *     extension     = singleton 1*("-" (2*8alphanum))
1270
 *
1271
 *                                         ; Single alphanumerics
1272
 *                                         ; "x" reserved for private use
1273
 *     singleton     = DIGIT               ; 0 - 9
1274
 *                   / %x41-57             ; A - W
1275
 *                   / %x59-5A             ; Y - Z
1276
 *                   / %x61-77             ; a - w
1277
 *                   / %x79-7A             ; y - z
1278
 *
1279
 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1280
 * The parser below doesn't try to cope with extension or privateuse
1281
 * that could be added but that's not interoperable anyway
1282
 *
1283
 * @param lang  pointer to the string value
1284
 * @returns 1 if correct 0 otherwise
1285
 **/
1286
int
1287
xmlCheckLanguageID(const xmlChar * lang)
1288
17.3k
{
1289
17.3k
    const xmlChar *cur = lang, *nxt;
1290
1291
17.3k
    if (cur == NULL)
1292
697
        return (0);
1293
16.6k
    if (((cur[0] == 'i') && (cur[1] == '-')) ||
1294
16.6k
        ((cur[0] == 'I') && (cur[1] == '-')) ||
1295
16.6k
        ((cur[0] == 'x') && (cur[1] == '-')) ||
1296
16.6k
        ((cur[0] == 'X') && (cur[1] == '-'))) {
1297
        /*
1298
         * Still allow IANA code and user code which were coming
1299
         * from the previous version of the XML-1.0 specification
1300
         * it's deprecated but we should not fail
1301
         */
1302
1.19k
        cur += 2;
1303
2.45k
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1304
2.45k
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
1305
1.26k
            cur++;
1306
1.19k
        return(cur[0] == 0);
1307
1.19k
    }
1308
15.4k
    nxt = cur;
1309
51.9k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1310
51.9k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1311
36.4k
           nxt++;
1312
15.4k
    if (nxt - cur >= 4) {
1313
        /*
1314
         * Reserved
1315
         */
1316
3.11k
        if ((nxt - cur > 8) || (nxt[0] != 0))
1317
1.26k
            return(0);
1318
1.85k
        return(1);
1319
3.11k
    }
1320
12.3k
    if (nxt - cur < 2)
1321
3.79k
        return(0);
1322
    /* we got an ISO 639 code */
1323
8.56k
    if (nxt[0] == 0)
1324
478
        return(1);
1325
8.08k
    if (nxt[0] != '-')
1326
1.70k
        return(0);
1327
1328
6.38k
    nxt++;
1329
6.38k
    cur = nxt;
1330
    /* now we can have extlang or script or region or variant */
1331
6.38k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1332
979
        goto region_m49;
1333
1334
23.8k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1335
23.8k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1336
18.4k
           nxt++;
1337
5.40k
    if (nxt - cur == 4)
1338
1.40k
        goto script;
1339
4.00k
    if (nxt - cur == 2)
1340
1.01k
        goto region;
1341
2.98k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1342
618
        goto variant;
1343
2.37k
    if (nxt - cur != 3)
1344
556
        return(0);
1345
    /* we parsed an extlang */
1346
1.81k
    if (nxt[0] == 0)
1347
199
        return(1);
1348
1.61k
    if (nxt[0] != '-')
1349
194
        return(0);
1350
1351
1.42k
    nxt++;
1352
1.42k
    cur = nxt;
1353
    /* now we can have script or region or variant */
1354
1.42k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1355
194
        goto region_m49;
1356
1357
6.09k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1358
6.09k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1359
4.86k
           nxt++;
1360
1.22k
    if (nxt - cur == 2)
1361
194
        goto region;
1362
1.03k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1363
202
        goto variant;
1364
832
    if (nxt - cur != 4)
1365
617
        return(0);
1366
    /* we parsed a script */
1367
1.61k
script:
1368
1.61k
    if (nxt[0] == 0)
1369
246
        return(1);
1370
1.37k
    if (nxt[0] != '-')
1371
216
        return(0);
1372
1373
1.15k
    nxt++;
1374
1.15k
    cur = nxt;
1375
    /* now we can have region or variant */
1376
1.15k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1377
196
        goto region_m49;
1378
1379
5.16k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1380
5.16k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1381
4.20k
           nxt++;
1382
1383
961
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1384
205
        goto variant;
1385
756
    if (nxt - cur != 2)
1386
558
        return(0);
1387
    /* we parsed a region */
1388
1.60k
region:
1389
1.60k
    if (nxt[0] == 0)
1390
456
        return(1);
1391
1.15k
    if (nxt[0] != '-')
1392
489
        return(0);
1393
1394
664
    nxt++;
1395
664
    cur = nxt;
1396
    /* now we can just have a variant */
1397
5.77k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1398
5.77k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1399
5.10k
           nxt++;
1400
1401
664
    if ((nxt - cur < 5) || (nxt - cur > 8))
1402
460
        return(0);
1403
1404
    /* we parsed a variant */
1405
1.22k
variant:
1406
1.22k
    if (nxt[0] == 0)
1407
796
        return(1);
1408
433
    if (nxt[0] != '-')
1409
235
        return(0);
1410
    /* extensions and private use subtags not checked */
1411
198
    return (1);
1412
1413
1.36k
region_m49:
1414
1.36k
    if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1415
1.36k
        ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1416
201
        nxt += 3;
1417
201
        goto region;
1418
201
    }
1419
1.16k
    return(0);
1420
1.36k
}
1421
1422
/************************************************************************
1423
 *                  *
1424
 *    Parser stacks related functions and macros    *
1425
 *                  *
1426
 ************************************************************************/
1427
1428
static xmlChar *
1429
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar **str);
1430
1431
/**
1432
 * Create a new namespace database.
1433
 *
1434
 * @returns the new obejct.
1435
 */
1436
xmlParserNsData *
1437
192k
xmlParserNsCreate(void) {
1438
192k
    xmlParserNsData *nsdb = xmlMalloc(sizeof(*nsdb));
1439
1440
192k
    if (nsdb == NULL)
1441
23
        return(NULL);
1442
192k
    memset(nsdb, 0, sizeof(*nsdb));
1443
192k
    nsdb->defaultNsIndex = INT_MAX;
1444
1445
192k
    return(nsdb);
1446
192k
}
1447
1448
/**
1449
 * Free a namespace database.
1450
 *
1451
 * @param nsdb  namespace database
1452
 */
1453
void
1454
192k
xmlParserNsFree(xmlParserNsData *nsdb) {
1455
192k
    if (nsdb == NULL)
1456
0
        return;
1457
1458
192k
    xmlFree(nsdb->extra);
1459
192k
    xmlFree(nsdb->hash);
1460
192k
    xmlFree(nsdb);
1461
192k
}
1462
1463
/**
1464
 * Reset a namespace database.
1465
 *
1466
 * @param nsdb  namespace database
1467
 */
1468
static void
1469
0
xmlParserNsReset(xmlParserNsData *nsdb) {
1470
0
    if (nsdb == NULL)
1471
0
        return;
1472
1473
0
    nsdb->hashElems = 0;
1474
0
    nsdb->elementId = 0;
1475
0
    nsdb->defaultNsIndex = INT_MAX;
1476
1477
0
    if (nsdb->hash)
1478
0
        memset(nsdb->hash, 0, nsdb->hashSize * sizeof(nsdb->hash[0]));
1479
0
}
1480
1481
/**
1482
 * Signal that a new element has started.
1483
 *
1484
 * @param nsdb  namespace database
1485
 * @returns 0 on success, -1 if the element counter overflowed.
1486
 */
1487
static int
1488
221k
xmlParserNsStartElement(xmlParserNsData *nsdb) {
1489
221k
    if (nsdb->elementId == UINT_MAX)
1490
0
        return(-1);
1491
221k
    nsdb->elementId++;
1492
1493
221k
    return(0);
1494
221k
}
1495
1496
/**
1497
 * Lookup namespace with given prefix. If `bucketPtr` is non-NULL, it will
1498
 * be set to the matching bucket, or the first empty bucket if no match
1499
 * was found.
1500
 *
1501
 * @param ctxt  parser context
1502
 * @param prefix  namespace prefix
1503
 * @param bucketPtr  optional bucket (return value)
1504
 * @returns the namespace index on success, INT_MAX if no namespace was
1505
 * found.
1506
 */
1507
static int
1508
xmlParserNsLookup(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1509
450k
                  xmlParserNsBucket **bucketPtr) {
1510
450k
    xmlParserNsBucket *bucket, *tombstone;
1511
450k
    unsigned index, hashValue;
1512
1513
450k
    if (prefix->name == NULL)
1514
150k
        return(ctxt->nsdb->defaultNsIndex);
1515
1516
300k
    if (ctxt->nsdb->hashSize == 0)
1517
43.9k
        return(INT_MAX);
1518
1519
256k
    hashValue = prefix->hashValue;
1520
256k
    index = hashValue & (ctxt->nsdb->hashSize - 1);
1521
256k
    bucket = &ctxt->nsdb->hash[index];
1522
256k
    tombstone = NULL;
1523
1524
363k
    while (bucket->hashValue) {
1525
323k
        if (bucket->index == INT_MAX) {
1526
39.3k
            if (tombstone == NULL)
1527
32.1k
                tombstone = bucket;
1528
284k
        } else if (bucket->hashValue == hashValue) {
1529
216k
            if (ctxt->nsTab[bucket->index * 2] == prefix->name) {
1530
216k
                if (bucketPtr != NULL)
1531
101k
                    *bucketPtr = bucket;
1532
216k
                return(bucket->index);
1533
216k
            }
1534
216k
        }
1535
1536
107k
        index++;
1537
107k
        bucket++;
1538
107k
        if (index == ctxt->nsdb->hashSize) {
1539
36.0k
            index = 0;
1540
36.0k
            bucket = ctxt->nsdb->hash;
1541
36.0k
        }
1542
107k
    }
1543
1544
39.2k
    if (bucketPtr != NULL)
1545
32.4k
        *bucketPtr = tombstone ? tombstone : bucket;
1546
39.2k
    return(INT_MAX);
1547
256k
}
1548
1549
/**
1550
 * Lookup namespace URI with given prefix.
1551
 *
1552
 * @param ctxt  parser context
1553
 * @param prefix  namespace prefix
1554
 * @returns the namespace URI on success, NULL if no namespace was found.
1555
 */
1556
static const xmlChar *
1557
191k
xmlParserNsLookupUri(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix) {
1558
191k
    const xmlChar *ret;
1559
191k
    int nsIndex;
1560
1561
191k
    if (prefix->name == ctxt->str_xml)
1562
3.06k
        return(ctxt->str_xml_ns);
1563
1564
    /*
1565
     * minNsIndex is used when building an entity tree. We must
1566
     * ignore namespaces declared outside the entity.
1567
     */
1568
188k
    nsIndex = xmlParserNsLookup(ctxt, prefix, NULL);
1569
188k
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1570
130k
        return(NULL);
1571
1572
57.0k
    ret = ctxt->nsTab[nsIndex * 2 + 1];
1573
57.0k
    if (ret[0] == 0)
1574
1.54k
        ret = NULL;
1575
57.0k
    return(ret);
1576
188k
}
1577
1578
/**
1579
 * Lookup extra data for the given prefix. This returns data stored
1580
 * with xmlParserNsUdpateSax.
1581
 *
1582
 * @param ctxt  parser context
1583
 * @param prefix  namespace prefix
1584
 * @returns the data on success, NULL if no namespace was found.
1585
 */
1586
void *
1587
42.6k
xmlParserNsLookupSax(xmlParserCtxt *ctxt, const xmlChar *prefix) {
1588
42.6k
    xmlHashedString hprefix;
1589
42.6k
    int nsIndex;
1590
1591
42.6k
    if (prefix == ctxt->str_xml)
1592
18.7k
        return(NULL);
1593
1594
23.9k
    hprefix.name = prefix;
1595
23.9k
    if (prefix != NULL)
1596
19.5k
        hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1597
4.36k
    else
1598
4.36k
        hprefix.hashValue = 0;
1599
23.9k
    nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1600
23.9k
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1601
0
        return(NULL);
1602
1603
23.9k
    return(ctxt->nsdb->extra[nsIndex].saxData);
1604
23.9k
}
1605
1606
/**
1607
 * Sets or updates extra data for the given prefix. This value will be
1608
 * returned by xmlParserNsLookupSax as long as the namespace with the
1609
 * given prefix is in scope.
1610
 *
1611
 * @param ctxt  parser context
1612
 * @param prefix  namespace prefix
1613
 * @param saxData  extra data for SAX handler
1614
 * @returns the data on success, NULL if no namespace was found.
1615
 */
1616
int
1617
xmlParserNsUpdateSax(xmlParserCtxt *ctxt, const xmlChar *prefix,
1618
53.4k
                     void *saxData) {
1619
53.4k
    xmlHashedString hprefix;
1620
53.4k
    int nsIndex;
1621
1622
53.4k
    if (prefix == ctxt->str_xml)
1623
0
        return(-1);
1624
1625
53.4k
    hprefix.name = prefix;
1626
53.4k
    if (prefix != NULL)
1627
43.0k
        hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1628
10.4k
    else
1629
10.4k
        hprefix.hashValue = 0;
1630
53.4k
    nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1631
53.4k
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1632
0
        return(-1);
1633
1634
53.4k
    ctxt->nsdb->extra[nsIndex].saxData = saxData;
1635
53.4k
    return(0);
1636
53.4k
}
1637
1638
/**
1639
 * Grows the namespace tables.
1640
 *
1641
 * @param ctxt  parser context
1642
 * @returns 0 on success, -1 if a memory allocation failed.
1643
 */
1644
static int
1645
39.6k
xmlParserNsGrow(xmlParserCtxtPtr ctxt) {
1646
39.6k
    const xmlChar **table;
1647
39.6k
    xmlParserNsExtra *extra;
1648
39.6k
    int newSize;
1649
1650
39.6k
    newSize = xmlGrowCapacity(ctxt->nsMax,
1651
39.6k
                              sizeof(table[0]) + sizeof(extra[0]),
1652
39.6k
                              16, XML_MAX_ITEMS);
1653
39.6k
    if (newSize < 0)
1654
0
        goto error;
1655
1656
39.6k
    table = xmlRealloc(ctxt->nsTab, 2 * newSize * sizeof(table[0]));
1657
39.6k
    if (table == NULL)
1658
4
        goto error;
1659
39.6k
    ctxt->nsTab = table;
1660
1661
39.6k
    extra = xmlRealloc(ctxt->nsdb->extra, newSize * sizeof(extra[0]));
1662
39.6k
    if (extra == NULL)
1663
9
        goto error;
1664
39.6k
    ctxt->nsdb->extra = extra;
1665
1666
39.6k
    ctxt->nsMax = newSize;
1667
39.6k
    return(0);
1668
1669
13
error:
1670
13
    xmlErrMemory(ctxt);
1671
13
    return(-1);
1672
39.6k
}
1673
1674
/**
1675
 * Push a new namespace on the table.
1676
 *
1677
 * @param ctxt  parser context
1678
 * @param prefix  prefix with hash value
1679
 * @param uri  uri with hash value
1680
 * @param saxData  extra data for SAX handler
1681
 * @param defAttr  whether the namespace comes from a default attribute
1682
 * @returns 1 if the namespace was pushed, 0 if the namespace was ignored,
1683
 * -1 if a memory allocation failed.
1684
 */
1685
static int
1686
xmlParserNsPush(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1687
92.1k
                const xmlHashedString *uri, void *saxData, int defAttr) {
1688
92.1k
    xmlParserNsBucket *bucket = NULL;
1689
92.1k
    xmlParserNsExtra *extra;
1690
92.1k
    const xmlChar **ns;
1691
92.1k
    unsigned hashValue, nsIndex, oldIndex;
1692
1693
92.1k
    if ((prefix != NULL) && (prefix->name == ctxt->str_xml))
1694
277
        return(0);
1695
1696
91.8k
    if ((ctxt->nsNr >= ctxt->nsMax) && (xmlParserNsGrow(ctxt) < 0)) {
1697
13
        xmlErrMemory(ctxt);
1698
13
        return(-1);
1699
13
    }
1700
1701
    /*
1702
     * Default namespace and 'xml' namespace
1703
     */
1704
91.8k
    if ((prefix == NULL) || (prefix->name == NULL)) {
1705
16.1k
        oldIndex = ctxt->nsdb->defaultNsIndex;
1706
1707
16.1k
        if (oldIndex != INT_MAX) {
1708
8.96k
            extra = &ctxt->nsdb->extra[oldIndex];
1709
1710
8.96k
            if (extra->elementId == ctxt->nsdb->elementId) {
1711
483
                if (defAttr == 0)
1712
202
                    xmlErrAttributeDup(ctxt, NULL, BAD_CAST "xmlns");
1713
483
                return(0);
1714
483
            }
1715
1716
8.47k
            if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1717
8.47k
                (uri->name == ctxt->nsTab[oldIndex * 2 + 1]))
1718
1.15k
                return(0);
1719
8.47k
        }
1720
1721
14.5k
        ctxt->nsdb->defaultNsIndex = ctxt->nsNr;
1722
14.5k
        goto populate_entry;
1723
16.1k
    }
1724
1725
    /*
1726
     * Hash table lookup
1727
     */
1728
75.7k
    oldIndex = xmlParserNsLookup(ctxt, prefix, &bucket);
1729
75.7k
    if (oldIndex != INT_MAX) {
1730
30.2k
        extra = &ctxt->nsdb->extra[oldIndex];
1731
1732
        /*
1733
         * Check for duplicate definitions on the same element.
1734
         */
1735
30.2k
        if (extra->elementId == ctxt->nsdb->elementId) {
1736
411
            if (defAttr == 0)
1737
199
                xmlErrAttributeDup(ctxt, BAD_CAST "xmlns", prefix->name);
1738
411
            return(0);
1739
411
        }
1740
1741
29.7k
        if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1742
29.7k
            (uri->name == ctxt->nsTab[bucket->index * 2 + 1]))
1743
2.84k
            return(0);
1744
1745
26.9k
        bucket->index = ctxt->nsNr;
1746
26.9k
        goto populate_entry;
1747
29.7k
    }
1748
1749
    /*
1750
     * Insert new bucket
1751
     */
1752
1753
45.5k
    hashValue = prefix->hashValue;
1754
1755
    /*
1756
     * Grow hash table, 50% fill factor
1757
     */
1758
45.5k
    if (ctxt->nsdb->hashElems + 1 > ctxt->nsdb->hashSize / 2) {
1759
15.9k
        xmlParserNsBucket *newHash;
1760
15.9k
        unsigned newSize, i, index;
1761
1762
15.9k
        if (ctxt->nsdb->hashSize > UINT_MAX / 2) {
1763
0
            xmlErrMemory(ctxt);
1764
0
            return(-1);
1765
0
        }
1766
15.9k
        newSize = ctxt->nsdb->hashSize ? ctxt->nsdb->hashSize * 2 : 16;
1767
15.9k
        newHash = xmlMalloc(newSize * sizeof(newHash[0]));
1768
15.9k
        if (newHash == NULL) {
1769
1
            xmlErrMemory(ctxt);
1770
1
            return(-1);
1771
1
        }
1772
15.9k
        memset(newHash, 0, newSize * sizeof(newHash[0]));
1773
1774
63.0k
        for (i = 0; i < ctxt->nsdb->hashSize; i++) {
1775
47.0k
            unsigned hv = ctxt->nsdb->hash[i].hashValue;
1776
47.0k
            unsigned newIndex;
1777
1778
47.0k
            if ((hv == 0) || (ctxt->nsdb->hash[i].index == INT_MAX))
1779
42.0k
                continue;
1780
5.06k
            newIndex = hv & (newSize - 1);
1781
1782
6.90k
            while (newHash[newIndex].hashValue != 0) {
1783
1.84k
                newIndex++;
1784
1.84k
                if (newIndex == newSize)
1785
828
                    newIndex = 0;
1786
1.84k
            }
1787
1788
5.06k
            newHash[newIndex] = ctxt->nsdb->hash[i];
1789
5.06k
        }
1790
1791
15.9k
        xmlFree(ctxt->nsdb->hash);
1792
15.9k
        ctxt->nsdb->hash = newHash;
1793
15.9k
        ctxt->nsdb->hashSize = newSize;
1794
1795
        /*
1796
         * Relookup
1797
         */
1798
15.9k
        index = hashValue & (newSize - 1);
1799
1800
17.4k
        while (newHash[index].hashValue != 0) {
1801
1.47k
            index++;
1802
1.47k
            if (index == newSize)
1803
214
                index = 0;
1804
1.47k
        }
1805
1806
15.9k
        bucket = &newHash[index];
1807
15.9k
    }
1808
1809
45.5k
    bucket->hashValue = hashValue;
1810
45.5k
    bucket->index = ctxt->nsNr;
1811
45.5k
    ctxt->nsdb->hashElems++;
1812
45.5k
    oldIndex = INT_MAX;
1813
1814
86.9k
populate_entry:
1815
86.9k
    nsIndex = ctxt->nsNr;
1816
1817
86.9k
    ns = &ctxt->nsTab[nsIndex * 2];
1818
86.9k
    ns[0] = prefix ? prefix->name : NULL;
1819
86.9k
    ns[1] = uri->name;
1820
1821
86.9k
    extra = &ctxt->nsdb->extra[nsIndex];
1822
86.9k
    extra->saxData = saxData;
1823
86.9k
    extra->prefixHashValue = prefix ? prefix->hashValue : 0;
1824
86.9k
    extra->uriHashValue = uri->hashValue;
1825
86.9k
    extra->elementId = ctxt->nsdb->elementId;
1826
86.9k
    extra->oldIndex = oldIndex;
1827
1828
86.9k
    ctxt->nsNr++;
1829
1830
86.9k
    return(1);
1831
45.5k
}
1832
1833
/**
1834
 * Pops the top `nr` namespaces and restores the hash table.
1835
 *
1836
 * @param ctxt  an XML parser context
1837
 * @param nr  the number to pop
1838
 * @returns the number of namespaces popped.
1839
 */
1840
static int
1841
xmlParserNsPop(xmlParserCtxtPtr ctxt, int nr)
1842
64.4k
{
1843
64.4k
    int i;
1844
1845
    /* assert(nr <= ctxt->nsNr); */
1846
1847
150k
    for (i = ctxt->nsNr - 1; i >= ctxt->nsNr - nr; i--) {
1848
85.7k
        const xmlChar *prefix = ctxt->nsTab[i * 2];
1849
85.7k
        xmlParserNsExtra *extra = &ctxt->nsdb->extra[i];
1850
1851
85.7k
        if (prefix == NULL) {
1852
14.3k
            ctxt->nsdb->defaultNsIndex = extra->oldIndex;
1853
71.4k
        } else {
1854
71.4k
            xmlHashedString hprefix;
1855
71.4k
            xmlParserNsBucket *bucket = NULL;
1856
1857
71.4k
            hprefix.name = prefix;
1858
71.4k
            hprefix.hashValue = extra->prefixHashValue;
1859
71.4k
            xmlParserNsLookup(ctxt, &hprefix, &bucket);
1860
            /* assert(bucket && bucket->hashValue); */
1861
71.4k
            bucket->index = extra->oldIndex;
1862
71.4k
        }
1863
85.7k
    }
1864
1865
64.4k
    ctxt->nsNr -= nr;
1866
64.4k
    return(nr);
1867
64.4k
}
1868
1869
static int
1870
40.9k
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt) {
1871
40.9k
    const xmlChar **atts;
1872
40.9k
    unsigned *attallocs;
1873
40.9k
    int newSize;
1874
1875
40.9k
    newSize = xmlGrowCapacity(ctxt->maxatts / 5,
1876
40.9k
                              sizeof(atts[0]) * 5 + sizeof(attallocs[0]),
1877
40.9k
                              10, XML_MAX_ATTRS);
1878
40.9k
    if (newSize < 0) {
1879
0
        xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
1880
0
                    "Maximum number of attributes exceeded");
1881
0
        return(-1);
1882
0
    }
1883
1884
40.9k
    atts = xmlRealloc(ctxt->atts, newSize * sizeof(atts[0]) * 5);
1885
40.9k
    if (atts == NULL)
1886
5
        goto mem_error;
1887
40.9k
    ctxt->atts = atts;
1888
1889
40.9k
    attallocs = xmlRealloc(ctxt->attallocs,
1890
40.9k
                           newSize * sizeof(attallocs[0]));
1891
40.9k
    if (attallocs == NULL)
1892
10
        goto mem_error;
1893
40.9k
    ctxt->attallocs = attallocs;
1894
1895
40.9k
    ctxt->maxatts = newSize * 5;
1896
1897
40.9k
    return(0);
1898
1899
15
mem_error:
1900
15
    xmlErrMemory(ctxt);
1901
15
    return(-1);
1902
40.9k
}
1903
1904
/**
1905
 * Pushes a new parser input on top of the input stack
1906
 *
1907
 * @param ctxt  an XML parser context
1908
 * @param value  the parser input
1909
 * @returns -1 in case of error, the index in the stack otherwise
1910
 */
1911
int
1912
xmlCtxtPushInput(xmlParserCtxt *ctxt, xmlParserInput *value)
1913
182k
{
1914
182k
    char *directory = NULL;
1915
182k
    int maxDepth;
1916
1917
182k
    if ((ctxt == NULL) || (value == NULL))
1918
0
        return(-1);
1919
1920
182k
    maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
1921
1922
182k
    if (ctxt->inputNr >= ctxt->inputMax) {
1923
2.98k
        xmlParserInputPtr *tmp;
1924
2.98k
        int newSize;
1925
1926
2.98k
        newSize = xmlGrowCapacity(ctxt->inputMax, sizeof(tmp[0]),
1927
2.98k
                                  5, maxDepth);
1928
2.98k
        if (newSize < 0) {
1929
0
            xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
1930
0
                           "Maximum entity nesting depth exceeded");
1931
0
            xmlHaltParser(ctxt);
1932
0
            return(-1);
1933
0
        }
1934
2.98k
        tmp = xmlRealloc(ctxt->inputTab, newSize * sizeof(tmp[0]));
1935
2.98k
        if (tmp == NULL) {
1936
2
            xmlErrMemory(ctxt);
1937
2
            return(-1);
1938
2
        }
1939
2.98k
        ctxt->inputTab = tmp;
1940
2.98k
        ctxt->inputMax = newSize;
1941
2.98k
    }
1942
1943
182k
    if ((ctxt->inputNr == 0) && (value->filename != NULL)) {
1944
140k
        directory = xmlParserGetDirectory(value->filename);
1945
140k
        if (directory == NULL) {
1946
12
            xmlErrMemory(ctxt);
1947
12
            return(-1);
1948
12
        }
1949
140k
    }
1950
1951
182k
    if (ctxt->input_id >= INT_MAX) {
1952
0
        xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT, "Input ID overflow\n");
1953
0
        return(-1);
1954
0
    }
1955
1956
182k
    ctxt->inputTab[ctxt->inputNr] = value;
1957
182k
    ctxt->input = value;
1958
1959
182k
    if (ctxt->inputNr == 0) {
1960
178k
        xmlFree(ctxt->directory);
1961
178k
        ctxt->directory = directory;
1962
178k
    }
1963
1964
    /*
1965
     * The input ID is unused internally, but there are entity
1966
     * loaders in downstream code that detect the main document
1967
     * by checking for "input_id == 1".
1968
     */
1969
182k
    value->id = ctxt->input_id++;
1970
1971
182k
    return(ctxt->inputNr++);
1972
182k
}
1973
1974
/**
1975
 * Pops the top parser input from the input stack
1976
 *
1977
 * @param ctxt  an XML parser context
1978
 * @returns the input just removed
1979
 */
1980
xmlParserInput *
1981
xmlCtxtPopInput(xmlParserCtxt *ctxt)
1982
565k
{
1983
565k
    xmlParserInputPtr ret;
1984
1985
565k
    if (ctxt == NULL)
1986
0
        return(NULL);
1987
565k
    if (ctxt->inputNr <= 0)
1988
384k
        return (NULL);
1989
180k
    ctxt->inputNr--;
1990
180k
    if (ctxt->inputNr > 0)
1991
3.78k
        ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1992
176k
    else
1993
176k
        ctxt->input = NULL;
1994
180k
    ret = ctxt->inputTab[ctxt->inputNr];
1995
180k
    ctxt->inputTab[ctxt->inputNr] = NULL;
1996
180k
    return (ret);
1997
565k
}
1998
1999
/**
2000
 * Pushes a new element node on top of the node stack
2001
 *
2002
 * @deprecated Internal function, do not use.
2003
 *
2004
 * @param ctxt  an XML parser context
2005
 * @param value  the element node
2006
 * @returns -1 in case of error, the index in the stack otherwise
2007
 */
2008
int
2009
nodePush(xmlParserCtxt *ctxt, xmlNode *value)
2010
235k
{
2011
235k
    if (ctxt == NULL)
2012
0
        return(0);
2013
2014
235k
    if (ctxt->nodeNr >= ctxt->nodeMax) {
2015
77.3k
        int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
2016
77.3k
        xmlNodePtr *tmp;
2017
77.3k
        int newSize;
2018
2019
77.3k
        newSize = xmlGrowCapacity(ctxt->nodeMax, sizeof(tmp[0]),
2020
77.3k
                                  10, maxDepth);
2021
77.3k
        if (newSize < 0) {
2022
0
            xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
2023
0
                    "Excessive depth in document: %d,"
2024
0
                    " use XML_PARSE_HUGE option\n",
2025
0
                    ctxt->nodeNr);
2026
0
            xmlHaltParser(ctxt);
2027
0
            return(-1);
2028
0
        }
2029
2030
77.3k
  tmp = xmlRealloc(ctxt->nodeTab, newSize * sizeof(tmp[0]));
2031
77.3k
        if (tmp == NULL) {
2032
15
            xmlErrMemory(ctxt);
2033
15
            return (-1);
2034
15
        }
2035
77.3k
        ctxt->nodeTab = tmp;
2036
77.3k
  ctxt->nodeMax = newSize;
2037
77.3k
    }
2038
2039
235k
    ctxt->nodeTab[ctxt->nodeNr] = value;
2040
235k
    ctxt->node = value;
2041
235k
    return (ctxt->nodeNr++);
2042
235k
}
2043
2044
/**
2045
 * Pops the top element node from the node stack
2046
 *
2047
 * @deprecated Internal function, do not use.
2048
 *
2049
 * @param ctxt  an XML parser context
2050
 * @returns the node just removed
2051
 */
2052
xmlNode *
2053
nodePop(xmlParserCtxt *ctxt)
2054
227k
{
2055
227k
    xmlNodePtr ret;
2056
2057
227k
    if (ctxt == NULL) return(NULL);
2058
227k
    if (ctxt->nodeNr <= 0)
2059
5.50k
        return (NULL);
2060
222k
    ctxt->nodeNr--;
2061
222k
    if (ctxt->nodeNr > 0)
2062
167k
        ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
2063
54.9k
    else
2064
54.9k
        ctxt->node = NULL;
2065
222k
    ret = ctxt->nodeTab[ctxt->nodeNr];
2066
222k
    ctxt->nodeTab[ctxt->nodeNr] = NULL;
2067
222k
    return (ret);
2068
227k
}
2069
2070
/**
2071
 * Pushes a new element name/prefix/URL on top of the name stack
2072
 *
2073
 * @param ctxt  an XML parser context
2074
 * @param value  the element name
2075
 * @param prefix  the element prefix
2076
 * @param URI  the element namespace name
2077
 * @param line  the current line number for error messages
2078
 * @param nsNr  the number of namespaces pushed on the namespace table
2079
 * @returns -1 in case of error, the index in the stack otherwise
2080
 */
2081
static int
2082
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
2083
           const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
2084
294k
{
2085
294k
    xmlStartTag *tag;
2086
2087
294k
    if (ctxt->nameNr >= ctxt->nameMax) {
2088
89.8k
        const xmlChar **tmp;
2089
89.8k
        xmlStartTag *tmp2;
2090
89.8k
        int newSize;
2091
2092
89.8k
        newSize = xmlGrowCapacity(ctxt->nameMax,
2093
89.8k
                                  sizeof(tmp[0]) + sizeof(tmp2[0]),
2094
89.8k
                                  10, XML_MAX_ITEMS);
2095
89.8k
        if (newSize < 0)
2096
0
            goto mem_error;
2097
2098
89.8k
        tmp = xmlRealloc(ctxt->nameTab, newSize * sizeof(tmp[0]));
2099
89.8k
        if (tmp == NULL)
2100
22
      goto mem_error;
2101
89.8k
  ctxt->nameTab = tmp;
2102
2103
89.8k
        tmp2 = xmlRealloc(ctxt->pushTab, newSize * sizeof(tmp2[0]));
2104
89.8k
        if (tmp2 == NULL)
2105
24
      goto mem_error;
2106
89.8k
  ctxt->pushTab = tmp2;
2107
2108
89.8k
        ctxt->nameMax = newSize;
2109
204k
    } else if (ctxt->pushTab == NULL) {
2110
72.9k
        ctxt->pushTab = xmlMalloc(ctxt->nameMax * sizeof(ctxt->pushTab[0]));
2111
72.9k
        if (ctxt->pushTab == NULL)
2112
17
            goto mem_error;
2113
72.9k
    }
2114
294k
    ctxt->nameTab[ctxt->nameNr] = value;
2115
294k
    ctxt->name = value;
2116
294k
    tag = &ctxt->pushTab[ctxt->nameNr];
2117
294k
    tag->prefix = prefix;
2118
294k
    tag->URI = URI;
2119
294k
    tag->line = line;
2120
294k
    tag->nsNr = nsNr;
2121
294k
    return (ctxt->nameNr++);
2122
63
mem_error:
2123
63
    xmlErrMemory(ctxt);
2124
63
    return (-1);
2125
294k
}
2126
#ifdef LIBXML_PUSH_ENABLED
2127
/**
2128
 * Pops the top element/prefix/URI name from the name stack
2129
 *
2130
 * @param ctxt  an XML parser context
2131
 * @returns the name just removed
2132
 */
2133
static const xmlChar *
2134
nameNsPop(xmlParserCtxtPtr ctxt)
2135
0
{
2136
0
    const xmlChar *ret;
2137
2138
0
    if (ctxt->nameNr <= 0)
2139
0
        return (NULL);
2140
0
    ctxt->nameNr--;
2141
0
    if (ctxt->nameNr > 0)
2142
0
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2143
0
    else
2144
0
        ctxt->name = NULL;
2145
0
    ret = ctxt->nameTab[ctxt->nameNr];
2146
0
    ctxt->nameTab[ctxt->nameNr] = NULL;
2147
0
    return (ret);
2148
0
}
2149
#endif /* LIBXML_PUSH_ENABLED */
2150
2151
/**
2152
 * Pops the top element name from the name stack
2153
 *
2154
 * @deprecated Internal function, do not use.
2155
 *
2156
 * @param ctxt  an XML parser context
2157
 * @returns the name just removed
2158
 */
2159
static const xmlChar *
2160
namePop(xmlParserCtxtPtr ctxt)
2161
286k
{
2162
286k
    const xmlChar *ret;
2163
2164
286k
    if ((ctxt == NULL) || (ctxt->nameNr <= 0))
2165
9
        return (NULL);
2166
286k
    ctxt->nameNr--;
2167
286k
    if (ctxt->nameNr > 0)
2168
221k
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2169
64.3k
    else
2170
64.3k
        ctxt->name = NULL;
2171
286k
    ret = ctxt->nameTab[ctxt->nameNr];
2172
286k
    ctxt->nameTab[ctxt->nameNr] = NULL;
2173
286k
    return (ret);
2174
286k
}
2175
2176
339k
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
2177
339k
    if (ctxt->spaceNr >= ctxt->spaceMax) {
2178
157k
        int *tmp;
2179
157k
        int newSize;
2180
2181
157k
        newSize = xmlGrowCapacity(ctxt->spaceMax, sizeof(tmp[0]),
2182
157k
                                  10, XML_MAX_ITEMS);
2183
157k
        if (newSize < 0) {
2184
0
      xmlErrMemory(ctxt);
2185
0
      return(-1);
2186
0
        }
2187
2188
157k
        tmp = xmlRealloc(ctxt->spaceTab, newSize * sizeof(tmp[0]));
2189
157k
        if (tmp == NULL) {
2190
29
      xmlErrMemory(ctxt);
2191
29
      return(-1);
2192
29
  }
2193
157k
  ctxt->spaceTab = tmp;
2194
2195
157k
        ctxt->spaceMax = newSize;
2196
157k
    }
2197
339k
    ctxt->spaceTab[ctxt->spaceNr] = val;
2198
339k
    ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
2199
339k
    return(ctxt->spaceNr++);
2200
339k
}
2201
2202
329k
static int spacePop(xmlParserCtxtPtr ctxt) {
2203
329k
    int ret;
2204
329k
    if (ctxt->spaceNr <= 0) return(0);
2205
329k
    ctxt->spaceNr--;
2206
329k
    if (ctxt->spaceNr > 0)
2207
329k
  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
2208
12
    else
2209
12
        ctxt->space = &ctxt->spaceTab[0];
2210
329k
    ret = ctxt->spaceTab[ctxt->spaceNr];
2211
329k
    ctxt->spaceTab[ctxt->spaceNr] = -1;
2212
329k
    return(ret);
2213
329k
}
2214
2215
/*
2216
 * Macros for accessing the content. Those should be used only by the parser,
2217
 * and not exported.
2218
 *
2219
 * Dirty macros, i.e. one often need to make assumption on the context to
2220
 * use them
2221
 *
2222
 *   CUR_PTR return the current pointer to the xmlChar to be parsed.
2223
 *           To be used with extreme caution since operations consuming
2224
 *           characters may move the input buffer to a different location !
2225
 *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
2226
 *           This should be used internally by the parser
2227
 *           only to compare to ASCII values otherwise it would break when
2228
 *           running with UTF-8 encoding.
2229
 *   RAW     same as CUR but in the input buffer, bypass any token
2230
 *           extraction that may have been done
2231
 *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
2232
 *           to compare on ASCII based substring.
2233
 *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2234
 *           strings without newlines within the parser.
2235
 *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2236
 *           defined char within the parser.
2237
 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2238
 *
2239
 *   NEXT    Skip to the next character, this does the proper decoding
2240
 *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2241
 *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2242
 *   COPY_BUF  copy the current unicode char to the target buffer, increment
2243
 *            the index
2244
 *   GROW, SHRINK  handling of input buffers
2245
 */
2246
2247
9.04M
#define RAW (*ctxt->input->cur)
2248
9.96M
#define CUR (*ctxt->input->cur)
2249
3.37M
#define NXT(val) ctxt->input->cur[(val)]
2250
12.4M
#define CUR_PTR ctxt->input->cur
2251
683k
#define BASE_PTR ctxt->input->base
2252
2253
#define CMP4( s, c1, c2, c3, c4 ) \
2254
7.96M
  ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2255
4.16M
    ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2256
#define CMP5( s, c1, c2, c3, c4, c5 ) \
2257
7.31M
  ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2258
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2259
6.68M
  ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2260
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2261
6.12M
  ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2262
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2263
5.71M
  ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2264
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2265
2.86M
  ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2266
2.86M
    ((unsigned char *) s)[ 8 ] == c9 )
2267
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2268
104k
  ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2269
104k
    ((unsigned char *) s)[ 9 ] == c10 )
2270
2271
1.51M
#define SKIP(val) do {             \
2272
1.51M
    ctxt->input->cur += (val),ctxt->input->col+=(val);      \
2273
1.51M
    if (*ctxt->input->cur == 0)           \
2274
1.51M
        xmlParserGrow(ctxt);           \
2275
1.51M
  } while (0)
2276
2277
#define SKIPL(val) do {             \
2278
    int skipl;                \
2279
    for(skipl=0; skipl<val; skipl++) {          \
2280
  if (*(ctxt->input->cur) == '\n') {        \
2281
  ctxt->input->line++; ctxt->input->col = 1;      \
2282
  } else ctxt->input->col++;          \
2283
  ctxt->input->cur++;           \
2284
    }                 \
2285
    if (*ctxt->input->cur == 0)           \
2286
        xmlParserGrow(ctxt);            \
2287
  } while (0)
2288
2289
#define SHRINK \
2290
4.07M
    if (!PARSER_PROGRESSIVE(ctxt)) \
2291
4.07M
  xmlParserShrink(ctxt);
2292
2293
#define GROW \
2294
11.4M
    if ((!PARSER_PROGRESSIVE(ctxt)) && \
2295
11.4M
        (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2296
4.71M
  xmlParserGrow(ctxt);
2297
2298
3.41M
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2299
2300
406k
#define SKIP_BLANKS_PE xmlSkipBlankCharsPE(ctxt)
2301
2302
3.97M
#define NEXT xmlNextChar(ctxt)
2303
2304
745k
#define NEXT1 {               \
2305
745k
  ctxt->input->col++;           \
2306
745k
  ctxt->input->cur++;           \
2307
745k
  if (*ctxt->input->cur == 0)         \
2308
745k
      xmlParserGrow(ctxt);           \
2309
745k
    }
2310
2311
9.99M
#define NEXTL(l) do {             \
2312
9.99M
    if (*(ctxt->input->cur) == '\n') {         \
2313
79.9k
  ctxt->input->line++; ctxt->input->col = 1;      \
2314
9.91M
    } else ctxt->input->col++;           \
2315
9.99M
    ctxt->input->cur += l;        \
2316
9.99M
  } while (0)
2317
2318
#define COPY_BUF(b, i, v)           \
2319
2.84M
    if (v < 0x80) b[i++] = v;           \
2320
2.84M
    else i += xmlCopyCharMultiByte(&b[i],v)
2321
2322
static int
2323
4.35M
xmlCurrentCharRecover(xmlParserCtxtPtr ctxt, int *len) {
2324
4.35M
    int c = xmlCurrentChar(ctxt, len);
2325
2326
4.35M
    if (c == XML_INVALID_CHAR)
2327
284k
        c = 0xFFFD; /* replacement character */
2328
2329
4.35M
    return(c);
2330
4.35M
}
2331
2332
/**
2333
 * Skip whitespace in the input stream.
2334
 *
2335
 * @deprecated Internal function, do not use.
2336
 *
2337
 * @param ctxt  the XML parser context
2338
 * @returns the number of space chars skipped
2339
 */
2340
int
2341
3.82M
xmlSkipBlankChars(xmlParserCtxt *ctxt) {
2342
3.82M
    const xmlChar *cur;
2343
3.82M
    int res = 0;
2344
2345
3.82M
    cur = ctxt->input->cur;
2346
3.82M
    while (IS_BLANK_CH(*cur)) {
2347
1.81M
        if (*cur == '\n') {
2348
432k
            ctxt->input->line++; ctxt->input->col = 1;
2349
1.38M
        } else {
2350
1.38M
            ctxt->input->col++;
2351
1.38M
        }
2352
1.81M
        cur++;
2353
1.81M
        if (res < INT_MAX)
2354
1.81M
            res++;
2355
1.81M
        if (*cur == 0) {
2356
51.6k
            ctxt->input->cur = cur;
2357
51.6k
            xmlParserGrow(ctxt);
2358
51.6k
            cur = ctxt->input->cur;
2359
51.6k
        }
2360
1.81M
    }
2361
3.82M
    ctxt->input->cur = cur;
2362
2363
3.82M
    if (res > 4)
2364
3.15k
        GROW;
2365
2366
3.82M
    return(res);
2367
3.82M
}
2368
2369
static void
2370
0
xmlPopPE(xmlParserCtxtPtr ctxt) {
2371
0
    unsigned long consumed;
2372
0
    xmlEntityPtr ent;
2373
2374
0
    ent = ctxt->input->entity;
2375
2376
0
    ent->flags &= ~XML_ENT_EXPANDING;
2377
2378
0
    if ((ent->flags & XML_ENT_CHECKED) == 0) {
2379
0
        int result;
2380
2381
        /*
2382
         * Read the rest of the stream in case of errors. We want
2383
         * to account for the whole entity size.
2384
         */
2385
0
        do {
2386
0
            ctxt->input->cur = ctxt->input->end;
2387
0
            xmlParserShrink(ctxt);
2388
0
            result = xmlParserGrow(ctxt);
2389
0
        } while (result > 0);
2390
2391
0
        consumed = ctxt->input->consumed;
2392
0
        xmlSaturatedAddSizeT(&consumed,
2393
0
                             ctxt->input->end - ctxt->input->base);
2394
2395
0
        xmlSaturatedAdd(&ent->expandedSize, consumed);
2396
2397
        /*
2398
         * Add to sizeentities when parsing an external entity
2399
         * for the first time.
2400
         */
2401
0
        if (ent->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
2402
0
            xmlSaturatedAdd(&ctxt->sizeentities, consumed);
2403
0
        }
2404
2405
0
        ent->flags |= XML_ENT_CHECKED;
2406
0
    }
2407
2408
0
    xmlFreeInputStream(xmlCtxtPopInput(ctxt));
2409
2410
0
    xmlParserEntityCheck(ctxt, ent->expandedSize);
2411
2412
0
    GROW;
2413
0
}
2414
2415
/**
2416
 * Skip whitespace in the input stream, also handling parameter
2417
 * entities.
2418
 *
2419
 * @param ctxt  the XML parser context
2420
 * @returns the number of space chars skipped
2421
 */
2422
static int
2423
406k
xmlSkipBlankCharsPE(xmlParserCtxtPtr ctxt) {
2424
406k
    int res = 0;
2425
406k
    int inParam;
2426
406k
    int expandParam;
2427
2428
406k
    inParam = PARSER_IN_PE(ctxt);
2429
406k
    expandParam = PARSER_EXTERNAL(ctxt);
2430
2431
406k
    if (!inParam && !expandParam)
2432
406k
        return(xmlSkipBlankChars(ctxt));
2433
2434
    /*
2435
     * It's Okay to use CUR/NEXT here since all the blanks are on
2436
     * the ASCII range.
2437
     */
2438
0
    while (PARSER_STOPPED(ctxt) == 0) {
2439
0
        if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2440
0
            NEXT;
2441
0
        } else if (CUR == '%') {
2442
0
            if ((expandParam == 0) ||
2443
0
                (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2444
0
                break;
2445
2446
            /*
2447
             * Expand parameter entity. We continue to consume
2448
             * whitespace at the start of the entity and possible
2449
             * even consume the whole entity and pop it. We might
2450
             * even pop multiple PEs in this loop.
2451
             */
2452
0
            xmlParsePERefInternal(ctxt, 0);
2453
2454
0
            inParam = PARSER_IN_PE(ctxt);
2455
0
            expandParam = PARSER_EXTERNAL(ctxt);
2456
0
        } else if (CUR == 0) {
2457
0
            if (inParam == 0)
2458
0
                break;
2459
2460
            /*
2461
             * Don't pop parameter entities that start a markup
2462
             * declaration to detect Well-formedness constraint:
2463
             * PE Between Declarations.
2464
             */
2465
0
            if (ctxt->input->flags & XML_INPUT_MARKUP_DECL)
2466
0
                break;
2467
2468
0
            xmlPopPE(ctxt);
2469
2470
0
            inParam = PARSER_IN_PE(ctxt);
2471
0
            expandParam = PARSER_EXTERNAL(ctxt);
2472
0
        } else {
2473
0
            break;
2474
0
        }
2475
2476
        /*
2477
         * Also increase the counter when entering or exiting a PERef.
2478
         * The spec says: "When a parameter-entity reference is recognized
2479
         * in the DTD and included, its replacement text MUST be enlarged
2480
         * by the attachment of one leading and one following space (#x20)
2481
         * character."
2482
         */
2483
0
        if (res < INT_MAX)
2484
0
            res++;
2485
0
    }
2486
2487
0
    return(res);
2488
406k
}
2489
2490
/************************************************************************
2491
 *                  *
2492
 *    Commodity functions to handle entities      *
2493
 *                  *
2494
 ************************************************************************/
2495
2496
/**
2497
 * @deprecated Internal function, don't use.
2498
 *
2499
 * @param ctxt  an XML parser context
2500
 * @returns the current xmlChar in the parser context
2501
 */
2502
xmlChar
2503
0
xmlPopInput(xmlParserCtxt *ctxt) {
2504
0
    xmlParserInputPtr input;
2505
2506
0
    if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2507
0
    input = xmlCtxtPopInput(ctxt);
2508
0
    xmlFreeInputStream(input);
2509
0
    if (*ctxt->input->cur == 0)
2510
0
        xmlParserGrow(ctxt);
2511
0
    return(CUR);
2512
0
}
2513
2514
/**
2515
 * Push an input stream onto the stack.
2516
 *
2517
 * @deprecated Internal function, don't use.
2518
 *
2519
 * @param ctxt  an XML parser context
2520
 * @param input  an XML parser input fragment (entity, XML fragment ...).
2521
 * @returns -1 in case of error or the index in the input stack
2522
 */
2523
int
2524
0
xmlPushInput(xmlParserCtxt *ctxt, xmlParserInput *input) {
2525
0
    int ret;
2526
2527
0
    if ((ctxt == NULL) || (input == NULL))
2528
0
        return(-1);
2529
2530
0
    ret = xmlCtxtPushInput(ctxt, input);
2531
0
    if (ret >= 0)
2532
0
        GROW;
2533
0
    return(ret);
2534
0
}
2535
2536
/**
2537
 * Parse a numeric character reference. Always consumes '&'.
2538
 *
2539
 * @deprecated Internal function, don't use.
2540
 *
2541
 *     [66] CharRef ::= '&#' [0-9]+ ';' |
2542
 *                      '&#x' [0-9a-fA-F]+ ';'
2543
 *
2544
 * [ WFC: Legal Character ]
2545
 * Characters referred to using character references must match the
2546
 * production for Char.
2547
 *
2548
 * @param ctxt  an XML parser context
2549
 * @returns the value parsed (as an int), 0 in case of error
2550
 */
2551
int
2552
136k
xmlParseCharRef(xmlParserCtxt *ctxt) {
2553
136k
    int val = 0;
2554
136k
    int count = 0;
2555
2556
    /*
2557
     * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2558
     */
2559
136k
    if ((RAW == '&') && (NXT(1) == '#') &&
2560
136k
        (NXT(2) == 'x')) {
2561
60.3k
  SKIP(3);
2562
60.3k
  GROW;
2563
288k
  while ((RAW != ';') && (PARSER_STOPPED(ctxt) == 0)) {
2564
229k
      if (count++ > 20) {
2565
537
    count = 0;
2566
537
    GROW;
2567
537
      }
2568
229k
      if ((RAW >= '0') && (RAW <= '9'))
2569
71.2k
          val = val * 16 + (CUR - '0');
2570
158k
      else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2571
586
          val = val * 16 + (CUR - 'a') + 10;
2572
158k
      else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2573
156k
          val = val * 16 + (CUR - 'A') + 10;
2574
1.57k
      else {
2575
1.57k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2576
1.57k
    val = 0;
2577
1.57k
    break;
2578
1.57k
      }
2579
228k
      if (val > 0x110000)
2580
5.85k
          val = 0x110000;
2581
2582
228k
      NEXT;
2583
228k
      count++;
2584
228k
  }
2585
60.3k
  if (RAW == ';') {
2586
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2587
58.7k
      ctxt->input->col++;
2588
58.7k
      ctxt->input->cur++;
2589
58.7k
  }
2590
75.9k
    } else if  ((RAW == '&') && (NXT(1) == '#')) {
2591
75.9k
  SKIP(2);
2592
75.9k
  GROW;
2593
236k
  while (RAW != ';') { /* loop blocked by count */
2594
162k
      if (count++ > 20) {
2595
421
    count = 0;
2596
421
    GROW;
2597
421
      }
2598
162k
      if ((RAW >= '0') && (RAW <= '9'))
2599
160k
          val = val * 10 + (CUR - '0');
2600
1.31k
      else {
2601
1.31k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2602
1.31k
    val = 0;
2603
1.31k
    break;
2604
1.31k
      }
2605
160k
      if (val > 0x110000)
2606
2.24k
          val = 0x110000;
2607
2608
160k
      NEXT;
2609
160k
      count++;
2610
160k
  }
2611
75.9k
  if (RAW == ';') {
2612
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2613
74.6k
      ctxt->input->col++;
2614
74.6k
      ctxt->input->cur++;
2615
74.6k
  }
2616
75.9k
    } else {
2617
0
        if (RAW == '&')
2618
0
            SKIP(1);
2619
0
        xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2620
0
    }
2621
2622
    /*
2623
     * [ WFC: Legal Character ]
2624
     * Characters referred to using character references must match the
2625
     * production for Char.
2626
     */
2627
136k
    if (val >= 0x110000) {
2628
348
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2629
348
                "xmlParseCharRef: character reference out of bounds\n",
2630
348
          val);
2631
135k
    } else if (IS_CHAR(val)) {
2632
132k
        return(val);
2633
132k
    } else {
2634
3.51k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2635
3.51k
                          "xmlParseCharRef: invalid xmlChar value %d\n",
2636
3.51k
                    val);
2637
3.51k
    }
2638
3.85k
    return(0);
2639
136k
}
2640
2641
/**
2642
 * parse Reference declarations, variant parsing from a string rather
2643
 * than an an input flow.
2644
 *
2645
 *     [66] CharRef ::= '&#' [0-9]+ ';' |
2646
 *                      '&#x' [0-9a-fA-F]+ ';'
2647
 *
2648
 * [ WFC: Legal Character ]
2649
 * Characters referred to using character references must match the
2650
 * production for Char.
2651
 *
2652
 * @param ctxt  an XML parser context
2653
 * @param str  a pointer to an index in the string
2654
 * @returns the value parsed (as an int), 0 in case of error, str will be
2655
 *         updated to the current value of the index
2656
 */
2657
static int
2658
29.7k
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2659
29.7k
    const xmlChar *ptr;
2660
29.7k
    xmlChar cur;
2661
29.7k
    int val = 0;
2662
2663
29.7k
    if ((str == NULL) || (*str == NULL)) return(0);
2664
29.7k
    ptr = *str;
2665
29.7k
    cur = *ptr;
2666
29.7k
    if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2667
6.13k
  ptr += 3;
2668
6.13k
  cur = *ptr;
2669
26.7k
  while (cur != ';') { /* Non input consuming loop */
2670
21.0k
      if ((cur >= '0') && (cur <= '9'))
2671
2.39k
          val = val * 16 + (cur - '0');
2672
18.6k
      else if ((cur >= 'a') && (cur <= 'f'))
2673
276
          val = val * 16 + (cur - 'a') + 10;
2674
18.3k
      else if ((cur >= 'A') && (cur <= 'F'))
2675
17.9k
          val = val * 16 + (cur - 'A') + 10;
2676
425
      else {
2677
425
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2678
425
    val = 0;
2679
425
    break;
2680
425
      }
2681
20.5k
      if (val > 0x110000)
2682
197
          val = 0x110000;
2683
2684
20.5k
      ptr++;
2685
20.5k
      cur = *ptr;
2686
20.5k
  }
2687
6.13k
  if (cur == ';')
2688
5.71k
      ptr++;
2689
23.6k
    } else if  ((cur == '&') && (ptr[1] == '#')){
2690
23.6k
  ptr += 2;
2691
23.6k
  cur = *ptr;
2692
76.3k
  while (cur != ';') { /* Non input consuming loops */
2693
53.3k
      if ((cur >= '0') && (cur <= '9'))
2694
52.7k
          val = val * 10 + (cur - '0');
2695
601
      else {
2696
601
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2697
601
    val = 0;
2698
601
    break;
2699
601
      }
2700
52.7k
      if (val > 0x110000)
2701
380
          val = 0x110000;
2702
2703
52.7k
      ptr++;
2704
52.7k
      cur = *ptr;
2705
52.7k
  }
2706
23.6k
  if (cur == ';')
2707
23.0k
      ptr++;
2708
23.6k
    } else {
2709
0
  xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2710
0
  return(0);
2711
0
    }
2712
29.7k
    *str = ptr;
2713
2714
    /*
2715
     * [ WFC: Legal Character ]
2716
     * Characters referred to using character references must match the
2717
     * production for Char.
2718
     */
2719
29.7k
    if (val >= 0x110000) {
2720
209
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2721
209
                "xmlParseStringCharRef: character reference out of bounds\n",
2722
209
                val);
2723
29.5k
    } else if (IS_CHAR(val)) {
2724
27.9k
        return(val);
2725
27.9k
    } else {
2726
1.62k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2727
1.62k
        "xmlParseStringCharRef: invalid xmlChar value %d\n",
2728
1.62k
        val);
2729
1.62k
    }
2730
1.83k
    return(0);
2731
29.7k
}
2732
2733
/**
2734
 *     [69] PEReference ::= '%' Name ';'
2735
 *
2736
 * @deprecated Internal function, do not use.
2737
 *
2738
 * [ WFC: No Recursion ]
2739
 * A parsed entity must not contain a recursive
2740
 * reference to itself, either directly or indirectly.
2741
 *
2742
 * [ WFC: Entity Declared ]
2743
 * In a document without any DTD, a document with only an internal DTD
2744
 * subset which contains no parameter entity references, or a document
2745
 * with "standalone='yes'", ...  ... The declaration of a parameter
2746
 * entity must precede any reference to it...
2747
 *
2748
 * [ VC: Entity Declared ]
2749
 * In a document with an external subset or external parameter entities
2750
 * with "standalone='no'", ...  ... The declaration of a parameter entity
2751
 * must precede any reference to it...
2752
 *
2753
 * [ WFC: In DTD ]
2754
 * Parameter-entity references may only appear in the DTD.
2755
 * NOTE: misleading but this is handled.
2756
 *
2757
 * A PEReference may have been detected in the current input stream
2758
 * the handling is done accordingly to
2759
 *      http://www.w3.org/TR/REC-xml#entproc
2760
 * i.e.
2761
 *   - Included in literal in entity values
2762
 *   - Included as Parameter Entity reference within DTDs
2763
 * @param ctxt  the parser context
2764
 */
2765
void
2766
0
xmlParserHandlePEReference(xmlParserCtxt *ctxt) {
2767
0
    xmlParsePERefInternal(ctxt, 0);
2768
0
}
2769
2770
/**
2771
 * @deprecated Internal function, don't use.
2772
 *
2773
 * @param ctxt  the parser context
2774
 * @param str  the input string
2775
 * @param len  the string length
2776
 * @param what  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2777
 * @param end  an end marker xmlChar, 0 if none
2778
 * @param end2  an end marker xmlChar, 0 if none
2779
 * @param end3  an end marker xmlChar, 0 if none
2780
 * @returns A newly allocated string with the substitution done. The caller
2781
 *      must deallocate it !
2782
 */
2783
xmlChar *
2784
xmlStringLenDecodeEntities(xmlParserCtxt *ctxt, const xmlChar *str, int len,
2785
                           int what ATTRIBUTE_UNUSED,
2786
0
                           xmlChar end, xmlChar end2, xmlChar end3) {
2787
0
    if ((ctxt == NULL) || (str == NULL) || (len < 0))
2788
0
        return(NULL);
2789
2790
0
    if ((str[len] != 0) ||
2791
0
        (end != 0) || (end2 != 0) || (end3 != 0))
2792
0
        return(NULL);
2793
2794
0
    return(xmlExpandEntitiesInAttValue(ctxt, str, 0));
2795
0
}
2796
2797
/**
2798
 * @deprecated Internal function, don't use.
2799
 *
2800
 * @param ctxt  the parser context
2801
 * @param str  the input string
2802
 * @param what  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2803
 * @param end  an end marker xmlChar, 0 if none
2804
 * @param end2  an end marker xmlChar, 0 if none
2805
 * @param end3  an end marker xmlChar, 0 if none
2806
 * @returns A newly allocated string with the substitution done. The caller
2807
 *      must deallocate it !
2808
 */
2809
xmlChar *
2810
xmlStringDecodeEntities(xmlParserCtxt *ctxt, const xmlChar *str,
2811
                        int what ATTRIBUTE_UNUSED,
2812
0
            xmlChar end, xmlChar  end2, xmlChar end3) {
2813
0
    if ((ctxt == NULL) || (str == NULL))
2814
0
        return(NULL);
2815
2816
0
    if ((end != 0) || (end2 != 0) || (end3 != 0))
2817
0
        return(NULL);
2818
2819
0
    return(xmlExpandEntitiesInAttValue(ctxt, str, 0));
2820
0
}
2821
2822
/************************************************************************
2823
 *                  *
2824
 *    Commodity functions, cleanup needed ?     *
2825
 *                  *
2826
 ************************************************************************/
2827
2828
/**
2829
 * Is this a sequence of blank chars that one can ignore ?
2830
 *
2831
 * @param ctxt  an XML parser context
2832
 * @param str  a xmlChar *
2833
 * @param len  the size of `str`
2834
 * @param blank_chars  we know the chars are blanks
2835
 * @returns 1 if ignorable 0 otherwise.
2836
 */
2837
2838
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2839
115k
                     int blank_chars) {
2840
115k
    int i;
2841
115k
    xmlNodePtr lastChild;
2842
2843
    /*
2844
     * Check for xml:space value.
2845
     */
2846
115k
    if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2847
115k
        (*(ctxt->space) == -2))
2848
52.2k
  return(0);
2849
2850
    /*
2851
     * Check that the string is made of blanks
2852
     */
2853
63.4k
    if (blank_chars == 0) {
2854
47.6k
  for (i = 0;i < len;i++)
2855
46.5k
      if (!(IS_BLANK_CH(str[i]))) return(0);
2856
43.0k
    }
2857
2858
    /*
2859
     * Look if the element is mixed content in the DTD if available
2860
     */
2861
21.5k
    if (ctxt->node == NULL) return(0);
2862
21.5k
    if (ctxt->myDoc != NULL) {
2863
21.5k
        xmlElementPtr elemDecl = NULL;
2864
21.5k
        xmlDocPtr doc = ctxt->myDoc;
2865
21.5k
        const xmlChar *prefix = NULL;
2866
2867
21.5k
        if (ctxt->node->ns)
2868
4.78k
            prefix = ctxt->node->ns->prefix;
2869
21.5k
        if (doc->intSubset != NULL)
2870
17.1k
            elemDecl = xmlHashLookup2(doc->intSubset->elements, ctxt->node->name,
2871
17.1k
                                      prefix);
2872
21.5k
        if ((elemDecl == NULL) && (doc->extSubset != NULL))
2873
0
            elemDecl = xmlHashLookup2(doc->extSubset->elements, ctxt->node->name,
2874
0
                                      prefix);
2875
21.5k
        if (elemDecl != NULL) {
2876
5.21k
            if (elemDecl->etype == XML_ELEMENT_TYPE_ELEMENT)
2877
1.59k
                return(1);
2878
3.62k
            if ((elemDecl->etype == XML_ELEMENT_TYPE_ANY) ||
2879
3.62k
                (elemDecl->etype == XML_ELEMENT_TYPE_MIXED))
2880
475
                return(0);
2881
3.62k
        }
2882
21.5k
    }
2883
2884
    /*
2885
     * Otherwise, heuristic :-\
2886
     *
2887
     * When push parsing, we could be at the end of a chunk.
2888
     * This makes the look-ahead and consequently the NOBLANKS
2889
     * option unreliable.
2890
     */
2891
19.4k
    if ((RAW != '<') && (RAW != 0xD)) return(0);
2892
18.9k
    if ((ctxt->node->children == NULL) &&
2893
18.9k
  (RAW == '<') && (NXT(1) == '/')) return(0);
2894
2895
18.3k
    lastChild = xmlGetLastChild(ctxt->node);
2896
18.3k
    if (lastChild == NULL) {
2897
9.11k
        if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2898
9.11k
            (ctxt->node->content != NULL)) return(0);
2899
9.26k
    } else if (xmlNodeIsText(lastChild))
2900
235
        return(0);
2901
9.02k
    else if ((ctxt->node->children != NULL) &&
2902
9.02k
             (xmlNodeIsText(ctxt->node->children)))
2903
203
        return(0);
2904
17.9k
    return(1);
2905
18.3k
}
2906
2907
/************************************************************************
2908
 *                  *
2909
 *    Extra stuff for namespace support     *
2910
 *  Relates to http://www.w3.org/TR/WD-xml-names      *
2911
 *                  *
2912
 ************************************************************************/
2913
2914
/**
2915
 * parse an UTF8 encoded XML qualified name string
2916
 *
2917
 * @deprecated Don't use.
2918
 *
2919
 * @param ctxt  an XML parser context
2920
 * @param name  an XML parser context
2921
 * @param prefixOut  a xmlChar **
2922
 * @returns the local part, and prefix is updated
2923
 *   to get the Prefix if any.
2924
 */
2925
2926
xmlChar *
2927
0
xmlSplitQName(xmlParserCtxt *ctxt, const xmlChar *name, xmlChar **prefixOut) {
2928
0
    xmlChar *ret;
2929
0
    const xmlChar *localname;
2930
2931
0
    localname = xmlSplitQName4(name, prefixOut);
2932
0
    if (localname == NULL) {
2933
0
        xmlCtxtErrMemory(ctxt);
2934
0
        return(NULL);
2935
0
    }
2936
2937
0
    ret = xmlStrdup(localname);
2938
0
    if (ret == NULL) {
2939
0
        xmlCtxtErrMemory(ctxt);
2940
0
        xmlFree(*prefixOut);
2941
0
    }
2942
2943
0
    return(ret);
2944
0
}
2945
2946
/************************************************************************
2947
 *                  *
2948
 *      The parser itself       *
2949
 *  Relates to http://www.w3.org/TR/REC-xml       *
2950
 *                  *
2951
 ************************************************************************/
2952
2953
/************************************************************************
2954
 *                  *
2955
 *  Routines to parse Name, NCName and NmToken      *
2956
 *                  *
2957
 ************************************************************************/
2958
2959
/*
2960
 * The two following functions are related to the change of accepted
2961
 * characters for Name and NmToken in the Revision 5 of XML-1.0
2962
 * They correspond to the modified production [4] and the new production [4a]
2963
 * changes in that revision. Also note that the macros used for the
2964
 * productions Letter, Digit, CombiningChar and Extender are not needed
2965
 * anymore.
2966
 * We still keep compatibility to pre-revision5 parsing semantic if the
2967
 * new XML_PARSE_OLD10 option is given to the parser.
2968
 */
2969
2970
static int
2971
334k
xmlIsNameStartCharNew(int c) {
2972
    /*
2973
     * Use the new checks of production [4] [4a] amd [5] of the
2974
     * Update 5 of XML-1.0
2975
     */
2976
334k
    if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
2977
334k
        (((c >= 'a') && (c <= 'z')) ||
2978
327k
         ((c >= 'A') && (c <= 'Z')) ||
2979
327k
         (c == '_') || (c == ':') ||
2980
327k
         ((c >= 0xC0) && (c <= 0xD6)) ||
2981
327k
         ((c >= 0xD8) && (c <= 0xF6)) ||
2982
327k
         ((c >= 0xF8) && (c <= 0x2FF)) ||
2983
327k
         ((c >= 0x370) && (c <= 0x37D)) ||
2984
327k
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
2985
327k
         ((c >= 0x200C) && (c <= 0x200D)) ||
2986
327k
         ((c >= 0x2070) && (c <= 0x218F)) ||
2987
327k
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
2988
327k
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
2989
327k
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
2990
327k
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
2991
327k
         ((c >= 0x10000) && (c <= 0xEFFFF))))
2992
238k
        return(1);
2993
95.9k
    return(0);
2994
334k
}
2995
2996
static int
2997
895k
xmlIsNameCharNew(int c) {
2998
    /*
2999
     * Use the new checks of production [4] [4a] amd [5] of the
3000
     * Update 5 of XML-1.0
3001
     */
3002
895k
    if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3003
895k
        (((c >= 'a') && (c <= 'z')) ||
3004
861k
         ((c >= 'A') && (c <= 'Z')) ||
3005
861k
         ((c >= '0') && (c <= '9')) || /* !start */
3006
861k
         (c == '_') || (c == ':') ||
3007
861k
         (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3008
861k
         ((c >= 0xC0) && (c <= 0xD6)) ||
3009
861k
         ((c >= 0xD8) && (c <= 0xF6)) ||
3010
861k
         ((c >= 0xF8) && (c <= 0x2FF)) ||
3011
861k
         ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3012
861k
         ((c >= 0x370) && (c <= 0x37D)) ||
3013
861k
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
3014
861k
         ((c >= 0x200C) && (c <= 0x200D)) ||
3015
861k
         ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3016
861k
         ((c >= 0x2070) && (c <= 0x218F)) ||
3017
861k
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3018
861k
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
3019
861k
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
3020
861k
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3021
861k
         ((c >= 0x10000) && (c <= 0xEFFFF))))
3022
653k
         return(1);
3023
241k
    return(0);
3024
895k
}
3025
3026
static int
3027
169k
xmlIsNameStartCharOld(int c) {
3028
169k
    if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3029
169k
        ((IS_LETTER(c) || (c == '_') || (c == ':'))))
3030
134k
        return(1);
3031
35.1k
    return(0);
3032
169k
}
3033
3034
static int
3035
391k
xmlIsNameCharOld(int c) {
3036
391k
    if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3037
391k
        ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3038
377k
         (c == '.') || (c == '-') ||
3039
377k
         (c == '_') || (c == ':') ||
3040
377k
         (IS_COMBINING(c)) ||
3041
377k
         (IS_EXTENDER(c))))
3042
276k
        return(1);
3043
114k
    return(0);
3044
391k
}
3045
3046
static int
3047
503k
xmlIsNameStartChar(int c, int old10) {
3048
503k
    if (!old10)
3049
334k
        return(xmlIsNameStartCharNew(c));
3050
169k
    else
3051
169k
        return(xmlIsNameStartCharOld(c));
3052
503k
}
3053
3054
static int
3055
1.28M
xmlIsNameChar(int c, int old10) {
3056
1.28M
    if (!old10)
3057
895k
        return(xmlIsNameCharNew(c));
3058
391k
    else
3059
391k
        return(xmlIsNameCharOld(c));
3060
1.28M
}
3061
3062
/*
3063
 * Scan an XML Name, NCName or Nmtoken.
3064
 *
3065
 * Returns a pointer to the end of the name on success. If the
3066
 * name is invalid, returns `ptr`. If the name is longer than
3067
 * `maxSize` bytes, returns NULL.
3068
 *
3069
 * @param ptr  pointer to the start of the name
3070
 * @param maxSize  maximum size in bytes
3071
 * @param flags  XML_SCAN_* flags
3072
 * @returns a pointer to the end of the name or NULL
3073
 */
3074
const xmlChar *
3075
317k
xmlScanName(const xmlChar *ptr, size_t maxSize, int flags) {
3076
317k
    int stop = flags & XML_SCAN_NC ? ':' : 0;
3077
317k
    int old10 = flags & XML_SCAN_OLD10 ? 1 : 0;
3078
3079
1.24M
    while (1) {
3080
1.24M
        int c, len;
3081
3082
1.24M
        c = *ptr;
3083
1.24M
        if (c < 0x80) {
3084
1.23M
            if (c == stop)
3085
46.5k
                break;
3086
1.18M
            len = 1;
3087
1.18M
        } else {
3088
11.9k
            len = 4;
3089
11.9k
            c = xmlGetUTF8Char(ptr, &len);
3090
11.9k
            if (c < 0)
3091
1.35k
                break;
3092
11.9k
        }
3093
3094
1.19M
        if (flags & XML_SCAN_NMTOKEN ?
3095
891k
                !xmlIsNameChar(c, old10) :
3096
1.19M
                !xmlIsNameStartChar(c, old10))
3097
269k
            break;
3098
3099
927k
        if ((size_t) len > maxSize)
3100
0
            return(NULL);
3101
927k
        ptr += len;
3102
927k
        maxSize -= len;
3103
927k
        flags |= XML_SCAN_NMTOKEN;
3104
927k
    }
3105
3106
317k
    return(ptr);
3107
317k
}
3108
3109
static const xmlChar *
3110
114k
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3111
114k
    const xmlChar *ret;
3112
114k
    int len = 0, l;
3113
114k
    int c;
3114
114k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3115
50.3k
                    XML_MAX_TEXT_LENGTH :
3116
114k
                    XML_MAX_NAME_LENGTH;
3117
114k
    int old10 = (ctxt->options & XML_PARSE_OLD10) ? 1 : 0;
3118
3119
    /*
3120
     * Handler for more complex cases
3121
     */
3122
114k
    c = xmlCurrentChar(ctxt, &l);
3123
114k
    if (!xmlIsNameStartChar(c, old10))
3124
73.9k
        return(NULL);
3125
40.1k
    len += l;
3126
40.1k
    NEXTL(l);
3127
40.1k
    c = xmlCurrentChar(ctxt, &l);
3128
151k
    while (xmlIsNameChar(c, old10)) {
3129
110k
        if (len <= INT_MAX - l)
3130
110k
            len += l;
3131
110k
        NEXTL(l);
3132
110k
        c = xmlCurrentChar(ctxt, &l);
3133
110k
    }
3134
40.1k
    if (len > maxLength) {
3135
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3136
0
        return(NULL);
3137
0
    }
3138
40.1k
    if (ctxt->input->cur - ctxt->input->base < len) {
3139
        /*
3140
         * There were a couple of bugs where PERefs lead to to a change
3141
         * of the buffer. Check the buffer size to avoid passing an invalid
3142
         * pointer to xmlDictLookup.
3143
         */
3144
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3145
0
                    "unexpected change of input buffer");
3146
0
        return (NULL);
3147
0
    }
3148
40.1k
    if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3149
213
        ret = xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len);
3150
39.9k
    else
3151
39.9k
        ret = xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len);
3152
40.1k
    if (ret == NULL)
3153
1
        xmlErrMemory(ctxt);
3154
40.1k
    return(ret);
3155
40.1k
}
3156
3157
/**
3158
 * parse an XML name.
3159
 *
3160
 * @deprecated Internal function, don't use.
3161
 *
3162
 *     [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3163
 *                      CombiningChar | Extender
3164
 *
3165
 *     [5] Name ::= (Letter | '_' | ':') (NameChar)*
3166
 *
3167
 *     [6] Names ::= Name (#x20 Name)*
3168
 *
3169
 * @param ctxt  an XML parser context
3170
 * @returns the Name parsed or NULL
3171
 */
3172
3173
const xmlChar *
3174
1.02M
xmlParseName(xmlParserCtxt *ctxt) {
3175
1.02M
    const xmlChar *in;
3176
1.02M
    const xmlChar *ret;
3177
1.02M
    size_t count = 0;
3178
1.02M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3179
374k
                       XML_MAX_TEXT_LENGTH :
3180
1.02M
                       XML_MAX_NAME_LENGTH;
3181
3182
1.02M
    GROW;
3183
3184
    /*
3185
     * Accelerator for simple ASCII names
3186
     */
3187
1.02M
    in = ctxt->input->cur;
3188
1.02M
    if (((*in >= 0x61) && (*in <= 0x7A)) ||
3189
1.02M
  ((*in >= 0x41) && (*in <= 0x5A)) ||
3190
1.02M
  (*in == '_') || (*in == ':')) {
3191
932k
  in++;
3192
3.54M
  while (((*in >= 0x61) && (*in <= 0x7A)) ||
3193
3.54M
         ((*in >= 0x41) && (*in <= 0x5A)) ||
3194
3.54M
         ((*in >= 0x30) && (*in <= 0x39)) ||
3195
3.54M
         (*in == '_') || (*in == '-') ||
3196
3.54M
         (*in == ':') || (*in == '.'))
3197
2.61M
      in++;
3198
932k
  if ((*in > 0) && (*in < 0x80)) {
3199
908k
      count = in - ctxt->input->cur;
3200
908k
            if (count > maxLength) {
3201
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3202
0
                return(NULL);
3203
0
            }
3204
908k
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3205
908k
      ctxt->input->cur = in;
3206
908k
      ctxt->input->col += count;
3207
908k
      if (ret == NULL)
3208
1
          xmlErrMemory(ctxt);
3209
908k
      return(ret);
3210
908k
  }
3211
932k
    }
3212
    /* accelerator for special cases */
3213
114k
    return(xmlParseNameComplex(ctxt));
3214
1.02M
}
3215
3216
static xmlHashedString
3217
95.9k
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3218
95.9k
    xmlHashedString ret;
3219
95.9k
    int len = 0, l;
3220
95.9k
    int c;
3221
95.9k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3222
10.8k
                    XML_MAX_TEXT_LENGTH :
3223
95.9k
                    XML_MAX_NAME_LENGTH;
3224
95.9k
    int old10 = (ctxt->options & XML_PARSE_OLD10) ? 1 : 0;
3225
95.9k
    size_t startPosition = 0;
3226
3227
95.9k
    ret.name = NULL;
3228
95.9k
    ret.hashValue = 0;
3229
3230
    /*
3231
     * Handler for more complex cases
3232
     */
3233
95.9k
    startPosition = CUR_PTR - BASE_PTR;
3234
95.9k
    c = xmlCurrentChar(ctxt, &l);
3235
95.9k
    if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3236
95.9k
  (!xmlIsNameStartChar(c, old10) || (c == ':'))) {
3237
74.3k
  return(ret);
3238
74.3k
    }
3239
3240
95.2k
    while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3241
95.2k
     (xmlIsNameChar(c, old10) && (c != ':'))) {
3242
73.6k
        if (len <= INT_MAX - l)
3243
73.6k
      len += l;
3244
73.6k
  NEXTL(l);
3245
73.6k
  c = xmlCurrentChar(ctxt, &l);
3246
73.6k
    }
3247
21.6k
    if (len > maxLength) {
3248
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3249
0
        return(ret);
3250
0
    }
3251
21.6k
    ret = xmlDictLookupHashed(ctxt->dict, (BASE_PTR + startPosition), len);
3252
21.6k
    if (ret.name == NULL)
3253
1
        xmlErrMemory(ctxt);
3254
21.6k
    return(ret);
3255
21.6k
}
3256
3257
/**
3258
 * parse an XML name.
3259
 *
3260
 *     [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3261
 *                          CombiningChar | Extender
3262
 *
3263
 *     [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3264
 *
3265
 * @param ctxt  an XML parser context
3266
 * @returns the Name parsed or NULL
3267
 */
3268
3269
static xmlHashedString
3270
598k
xmlParseNCName(xmlParserCtxtPtr ctxt) {
3271
598k
    const xmlChar *in, *e;
3272
598k
    xmlHashedString ret;
3273
598k
    size_t count = 0;
3274
598k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3275
110k
                       XML_MAX_TEXT_LENGTH :
3276
598k
                       XML_MAX_NAME_LENGTH;
3277
3278
598k
    ret.name = NULL;
3279
3280
    /*
3281
     * Accelerator for simple ASCII names
3282
     */
3283
598k
    in = ctxt->input->cur;
3284
598k
    e = ctxt->input->end;
3285
598k
    if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3286
598k
   ((*in >= 0x41) && (*in <= 0x5A)) ||
3287
598k
   (*in == '_')) && (in < e)) {
3288
518k
  in++;
3289
1.81M
  while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3290
1.81M
          ((*in >= 0x41) && (*in <= 0x5A)) ||
3291
1.81M
          ((*in >= 0x30) && (*in <= 0x39)) ||
3292
1.81M
          (*in == '_') || (*in == '-') ||
3293
1.81M
          (*in == '.')) && (in < e))
3294
1.29M
      in++;
3295
518k
  if (in >= e)
3296
1.04k
      goto complex;
3297
516k
  if ((*in > 0) && (*in < 0x80)) {
3298
502k
      count = in - ctxt->input->cur;
3299
502k
            if (count > maxLength) {
3300
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3301
0
                return(ret);
3302
0
            }
3303
502k
      ret = xmlDictLookupHashed(ctxt->dict, ctxt->input->cur, count);
3304
502k
      ctxt->input->cur = in;
3305
502k
      ctxt->input->col += count;
3306
502k
      if (ret.name == NULL) {
3307
1
          xmlErrMemory(ctxt);
3308
1
      }
3309
502k
      return(ret);
3310
502k
  }
3311
516k
    }
3312
95.9k
complex:
3313
95.9k
    return(xmlParseNCNameComplex(ctxt));
3314
598k
}
3315
3316
/**
3317
 * parse an XML name and compares for match
3318
 * (specialized for endtag parsing)
3319
 *
3320
 * @param ctxt  an XML parser context
3321
 * @param other  the name to compare with
3322
 * @returns NULL for an illegal name, (xmlChar*) 1 for success
3323
 * and the name for mismatch
3324
 */
3325
3326
static const xmlChar *
3327
158k
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3328
158k
    register const xmlChar *cmp = other;
3329
158k
    register const xmlChar *in;
3330
158k
    const xmlChar *ret;
3331
3332
158k
    GROW;
3333
3334
158k
    in = ctxt->input->cur;
3335
750k
    while (*in != 0 && *in == *cmp) {
3336
592k
  ++in;
3337
592k
  ++cmp;
3338
592k
    }
3339
158k
    if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3340
  /* success */
3341
141k
  ctxt->input->col += in - ctxt->input->cur;
3342
141k
  ctxt->input->cur = in;
3343
141k
  return (const xmlChar*) 1;
3344
141k
    }
3345
    /* failure (or end of input buffer), check with full function */
3346
17.2k
    ret = xmlParseName (ctxt);
3347
    /* strings coming from the dictionary direct compare possible */
3348
17.2k
    if (ret == other) {
3349
1.08k
  return (const xmlChar*) 1;
3350
1.08k
    }
3351
16.1k
    return ret;
3352
17.2k
}
3353
3354
/**
3355
 * Parse an XML name.
3356
 *
3357
 * @param ctxt  an XML parser context
3358
 * @param str  a pointer to the string pointer (IN/OUT)
3359
 * @returns the Name parsed or NULL. The `str` pointer
3360
 * is updated to the current location in the string.
3361
 */
3362
3363
static xmlChar *
3364
260k
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3365
260k
    xmlChar *ret;
3366
260k
    const xmlChar *cur = *str;
3367
260k
    int flags = 0;
3368
260k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3369
103k
                    XML_MAX_TEXT_LENGTH :
3370
260k
                    XML_MAX_NAME_LENGTH;
3371
3372
260k
    if (ctxt->options & XML_PARSE_OLD10)
3373
88.6k
        flags |= XML_SCAN_OLD10;
3374
3375
260k
    cur = xmlScanName(*str, maxLength, flags);
3376
260k
    if (cur == NULL) {
3377
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3378
0
        return(NULL);
3379
0
    }
3380
260k
    if (cur == *str)
3381
1.79k
        return(NULL);
3382
3383
258k
    ret = xmlStrndup(*str, cur - *str);
3384
258k
    if (ret == NULL)
3385
24
        xmlErrMemory(ctxt);
3386
258k
    *str = cur;
3387
258k
    return(ret);
3388
260k
}
3389
3390
/**
3391
 * parse an XML Nmtoken.
3392
 *
3393
 * @deprecated Internal function, don't use.
3394
 *
3395
 *     [7] Nmtoken ::= (NameChar)+
3396
 *
3397
 *     [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3398
 *
3399
 * @param ctxt  an XML parser context
3400
 * @returns the Nmtoken parsed or NULL
3401
 */
3402
3403
xmlChar *
3404
44.6k
xmlParseNmtoken(xmlParserCtxt *ctxt) {
3405
44.6k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3406
44.6k
    xmlChar *ret;
3407
44.6k
    int len = 0, l;
3408
44.6k
    int c;
3409
44.6k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3410
6.82k
                    XML_MAX_TEXT_LENGTH :
3411
44.6k
                    XML_MAX_NAME_LENGTH;
3412
44.6k
    int old10 = (ctxt->options & XML_PARSE_OLD10) ? 1 : 0;
3413
3414
44.6k
    c = xmlCurrentChar(ctxt, &l);
3415
3416
145k
    while (xmlIsNameChar(c, old10)) {
3417
101k
  COPY_BUF(buf, len, c);
3418
101k
  NEXTL(l);
3419
101k
  c = xmlCurrentChar(ctxt, &l);
3420
101k
  if (len >= XML_MAX_NAMELEN) {
3421
      /*
3422
       * Okay someone managed to make a huge token, so he's ready to pay
3423
       * for the processing speed.
3424
       */
3425
619
      xmlChar *buffer;
3426
619
      int max = len * 2;
3427
3428
619
      buffer = xmlMalloc(max);
3429
619
      if (buffer == NULL) {
3430
1
          xmlErrMemory(ctxt);
3431
1
    return(NULL);
3432
1
      }
3433
618
      memcpy(buffer, buf, len);
3434
13.7k
      while (xmlIsNameChar(c, old10)) {
3435
13.1k
    if (len + 10 > max) {
3436
308
        xmlChar *tmp;
3437
308
                    int newSize;
3438
3439
308
                    newSize = xmlGrowCapacity(max, 1, 1, maxLength);
3440
308
                    if (newSize < 0) {
3441
0
                        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3442
0
                        xmlFree(buffer);
3443
0
                        return(NULL);
3444
0
                    }
3445
308
        tmp = xmlRealloc(buffer, newSize);
3446
308
        if (tmp == NULL) {
3447
1
      xmlErrMemory(ctxt);
3448
1
      xmlFree(buffer);
3449
1
      return(NULL);
3450
1
        }
3451
307
        buffer = tmp;
3452
307
                    max = newSize;
3453
307
    }
3454
13.1k
    COPY_BUF(buffer, len, c);
3455
13.1k
    NEXTL(l);
3456
13.1k
    c = xmlCurrentChar(ctxt, &l);
3457
13.1k
      }
3458
617
      buffer[len] = 0;
3459
617
      return(buffer);
3460
618
  }
3461
101k
    }
3462
43.9k
    if (len == 0)
3463
21.0k
        return(NULL);
3464
22.9k
    if (len > maxLength) {
3465
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3466
0
        return(NULL);
3467
0
    }
3468
22.9k
    ret = xmlStrndup(buf, len);
3469
22.9k
    if (ret == NULL)
3470
3
        xmlErrMemory(ctxt);
3471
22.9k
    return(ret);
3472
22.9k
}
3473
3474
/**
3475
 * Validate an entity value and expand parameter entities.
3476
 *
3477
 * @param ctxt  parser context
3478
 * @param buf  string buffer
3479
 * @param str  entity value
3480
 * @param length  size of entity value
3481
 * @param depth  nesting depth
3482
 */
3483
static void
3484
xmlExpandPEsInEntityValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf,
3485
18.9k
                          const xmlChar *str, int length, int depth) {
3486
18.9k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3487
18.9k
    const xmlChar *end, *chunk;
3488
18.9k
    int c, l;
3489
3490
18.9k
    if (str == NULL)
3491
0
        return;
3492
3493
18.9k
    depth += 1;
3494
18.9k
    if (depth > maxDepth) {
3495
0
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3496
0
                       "Maximum entity nesting depth exceeded");
3497
0
  return;
3498
0
    }
3499
3500
18.9k
    end = str + length;
3501
18.9k
    chunk = str;
3502
3503
1.02M
    while ((str < end) && (!PARSER_STOPPED(ctxt))) {
3504
1.01M
        c = *str;
3505
3506
1.01M
        if (c >= 0x80) {
3507
24.5k
            l = xmlUTF8MultibyteLen(ctxt, str,
3508
24.5k
                    "invalid character in entity value\n");
3509
24.5k
            if (l == 0) {
3510
23.9k
                if (chunk < str)
3511
3.98k
                    xmlSBufAddString(buf, chunk, str - chunk);
3512
23.9k
                xmlSBufAddReplChar(buf);
3513
23.9k
                str += 1;
3514
23.9k
                chunk = str;
3515
23.9k
            } else {
3516
581
                str += l;
3517
581
            }
3518
987k
        } else if (c == '&') {
3519
99.1k
            if (str[1] == '#') {
3520
11.6k
                if (chunk < str)
3521
6.80k
                    xmlSBufAddString(buf, chunk, str - chunk);
3522
3523
11.6k
                c = xmlParseStringCharRef(ctxt, &str);
3524
11.6k
                if (c == 0)
3525
1.83k
                    return;
3526
3527
9.77k
                xmlSBufAddChar(buf, c);
3528
3529
9.77k
                chunk = str;
3530
87.5k
            } else {
3531
87.5k
                xmlChar *name;
3532
3533
                /*
3534
                 * General entity references are checked for
3535
                 * syntactic validity.
3536
                 */
3537
87.5k
                str++;
3538
87.5k
                name = xmlParseStringName(ctxt, &str);
3539
3540
87.5k
                if ((name == NULL) || (*str++ != ';')) {
3541
519
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3542
519
                            "EntityValue: '&' forbidden except for entities "
3543
519
                            "references\n");
3544
519
                    xmlFree(name);
3545
519
                    return;
3546
519
                }
3547
3548
87.0k
                xmlFree(name);
3549
87.0k
            }
3550
887k
        } else if (c == '%') {
3551
2.38k
            xmlEntityPtr ent;
3552
3553
2.38k
            if (chunk < str)
3554
2.02k
                xmlSBufAddString(buf, chunk, str - chunk);
3555
3556
2.38k
            ent = xmlParseStringPEReference(ctxt, &str);
3557
2.38k
            if (ent == NULL)
3558
2.38k
                return;
3559
3560
0
            if (!PARSER_EXTERNAL(ctxt)) {
3561
0
                xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3562
0
                return;
3563
0
            }
3564
3565
0
            if (ent->content == NULL) {
3566
                /*
3567
                 * Note: external parsed entities will not be loaded,
3568
                 * it is not required for a non-validating parser to
3569
                 * complete external PEReferences coming from the
3570
                 * internal subset
3571
                 */
3572
0
                if (((ctxt->options & XML_PARSE_NO_XXE) == 0) &&
3573
0
                    ((ctxt->replaceEntities) ||
3574
0
                     (ctxt->validate))) {
3575
0
                    xmlLoadEntityContent(ctxt, ent);
3576
0
                } else {
3577
0
                    xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
3578
0
                                  "not validating will not read content for "
3579
0
                                  "PE entity %s\n", ent->name, NULL);
3580
0
                }
3581
0
            }
3582
3583
            /*
3584
             * TODO: Skip if ent->content is still NULL.
3585
             */
3586
3587
0
            if (xmlParserEntityCheck(ctxt, ent->length))
3588
0
                return;
3589
3590
0
            if (ent->flags & XML_ENT_EXPANDING) {
3591
0
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
3592
0
                xmlHaltParser(ctxt);
3593
0
                return;
3594
0
            }
3595
3596
0
            ent->flags |= XML_ENT_EXPANDING;
3597
0
            xmlExpandPEsInEntityValue(ctxt, buf, ent->content, ent->length,
3598
0
                                      depth);
3599
0
            ent->flags &= ~XML_ENT_EXPANDING;
3600
3601
0
            chunk = str;
3602
885k
        } else {
3603
            /* Normal ASCII char */
3604
885k
            if (!IS_BYTE_CHAR(c)) {
3605
11.0k
                xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3606
11.0k
                        "invalid character in entity value\n");
3607
11.0k
                if (chunk < str)
3608
2.86k
                    xmlSBufAddString(buf, chunk, str - chunk);
3609
11.0k
                xmlSBufAddReplChar(buf);
3610
11.0k
                str += 1;
3611
11.0k
                chunk = str;
3612
874k
            } else {
3613
874k
                str += 1;
3614
874k
            }
3615
885k
        }
3616
1.01M
    }
3617
3618
14.2k
    if (chunk < str)
3619
10.8k
        xmlSBufAddString(buf, chunk, str - chunk);
3620
14.2k
}
3621
3622
/**
3623
 * parse a value for ENTITY declarations
3624
 *
3625
 * @deprecated Internal function, don't use.
3626
 *
3627
 *     [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3628
 *                         "'" ([^%&'] | PEReference | Reference)* "'"
3629
 *
3630
 * @param ctxt  an XML parser context
3631
 * @param orig  if non-NULL store a copy of the original entity value
3632
 * @returns the EntityValue parsed with reference substituted or NULL
3633
 */
3634
xmlChar *
3635
19.4k
xmlParseEntityValue(xmlParserCtxt *ctxt, xmlChar **orig) {
3636
19.4k
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3637
3.47k
                         XML_MAX_HUGE_LENGTH :
3638
19.4k
                         XML_MAX_TEXT_LENGTH;
3639
19.4k
    xmlSBuf buf;
3640
19.4k
    const xmlChar *start;
3641
19.4k
    int quote, length;
3642
3643
19.4k
    xmlSBufInit(&buf, maxLength);
3644
3645
19.4k
    GROW;
3646
3647
19.4k
    quote = CUR;
3648
19.4k
    if ((quote != '"') && (quote != '\'')) {
3649
0
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3650
0
  return(NULL);
3651
0
    }
3652
19.4k
    CUR_PTR++;
3653
3654
19.4k
    length = 0;
3655
3656
    /*
3657
     * Copy raw content of the entity into a buffer
3658
     */
3659
1.88M
    while (1) {
3660
1.88M
        int c;
3661
3662
1.88M
        if (PARSER_STOPPED(ctxt))
3663
1
            goto error;
3664
3665
1.88M
        if (CUR_PTR >= ctxt->input->end) {
3666
546
            xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3667
546
            goto error;
3668
546
        }
3669
3670
1.88M
        c = CUR;
3671
3672
1.88M
        if (c == 0) {
3673
0
            xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3674
0
                    "invalid character in entity value\n");
3675
0
            goto error;
3676
0
        }
3677
1.88M
        if (c == quote)
3678
18.9k
            break;
3679
1.87M
        NEXTL(1);
3680
1.87M
        length += 1;
3681
3682
        /*
3683
         * TODO: Check growth threshold
3684
         */
3685
1.87M
        if (ctxt->input->end - CUR_PTR < 10)
3686
15.2k
            GROW;
3687
1.87M
    }
3688
3689
18.9k
    start = CUR_PTR - length;
3690
3691
18.9k
    if (orig != NULL) {
3692
18.9k
        *orig = xmlStrndup(start, length);
3693
18.9k
        if (*orig == NULL)
3694
4
            xmlErrMemory(ctxt);
3695
18.9k
    }
3696
3697
18.9k
    xmlExpandPEsInEntityValue(ctxt, &buf, start, length, ctxt->inputNr);
3698
3699
18.9k
    NEXTL(1);
3700
3701
18.9k
    return(xmlSBufFinish(&buf, NULL, ctxt, "entity length too long"));
3702
3703
547
error:
3704
547
    xmlSBufCleanup(&buf, ctxt, "entity length too long");
3705
547
    return(NULL);
3706
19.4k
}
3707
3708
/**
3709
 * Check an entity reference in an attribute value for validity
3710
 * without expanding it.
3711
 *
3712
 * @param ctxt  parser context
3713
 * @param pent  entity
3714
 * @param depth  nesting depth
3715
 */
3716
static void
3717
3.17k
xmlCheckEntityInAttValue(xmlParserCtxtPtr ctxt, xmlEntityPtr pent, int depth) {
3718
3.17k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3719
3.17k
    const xmlChar *str;
3720
3.17k
    unsigned long expandedSize = pent->length;
3721
3.17k
    int c, flags;
3722
3723
3.17k
    depth += 1;
3724
3.17k
    if (depth > maxDepth) {
3725
0
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3726
0
                       "Maximum entity nesting depth exceeded");
3727
0
  return;
3728
0
    }
3729
3730
3.17k
    if (pent->flags & XML_ENT_EXPANDING) {
3731
598
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
3732
598
        xmlHaltParser(ctxt);
3733
598
        return;
3734
598
    }
3735
3736
    /*
3737
     * If we're parsing a default attribute value in DTD content,
3738
     * the entity might reference other entities which weren't
3739
     * defined yet, so the check isn't reliable.
3740
     */
3741
2.58k
    if (ctxt->inSubset == 0)
3742
2.37k
        flags = XML_ENT_CHECKED | XML_ENT_VALIDATED;
3743
202
    else
3744
202
        flags = XML_ENT_VALIDATED;
3745
3746
2.58k
    str = pent->content;
3747
2.58k
    if (str == NULL)
3748
0
        goto done;
3749
3750
    /*
3751
     * Note that entity values are already validated. We only check
3752
     * for illegal less-than signs and compute the expanded size
3753
     * of the entity. No special handling for multi-byte characters
3754
     * is needed.
3755
     */
3756
82.8k
    while (!PARSER_STOPPED(ctxt)) {
3757
82.2k
        c = *str;
3758
3759
82.2k
  if (c != '&') {
3760
64.2k
            if (c == 0)
3761
1.98k
                break;
3762
3763
62.3k
            if (c == '<')
3764
233
                xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
3765
233
                        "'<' in entity '%s' is not allowed in attributes "
3766
233
                        "values\n", pent->name);
3767
3768
62.3k
            str += 1;
3769
62.3k
        } else if (str[1] == '#') {
3770
0
            int val;
3771
3772
0
      val = xmlParseStringCharRef(ctxt, &str);
3773
0
      if (val == 0) {
3774
0
                pent->content[0] = 0;
3775
0
                break;
3776
0
            }
3777
17.9k
  } else {
3778
17.9k
            xmlChar *name;
3779
17.9k
            xmlEntityPtr ent;
3780
3781
17.9k
      name = xmlParseStringEntityRef(ctxt, &str);
3782
17.9k
      if (name == NULL) {
3783
3
                pent->content[0] = 0;
3784
3
                break;
3785
3
            }
3786
3787
17.9k
            ent = xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 1);
3788
17.9k
            xmlFree(name);
3789
3790
17.9k
            if ((ent != NULL) &&
3791
17.9k
                (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
3792
598
                if ((ent->flags & flags) != flags) {
3793
598
                    pent->flags |= XML_ENT_EXPANDING;
3794
598
                    xmlCheckEntityInAttValue(ctxt, ent, depth);
3795
598
                    pent->flags &= ~XML_ENT_EXPANDING;
3796
598
                }
3797
3798
598
                xmlSaturatedAdd(&expandedSize, ent->expandedSize);
3799
598
                xmlSaturatedAdd(&expandedSize, XML_ENT_FIXED_COST);
3800
598
            }
3801
17.9k
        }
3802
82.2k
    }
3803
3804
2.58k
done:
3805
2.58k
    if (ctxt->inSubset == 0)
3806
2.37k
        pent->expandedSize = expandedSize;
3807
3808
2.58k
    pent->flags |= flags;
3809
2.58k
}
3810
3811
/**
3812
 * Expand general entity references in an entity or attribute value.
3813
 * Perform attribute value normalization.
3814
 *
3815
 * @param ctxt  parser context
3816
 * @param buf  string buffer
3817
 * @param str  entity or attribute value
3818
 * @param pent  entity for entity value, NULL for attribute values
3819
 * @param normalize  whether to collapse whitespace
3820
 * @param inSpace  whitespace state
3821
 * @param depth  nesting depth
3822
 * @param check  whether to check for amplification
3823
 * @returns  whether there was a normalization change
3824
 */
3825
static int
3826
xmlExpandEntityInAttValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf,
3827
                          const xmlChar *str, xmlEntityPtr pent, int normalize,
3828
23.2k
                          int *inSpace, int depth, int check) {
3829
23.2k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3830
23.2k
    int c, chunkSize;
3831
23.2k
    int normChange = 0;
3832
3833
23.2k
    if (str == NULL)
3834
0
        return(0);
3835
3836
23.2k
    depth += 1;
3837
23.2k
    if (depth > maxDepth) {
3838
0
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3839
0
                       "Maximum entity nesting depth exceeded");
3840
0
  return(0);
3841
0
    }
3842
3843
23.2k
    if (pent != NULL) {
3844
9.18k
        if (pent->flags & XML_ENT_EXPANDING) {
3845
286
            xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
3846
286
            xmlHaltParser(ctxt);
3847
286
            return(0);
3848
286
        }
3849
3850
8.90k
        if (check) {
3851
8.02k
            if (xmlParserEntityCheck(ctxt, pent->length))
3852
0
                return(0);
3853
8.02k
        }
3854
8.90k
    }
3855
3856
22.9k
    chunkSize = 0;
3857
3858
    /*
3859
     * Note that entity values are already validated. No special
3860
     * handling for multi-byte characters is needed.
3861
     */
3862
1.49M
    while (!PARSER_STOPPED(ctxt)) {
3863
1.49M
        c = *str;
3864
3865
1.49M
  if (c != '&') {
3866
1.32M
            if (c == 0)
3867
22.4k
                break;
3868
3869
            /*
3870
             * If this function is called without an entity, it is used to
3871
             * expand entities in an attribute content where less-than was
3872
             * already unscaped and is allowed.
3873
             */
3874
1.30M
            if ((pent != NULL) && (c == '<')) {
3875
205
                xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
3876
205
                        "'<' in entity '%s' is not allowed in attributes "
3877
205
                        "values\n", pent->name);
3878
205
                break;
3879
205
            }
3880
3881
1.30M
            if (c <= 0x20) {
3882
67.2k
                if ((normalize) && (*inSpace)) {
3883
                    /* Skip char */
3884
0
                    if (chunkSize > 0) {
3885
0
                        xmlSBufAddString(buf, str - chunkSize, chunkSize);
3886
0
                        chunkSize = 0;
3887
0
                    }
3888
0
                    normChange = 1;
3889
67.2k
                } else if (c < 0x20) {
3890
7.62k
                    if (chunkSize > 0) {
3891
6.30k
                        xmlSBufAddString(buf, str - chunkSize, chunkSize);
3892
6.30k
                        chunkSize = 0;
3893
6.30k
                    }
3894
3895
7.62k
                    xmlSBufAddCString(buf, " ", 1);
3896
59.6k
                } else {
3897
59.6k
                    chunkSize += 1;
3898
59.6k
                }
3899
3900
67.2k
                *inSpace = 1;
3901
1.23M
            } else {
3902
1.23M
                chunkSize += 1;
3903
1.23M
                *inSpace = 0;
3904
1.23M
            }
3905
3906
1.30M
            str += 1;
3907
1.30M
        } else if (str[1] == '#') {
3908
18.1k
            int val;
3909
3910
18.1k
            if (chunkSize > 0) {
3911
17.4k
                xmlSBufAddString(buf, str - chunkSize, chunkSize);
3912
17.4k
                chunkSize = 0;
3913
17.4k
            }
3914
3915
18.1k
      val = xmlParseStringCharRef(ctxt, &str);
3916
18.1k
      if (val == 0) {
3917
0
                if (pent != NULL)
3918
0
                    pent->content[0] = 0;
3919
0
                break;
3920
0
            }
3921
3922
18.1k
            if (val == ' ') {
3923
0
                if ((normalize) && (*inSpace))
3924
0
                    normChange = 1;
3925
0
                else
3926
0
                    xmlSBufAddCString(buf, " ", 1);
3927
0
                *inSpace = 1;
3928
18.1k
            } else {
3929
18.1k
                xmlSBufAddChar(buf, val);
3930
18.1k
                *inSpace = 0;
3931
18.1k
            }
3932
152k
  } else {
3933
152k
            xmlChar *name;
3934
152k
            xmlEntityPtr ent;
3935
3936
152k
            if (chunkSize > 0) {
3937
103k
                xmlSBufAddString(buf, str - chunkSize, chunkSize);
3938
103k
                chunkSize = 0;
3939
103k
            }
3940
3941
152k
      name = xmlParseStringEntityRef(ctxt, &str);
3942
152k
            if (name == NULL) {
3943
9
                if (pent != NULL)
3944
8
                    pent->content[0] = 0;
3945
9
                break;
3946
9
            }
3947
3948
152k
            ent = xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 1);
3949
152k
            xmlFree(name);
3950
3951
152k
      if ((ent != NULL) &&
3952
152k
    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3953
151k
    if (ent->content == NULL) {
3954
0
        xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
3955
0
          "predefined entity has no content\n");
3956
0
                    break;
3957
0
                }
3958
3959
151k
                xmlSBufAddString(buf, ent->content, ent->length);
3960
3961
151k
                *inSpace = 0;
3962
151k
      } else if ((ent != NULL) && (ent->content != NULL)) {
3963
1.16k
                if (pent != NULL)
3964
286
                    pent->flags |= XML_ENT_EXPANDING;
3965
1.16k
    normChange |= xmlExpandEntityInAttValue(ctxt, buf,
3966
1.16k
                        ent->content, ent, normalize, inSpace, depth, check);
3967
1.16k
                if (pent != NULL)
3968
286
                    pent->flags &= ~XML_ENT_EXPANDING;
3969
1.16k
      }
3970
152k
        }
3971
1.49M
    }
3972
3973
22.9k
    if (chunkSize > 0)
3974
18.0k
        xmlSBufAddString(buf, str - chunkSize, chunkSize);
3975
3976
22.9k
    return(normChange);
3977
23.2k
}
3978
3979
/**
3980
 * Expand general entity references in an entity or attribute value.
3981
 * Perform attribute value normalization.
3982
 *
3983
 * @param ctxt  parser context
3984
 * @param str  entity or attribute value
3985
 * @param normalize  whether to collapse whitespace
3986
 * @returns the expanded attribtue value.
3987
 */
3988
xmlChar *
3989
xmlExpandEntitiesInAttValue(xmlParserCtxt *ctxt, const xmlChar *str,
3990
14.0k
                            int normalize) {
3991
14.0k
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3992
4.14k
                         XML_MAX_HUGE_LENGTH :
3993
14.0k
                         XML_MAX_TEXT_LENGTH;
3994
14.0k
    xmlSBuf buf;
3995
14.0k
    int inSpace = 1;
3996
3997
14.0k
    xmlSBufInit(&buf, maxLength);
3998
3999
14.0k
    xmlExpandEntityInAttValue(ctxt, &buf, str, NULL, normalize, &inSpace,
4000
14.0k
                              ctxt->inputNr, /* check */ 0);
4001
4002
14.0k
    if ((normalize) && (inSpace) && (buf.size > 0))
4003
0
        buf.size--;
4004
4005
14.0k
    return(xmlSBufFinish(&buf, NULL, ctxt, "AttValue length too long"));
4006
14.0k
}
4007
4008
/**
4009
 * parse a value for an attribute.
4010
 *
4011
 * NOTE: if no normalization is needed, the routine will return pointers
4012
 * directly from the data buffer.
4013
 *
4014
 * 3.3.3 Attribute-Value Normalization:
4015
 *
4016
 * Before the value of an attribute is passed to the application or
4017
 * checked for validity, the XML processor must normalize it as follows:
4018
 *
4019
 * - a character reference is processed by appending the referenced
4020
 *   character to the attribute value
4021
 * - an entity reference is processed by recursively processing the
4022
 *   replacement text of the entity
4023
 * - a whitespace character (\#x20, \#xD, \#xA, \#x9) is processed by
4024
 *   appending \#x20 to the normalized value, except that only a single
4025
 *   \#x20 is appended for a "#xD#xA" sequence that is part of an external
4026
 *   parsed entity or the literal entity value of an internal parsed entity
4027
 * - other characters are processed by appending them to the normalized value
4028
 *
4029
 * If the declared value is not CDATA, then the XML processor must further
4030
 * process the normalized attribute value by discarding any leading and
4031
 * trailing space (\#x20) characters, and by replacing sequences of space
4032
 * (\#x20) characters by a single space (\#x20) character.
4033
 * All attributes for which no declaration has been read should be treated
4034
 * by a non-validating parser as if declared CDATA.
4035
 *
4036
 * @param ctxt  an XML parser context
4037
 * @param attlen  attribute len result
4038
 * @param outFlags  resulting XML_ATTVAL_* flags
4039
 * @param special  value from attsSpecial
4040
 * @param isNamespace  whether this is a namespace declaration
4041
 * @returns the AttValue parsed or NULL. The value has to be freed by the
4042
 *     caller if it was copied, this can be detected by val[*len] == 0.
4043
 */
4044
static xmlChar *
4045
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *attlen, int *outFlags,
4046
260k
                         int special, int isNamespace) {
4047
260k
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4048
92.9k
                         XML_MAX_HUGE_LENGTH :
4049
260k
                         XML_MAX_TEXT_LENGTH;
4050
260k
    xmlSBuf buf;
4051
260k
    xmlChar *ret;
4052
260k
    int c, l, quote, entFlags, chunkSize;
4053
260k
    int inSpace = 1;
4054
260k
    int replaceEntities;
4055
260k
    int normalize = (special & XML_SPECIAL_TYPE_MASK) != 0;
4056
260k
    int attvalFlags = 0;
4057
4058
    /* Always expand namespace URIs */
4059
260k
    replaceEntities = (ctxt->replaceEntities) || (isNamespace);
4060
4061
260k
    xmlSBufInit(&buf, maxLength);
4062
4063
260k
    GROW;
4064
4065
260k
    quote = CUR;
4066
260k
    if ((quote != '"') && (quote != '\'')) {
4067
5.80k
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
4068
5.80k
  return(NULL);
4069
5.80k
    }
4070
254k
    NEXTL(1);
4071
4072
254k
    if (ctxt->inSubset == 0)
4073
218k
        entFlags = XML_ENT_CHECKED | XML_ENT_VALIDATED;
4074
36.2k
    else
4075
36.2k
        entFlags = XML_ENT_VALIDATED;
4076
4077
254k
    inSpace = 1;
4078
254k
    chunkSize = 0;
4079
4080
3.68M
    while (1) {
4081
3.68M
        if (PARSER_STOPPED(ctxt))
4082
930
            goto error;
4083
4084
3.68M
        if (CUR_PTR >= ctxt->input->end) {
4085
1.38k
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4086
1.38k
                           "AttValue: ' expected\n");
4087
1.38k
            goto error;
4088
1.38k
        }
4089
4090
        /*
4091
         * TODO: Check growth threshold
4092
         */
4093
3.68M
        if (ctxt->input->end - CUR_PTR < 10)
4094
64.9k
            GROW;
4095
4096
3.68M
        c = CUR;
4097
4098
3.68M
        if (c >= 0x80) {
4099
262k
            l = xmlUTF8MultibyteLen(ctxt, CUR_PTR,
4100
262k
                    "invalid character in attribute value\n");
4101
262k
            if (l == 0) {
4102
248k
                if (chunkSize > 0) {
4103
30.7k
                    xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4104
30.7k
                    chunkSize = 0;
4105
30.7k
                }
4106
248k
                xmlSBufAddReplChar(&buf);
4107
248k
                NEXTL(1);
4108
248k
            } else {
4109
14.3k
                chunkSize += l;
4110
14.3k
                NEXTL(l);
4111
14.3k
            }
4112
4113
262k
            inSpace = 0;
4114
3.42M
        } else if (c != '&') {
4115
3.02M
            if (c > 0x20) {
4116
2.84M
                if (c == quote)
4117
251k
                    break;
4118
4119
2.58M
                if (c == '<')
4120
16.1k
                    xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4121
4122
2.58M
                chunkSize += 1;
4123
2.58M
                inSpace = 0;
4124
2.58M
            } else if (!IS_BYTE_CHAR(c)) {
4125
50.3k
                xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4126
50.3k
                        "invalid character in attribute value\n");
4127
50.3k
                if (chunkSize > 0) {
4128
4.75k
                    xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4129
4.75k
                    chunkSize = 0;
4130
4.75k
                }
4131
50.3k
                xmlSBufAddReplChar(&buf);
4132
50.3k
                inSpace = 0;
4133
129k
            } else {
4134
                /* Whitespace */
4135
129k
                if ((normalize) && (inSpace)) {
4136
                    /* Skip char */
4137
3.44k
                    if (chunkSize > 0) {
4138
628
                        xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4139
628
                        chunkSize = 0;
4140
628
                    }
4141
3.44k
                    attvalFlags |= XML_ATTVAL_NORM_CHANGE;
4142
126k
                } else if (c < 0x20) {
4143
                    /* Convert to space */
4144
16.2k
                    if (chunkSize > 0) {
4145
10.9k
                        xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4146
10.9k
                        chunkSize = 0;
4147
10.9k
                    }
4148
4149
16.2k
                    xmlSBufAddCString(&buf, " ", 1);
4150
110k
                } else {
4151
110k
                    chunkSize += 1;
4152
110k
                }
4153
4154
129k
                inSpace = 1;
4155
4156
129k
                if ((c == 0xD) && (NXT(1) == 0xA))
4157
233
                    CUR_PTR++;
4158
129k
            }
4159
4160
2.76M
            NEXTL(1);
4161
2.76M
        } else if (NXT(1) == '#') {
4162
89.4k
            int val;
4163
4164
89.4k
            if (chunkSize > 0) {
4165
55.1k
                xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4166
55.1k
                chunkSize = 0;
4167
55.1k
            }
4168
4169
89.4k
            val = xmlParseCharRef(ctxt);
4170
89.4k
            if (val == 0)
4171
505
                goto error;
4172
4173
88.9k
            if ((val == '&') && (!replaceEntities)) {
4174
                /*
4175
                 * The reparsing will be done in xmlNodeParseContent()
4176
                 * called from SAX2.c
4177
                 */
4178
5.41k
                xmlSBufAddCString(&buf, "&#38;", 5);
4179
5.41k
                inSpace = 0;
4180
83.5k
            } else if (val == ' ') {
4181
1.04k
                if ((normalize) && (inSpace))
4182
0
                    attvalFlags |= XML_ATTVAL_NORM_CHANGE;
4183
1.04k
                else
4184
1.04k
                    xmlSBufAddCString(&buf, " ", 1);
4185
1.04k
                inSpace = 1;
4186
82.5k
            } else {
4187
82.5k
                xmlSBufAddChar(&buf, val);
4188
82.5k
                inSpace = 0;
4189
82.5k
            }
4190
309k
        } else {
4191
309k
            const xmlChar *name;
4192
309k
            xmlEntityPtr ent;
4193
4194
309k
            if (chunkSize > 0) {
4195
177k
                xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4196
177k
                chunkSize = 0;
4197
177k
            }
4198
4199
309k
            name = xmlParseEntityRefInternal(ctxt);
4200
309k
            if (name == NULL) {
4201
                /*
4202
                 * Probably a literal '&' which wasn't escaped.
4203
                 * TODO: Handle gracefully in recovery mode.
4204
                 */
4205
4.78k
                continue;
4206
4.78k
            }
4207
4208
304k
            ent = xmlLookupGeneralEntity(ctxt, name, /* isAttr */ 1);
4209
304k
            if (ent == NULL)
4210
297
                continue;
4211
4212
304k
            if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
4213
281k
                if ((ent->content[0] == '&') && (!replaceEntities))
4214
48.7k
                    xmlSBufAddCString(&buf, "&#38;", 5);
4215
232k
                else
4216
232k
                    xmlSBufAddString(&buf, ent->content, ent->length);
4217
281k
                inSpace = 0;
4218
281k
            } else if (replaceEntities) {
4219
8.02k
                if (xmlExpandEntityInAttValue(ctxt, &buf,
4220
8.02k
                        ent->content, ent, normalize, &inSpace, ctxt->inputNr,
4221
8.02k
                        /* check */ 1) > 0)
4222
0
                    attvalFlags |= XML_ATTVAL_NORM_CHANGE;
4223
14.6k
            } else {
4224
14.6k
                if ((ent->flags & entFlags) != entFlags)
4225
2.58k
                    xmlCheckEntityInAttValue(ctxt, ent, ctxt->inputNr);
4226
4227
14.6k
                if (xmlParserEntityCheck(ctxt, ent->expandedSize)) {
4228
0
                    ent->content[0] = 0;
4229
0
                    goto error;
4230
0
                }
4231
4232
                /*
4233
                 * Just output the reference
4234
                 */
4235
14.6k
                xmlSBufAddCString(&buf, "&", 1);
4236
14.6k
                xmlSBufAddString(&buf, ent->name, xmlStrlen(ent->name));
4237
14.6k
                xmlSBufAddCString(&buf, ";", 1);
4238
4239
14.6k
                inSpace = 0;
4240
14.6k
            }
4241
304k
  }
4242
3.68M
    }
4243
4244
251k
    if ((buf.mem == NULL) && (outFlags != NULL)) {
4245
108k
        ret = (xmlChar *) CUR_PTR - chunkSize;
4246
4247
108k
        if (attlen != NULL)
4248
108k
            *attlen = chunkSize;
4249
108k
        if ((normalize) && (inSpace) && (chunkSize > 0)) {
4250
232
            attvalFlags |= XML_ATTVAL_NORM_CHANGE;
4251
232
            *attlen -= 1;
4252
232
        }
4253
4254
        /* Report potential error */
4255
108k
        xmlSBufCleanup(&buf, ctxt, "AttValue length too long");
4256
143k
    } else {
4257
143k
        if (chunkSize > 0)
4258
86.7k
            xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4259
4260
143k
        if ((normalize) && (inSpace) && (buf.size > 0)) {
4261
280
            attvalFlags |= XML_ATTVAL_NORM_CHANGE;
4262
280
            buf.size--;
4263
280
        }
4264
4265
143k
        ret = xmlSBufFinish(&buf, attlen, ctxt, "AttValue length too long");
4266
143k
        attvalFlags |= XML_ATTVAL_ALLOC;
4267
4268
143k
        if (ret != NULL) {
4269
143k
            if (attlen != NULL)
4270
42.3k
                *attlen = buf.size;
4271
143k
        }
4272
143k
    }
4273
4274
251k
    if (outFlags != NULL)
4275
150k
        *outFlags = attvalFlags;
4276
4277
251k
    NEXTL(1);
4278
4279
251k
    return(ret);
4280
4281
2.82k
error:
4282
2.82k
    xmlSBufCleanup(&buf, ctxt, "AttValue length too long");
4283
2.82k
    return(NULL);
4284
254k
}
4285
4286
/**
4287
 * parse a value for an attribute
4288
 * Note: the parser won't do substitution of entities here, this
4289
 * will be handled later in #xmlStringGetNodeList
4290
 *
4291
 * @deprecated Internal function, don't use.
4292
 *
4293
 *     [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4294
 *                       "'" ([^<&'] | Reference)* "'"
4295
 *
4296
 * 3.3.3 Attribute-Value Normalization:
4297
 *
4298
 * Before the value of an attribute is passed to the application or
4299
 * checked for validity, the XML processor must normalize it as follows:
4300
 *
4301
 * - a character reference is processed by appending the referenced
4302
 *   character to the attribute value
4303
 * - an entity reference is processed by recursively processing the
4304
 *   replacement text of the entity
4305
 * - a whitespace character (\#x20, \#xD, \#xA, \#x9) is processed by
4306
 *   appending \#x20 to the normalized value, except that only a single
4307
 *   \#x20 is appended for a "#xD#xA" sequence that is part of an external
4308
 *   parsed entity or the literal entity value of an internal parsed entity
4309
 * - other characters are processed by appending them to the normalized value
4310
 *
4311
 * If the declared value is not CDATA, then the XML processor must further
4312
 * process the normalized attribute value by discarding any leading and
4313
 * trailing space (\#x20) characters, and by replacing sequences of space
4314
 * (\#x20) characters by a single space (\#x20) character.
4315
 * All attributes for which no declaration has been read should be treated
4316
 * by a non-validating parser as if declared CDATA.
4317
 *
4318
 * @param ctxt  an XML parser context
4319
 * @returns the AttValue parsed or NULL. The value has to be freed by the
4320
 * caller.
4321
 */
4322
xmlChar *
4323
106k
xmlParseAttValue(xmlParserCtxt *ctxt) {
4324
106k
    if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4325
106k
    return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0, 0));
4326
106k
}
4327
4328
/**
4329
 * parse an XML Literal
4330
 *
4331
 * @deprecated Internal function, don't use.
4332
 *
4333
 *     [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4334
 *
4335
 * @param ctxt  an XML parser context
4336
 * @returns the SystemLiteral parsed or NULL
4337
 */
4338
4339
xmlChar *
4340
114k
xmlParseSystemLiteral(xmlParserCtxt *ctxt) {
4341
114k
    xmlChar *buf = NULL;
4342
114k
    int len = 0;
4343
114k
    int size = XML_PARSER_BUFFER_SIZE;
4344
114k
    int cur, l;
4345
114k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4346
28.8k
                    XML_MAX_TEXT_LENGTH :
4347
114k
                    XML_MAX_NAME_LENGTH;
4348
114k
    xmlChar stop;
4349
4350
114k
    if (RAW == '"') {
4351
109k
        NEXT;
4352
109k
  stop = '"';
4353
109k
    } else if (RAW == '\'') {
4354
3.35k
        NEXT;
4355
3.35k
  stop = '\'';
4356
3.35k
    } else {
4357
2.07k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4358
2.07k
  return(NULL);
4359
2.07k
    }
4360
4361
112k
    buf = xmlMalloc(size);
4362
112k
    if (buf == NULL) {
4363
13
        xmlErrMemory(ctxt);
4364
13
  return(NULL);
4365
13
    }
4366
112k
    cur = xmlCurrentCharRecover(ctxt, &l);
4367
1.12M
    while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4368
1.00M
  if (len + 5 >= size) {
4369
1.53k
      xmlChar *tmp;
4370
1.53k
            int newSize;
4371
4372
1.53k
            newSize = xmlGrowCapacity(size, 1, 1, maxLength);
4373
1.53k
            if (newSize < 0) {
4374
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4375
0
                xmlFree(buf);
4376
0
                return(NULL);
4377
0
            }
4378
1.53k
      tmp = xmlRealloc(buf, newSize);
4379
1.53k
      if (tmp == NULL) {
4380
1
          xmlFree(buf);
4381
1
    xmlErrMemory(ctxt);
4382
1
    return(NULL);
4383
1
      }
4384
1.52k
      buf = tmp;
4385
1.52k
            size = newSize;
4386
1.52k
  }
4387
1.00M
  COPY_BUF(buf, len, cur);
4388
1.00M
  NEXTL(l);
4389
1.00M
  cur = xmlCurrentCharRecover(ctxt, &l);
4390
1.00M
    }
4391
112k
    buf[len] = 0;
4392
112k
    if (!IS_CHAR(cur)) {
4393
890
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4394
111k
    } else {
4395
111k
  NEXT;
4396
111k
    }
4397
112k
    return(buf);
4398
112k
}
4399
4400
/**
4401
 * parse an XML public literal
4402
 *
4403
 * @deprecated Internal function, don't use.
4404
 *
4405
 *     [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4406
 *
4407
 * @param ctxt  an XML parser context
4408
 * @returns the PubidLiteral parsed or NULL.
4409
 */
4410
4411
xmlChar *
4412
64.0k
xmlParsePubidLiteral(xmlParserCtxt *ctxt) {
4413
64.0k
    xmlChar *buf = NULL;
4414
64.0k
    int len = 0;
4415
64.0k
    int size = XML_PARSER_BUFFER_SIZE;
4416
64.0k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4417
16.3k
                    XML_MAX_TEXT_LENGTH :
4418
64.0k
                    XML_MAX_NAME_LENGTH;
4419
64.0k
    xmlChar cur;
4420
64.0k
    xmlChar stop;
4421
4422
64.0k
    if (RAW == '"') {
4423
62.9k
        NEXT;
4424
62.9k
  stop = '"';
4425
62.9k
    } else if (RAW == '\'') {
4426
868
        NEXT;
4427
868
  stop = '\'';
4428
868
    } else {
4429
259
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4430
259
  return(NULL);
4431
259
    }
4432
63.7k
    buf = xmlMalloc(size);
4433
63.7k
    if (buf == NULL) {
4434
7
  xmlErrMemory(ctxt);
4435
7
  return(NULL);
4436
7
    }
4437
63.7k
    cur = CUR;
4438
444k
    while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop) &&
4439
444k
           (PARSER_STOPPED(ctxt) == 0)) { /* checked */
4440
380k
  if (len + 1 >= size) {
4441
239
      xmlChar *tmp;
4442
239
            int newSize;
4443
4444
239
      newSize = xmlGrowCapacity(size, 1, 1, maxLength);
4445
239
            if (newSize < 0) {
4446
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4447
0
                xmlFree(buf);
4448
0
                return(NULL);
4449
0
            }
4450
239
      tmp = xmlRealloc(buf, newSize);
4451
239
      if (tmp == NULL) {
4452
1
    xmlErrMemory(ctxt);
4453
1
    xmlFree(buf);
4454
1
    return(NULL);
4455
1
      }
4456
238
      buf = tmp;
4457
238
            size = newSize;
4458
238
  }
4459
380k
  buf[len++] = cur;
4460
380k
  NEXT;
4461
380k
  cur = CUR;
4462
380k
    }
4463
63.7k
    buf[len] = 0;
4464
63.7k
    if (cur != stop) {
4465
1.68k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4466
62.0k
    } else {
4467
62.0k
  NEXTL(1);
4468
62.0k
    }
4469
63.7k
    return(buf);
4470
63.7k
}
4471
4472
static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial);
4473
4474
/*
4475
 * used for the test in the inner loop of the char data testing
4476
 */
4477
static const unsigned char test_char_data[256] = {
4478
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4479
    0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4480
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4481
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4482
    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4483
    0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4484
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4485
    0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4486
    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4487
    0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4488
    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4489
    0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4490
    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4491
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4492
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4493
    0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4494
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4495
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4496
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4497
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4498
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4499
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4500
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4501
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4502
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4503
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4504
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4505
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4506
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4507
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4508
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4509
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4510
};
4511
4512
static void
4513
xmlCharacters(xmlParserCtxtPtr ctxt, const xmlChar *buf, int size,
4514
582k
              int isBlank) {
4515
582k
    int checkBlanks;
4516
4517
582k
    if ((ctxt->sax == NULL) || (ctxt->disableSAX))
4518
296k
        return;
4519
4520
285k
    checkBlanks = (!ctxt->keepBlanks) ||
4521
285k
                  (ctxt->sax->ignorableWhitespace != ctxt->sax->characters);
4522
4523
    /*
4524
     * Calling areBlanks with only parts of a text node
4525
     * is fundamentally broken, making the NOBLANKS option
4526
     * essentially unusable.
4527
     */
4528
285k
    if ((checkBlanks) &&
4529
285k
        (areBlanks(ctxt, buf, size, isBlank))) {
4530
19.5k
        if ((ctxt->sax->ignorableWhitespace != NULL) &&
4531
19.5k
            (ctxt->keepBlanks))
4532
0
            ctxt->sax->ignorableWhitespace(ctxt->userData, buf, size);
4533
266k
    } else {
4534
266k
        if (ctxt->sax->characters != NULL)
4535
266k
            ctxt->sax->characters(ctxt->userData, buf, size);
4536
4537
        /*
4538
         * The old code used to update this value for "complex" data
4539
         * even if checkBlanks was false. This was probably a bug.
4540
         */
4541
266k
        if ((checkBlanks) && (*ctxt->space == -1))
4542
40.9k
            *ctxt->space = -2;
4543
266k
    }
4544
285k
}
4545
4546
/**
4547
 * Parse character data. Always makes progress if the first char isn't
4548
 * '<' or '&'.
4549
 *
4550
 * The right angle bracket (>) may be represented using the string "&gt;",
4551
 * and must, for compatibility, be escaped using "&gt;" or a character
4552
 * reference when it appears in the string "]]>" in content, when that
4553
 * string is not marking the end of a CDATA section.
4554
 *
4555
 *     [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4556
 * @param ctxt  an XML parser context
4557
 * @param partial  buffer may contain partial UTF-8 sequences
4558
 */
4559
static void
4560
1.81M
xmlParseCharDataInternal(xmlParserCtxtPtr ctxt, int partial) {
4561
1.81M
    const xmlChar *in;
4562
1.81M
    int line = ctxt->input->line;
4563
1.81M
    int col = ctxt->input->col;
4564
1.81M
    int ccol;
4565
1.81M
    int terminate = 0;
4566
4567
1.81M
    GROW;
4568
    /*
4569
     * Accelerated common case where input don't need to be
4570
     * modified before passing it to the handler.
4571
     */
4572
1.81M
    in = ctxt->input->cur;
4573
1.81M
    do {
4574
1.92M
get_more_space:
4575
2.21M
        while (*in == 0x20) { in++; ctxt->input->col++; }
4576
1.92M
        if (*in == 0xA) {
4577
115k
            do {
4578
115k
                ctxt->input->line++; ctxt->input->col = 1;
4579
115k
                in++;
4580
115k
            } while (*in == 0xA);
4581
111k
            goto get_more_space;
4582
111k
        }
4583
1.81M
        if (*in == '<') {
4584
155k
            while (in > ctxt->input->cur) {
4585
77.7k
                const xmlChar *tmp = ctxt->input->cur;
4586
77.7k
                size_t nbchar = in - tmp;
4587
4588
77.7k
                if (nbchar > XML_MAX_ITEMS)
4589
0
                    nbchar = XML_MAX_ITEMS;
4590
77.7k
                ctxt->input->cur += nbchar;
4591
4592
77.7k
                xmlCharacters(ctxt, tmp, nbchar, 1);
4593
77.7k
            }
4594
77.7k
            return;
4595
77.7k
        }
4596
4597
1.79M
get_more:
4598
1.79M
        ccol = ctxt->input->col;
4599
4.03M
        while (test_char_data[*in]) {
4600
2.23M
            in++;
4601
2.23M
            ccol++;
4602
2.23M
        }
4603
1.79M
        ctxt->input->col = ccol;
4604
1.79M
        if (*in == 0xA) {
4605
58.1k
            do {
4606
58.1k
                ctxt->input->line++; ctxt->input->col = 1;
4607
58.1k
                in++;
4608
58.1k
            } while (*in == 0xA);
4609
55.8k
            goto get_more;
4610
55.8k
        }
4611
1.74M
        if (*in == ']') {
4612
7.19k
            size_t avail = ctxt->input->end - in;
4613
4614
7.19k
            if (partial && avail < 2) {
4615
0
                terminate = 1;
4616
0
                goto invoke_callback;
4617
0
            }
4618
7.19k
            if (in[1] == ']') {
4619
821
                if (partial && avail < 3) {
4620
0
                    terminate = 1;
4621
0
                    goto invoke_callback;
4622
0
                }
4623
821
                if (in[2] == '>')
4624
243
                    xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4625
821
            }
4626
4627
7.19k
            in++;
4628
7.19k
            ctxt->input->col++;
4629
7.19k
            goto get_more;
4630
7.19k
        }
4631
4632
1.73M
invoke_callback:
4633
2.09M
        while (in > ctxt->input->cur) {
4634
354k
            const xmlChar *tmp = ctxt->input->cur;
4635
354k
            size_t nbchar = in - tmp;
4636
4637
354k
            if (nbchar > XML_MAX_ITEMS)
4638
0
                nbchar = XML_MAX_ITEMS;
4639
354k
            ctxt->input->cur += nbchar;
4640
4641
354k
            xmlCharacters(ctxt, tmp, nbchar, 0);
4642
4643
354k
            line = ctxt->input->line;
4644
354k
            col = ctxt->input->col;
4645
354k
        }
4646
1.73M
        ctxt->input->cur = in;
4647
1.73M
        if (*in == 0xD) {
4648
3.89k
            in++;
4649
3.89k
            if (*in == 0xA) {
4650
479
                ctxt->input->cur = in;
4651
479
                in++;
4652
479
                ctxt->input->line++; ctxt->input->col = 1;
4653
479
                continue; /* while */
4654
479
            }
4655
3.41k
            in--;
4656
3.41k
        }
4657
1.73M
        if (*in == '<') {
4658
157k
            return;
4659
157k
        }
4660
1.57M
        if (*in == '&') {
4661
94.4k
            return;
4662
94.4k
        }
4663
1.48M
        if (terminate) {
4664
0
            return;
4665
0
        }
4666
1.48M
        SHRINK;
4667
1.48M
        GROW;
4668
1.48M
        in = ctxt->input->cur;
4669
1.48M
    } while (((*in >= 0x20) && (*in <= 0x7F)) ||
4670
1.48M
             (*in == 0x09) || (*in == 0x0a));
4671
1.48M
    ctxt->input->line = line;
4672
1.48M
    ctxt->input->col = col;
4673
1.48M
    xmlParseCharDataComplex(ctxt, partial);
4674
1.48M
}
4675
4676
/**
4677
 * Always makes progress if the first char isn't '<' or '&'.
4678
 *
4679
 * parse a CharData section.this is the fallback function
4680
 * of #xmlParseCharData when the parsing requires handling
4681
 * of non-ASCII characters.
4682
 *
4683
 * @param ctxt  an XML parser context
4684
 * @param partial  whether the input can end with truncated UTF-8
4685
 */
4686
static void
4687
1.48M
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial) {
4688
1.48M
    xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4689
1.48M
    int nbchar = 0;
4690
1.48M
    int cur, l;
4691
4692
1.48M
    cur = xmlCurrentCharRecover(ctxt, &l);
4693
1.95M
    while ((cur != '<') && /* checked */
4694
1.95M
           (cur != '&') &&
4695
1.95M
     (IS_CHAR(cur))) {
4696
470k
        if (cur == ']') {
4697
1.96k
            size_t avail = ctxt->input->end - ctxt->input->cur;
4698
4699
1.96k
            if (partial && avail < 2)
4700
0
                break;
4701
1.96k
            if (NXT(1) == ']') {
4702
729
                if (partial && avail < 3)
4703
0
                    break;
4704
729
                if (NXT(2) == '>')
4705
432
                    xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4706
729
            }
4707
1.96k
        }
4708
4709
470k
  COPY_BUF(buf, nbchar, cur);
4710
  /* move current position before possible calling of ctxt->sax->characters */
4711
470k
  NEXTL(l);
4712
470k
  if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4713
200
      buf[nbchar] = 0;
4714
4715
200
            xmlCharacters(ctxt, buf, nbchar, 0);
4716
200
      nbchar = 0;
4717
200
            SHRINK;
4718
200
  }
4719
470k
  cur = xmlCurrentCharRecover(ctxt, &l);
4720
470k
    }
4721
1.48M
    if (nbchar != 0) {
4722
149k
        buf[nbchar] = 0;
4723
4724
149k
        xmlCharacters(ctxt, buf, nbchar, 0);
4725
149k
    }
4726
    /*
4727
     * cur == 0 can mean
4728
     *
4729
     * - End of buffer.
4730
     * - An actual 0 character.
4731
     * - An incomplete UTF-8 sequence. This is allowed if partial is set.
4732
     */
4733
1.48M
    if (ctxt->input->cur < ctxt->input->end) {
4734
1.47M
        if ((cur == 0) && (CUR != 0)) {
4735
746
            if (partial == 0) {
4736
746
                xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4737
746
                        "Incomplete UTF-8 sequence starting with %02X\n", CUR);
4738
746
                NEXTL(1);
4739
746
            }
4740
1.47M
        } else if ((cur != '<') && (cur != '&') && (cur != ']')) {
4741
            /* Generate the error and skip the offending character */
4742
1.45M
            xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4743
1.45M
                              "PCDATA invalid Char value %d\n", cur);
4744
1.45M
            NEXTL(l);
4745
1.45M
        }
4746
1.47M
    }
4747
1.48M
}
4748
4749
/**
4750
 * @deprecated Internal function, don't use.
4751
 * @param ctxt  an XML parser context
4752
 * @param cdata  unused
4753
 */
4754
void
4755
0
xmlParseCharData(xmlParserCtxt *ctxt, ATTRIBUTE_UNUSED int cdata) {
4756
0
    xmlParseCharDataInternal(ctxt, 0);
4757
0
}
4758
4759
/**
4760
 * Parse an External ID or a Public ID
4761
 *
4762
 * @deprecated Internal function, don't use.
4763
 *
4764
 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4765
 * `'PUBLIC' S PubidLiteral S SystemLiteral`
4766
 *
4767
 *     [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4768
 *                       | 'PUBLIC' S PubidLiteral S SystemLiteral
4769
 *
4770
 *     [83] PublicID ::= 'PUBLIC' S PubidLiteral
4771
 *
4772
 * @param ctxt  an XML parser context
4773
 * @param publicId  a xmlChar** receiving PubidLiteral
4774
 * @param strict  indicate whether we should restrict parsing to only
4775
 *          production [75], see NOTE below
4776
 * @returns the function returns SystemLiteral and in the second
4777
 *                case publicID receives PubidLiteral, is strict is off
4778
 *                it is possible to return NULL and have publicID set.
4779
 */
4780
4781
xmlChar *
4782
122k
xmlParseExternalID(xmlParserCtxt *ctxt, xmlChar **publicId, int strict) {
4783
122k
    xmlChar *URI = NULL;
4784
4785
122k
    *publicId = NULL;
4786
122k
    if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4787
52.4k
        SKIP(6);
4788
52.4k
  if (SKIP_BLANKS == 0) {
4789
194
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4790
194
                     "Space required after 'SYSTEM'\n");
4791
194
  }
4792
52.4k
  URI = xmlParseSystemLiteral(ctxt);
4793
52.4k
  if (URI == NULL) {
4794
201
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4795
201
        }
4796
70.0k
    } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4797
64.0k
        SKIP(6);
4798
64.0k
  if (SKIP_BLANKS == 0) {
4799
259
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4800
259
        "Space required after 'PUBLIC'\n");
4801
259
  }
4802
64.0k
  *publicId = xmlParsePubidLiteral(ctxt);
4803
64.0k
  if (*publicId == NULL) {
4804
267
      xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4805
267
  }
4806
64.0k
  if (strict) {
4807
      /*
4808
       * We don't handle [83] so "S SystemLiteral" is required.
4809
       */
4810
60.5k
      if (SKIP_BLANKS == 0) {
4811
1.89k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4812
1.89k
      "Space required after the Public Identifier\n");
4813
1.89k
      }
4814
60.5k
  } else {
4815
      /*
4816
       * We handle [83] so we return immediately, if
4817
       * "S SystemLiteral" is not detected. We skip blanks if no
4818
             * system literal was found, but this is harmless since we must
4819
             * be at the end of a NotationDecl.
4820
       */
4821
3.44k
      if (SKIP_BLANKS == 0) return(NULL);
4822
3.23k
      if ((CUR != '\'') && (CUR != '"')) return(NULL);
4823
3.23k
  }
4824
61.9k
  URI = xmlParseSystemLiteral(ctxt);
4825
61.9k
  if (URI == NULL) {
4826
1.89k
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4827
1.89k
        }
4828
61.9k
    }
4829
120k
    return(URI);
4830
122k
}
4831
4832
/**
4833
 * Skip an XML (SGML) comment <!-- .... -->
4834
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4835
 *  must not occur within comments. "
4836
 * This is the slow routine in case the accelerator for ascii didn't work
4837
 *
4838
 *     [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4839
 * @param ctxt  an XML parser context
4840
 * @param buf  the already parsed part of the buffer
4841
 * @param len  number of bytes in the buffer
4842
 * @param size  allocated size of the buffer
4843
 */
4844
static void
4845
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4846
6.12k
                       size_t len, size_t size) {
4847
6.12k
    int q, ql;
4848
6.12k
    int r, rl;
4849
6.12k
    int cur, l;
4850
6.12k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4851
534
                    XML_MAX_HUGE_LENGTH :
4852
6.12k
                    XML_MAX_TEXT_LENGTH;
4853
4854
6.12k
    if (buf == NULL) {
4855
2.94k
        len = 0;
4856
2.94k
  size = XML_PARSER_BUFFER_SIZE;
4857
2.94k
  buf = xmlMalloc(size);
4858
2.94k
  if (buf == NULL) {
4859
1
      xmlErrMemory(ctxt);
4860
1
      return;
4861
1
  }
4862
2.94k
    }
4863
6.12k
    q = xmlCurrentCharRecover(ctxt, &ql);
4864
6.12k
    if (q == 0)
4865
267
        goto not_terminated;
4866
5.86k
    if (!IS_CHAR(q)) {
4867
1.39k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4868
1.39k
                          "xmlParseComment: invalid xmlChar value %d\n",
4869
1.39k
                    q);
4870
1.39k
  xmlFree (buf);
4871
1.39k
  return;
4872
1.39k
    }
4873
4.46k
    NEXTL(ql);
4874
4.46k
    r = xmlCurrentCharRecover(ctxt, &rl);
4875
4.46k
    if (r == 0)
4876
194
        goto not_terminated;
4877
4.26k
    if (!IS_CHAR(r)) {
4878
539
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4879
539
                          "xmlParseComment: invalid xmlChar value %d\n",
4880
539
                    r);
4881
539
  xmlFree (buf);
4882
539
  return;
4883
539
    }
4884
3.73k
    NEXTL(rl);
4885
3.73k
    cur = xmlCurrentCharRecover(ctxt, &l);
4886
3.73k
    if (cur == 0)
4887
194
        goto not_terminated;
4888
66.1k
    while (IS_CHAR(cur) && /* checked */
4889
66.1k
           ((cur != '>') ||
4890
64.7k
      (r != '-') || (q != '-'))) {
4891
62.6k
  if ((r == '-') && (q == '-')) {
4892
1.24k
      xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4893
1.24k
  }
4894
62.6k
  if (len + 5 >= size) {
4895
499
      xmlChar *tmp;
4896
499
            int newSize;
4897
4898
499
      newSize = xmlGrowCapacity(size, 1, 1, maxLength);
4899
499
            if (newSize < 0) {
4900
0
                xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4901
0
                             "Comment too big found", NULL);
4902
0
                xmlFree (buf);
4903
0
                return;
4904
0
            }
4905
499
      tmp = xmlRealloc(buf, newSize);
4906
499
      if (tmp == NULL) {
4907
1
    xmlErrMemory(ctxt);
4908
1
    xmlFree(buf);
4909
1
    return;
4910
1
      }
4911
498
      buf = tmp;
4912
498
            size = newSize;
4913
498
  }
4914
62.5k
  COPY_BUF(buf, len, q);
4915
4916
62.5k
  q = r;
4917
62.5k
  ql = rl;
4918
62.5k
  r = cur;
4919
62.5k
  rl = l;
4920
4921
62.5k
  NEXTL(l);
4922
62.5k
  cur = xmlCurrentCharRecover(ctxt, &l);
4923
4924
62.5k
    }
4925
3.53k
    buf[len] = 0;
4926
3.53k
    if (cur == 0) {
4927
328
  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4928
328
                       "Comment not terminated \n<!--%.50s\n", buf);
4929
3.20k
    } else if (!IS_CHAR(cur)) {
4930
1.10k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4931
1.10k
                          "xmlParseComment: invalid xmlChar value %d\n",
4932
1.10k
                    cur);
4933
2.10k
    } else {
4934
2.10k
        NEXT;
4935
2.10k
  if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4936
2.10k
      (!ctxt->disableSAX))
4937
899
      ctxt->sax->comment(ctxt->userData, buf);
4938
2.10k
    }
4939
3.53k
    xmlFree(buf);
4940
3.53k
    return;
4941
655
not_terminated:
4942
655
    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4943
655
       "Comment not terminated\n", NULL);
4944
655
    xmlFree(buf);
4945
655
}
4946
4947
/**
4948
 * Parse an XML (SGML) comment. Always consumes '<!'.
4949
 *
4950
 * @deprecated Internal function, don't use.
4951
 *
4952
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4953
 *  must not occur within comments. "
4954
 *
4955
 *     [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4956
 * @param ctxt  an XML parser context
4957
 */
4958
void
4959
9.22k
xmlParseComment(xmlParserCtxt *ctxt) {
4960
9.22k
    xmlChar *buf = NULL;
4961
9.22k
    size_t size = XML_PARSER_BUFFER_SIZE;
4962
9.22k
    size_t len = 0;
4963
9.22k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4964
2.12k
                       XML_MAX_HUGE_LENGTH :
4965
9.22k
                       XML_MAX_TEXT_LENGTH;
4966
9.22k
    const xmlChar *in;
4967
9.22k
    size_t nbchar = 0;
4968
9.22k
    int ccol;
4969
4970
    /*
4971
     * Check that there is a comment right here.
4972
     */
4973
9.22k
    if ((RAW != '<') || (NXT(1) != '!'))
4974
0
        return;
4975
9.22k
    SKIP(2);
4976
9.22k
    if ((RAW != '-') || (NXT(1) != '-'))
4977
194
        return;
4978
9.02k
    SKIP(2);
4979
9.02k
    GROW;
4980
4981
    /*
4982
     * Accelerated common case where input don't need to be
4983
     * modified before passing it to the handler.
4984
     */
4985
9.02k
    in = ctxt->input->cur;
4986
9.02k
    do {
4987
9.02k
  if (*in == 0xA) {
4988
922
      do {
4989
922
    ctxt->input->line++; ctxt->input->col = 1;
4990
922
    in++;
4991
922
      } while (*in == 0xA);
4992
725
  }
4993
13.6k
get_more:
4994
13.6k
        ccol = ctxt->input->col;
4995
75.7k
  while (((*in > '-') && (*in <= 0x7F)) ||
4996
75.7k
         ((*in >= 0x20) && (*in < '-')) ||
4997
75.7k
         (*in == 0x09)) {
4998
62.0k
        in++;
4999
62.0k
        ccol++;
5000
62.0k
  }
5001
13.6k
  ctxt->input->col = ccol;
5002
13.6k
  if (*in == 0xA) {
5003
1.40k
      do {
5004
1.40k
    ctxt->input->line++; ctxt->input->col = 1;
5005
1.40k
    in++;
5006
1.40k
      } while (*in == 0xA);
5007
1.16k
      goto get_more;
5008
1.16k
  }
5009
12.4k
  nbchar = in - ctxt->input->cur;
5010
  /*
5011
   * save current set of data
5012
   */
5013
12.4k
  if (nbchar > 0) {
5014
7.43k
            if (nbchar > maxLength - len) {
5015
0
                xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5016
0
                                  "Comment too big found", NULL);
5017
0
                xmlFree(buf);
5018
0
                return;
5019
0
            }
5020
7.43k
            if (buf == NULL) {
5021
4.26k
                if ((*in == '-') && (in[1] == '-'))
5022
892
                    size = nbchar + 1;
5023
3.37k
                else
5024
3.37k
                    size = XML_PARSER_BUFFER_SIZE + nbchar;
5025
4.26k
                buf = xmlMalloc(size);
5026
4.26k
                if (buf == NULL) {
5027
2
                    xmlErrMemory(ctxt);
5028
2
                    return;
5029
2
                }
5030
4.26k
                len = 0;
5031
4.26k
            } else if (len + nbchar + 1 >= size) {
5032
436
                xmlChar *new_buf;
5033
436
                size += len + nbchar + XML_PARSER_BUFFER_SIZE;
5034
436
                new_buf = xmlRealloc(buf, size);
5035
436
                if (new_buf == NULL) {
5036
1
                    xmlErrMemory(ctxt);
5037
1
                    xmlFree(buf);
5038
1
                    return;
5039
1
                }
5040
435
                buf = new_buf;
5041
435
            }
5042
7.42k
            memcpy(&buf[len], ctxt->input->cur, nbchar);
5043
7.42k
            len += nbchar;
5044
7.42k
            buf[len] = 0;
5045
7.42k
  }
5046
12.4k
  ctxt->input->cur = in;
5047
12.4k
  if (*in == 0xA) {
5048
0
      in++;
5049
0
      ctxt->input->line++; ctxt->input->col = 1;
5050
0
  }
5051
12.4k
  if (*in == 0xD) {
5052
604
      in++;
5053
604
      if (*in == 0xA) {
5054
249
    ctxt->input->cur = in;
5055
249
    in++;
5056
249
    ctxt->input->line++; ctxt->input->col = 1;
5057
249
    goto get_more;
5058
249
      }
5059
355
      in--;
5060
355
  }
5061
12.2k
  SHRINK;
5062
12.2k
  GROW;
5063
12.2k
  in = ctxt->input->cur;
5064
12.2k
  if (*in == '-') {
5065
6.07k
      if (in[1] == '-') {
5066
4.06k
          if (in[2] == '>') {
5067
2.89k
        SKIP(3);
5068
2.89k
        if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5069
2.89k
            (!ctxt->disableSAX)) {
5070
2.61k
      if (buf != NULL)
5071
808
          ctxt->sax->comment(ctxt->userData, buf);
5072
1.81k
      else
5073
1.81k
          ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5074
2.61k
        }
5075
2.89k
        if (buf != NULL)
5076
1.07k
            xmlFree(buf);
5077
2.89k
        return;
5078
2.89k
    }
5079
1.17k
    if (buf != NULL) {
5080
947
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5081
947
                          "Double hyphen within comment: "
5082
947
                                      "<!--%.50s\n",
5083
947
              buf);
5084
947
    } else
5085
224
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5086
224
                          "Double hyphen within comment\n", NULL);
5087
1.17k
    in++;
5088
1.17k
    ctxt->input->col++;
5089
1.17k
      }
5090
3.18k
      in++;
5091
3.18k
      ctxt->input->col++;
5092
3.18k
      goto get_more;
5093
6.07k
  }
5094
12.2k
    } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5095
6.12k
    xmlParseCommentComplex(ctxt, buf, len, size);
5096
6.12k
}
5097
5098
5099
/**
5100
 * parse the name of a PI
5101
 *
5102
 * @deprecated Internal function, don't use.
5103
 *
5104
 *     [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5105
 *
5106
 * @param ctxt  an XML parser context
5107
 * @returns the PITarget name or NULL
5108
 */
5109
5110
const xmlChar *
5111
60.9k
xmlParsePITarget(xmlParserCtxt *ctxt) {
5112
60.9k
    const xmlChar *name;
5113
5114
60.9k
    name = xmlParseName(ctxt);
5115
60.9k
    if ((name != NULL) &&
5116
60.9k
        ((name[0] == 'x') || (name[0] == 'X')) &&
5117
60.9k
        ((name[1] == 'm') || (name[1] == 'M')) &&
5118
60.9k
        ((name[2] == 'l') || (name[2] == 'L'))) {
5119
35.1k
  int i;
5120
35.1k
  if ((name[0] == 'x') && (name[1] == 'm') &&
5121
35.1k
      (name[2] == 'l') && (name[3] == 0)) {
5122
33.9k
      xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5123
33.9k
     "XML declaration allowed only at the start of the document\n");
5124
33.9k
      return(name);
5125
33.9k
  } else if (name[3] == 0) {
5126
585
      xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5127
585
      return(name);
5128
585
  }
5129
1.63k
  for (i = 0;;i++) {
5130
1.63k
      if (xmlW3CPIs[i] == NULL) break;
5131
1.22k
      if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5132
194
          return(name);
5133
1.22k
  }
5134
416
  xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5135
416
          "xmlParsePITarget: invalid name prefix 'xml'\n",
5136
416
          NULL, NULL);
5137
416
    }
5138
26.1k
    if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5139
447
  xmlNsErr(ctxt, XML_NS_ERR_COLON,
5140
447
     "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5141
447
    }
5142
26.1k
    return(name);
5143
60.9k
}
5144
5145
#ifdef LIBXML_CATALOG_ENABLED
5146
/**
5147
 * parse an XML Catalog Processing Instruction.
5148
 *
5149
 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5150
 *
5151
 * Occurs only if allowed by the user and if happening in the Misc
5152
 * part of the document before any doctype information
5153
 * This will add the given catalog to the parsing context in order
5154
 * to be used if there is a resolution need further down in the document
5155
 *
5156
 * @param ctxt  an XML parser context
5157
 * @param catalog  the PI value string
5158
 */
5159
5160
static void
5161
0
xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5162
0
    xmlChar *URL = NULL;
5163
0
    const xmlChar *tmp, *base;
5164
0
    xmlChar marker;
5165
5166
0
    tmp = catalog;
5167
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5168
0
    if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5169
0
  goto error;
5170
0
    tmp += 7;
5171
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5172
0
    if (*tmp != '=') {
5173
0
  return;
5174
0
    }
5175
0
    tmp++;
5176
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5177
0
    marker = *tmp;
5178
0
    if ((marker != '\'') && (marker != '"'))
5179
0
  goto error;
5180
0
    tmp++;
5181
0
    base = tmp;
5182
0
    while ((*tmp != 0) && (*tmp != marker)) tmp++;
5183
0
    if (*tmp == 0)
5184
0
  goto error;
5185
0
    URL = xmlStrndup(base, tmp - base);
5186
0
    tmp++;
5187
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5188
0
    if (*tmp != 0)
5189
0
  goto error;
5190
5191
0
    if (URL != NULL) {
5192
        /*
5193
         * Unfortunately, the catalog API doesn't report OOM errors.
5194
         * xmlGetLastError isn't very helpful since we don't know
5195
         * where the last error came from. We'd have to reset it
5196
         * before this call and restore it afterwards.
5197
         */
5198
0
  ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5199
0
  xmlFree(URL);
5200
0
    }
5201
0
    return;
5202
5203
0
error:
5204
0
    xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5205
0
            "Catalog PI syntax error: %s\n",
5206
0
      catalog, NULL);
5207
0
    if (URL != NULL)
5208
0
  xmlFree(URL);
5209
0
}
5210
#endif
5211
5212
/**
5213
 * parse an XML Processing Instruction.
5214
 *
5215
 * @deprecated Internal function, don't use.
5216
 *
5217
 *     [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5218
 *
5219
 * The processing is transferred to SAX once parsed.
5220
 *
5221
 * @param ctxt  an XML parser context
5222
 */
5223
5224
void
5225
60.9k
xmlParsePI(xmlParserCtxt *ctxt) {
5226
60.9k
    xmlChar *buf = NULL;
5227
60.9k
    size_t len = 0;
5228
60.9k
    size_t size = XML_PARSER_BUFFER_SIZE;
5229
60.9k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5230
13.1k
                       XML_MAX_HUGE_LENGTH :
5231
60.9k
                       XML_MAX_TEXT_LENGTH;
5232
60.9k
    int cur, l;
5233
60.9k
    const xmlChar *target;
5234
5235
60.9k
    if ((RAW == '<') && (NXT(1) == '?')) {
5236
  /*
5237
   * this is a Processing Instruction.
5238
   */
5239
60.9k
  SKIP(2);
5240
5241
  /*
5242
   * Parse the target name and check for special support like
5243
   * namespace.
5244
   */
5245
60.9k
        target = xmlParsePITarget(ctxt);
5246
60.9k
  if (target != NULL) {
5247
40.9k
      if ((RAW == '?') && (NXT(1) == '>')) {
5248
572
    SKIP(2);
5249
5250
    /*
5251
     * SAX: PI detected.
5252
     */
5253
572
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5254
572
        (ctxt->sax->processingInstruction != NULL))
5255
375
        ctxt->sax->processingInstruction(ctxt->userData,
5256
375
                                         target, NULL);
5257
572
    return;
5258
572
      }
5259
40.3k
      buf = xmlMalloc(size);
5260
40.3k
      if (buf == NULL) {
5261
15
    xmlErrMemory(ctxt);
5262
15
    return;
5263
15
      }
5264
40.3k
      if (SKIP_BLANKS == 0) {
5265
5.19k
    xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5266
5.19k
        "ParsePI: PI %s space expected\n", target);
5267
5.19k
      }
5268
40.3k
      cur = xmlCurrentCharRecover(ctxt, &l);
5269
1.09M
      while (IS_CHAR(cur) && /* checked */
5270
1.09M
       ((cur != '?') || (NXT(1) != '>'))) {
5271
1.04M
    if (len + 5 >= size) {
5272
301
        xmlChar *tmp;
5273
301
                    int newSize;
5274
5275
301
                    newSize = xmlGrowCapacity(size, 1, 1, maxLength);
5276
301
                    if (newSize < 0) {
5277
0
                        xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5278
0
                                          "PI %s too big found", target);
5279
0
                        xmlFree(buf);
5280
0
                        return;
5281
0
                    }
5282
301
        tmp = xmlRealloc(buf, newSize);
5283
301
        if (tmp == NULL) {
5284
1
      xmlErrMemory(ctxt);
5285
1
      xmlFree(buf);
5286
1
      return;
5287
1
        }
5288
300
        buf = tmp;
5289
300
                    size = newSize;
5290
300
    }
5291
1.04M
    COPY_BUF(buf, len, cur);
5292
1.04M
    NEXTL(l);
5293
1.04M
    cur = xmlCurrentCharRecover(ctxt, &l);
5294
1.04M
      }
5295
40.3k
      buf[len] = 0;
5296
40.3k
      if (cur != '?') {
5297
4.86k
    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5298
4.86k
          "ParsePI: PI %s never end ...\n", target);
5299
35.4k
      } else {
5300
35.4k
    SKIP(2);
5301
5302
35.4k
#ifdef LIBXML_CATALOG_ENABLED
5303
35.4k
    if ((ctxt->inSubset == 0) &&
5304
35.4k
        (xmlStrEqual(target, XML_CATALOG_PI))) {
5305
389
        xmlCatalogAllow allow = xmlCatalogGetDefaults();
5306
5307
389
        if ((ctxt->options & XML_PARSE_CATALOG_PI) &&
5308
389
                        ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5309
194
       (allow == XML_CATA_ALLOW_ALL)))
5310
0
      xmlParseCatalogPI(ctxt, buf);
5311
389
    }
5312
35.4k
#endif
5313
5314
    /*
5315
     * SAX: PI detected.
5316
     */
5317
35.4k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5318
35.4k
        (ctxt->sax->processingInstruction != NULL))
5319
24.0k
        ctxt->sax->processingInstruction(ctxt->userData,
5320
24.0k
                                         target, buf);
5321
35.4k
      }
5322
40.3k
      xmlFree(buf);
5323
40.3k
  } else {
5324
20.0k
      xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5325
20.0k
  }
5326
60.9k
    }
5327
60.9k
}
5328
5329
/**
5330
 * Parse a notation declaration. Always consumes '<!'.
5331
 *
5332
 * @deprecated Internal function, don't use.
5333
 *
5334
 *     [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID)
5335
 *                           S? '>'
5336
 *
5337
 * Hence there is actually 3 choices:
5338
 *
5339
 *     'PUBLIC' S PubidLiteral
5340
 *     'PUBLIC' S PubidLiteral S SystemLiteral
5341
 *     'SYSTEM' S SystemLiteral
5342
 *
5343
 * See the NOTE on #xmlParseExternalID.
5344
 *
5345
 * @param ctxt  an XML parser context
5346
 */
5347
5348
void
5349
7.69k
xmlParseNotationDecl(xmlParserCtxt *ctxt) {
5350
7.69k
    const xmlChar *name;
5351
7.69k
    xmlChar *Pubid;
5352
7.69k
    xmlChar *Systemid;
5353
5354
7.69k
    if ((CUR != '<') || (NXT(1) != '!'))
5355
0
        return;
5356
7.69k
    SKIP(2);
5357
5358
7.69k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5359
6.32k
#ifdef LIBXML_VALID_ENABLED
5360
6.32k
  int oldInputNr = ctxt->inputNr;
5361
6.32k
#endif
5362
5363
6.32k
  SKIP(8);
5364
6.32k
  if (SKIP_BLANKS_PE == 0) {
5365
194
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5366
194
         "Space required after '<!NOTATION'\n");
5367
194
      return;
5368
194
  }
5369
5370
6.12k
        name = xmlParseName(ctxt);
5371
6.12k
  if (name == NULL) {
5372
1.02k
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5373
1.02k
      return;
5374
1.02k
  }
5375
5.09k
  if (xmlStrchr(name, ':') != NULL) {
5376
509
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5377
509
         "colons are forbidden from notation names '%s'\n",
5378
509
         name, NULL, NULL);
5379
509
  }
5380
5.09k
  if (SKIP_BLANKS_PE == 0) {
5381
197
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5382
197
         "Space required after the NOTATION name'\n");
5383
197
      return;
5384
197
  }
5385
5386
  /*
5387
   * Parse the IDs.
5388
   */
5389
4.90k
  Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5390
4.90k
  SKIP_BLANKS_PE;
5391
5392
4.90k
  if (RAW == '>') {
5393
4.31k
#ifdef LIBXML_VALID_ENABLED
5394
4.31k
      if ((ctxt->validate) && (ctxt->inputNr > oldInputNr)) {
5395
0
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5396
0
                           "Notation declaration doesn't start and stop"
5397
0
                                 " in the same entity\n",
5398
0
                                 NULL, NULL);
5399
0
      }
5400
4.31k
#endif
5401
4.31k
      NEXT;
5402
4.31k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5403
4.31k
    (ctxt->sax->notationDecl != NULL))
5404
4.07k
    ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5405
4.31k
  } else {
5406
591
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5407
591
  }
5408
4.90k
  if (Systemid != NULL) xmlFree(Systemid);
5409
4.90k
  if (Pubid != NULL) xmlFree(Pubid);
5410
4.90k
    }
5411
7.69k
}
5412
5413
/**
5414
 * Parse an entity declaration. Always consumes '<!'.
5415
 *
5416
 * @deprecated Internal function, don't use.
5417
 *
5418
 *     [70] EntityDecl ::= GEDecl | PEDecl
5419
 *
5420
 *     [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5421
 *
5422
 *     [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5423
 *
5424
 *     [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5425
 *
5426
 *     [74] PEDef ::= EntityValue | ExternalID
5427
 *
5428
 *     [76] NDataDecl ::= S 'NDATA' S Name
5429
 *
5430
 * [ VC: Notation Declared ]
5431
 * The Name must match the declared name of a notation.
5432
 *
5433
 * @param ctxt  an XML parser context
5434
 */
5435
5436
void
5437
31.7k
xmlParseEntityDecl(xmlParserCtxt *ctxt) {
5438
31.7k
    const xmlChar *name = NULL;
5439
31.7k
    xmlChar *value = NULL;
5440
31.7k
    xmlChar *URI = NULL, *literal = NULL;
5441
31.7k
    const xmlChar *ndata = NULL;
5442
31.7k
    int isParameter = 0;
5443
31.7k
    xmlChar *orig = NULL;
5444
5445
31.7k
    if ((CUR != '<') || (NXT(1) != '!'))
5446
0
        return;
5447
31.7k
    SKIP(2);
5448
5449
    /* GROW; done in the caller */
5450
31.7k
    if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5451
31.0k
#ifdef LIBXML_VALID_ENABLED
5452
31.0k
  int oldInputNr = ctxt->inputNr;
5453
31.0k
#endif
5454
5455
31.0k
  SKIP(6);
5456
31.0k
  if (SKIP_BLANKS_PE == 0) {
5457
195
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5458
195
         "Space required after '<!ENTITY'\n");
5459
195
  }
5460
5461
31.0k
  if (RAW == '%') {
5462
5.25k
      NEXT;
5463
5.25k
      if (SKIP_BLANKS_PE == 0) {
5464
481
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5465
481
             "Space required after '%%'\n");
5466
481
      }
5467
5.25k
      isParameter = 1;
5468
5.25k
  }
5469
5470
31.0k
        name = xmlParseName(ctxt);
5471
31.0k
  if (name == NULL) {
5472
3.46k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5473
3.46k
                     "xmlParseEntityDecl: no name\n");
5474
3.46k
            return;
5475
3.46k
  }
5476
27.5k
  if (xmlStrchr(name, ':') != NULL) {
5477
788
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5478
788
         "colons are forbidden from entities names '%s'\n",
5479
788
         name, NULL, NULL);
5480
788
  }
5481
27.5k
  if (SKIP_BLANKS_PE == 0) {
5482
838
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5483
838
         "Space required after the entity name\n");
5484
838
  }
5485
5486
  /*
5487
   * handle the various case of definitions...
5488
   */
5489
27.5k
  if (isParameter) {
5490
4.71k
      if ((RAW == '"') || (RAW == '\'')) {
5491
3.28k
          value = xmlParseEntityValue(ctxt, &orig);
5492
3.28k
    if (value) {
5493
3.06k
        if ((ctxt->sax != NULL) &&
5494
3.06k
      (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5495
1.53k
      ctxt->sax->entityDecl(ctxt->userData, name,
5496
1.53k
                        XML_INTERNAL_PARAMETER_ENTITY,
5497
1.53k
            NULL, NULL, value);
5498
3.06k
    }
5499
3.28k
      } else {
5500
1.42k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5501
1.42k
    if ((URI == NULL) && (literal == NULL)) {
5502
204
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5503
204
    }
5504
1.42k
    if (URI) {
5505
1.02k
                    if (xmlStrchr(URI, '#')) {
5506
196
                        xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5507
827
                    } else {
5508
827
                        if ((ctxt->sax != NULL) &&
5509
827
                            (!ctxt->disableSAX) &&
5510
827
                            (ctxt->sax->entityDecl != NULL))
5511
630
                            ctxt->sax->entityDecl(ctxt->userData, name,
5512
630
                                        XML_EXTERNAL_PARAMETER_ENTITY,
5513
630
                                        literal, URI, NULL);
5514
827
                    }
5515
1.02k
    }
5516
1.42k
      }
5517
22.8k
  } else {
5518
22.8k
      if ((RAW == '"') || (RAW == '\'')) {
5519
16.2k
          value = xmlParseEntityValue(ctxt, &orig);
5520
16.2k
    if ((ctxt->sax != NULL) &&
5521
16.2k
        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5522
12.9k
        ctxt->sax->entityDecl(ctxt->userData, name,
5523
12.9k
        XML_INTERNAL_GENERAL_ENTITY,
5524
12.9k
        NULL, NULL, value);
5525
    /*
5526
     * For expat compatibility in SAX mode.
5527
     */
5528
16.2k
    if ((ctxt->myDoc == NULL) ||
5529
16.2k
        (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5530
2.23k
        if (ctxt->myDoc == NULL) {
5531
1.73k
      ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5532
1.73k
      if (ctxt->myDoc == NULL) {
5533
2
          xmlErrMemory(ctxt);
5534
2
          goto done;
5535
2
      }
5536
1.73k
      ctxt->myDoc->properties = XML_DOC_INTERNAL;
5537
1.73k
        }
5538
2.22k
        if (ctxt->myDoc->intSubset == NULL) {
5539
1.73k
      ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5540
1.73k
              BAD_CAST "fake", NULL, NULL);
5541
1.73k
                        if (ctxt->myDoc->intSubset == NULL) {
5542
1
                            xmlErrMemory(ctxt);
5543
1
                            goto done;
5544
1
                        }
5545
1.73k
                    }
5546
5547
2.22k
        xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5548
2.22k
                    NULL, NULL, value);
5549
2.22k
    }
5550
16.2k
      } else {
5551
6.63k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5552
6.63k
    if ((URI == NULL) && (literal == NULL)) {
5553
2.14k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5554
2.14k
    }
5555
6.63k
    if (URI) {
5556
4.29k
                    if (xmlStrchr(URI, '#')) {
5557
320
                        xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5558
320
                    }
5559
4.29k
    }
5560
6.63k
    if ((RAW != '>') && (SKIP_BLANKS_PE == 0)) {
5561
685
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5562
685
           "Space required before 'NDATA'\n");
5563
685
    }
5564
6.63k
    if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5565
1.14k
        SKIP(5);
5566
1.14k
        if (SKIP_BLANKS_PE == 0) {
5567
194
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5568
194
               "Space required after 'NDATA'\n");
5569
194
        }
5570
1.14k
        ndata = xmlParseName(ctxt);
5571
1.14k
        if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5572
1.14k
            (ctxt->sax->unparsedEntityDecl != NULL))
5573
894
      ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5574
894
            literal, URI, ndata);
5575
5.49k
    } else {
5576
5.49k
        if ((ctxt->sax != NULL) &&
5577
5.49k
            (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5578
3.83k
      ctxt->sax->entityDecl(ctxt->userData, name,
5579
3.83k
            XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5580
3.83k
            literal, URI, NULL);
5581
        /*
5582
         * For expat compatibility in SAX mode.
5583
         * assuming the entity replacement was asked for
5584
         */
5585
5.49k
        if ((ctxt->replaceEntities != 0) &&
5586
5.49k
      ((ctxt->myDoc == NULL) ||
5587
2.69k
      (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5588
1.00k
      if (ctxt->myDoc == NULL) {
5589
535
          ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5590
535
          if (ctxt->myDoc == NULL) {
5591
1
              xmlErrMemory(ctxt);
5592
1
        goto done;
5593
1
          }
5594
534
          ctxt->myDoc->properties = XML_DOC_INTERNAL;
5595
534
      }
5596
5597
1.00k
      if (ctxt->myDoc->intSubset == NULL) {
5598
534
          ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5599
534
            BAD_CAST "fake", NULL, NULL);
5600
534
                            if (ctxt->myDoc->intSubset == NULL) {
5601
1
                                xmlErrMemory(ctxt);
5602
1
                                goto done;
5603
1
                            }
5604
534
                        }
5605
1.00k
      xmlSAX2EntityDecl(ctxt, name,
5606
1.00k
                  XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5607
1.00k
                  literal, URI, NULL);
5608
1.00k
        }
5609
5.49k
    }
5610
6.63k
      }
5611
22.8k
  }
5612
27.5k
  SKIP_BLANKS_PE;
5613
27.5k
  if (RAW != '>') {
5614
4.88k
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5615
4.88k
              "xmlParseEntityDecl: entity %s not terminated\n", name);
5616
4.88k
      xmlHaltParser(ctxt);
5617
22.6k
  } else {
5618
22.6k
#ifdef LIBXML_VALID_ENABLED
5619
22.6k
      if ((ctxt->validate) && (ctxt->inputNr > oldInputNr)) {
5620
0
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5621
0
                           "Entity declaration doesn't start and stop in"
5622
0
                                 " the same entity\n",
5623
0
                                 NULL, NULL);
5624
0
      }
5625
22.6k
#endif
5626
22.6k
      NEXT;
5627
22.6k
  }
5628
27.5k
  if (orig != NULL) {
5629
      /*
5630
       * Ugly mechanism to save the raw entity value.
5631
       */
5632
18.9k
      xmlEntityPtr cur = NULL;
5633
5634
18.9k
      if (isParameter) {
5635
3.06k
          if ((ctxt->sax != NULL) &&
5636
3.06k
        (ctxt->sax->getParameterEntity != NULL))
5637
3.06k
        cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5638
15.8k
      } else {
5639
15.8k
          if ((ctxt->sax != NULL) &&
5640
15.8k
        (ctxt->sax->getEntity != NULL))
5641
15.8k
        cur = ctxt->sax->getEntity(ctxt->userData, name);
5642
15.8k
    if ((cur == NULL) && (ctxt->userData==ctxt)) {
5643
1.02k
        cur = xmlSAX2GetEntity(ctxt, name);
5644
1.02k
    }
5645
15.8k
      }
5646
18.9k
            if ((cur != NULL) && (cur->orig == NULL)) {
5647
14.5k
    cur->orig = orig;
5648
14.5k
                orig = NULL;
5649
14.5k
      }
5650
18.9k
  }
5651
5652
27.5k
done:
5653
27.5k
  if (value != NULL) xmlFree(value);
5654
27.5k
  if (URI != NULL) xmlFree(URI);
5655
27.5k
  if (literal != NULL) xmlFree(literal);
5656
27.5k
        if (orig != NULL) xmlFree(orig);
5657
27.5k
    }
5658
31.7k
}
5659
5660
/**
5661
 * Parse an attribute default declaration
5662
 *
5663
 * @deprecated Internal function, don't use.
5664
 *
5665
 *     [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5666
 *
5667
 * [ VC: Required Attribute ]
5668
 * if the default declaration is the keyword \#REQUIRED, then the
5669
 * attribute must be specified for all elements of the type in the
5670
 * attribute-list declaration.
5671
 *
5672
 * [ VC: Attribute Default Legal ]
5673
 * The declared default value must meet the lexical constraints of
5674
 * the declared attribute type c.f. #xmlValidateAttributeDecl
5675
 *
5676
 * [ VC: Fixed Attribute Default ]
5677
 * if an attribute has a default value declared with the \#FIXED
5678
 * keyword, instances of that attribute must match the default value.
5679
 *
5680
 * [ WFC: No < in Attribute Values ]
5681
 * handled in #xmlParseAttValue
5682
 *
5683
 * @param ctxt  an XML parser context
5684
 * @param value  Receive a possible fixed default value for the attribute
5685
 * @returns XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5686
 *          or XML_ATTRIBUTE_FIXED.
5687
 */
5688
5689
int
5690
41.5k
xmlParseDefaultDecl(xmlParserCtxt *ctxt, xmlChar **value) {
5691
41.5k
    int val;
5692
41.5k
    xmlChar *ret;
5693
5694
41.5k
    *value = NULL;
5695
41.5k
    if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5696
980
  SKIP(9);
5697
980
  return(XML_ATTRIBUTE_REQUIRED);
5698
980
    }
5699
40.5k
    if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5700
478
  SKIP(8);
5701
478
  return(XML_ATTRIBUTE_IMPLIED);
5702
478
    }
5703
40.1k
    val = XML_ATTRIBUTE_NONE;
5704
40.1k
    if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5705
1.44k
  SKIP(6);
5706
1.44k
  val = XML_ATTRIBUTE_FIXED;
5707
1.44k
  if (SKIP_BLANKS_PE == 0) {
5708
477
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5709
477
         "Space required after '#FIXED'\n");
5710
477
  }
5711
1.44k
    }
5712
40.1k
    ret = xmlParseAttValue(ctxt);
5713
40.1k
    if (ret == NULL) {
5714
4.19k
  xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5715
4.19k
           "Attribute default value declaration error\n");
5716
4.19k
    } else
5717
35.9k
        *value = ret;
5718
40.1k
    return(val);
5719
40.5k
}
5720
5721
/**
5722
 * parse an Notation attribute type.
5723
 *
5724
 * @deprecated Internal function, don't use.
5725
 *
5726
 * Note: the leading 'NOTATION' S part has already being parsed...
5727
 *
5728
 *     [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5729
 *
5730
 * [ VC: Notation Attributes ]
5731
 * Values of this type must match one of the notation names included
5732
 * in the declaration; all notation names in the declaration must be declared.
5733
 *
5734
 * @param ctxt  an XML parser context
5735
 * @returns the notation attribute tree built while parsing
5736
 */
5737
5738
xmlEnumeration *
5739
788
xmlParseNotationType(xmlParserCtxt *ctxt) {
5740
788
    const xmlChar *name;
5741
788
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5742
5743
788
    if (RAW != '(') {
5744
200
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5745
200
  return(NULL);
5746
200
    }
5747
588
    do {
5748
588
        NEXT;
5749
588
  SKIP_BLANKS_PE;
5750
588
        name = xmlParseName(ctxt);
5751
588
  if (name == NULL) {
5752
197
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5753
197
         "Name expected in NOTATION declaration\n");
5754
197
            xmlFreeEnumeration(ret);
5755
197
      return(NULL);
5756
197
  }
5757
391
        tmp = NULL;
5758
391
#ifdef LIBXML_VALID_ENABLED
5759
391
        if (ctxt->validate) {
5760
194
            tmp = ret;
5761
194
            while (tmp != NULL) {
5762
0
                if (xmlStrEqual(name, tmp->name)) {
5763
0
                    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5764
0
              "standalone: attribute notation value token %s duplicated\n",
5765
0
                                     name, NULL);
5766
0
                    if (!xmlDictOwns(ctxt->dict, name))
5767
0
                        xmlFree((xmlChar *) name);
5768
0
                    break;
5769
0
                }
5770
0
                tmp = tmp->next;
5771
0
            }
5772
194
        }
5773
391
#endif /* LIBXML_VALID_ENABLED */
5774
391
  if (tmp == NULL) {
5775
391
      cur = xmlCreateEnumeration(name);
5776
391
      if (cur == NULL) {
5777
1
                xmlErrMemory(ctxt);
5778
1
                xmlFreeEnumeration(ret);
5779
1
                return(NULL);
5780
1
            }
5781
390
      if (last == NULL) ret = last = cur;
5782
0
      else {
5783
0
    last->next = cur;
5784
0
    last = cur;
5785
0
      }
5786
390
  }
5787
390
  SKIP_BLANKS_PE;
5788
390
    } while (RAW == '|');
5789
390
    if (RAW != ')') {
5790
390
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5791
390
        xmlFreeEnumeration(ret);
5792
390
  return(NULL);
5793
390
    }
5794
0
    NEXT;
5795
0
    return(ret);
5796
390
}
5797
5798
/**
5799
 * parse an Enumeration attribute type.
5800
 *
5801
 * @deprecated Internal function, don't use.
5802
 *
5803
 *     [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5804
 *
5805
 * [ VC: Enumeration ]
5806
 * Values of this type must match one of the Nmtoken tokens in
5807
 * the declaration
5808
 *
5809
 * @param ctxt  an XML parser context
5810
 * @returns the enumeration attribute tree built while parsing
5811
 */
5812
5813
xmlEnumeration *
5814
7.39k
xmlParseEnumerationType(xmlParserCtxt *ctxt) {
5815
7.39k
    xmlChar *name;
5816
7.39k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5817
5818
7.39k
    if (RAW != '(') {
5819
4.94k
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5820
4.94k
  return(NULL);
5821
4.94k
    }
5822
4.02k
    do {
5823
4.02k
        NEXT;
5824
4.02k
  SKIP_BLANKS_PE;
5825
4.02k
        name = xmlParseNmtoken(ctxt);
5826
4.02k
  if (name == NULL) {
5827
289
      xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5828
289
      return(ret);
5829
289
  }
5830
3.73k
        tmp = NULL;
5831
3.73k
#ifdef LIBXML_VALID_ENABLED
5832
3.73k
        if (ctxt->validate) {
5833
2.84k
            tmp = ret;
5834
4.35k
            while (tmp != NULL) {
5835
1.74k
                if (xmlStrEqual(name, tmp->name)) {
5836
226
                    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5837
226
              "standalone: attribute enumeration value token %s duplicated\n",
5838
226
                                     name, NULL);
5839
226
                    if (!xmlDictOwns(ctxt->dict, name))
5840
226
                        xmlFree(name);
5841
226
                    break;
5842
226
                }
5843
1.51k
                tmp = tmp->next;
5844
1.51k
            }
5845
2.84k
        }
5846
3.73k
#endif /* LIBXML_VALID_ENABLED */
5847
3.73k
  if (tmp == NULL) {
5848
3.50k
      cur = xmlCreateEnumeration(name);
5849
3.50k
      if (!xmlDictOwns(ctxt->dict, name))
5850
3.50k
    xmlFree(name);
5851
3.50k
      if (cur == NULL) {
5852
1
                xmlErrMemory(ctxt);
5853
1
                xmlFreeEnumeration(ret);
5854
1
                return(NULL);
5855
1
            }
5856
3.50k
      if (last == NULL) ret = last = cur;
5857
1.25k
      else {
5858
1.25k
    last->next = cur;
5859
1.25k
    last = cur;
5860
1.25k
      }
5861
3.50k
  }
5862
3.73k
  SKIP_BLANKS_PE;
5863
3.73k
    } while (RAW == '|');
5864
2.16k
    if (RAW != ')') {
5865
391
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5866
391
  return(ret);
5867
391
    }
5868
1.76k
    NEXT;
5869
1.76k
    return(ret);
5870
2.16k
}
5871
5872
/**
5873
 * parse an Enumerated attribute type.
5874
 *
5875
 * @deprecated Internal function, don't use.
5876
 *
5877
 *     [57] EnumeratedType ::= NotationType | Enumeration
5878
 *
5879
 *     [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5880
 *
5881
 * @param ctxt  an XML parser context
5882
 * @param tree  the enumeration tree built while parsing
5883
 * @returns XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5884
 */
5885
5886
int
5887
8.38k
xmlParseEnumeratedType(xmlParserCtxt *ctxt, xmlEnumeration **tree) {
5888
8.38k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5889
985
  SKIP(8);
5890
985
  if (SKIP_BLANKS_PE == 0) {
5891
197
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5892
197
         "Space required after 'NOTATION'\n");
5893
197
      return(0);
5894
197
  }
5895
788
  *tree = xmlParseNotationType(ctxt);
5896
788
  if (*tree == NULL) return(0);
5897
0
  return(XML_ATTRIBUTE_NOTATION);
5898
788
    }
5899
7.39k
    *tree = xmlParseEnumerationType(ctxt);
5900
7.39k
    if (*tree == NULL) return(0);
5901
2.25k
    return(XML_ATTRIBUTE_ENUMERATION);
5902
7.39k
}
5903
5904
/**
5905
 * parse the Attribute list def for an element
5906
 *
5907
 * @deprecated Internal function, don't use.
5908
 *
5909
 *     [54] AttType ::= StringType | TokenizedType | EnumeratedType
5910
 *
5911
 *     [55] StringType ::= 'CDATA'
5912
 *
5913
 *     [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5914
 *                            'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5915
 *
5916
 * Validity constraints for attribute values syntax are checked in
5917
 * #xmlValidateAttributeValue
5918
 *
5919
 * [ VC: ID ]
5920
 * Values of type ID must match the Name production. A name must not
5921
 * appear more than once in an XML document as a value of this type;
5922
 * i.e., ID values must uniquely identify the elements which bear them.
5923
 *
5924
 * [ VC: One ID per Element Type ]
5925
 * No element type may have more than one ID attribute specified.
5926
 *
5927
 * [ VC: ID Attribute Default ]
5928
 * An ID attribute must have a declared default of \#IMPLIED or \#REQUIRED.
5929
 *
5930
 * [ VC: IDREF ]
5931
 * Values of type IDREF must match the Name production, and values
5932
 * of type IDREFS must match Names; each IDREF Name must match the value
5933
 * of an ID attribute on some element in the XML document; i.e. IDREF
5934
 * values must match the value of some ID attribute.
5935
 *
5936
 * [ VC: Entity Name ]
5937
 * Values of type ENTITY must match the Name production, values
5938
 * of type ENTITIES must match Names; each Entity Name must match the
5939
 * name of an unparsed entity declared in the DTD.
5940
 *
5941
 * [ VC: Name Token ]
5942
 * Values of type NMTOKEN must match the Nmtoken production; values
5943
 * of type NMTOKENS must match Nmtokens.
5944
 *
5945
 * @param ctxt  an XML parser context
5946
 * @param tree  the enumeration tree built while parsing
5947
 * @returns the attribute type
5948
 */
5949
int
5950
51.2k
xmlParseAttributeType(xmlParserCtxt *ctxt, xmlEnumeration **tree) {
5951
51.2k
    if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5952
12.4k
  SKIP(5);
5953
12.4k
  return(XML_ATTRIBUTE_CDATA);
5954
38.7k
     } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5955
3.34k
  SKIP(6);
5956
3.34k
  return(XML_ATTRIBUTE_IDREFS);
5957
35.3k
     } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5958
3.75k
  SKIP(5);
5959
3.75k
  return(XML_ATTRIBUTE_IDREF);
5960
31.6k
     } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5961
4.79k
        SKIP(2);
5962
4.79k
  return(XML_ATTRIBUTE_ID);
5963
26.8k
     } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5964
12.7k
  SKIP(6);
5965
12.7k
  return(XML_ATTRIBUTE_ENTITY);
5966
14.0k
     } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5967
2.07k
  SKIP(8);
5968
2.07k
  return(XML_ATTRIBUTE_ENTITIES);
5969
11.9k
     } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5970
754
  SKIP(8);
5971
754
  return(XML_ATTRIBUTE_NMTOKENS);
5972
11.2k
     } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5973
2.84k
  SKIP(7);
5974
2.84k
  return(XML_ATTRIBUTE_NMTOKEN);
5975
2.84k
     }
5976
8.38k
     return(xmlParseEnumeratedType(ctxt, tree));
5977
51.2k
}
5978
5979
/**
5980
 * Parse an attribute list declaration for an element. Always consumes '<!'.
5981
 *
5982
 * @deprecated Internal function, don't use.
5983
 *
5984
 *     [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5985
 *
5986
 *     [53] AttDef ::= S Name S AttType S DefaultDecl
5987
 * @param ctxt  an XML parser context
5988
 */
5989
void
5990
55.7k
xmlParseAttributeListDecl(xmlParserCtxt *ctxt) {
5991
55.7k
    const xmlChar *elemName;
5992
55.7k
    const xmlChar *attrName;
5993
55.7k
    xmlEnumerationPtr tree;
5994
5995
55.7k
    if ((CUR != '<') || (NXT(1) != '!'))
5996
0
        return;
5997
55.7k
    SKIP(2);
5998
5999
55.7k
    if (CMP7(CUR_PTR, 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
6000
54.5k
#ifdef LIBXML_VALID_ENABLED
6001
54.5k
  int oldInputNr = ctxt->inputNr;
6002
54.5k
#endif
6003
6004
54.5k
  SKIP(7);
6005
54.5k
  if (SKIP_BLANKS_PE == 0) {
6006
197
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6007
197
                     "Space required after '<!ATTLIST'\n");
6008
197
  }
6009
54.5k
        elemName = xmlParseName(ctxt);
6010
54.5k
  if (elemName == NULL) {
6011
1.33k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6012
1.33k
         "ATTLIST: no name for Element\n");
6013
1.33k
      return;
6014
1.33k
  }
6015
53.2k
  SKIP_BLANKS_PE;
6016
53.2k
  GROW;
6017
90.1k
  while ((RAW != '>') && (PARSER_STOPPED(ctxt) == 0)) {
6018
54.5k
      int type;
6019
54.5k
      int def;
6020
54.5k
      xmlChar *defaultValue = NULL;
6021
6022
54.5k
      GROW;
6023
54.5k
            tree = NULL;
6024
54.5k
      attrName = xmlParseName(ctxt);
6025
54.5k
      if (attrName == NULL) {
6026
2.63k
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6027
2.63k
             "ATTLIST: no name for Attribute\n");
6028
2.63k
    break;
6029
2.63k
      }
6030
51.9k
      GROW;
6031
51.9k
      if (SKIP_BLANKS_PE == 0) {
6032
722
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6033
722
            "Space required after the attribute name\n");
6034
722
    break;
6035
722
      }
6036
6037
51.2k
      type = xmlParseAttributeType(ctxt, &tree);
6038
51.2k
      if (type <= 0) {
6039
6.12k
          break;
6040
6.12k
      }
6041
6042
45.0k
      GROW;
6043
45.0k
      if (SKIP_BLANKS_PE == 0) {
6044
3.52k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6045
3.52k
             "Space required after the attribute type\n");
6046
3.52k
          if (tree != NULL)
6047
492
        xmlFreeEnumeration(tree);
6048
3.52k
    break;
6049
3.52k
      }
6050
6051
41.5k
      def = xmlParseDefaultDecl(ctxt, &defaultValue);
6052
41.5k
      if (def <= 0) {
6053
0
                if (defaultValue != NULL)
6054
0
        xmlFree(defaultValue);
6055
0
          if (tree != NULL)
6056
0
        xmlFreeEnumeration(tree);
6057
0
          break;
6058
0
      }
6059
41.5k
      if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6060
27.0k
          xmlAttrNormalizeSpace(defaultValue, defaultValue);
6061
6062
41.5k
      GROW;
6063
41.5k
            if (RAW != '>') {
6064
6.21k
    if (SKIP_BLANKS_PE == 0) {
6065
4.62k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6066
4.62k
      "Space required after the attribute default value\n");
6067
4.62k
        if (defaultValue != NULL)
6068
325
      xmlFree(defaultValue);
6069
4.62k
        if (tree != NULL)
6070
220
      xmlFreeEnumeration(tree);
6071
4.62k
        break;
6072
4.62k
    }
6073
6.21k
      }
6074
36.9k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6075
36.9k
    (ctxt->sax->attributeDecl != NULL))
6076
35.5k
    ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6077
35.5k
                          type, def, defaultValue, tree);
6078
1.42k
      else if (tree != NULL)
6079
202
    xmlFreeEnumeration(tree);
6080
6081
36.9k
      if ((ctxt->sax2) && (defaultValue != NULL) &&
6082
36.9k
          (def != XML_ATTRIBUTE_IMPLIED) &&
6083
36.9k
    (def != XML_ATTRIBUTE_REQUIRED)) {
6084
26.7k
    xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6085
26.7k
      }
6086
36.9k
      if (ctxt->sax2) {
6087
27.8k
    xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6088
27.8k
      }
6089
36.9k
      if (defaultValue != NULL)
6090
35.5k
          xmlFree(defaultValue);
6091
36.9k
      GROW;
6092
36.9k
  }
6093
53.2k
  if (RAW == '>') {
6094
37.4k
#ifdef LIBXML_VALID_ENABLED
6095
37.4k
      if ((ctxt->validate) && (ctxt->inputNr > oldInputNr)) {
6096
0
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6097
0
                                 "Attribute list declaration doesn't start and"
6098
0
                                 " stop in the same entity\n",
6099
0
                                 NULL, NULL);
6100
0
      }
6101
37.4k
#endif
6102
37.4k
      NEXT;
6103
37.4k
  }
6104
53.2k
    }
6105
55.7k
}
6106
6107
/**
6108
 * Handle PEs and check that we don't pop the entity that started
6109
 * a balanced group.
6110
 *
6111
 * @param ctxt  parser context
6112
 * @param openInputNr  input nr of the entity with opening '('
6113
 */
6114
static void
6115
99.2k
xmlSkipBlankCharsPEBalanced(xmlParserCtxt *ctxt, int openInputNr) {
6116
99.2k
    SKIP_BLANKS;
6117
99.2k
    GROW;
6118
6119
99.2k
    (void) openInputNr;
6120
6121
99.2k
    if (!PARSER_EXTERNAL(ctxt) && !PARSER_IN_PE(ctxt))
6122
99.2k
        return;
6123
6124
0
    while (!PARSER_STOPPED(ctxt)) {
6125
0
        if (ctxt->input->cur >= ctxt->input->end) {
6126
0
#ifdef LIBXML_VALID_ENABLED
6127
0
            if ((ctxt->validate) && (ctxt->inputNr <= openInputNr)) {
6128
0
                xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6129
0
                                 "Element content declaration doesn't start "
6130
0
                                 "and stop in the same entity\n",
6131
0
                                 NULL, NULL);
6132
0
            }
6133
0
#endif
6134
0
            if (PARSER_IN_PE(ctxt))
6135
0
                xmlPopPE(ctxt);
6136
0
            else
6137
0
                break;
6138
0
        } else if (RAW == '%') {
6139
0
            xmlParsePERefInternal(ctxt, 0);
6140
0
        } else {
6141
0
            break;
6142
0
        }
6143
6144
0
        SKIP_BLANKS;
6145
0
        GROW;
6146
0
    }
6147
0
}
6148
6149
/**
6150
 * parse the declaration for a Mixed Element content
6151
 * The leading '(' and spaces have been skipped in #xmlParseElementContentDecl
6152
 *
6153
 * @deprecated Internal function, don't use.
6154
 *
6155
 *     [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6156
 *                    '(' S? '#PCDATA' S? ')'
6157
 *
6158
 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6159
 *
6160
 * [ VC: No Duplicate Types ]
6161
 * The same name must not appear more than once in a single
6162
 * mixed-content declaration.
6163
 *
6164
 * @param ctxt  an XML parser context
6165
 * @param openInputNr  the input used for the current entity, needed for
6166
 * boundary checks
6167
 * @returns the list of the xmlElementContent describing the element choices
6168
 */
6169
xmlElementContent *
6170
3.80k
xmlParseElementMixedContentDecl(xmlParserCtxt *ctxt, int openInputNr) {
6171
3.80k
    xmlElementContentPtr ret = NULL, cur = NULL, n;
6172
3.80k
    const xmlChar *elem = NULL;
6173
6174
3.80k
    GROW;
6175
3.80k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6176
3.80k
  SKIP(7);
6177
3.80k
        xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6178
3.80k
  if (RAW == ')') {
6179
713
#ifdef LIBXML_VALID_ENABLED
6180
713
      if ((ctxt->validate) && (ctxt->inputNr > openInputNr)) {
6181
0
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6182
0
                                 "Element content declaration doesn't start "
6183
0
                                 "and stop in the same entity\n",
6184
0
                                 NULL, NULL);
6185
0
      }
6186
713
#endif
6187
713
      NEXT;
6188
713
      ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6189
713
      if (ret == NULL)
6190
3
                goto mem_error;
6191
710
      if (RAW == '*') {
6192
462
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6193
462
    NEXT;
6194
462
      }
6195
710
      return(ret);
6196
713
  }
6197
3.09k
  if ((RAW == '(') || (RAW == '|')) {
6198
2.56k
      ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6199
2.56k
      if (ret == NULL)
6200
0
                goto mem_error;
6201
2.56k
  }
6202
10.7k
  while ((RAW == '|') && (PARSER_STOPPED(ctxt) == 0)) {
6203
7.84k
      NEXT;
6204
7.84k
            n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6205
7.84k
            if (n == NULL)
6206
4
                goto mem_error;
6207
7.83k
      if (elem == NULL) {
6208
2.35k
    n->c1 = cur;
6209
2.35k
    if (cur != NULL)
6210
2.35k
        cur->parent = n;
6211
2.35k
    ret = cur = n;
6212
5.47k
      } else {
6213
5.47k
          cur->c2 = n;
6214
5.47k
    n->parent = cur;
6215
5.47k
    n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6216
5.47k
                if (n->c1 == NULL)
6217
2
                    goto mem_error;
6218
5.47k
    n->c1->parent = n;
6219
5.47k
    cur = n;
6220
5.47k
      }
6221
7.83k
            xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6222
7.83k
      elem = xmlParseName(ctxt);
6223
7.83k
      if (elem == NULL) {
6224
194
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6225
194
      "xmlParseElementMixedContentDecl : Name expected\n");
6226
194
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6227
194
    return(NULL);
6228
194
      }
6229
7.64k
            xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6230
7.64k
  }
6231
2.89k
  if ((RAW == ')') && (NXT(1) == '*')) {
6232
1.76k
      if (elem != NULL) {
6233
1.76k
    cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6234
1.76k
                                   XML_ELEMENT_CONTENT_ELEMENT);
6235
1.76k
    if (cur->c2 == NULL)
6236
1
                    goto mem_error;
6237
1.76k
    cur->c2->parent = cur;
6238
1.76k
            }
6239
1.76k
            if (ret != NULL)
6240
1.76k
                ret->ocur = XML_ELEMENT_CONTENT_MULT;
6241
1.76k
#ifdef LIBXML_VALID_ENABLED
6242
1.76k
      if ((ctxt->validate) && (ctxt->inputNr > openInputNr)) {
6243
0
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6244
0
                                 "Element content declaration doesn't start "
6245
0
                                 "and stop in the same entity\n",
6246
0
                                 NULL, NULL);
6247
0
      }
6248
1.76k
#endif
6249
1.76k
      SKIP(2);
6250
1.76k
  } else {
6251
1.12k
      xmlFreeDocElementContent(ctxt->myDoc, ret);
6252
1.12k
      xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6253
1.12k
      return(NULL);
6254
1.12k
  }
6255
6256
2.89k
    } else {
6257
0
  xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6258
0
    }
6259
1.76k
    return(ret);
6260
6261
10
mem_error:
6262
10
    xmlErrMemory(ctxt);
6263
10
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6264
10
    return(NULL);
6265
3.80k
}
6266
6267
/**
6268
 * parse the declaration for a Mixed Element content
6269
 * The leading '(' and spaces have been skipped in #xmlParseElementContentDecl
6270
 *
6271
 *     [47] children ::= (choice | seq) ('?' | '*' | '+')?
6272
 *
6273
 *     [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6274
 *
6275
 *     [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6276
 *
6277
 *     [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6278
 *
6279
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6280
 * TODO Parameter-entity replacement text must be properly nested
6281
 *  with parenthesized groups. That is to say, if either of the
6282
 *  opening or closing parentheses in a choice, seq, or Mixed
6283
 *  construct is contained in the replacement text for a parameter
6284
 *  entity, both must be contained in the same replacement text. For
6285
 *  interoperability, if a parameter-entity reference appears in a
6286
 *  choice, seq, or Mixed construct, its replacement text should not
6287
 *  be empty, and neither the first nor last non-blank character of
6288
 *  the replacement text should be a connector (| or ,).
6289
 *
6290
 * @param ctxt  an XML parser context
6291
 * @param openInputNr  the input used for the current entity, needed for
6292
 * boundary checks
6293
 * @param depth  the level of recursion
6294
 * @returns the tree of xmlElementContent describing the element
6295
 *          hierarchy.
6296
 */
6297
static xmlElementContentPtr
6298
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int openInputNr,
6299
15.5k
                                       int depth) {
6300
15.5k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
6301
15.5k
    xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6302
15.5k
    const xmlChar *elem;
6303
15.5k
    xmlChar type = 0;
6304
6305
15.5k
    if (depth > maxDepth) {
6306
0
        xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
6307
0
                "xmlParseElementChildrenContentDecl : depth %d too deep, "
6308
0
                "use XML_PARSE_HUGE\n", depth);
6309
0
  return(NULL);
6310
0
    }
6311
15.5k
    xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6312
15.5k
    if (RAW == '(') {
6313
2.01k
        int newInputNr = ctxt->inputNr;
6314
6315
        /* Recurse on first child */
6316
2.01k
  NEXT;
6317
2.01k
        cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, newInputNr,
6318
2.01k
                                                           depth + 1);
6319
2.01k
        if (cur == NULL)
6320
1.25k
            return(NULL);
6321
13.4k
    } else {
6322
13.4k
  elem = xmlParseName(ctxt);
6323
13.4k
  if (elem == NULL) {
6324
1.21k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6325
1.21k
      return(NULL);
6326
1.21k
  }
6327
12.2k
        cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6328
12.2k
  if (cur == NULL) {
6329
4
      xmlErrMemory(ctxt);
6330
4
      return(NULL);
6331
4
  }
6332
12.2k
  GROW;
6333
12.2k
  if (RAW == '?') {
6334
2.86k
      cur->ocur = XML_ELEMENT_CONTENT_OPT;
6335
2.86k
      NEXT;
6336
9.41k
  } else if (RAW == '*') {
6337
1.08k
      cur->ocur = XML_ELEMENT_CONTENT_MULT;
6338
1.08k
      NEXT;
6339
8.33k
  } else if (RAW == '+') {
6340
866
      cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6341
866
      NEXT;
6342
7.46k
  } else {
6343
7.46k
      cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6344
7.46k
  }
6345
12.2k
  GROW;
6346
12.2k
    }
6347
30.1k
    while (!PARSER_STOPPED(ctxt)) {
6348
29.9k
        xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6349
29.9k
        if (RAW == ')')
6350
9.54k
            break;
6351
        /*
6352
   * Each loop we parse one separator and one element.
6353
   */
6354
20.4k
        if (RAW == ',') {
6355
8.33k
      if (type == 0) type = CUR;
6356
6357
      /*
6358
       * Detect "Name | Name , Name" error
6359
       */
6360
4.18k
      else if (type != CUR) {
6361
209
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6362
209
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6363
209
                      type);
6364
209
    if ((last != NULL) && (last != ret))
6365
209
        xmlFreeDocElementContent(ctxt->myDoc, last);
6366
209
    if (ret != NULL)
6367
209
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6368
209
    return(NULL);
6369
209
      }
6370
8.12k
      NEXT;
6371
6372
8.12k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6373
8.12k
      if (op == NULL) {
6374
2
                xmlErrMemory(ctxt);
6375
2
    if ((last != NULL) && (last != ret))
6376
1
        xmlFreeDocElementContent(ctxt->myDoc, last);
6377
2
          xmlFreeDocElementContent(ctxt->myDoc, ret);
6378
2
    return(NULL);
6379
2
      }
6380
8.12k
      if (last == NULL) {
6381
4.15k
    op->c1 = ret;
6382
4.15k
    if (ret != NULL)
6383
4.15k
        ret->parent = op;
6384
4.15k
    ret = cur = op;
6385
4.15k
      } else {
6386
3.97k
          cur->c2 = op;
6387
3.97k
    if (op != NULL)
6388
3.97k
        op->parent = cur;
6389
3.97k
    op->c1 = last;
6390
3.97k
    if (last != NULL)
6391
3.97k
        last->parent = op;
6392
3.97k
    cur =op;
6393
3.97k
    last = NULL;
6394
3.97k
      }
6395
12.0k
  } else if (RAW == '|') {
6396
9.98k
      if (type == 0) type = CUR;
6397
6398
      /*
6399
       * Detect "Name , Name | Name" error
6400
       */
6401
4.75k
      else if (type != CUR) {
6402
220
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6403
220
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6404
220
          type);
6405
220
    if ((last != NULL) && (last != ret))
6406
220
        xmlFreeDocElementContent(ctxt->myDoc, last);
6407
220
    if (ret != NULL)
6408
220
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6409
220
    return(NULL);
6410
220
      }
6411
9.76k
      NEXT;
6412
6413
9.76k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6414
9.76k
      if (op == NULL) {
6415
2
                xmlErrMemory(ctxt);
6416
2
    if ((last != NULL) && (last != ret))
6417
1
        xmlFreeDocElementContent(ctxt->myDoc, last);
6418
2
    if (ret != NULL)
6419
2
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6420
2
    return(NULL);
6421
2
      }
6422
9.75k
      if (last == NULL) {
6423
5.23k
    op->c1 = ret;
6424
5.23k
    if (ret != NULL)
6425
5.23k
        ret->parent = op;
6426
5.23k
    ret = cur = op;
6427
5.23k
      } else {
6428
4.52k
          cur->c2 = op;
6429
4.52k
    if (op != NULL)
6430
4.52k
        op->parent = cur;
6431
4.52k
    op->c1 = last;
6432
4.52k
    if (last != NULL)
6433
4.52k
        last->parent = op;
6434
4.52k
    cur =op;
6435
4.52k
    last = NULL;
6436
4.52k
      }
6437
9.75k
  } else {
6438
2.08k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6439
2.08k
      if ((last != NULL) && (last != ret))
6440
1.05k
          xmlFreeDocElementContent(ctxt->myDoc, last);
6441
2.08k
      if (ret != NULL)
6442
2.08k
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6443
2.08k
      return(NULL);
6444
2.08k
  }
6445
17.8k
        xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6446
17.8k
        if (RAW == '(') {
6447
666
            int newInputNr = ctxt->inputNr;
6448
6449
      /* Recurse on second child */
6450
666
      NEXT;
6451
666
      last = xmlParseElementChildrenContentDeclPriv(ctxt, newInputNr,
6452
666
                                                          depth + 1);
6453
666
            if (last == NULL) {
6454
364
    if (ret != NULL)
6455
364
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6456
364
    return(NULL);
6457
364
            }
6458
17.2k
  } else {
6459
17.2k
      elem = xmlParseName(ctxt);
6460
17.2k
      if (elem == NULL) {
6461
414
    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6462
414
    if (ret != NULL)
6463
414
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6464
414
    return(NULL);
6465
414
      }
6466
16.8k
      last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6467
16.8k
      if (last == NULL) {
6468
3
                xmlErrMemory(ctxt);
6469
3
    if (ret != NULL)
6470
3
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6471
3
    return(NULL);
6472
3
      }
6473
16.8k
      if (RAW == '?') {
6474
4.13k
    last->ocur = XML_ELEMENT_CONTENT_OPT;
6475
4.13k
    NEXT;
6476
12.6k
      } else if (RAW == '*') {
6477
1.39k
    last->ocur = XML_ELEMENT_CONTENT_MULT;
6478
1.39k
    NEXT;
6479
11.2k
      } else if (RAW == '+') {
6480
1.11k
    last->ocur = XML_ELEMENT_CONTENT_PLUS;
6481
1.11k
    NEXT;
6482
10.1k
      } else {
6483
10.1k
    last->ocur = XML_ELEMENT_CONTENT_ONCE;
6484
10.1k
      }
6485
16.8k
  }
6486
17.8k
    }
6487
9.73k
    if ((cur != NULL) && (last != NULL)) {
6488
7.12k
        cur->c2 = last;
6489
7.12k
  if (last != NULL)
6490
7.12k
      last->parent = cur;
6491
7.12k
    }
6492
9.73k
#ifdef LIBXML_VALID_ENABLED
6493
9.73k
    if ((ctxt->validate) && (ctxt->inputNr > openInputNr)) {
6494
0
        xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6495
0
                         "Element content declaration doesn't start "
6496
0
                         "and stop in the same entity\n",
6497
0
                         NULL, NULL);
6498
0
    }
6499
9.73k
#endif
6500
9.73k
    NEXT;
6501
9.73k
    if (RAW == '?') {
6502
1.11k
  if (ret != NULL) {
6503
1.11k
      if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6504
1.11k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6505
389
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6506
724
      else
6507
724
          ret->ocur = XML_ELEMENT_CONTENT_OPT;
6508
1.11k
  }
6509
1.11k
  NEXT;
6510
8.62k
    } else if (RAW == '*') {
6511
2.17k
  if (ret != NULL) {
6512
2.17k
      ret->ocur = XML_ELEMENT_CONTENT_MULT;
6513
2.17k
      cur = ret;
6514
      /*
6515
       * Some normalization:
6516
       * (a | b* | c?)* == (a | b | c)*
6517
       */
6518
4.00k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6519
1.83k
    if ((cur->c1 != NULL) &&
6520
1.83k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6521
1.83k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6522
679
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6523
1.83k
    if ((cur->c2 != NULL) &&
6524
1.83k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6525
1.83k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6526
505
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6527
1.83k
    cur = cur->c2;
6528
1.83k
      }
6529
2.17k
  }
6530
2.17k
  NEXT;
6531
6.44k
    } else if (RAW == '+') {
6532
2.57k
  if (ret != NULL) {
6533
2.57k
      int found = 0;
6534
6535
2.57k
      if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6536
2.57k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6537
392
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6538
2.18k
      else
6539
2.18k
          ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6540
      /*
6541
       * Some normalization:
6542
       * (a | b*)+ == (a | b)*
6543
       * (a | b?)+ == (a | b)*
6544
       */
6545
4.18k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6546
1.60k
    if ((cur->c1 != NULL) &&
6547
1.60k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6548
1.60k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6549
634
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6550
634
        found = 1;
6551
634
    }
6552
1.60k
    if ((cur->c2 != NULL) &&
6553
1.60k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6554
1.60k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6555
441
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6556
441
        found = 1;
6557
441
    }
6558
1.60k
    cur = cur->c2;
6559
1.60k
      }
6560
2.57k
      if (found)
6561
809
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6562
2.57k
  }
6563
2.57k
  NEXT;
6564
2.57k
    }
6565
9.73k
    return(ret);
6566
13.0k
}
6567
6568
/**
6569
 * parse the declaration for a Mixed Element content
6570
 * The leading '(' and spaces have been skipped in #xmlParseElementContentDecl
6571
 *
6572
 * @deprecated Internal function, don't use.
6573
 *
6574
 *     [47] children ::= (choice | seq) ('?' | '*' | '+')?
6575
 *
6576
 *     [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6577
 *
6578
 *     [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6579
 *
6580
 *     [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6581
 *
6582
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6583
 * TODO Parameter-entity replacement text must be properly nested
6584
 *  with parenthesized groups. That is to say, if either of the
6585
 *  opening or closing parentheses in a choice, seq, or Mixed
6586
 *  construct is contained in the replacement text for a parameter
6587
 *  entity, both must be contained in the same replacement text. For
6588
 *  interoperability, if a parameter-entity reference appears in a
6589
 *  choice, seq, or Mixed construct, its replacement text should not
6590
 *  be empty, and neither the first nor last non-blank character of
6591
 *  the replacement text should be a connector (| or ,).
6592
 *
6593
 * @param ctxt  an XML parser context
6594
 * @param inputchk  the input used for the current entity, needed for boundary checks
6595
 * @returns the tree of xmlElementContent describing the element
6596
 *          hierarchy.
6597
 */
6598
xmlElementContent *
6599
0
xmlParseElementChildrenContentDecl(xmlParserCtxt *ctxt, int inputchk) {
6600
    /* stub left for API/ABI compat */
6601
0
    return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6602
0
}
6603
6604
/**
6605
 * parse the declaration for an Element content either Mixed or Children,
6606
 * the cases EMPTY and ANY are handled directly in #xmlParseElementDecl
6607
 *
6608
 * @deprecated Internal function, don't use.
6609
 *
6610
 *     [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6611
 *
6612
 * @param ctxt  an XML parser context
6613
 * @param name  the name of the element being defined.
6614
 * @param result  the Element Content pointer will be stored here if any
6615
 * @returns an xmlElementTypeVal value or -1 on error
6616
 */
6617
6618
int
6619
xmlParseElementContentDecl(xmlParserCtxt *ctxt, const xmlChar *name,
6620
16.6k
                           xmlElementContent **result) {
6621
6622
16.6k
    xmlElementContentPtr tree = NULL;
6623
16.6k
    int openInputNr = ctxt->inputNr;
6624
16.6k
    int res;
6625
6626
16.6k
    *result = NULL;
6627
6628
16.6k
    if (RAW != '(') {
6629
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6630
0
    "xmlParseElementContentDecl : %s '(' expected\n", name);
6631
0
  return(-1);
6632
0
    }
6633
16.6k
    NEXT;
6634
16.6k
    xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6635
16.6k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6636
3.80k
        tree = xmlParseElementMixedContentDecl(ctxt, openInputNr);
6637
3.80k
  res = XML_ELEMENT_TYPE_MIXED;
6638
12.8k
    } else {
6639
12.8k
        tree = xmlParseElementChildrenContentDeclPriv(ctxt, openInputNr, 1);
6640
12.8k
  res = XML_ELEMENT_TYPE_ELEMENT;
6641
12.8k
    }
6642
16.6k
    if (tree == NULL)
6643
5.48k
        return(-1);
6644
11.1k
    SKIP_BLANKS_PE;
6645
11.1k
    *result = tree;
6646
11.1k
    return(res);
6647
16.6k
}
6648
6649
/**
6650
 * Parse an element declaration. Always consumes '<!'.
6651
 *
6652
 * @deprecated Internal function, don't use.
6653
 *
6654
 *     [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6655
 *
6656
 * [ VC: Unique Element Type Declaration ]
6657
 * No element type may be declared more than once
6658
 *
6659
 * @param ctxt  an XML parser context
6660
 * @returns the type of the element, or -1 in case of error
6661
 */
6662
int
6663
24.4k
xmlParseElementDecl(xmlParserCtxt *ctxt) {
6664
24.4k
    const xmlChar *name;
6665
24.4k
    int ret = -1;
6666
24.4k
    xmlElementContentPtr content  = NULL;
6667
6668
24.4k
    if ((CUR != '<') || (NXT(1) != '!'))
6669
0
        return(ret);
6670
24.4k
    SKIP(2);
6671
6672
    /* GROW; done in the caller */
6673
24.4k
    if (CMP7(CUR_PTR, 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6674
23.4k
#ifdef LIBXML_VALID_ENABLED
6675
23.4k
  int oldInputNr = ctxt->inputNr;
6676
23.4k
#endif
6677
6678
23.4k
  SKIP(7);
6679
23.4k
  if (SKIP_BLANKS_PE == 0) {
6680
195
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6681
195
               "Space required after 'ELEMENT'\n");
6682
195
      return(-1);
6683
195
  }
6684
23.2k
        name = xmlParseName(ctxt);
6685
23.2k
  if (name == NULL) {
6686
389
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6687
389
         "xmlParseElementDecl: no name for Element\n");
6688
389
      return(-1);
6689
389
  }
6690
22.8k
  if (SKIP_BLANKS_PE == 0) {
6691
10.3k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6692
10.3k
         "Space required after the element name\n");
6693
10.3k
  }
6694
22.8k
  if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6695
2.37k
      SKIP(5);
6696
      /*
6697
       * Element must always be empty.
6698
       */
6699
2.37k
      ret = XML_ELEMENT_TYPE_EMPTY;
6700
20.4k
  } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6701
20.4k
             (NXT(2) == 'Y')) {
6702
2.59k
      SKIP(3);
6703
      /*
6704
       * Element is a generic container.
6705
       */
6706
2.59k
      ret = XML_ELEMENT_TYPE_ANY;
6707
17.8k
  } else if (RAW == '(') {
6708
16.6k
      ret = xmlParseElementContentDecl(ctxt, name, &content);
6709
16.6k
            if (ret <= 0)
6710
5.48k
                return(-1);
6711
16.6k
  } else {
6712
      /*
6713
       * [ WFC: PEs in Internal Subset ] error handling.
6714
       */
6715
1.24k
            xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6716
1.24k
                  "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6717
1.24k
      return(-1);
6718
1.24k
  }
6719
6720
16.1k
  SKIP_BLANKS_PE;
6721
6722
16.1k
  if (RAW != '>') {
6723
3.10k
      xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6724
3.10k
      if (content != NULL) {
6725
2.71k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6726
2.71k
      }
6727
13.0k
  } else {
6728
13.0k
#ifdef LIBXML_VALID_ENABLED
6729
13.0k
      if ((ctxt->validate) && (ctxt->inputNr > oldInputNr)) {
6730
0
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6731
0
                                 "Element declaration doesn't start and stop in"
6732
0
                                 " the same entity\n",
6733
0
                                 NULL, NULL);
6734
0
      }
6735
13.0k
#endif
6736
6737
13.0k
      NEXT;
6738
13.0k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6739
13.0k
    (ctxt->sax->elementDecl != NULL)) {
6740
12.2k
    if (content != NULL)
6741
7.98k
        content->parent = NULL;
6742
12.2k
          ctxt->sax->elementDecl(ctxt->userData, name, ret,
6743
12.2k
                           content);
6744
12.2k
    if ((content != NULL) && (content->parent == NULL)) {
6745
        /*
6746
         * this is a trick: if xmlAddElementDecl is called,
6747
         * instead of copying the full tree it is plugged directly
6748
         * if called from the parser. Avoid duplicating the
6749
         * interfaces or change the API/ABI
6750
         */
6751
198
        xmlFreeDocElementContent(ctxt->myDoc, content);
6752
198
    }
6753
12.2k
      } else if (content != NULL) {
6754
450
    xmlFreeDocElementContent(ctxt->myDoc, content);
6755
450
      }
6756
13.0k
  }
6757
16.1k
    }
6758
17.0k
    return(ret);
6759
24.4k
}
6760
6761
/**
6762
 * Parse a conditional section. Always consumes '<!['.
6763
 *
6764
 *     [61] conditionalSect ::= includeSect | ignoreSect
6765
 *     [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6766
 *     [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6767
 *     [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>'
6768
 *                                 Ignore)*
6769
 *     [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6770
 * @param ctxt  an XML parser context
6771
 */
6772
6773
static void
6774
0
xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6775
0
    size_t depth = 0;
6776
0
    int isFreshPE = 0;
6777
0
    int oldInputNr = ctxt->inputNr;
6778
0
    int declInputNr = ctxt->inputNr;
6779
6780
0
    while (!PARSER_STOPPED(ctxt)) {
6781
0
        if (ctxt->input->cur >= ctxt->input->end) {
6782
0
            if (ctxt->inputNr <= oldInputNr) {
6783
0
                xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6784
0
                return;
6785
0
            }
6786
6787
0
            xmlPopPE(ctxt);
6788
0
            declInputNr = ctxt->inputNr;
6789
0
        } else if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6790
0
            SKIP(3);
6791
0
            SKIP_BLANKS_PE;
6792
6793
0
            isFreshPE = 0;
6794
6795
0
            if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6796
0
                SKIP(7);
6797
0
                SKIP_BLANKS_PE;
6798
0
                if (RAW != '[') {
6799
0
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6800
0
                    return;
6801
0
                }
6802
0
#ifdef LIBXML_VALID_ENABLED
6803
0
                if ((ctxt->validate) && (ctxt->inputNr > declInputNr)) {
6804
0
        xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6805
0
                                     "All markup of the conditional section is"
6806
0
                                     " not in the same entity\n",
6807
0
                                     NULL, NULL);
6808
0
                }
6809
0
#endif
6810
0
                NEXT;
6811
6812
0
                depth++;
6813
0
            } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6814
0
                size_t ignoreDepth = 0;
6815
6816
0
                SKIP(6);
6817
0
                SKIP_BLANKS_PE;
6818
0
                if (RAW != '[') {
6819
0
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6820
0
                    return;
6821
0
                }
6822
0
#ifdef LIBXML_VALID_ENABLED
6823
0
                if ((ctxt->validate) && (ctxt->inputNr > declInputNr)) {
6824
0
        xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6825
0
                                     "All markup of the conditional section is"
6826
0
                                     " not in the same entity\n",
6827
0
                                     NULL, NULL);
6828
0
                }
6829
0
#endif
6830
0
                NEXT;
6831
6832
0
                while (PARSER_STOPPED(ctxt) == 0) {
6833
0
                    if (RAW == 0) {
6834
0
                        xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6835
0
                        return;
6836
0
                    }
6837
0
                    if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6838
0
                        SKIP(3);
6839
0
                        ignoreDepth++;
6840
                        /* Check for integer overflow */
6841
0
                        if (ignoreDepth == 0) {
6842
0
                            xmlErrMemory(ctxt);
6843
0
                            return;
6844
0
                        }
6845
0
                    } else if ((RAW == ']') && (NXT(1) == ']') &&
6846
0
                               (NXT(2) == '>')) {
6847
0
                        SKIP(3);
6848
0
                        if (ignoreDepth == 0)
6849
0
                            break;
6850
0
                        ignoreDepth--;
6851
0
                    } else {
6852
0
                        NEXT;
6853
0
                    }
6854
0
                }
6855
6856
0
#ifdef LIBXML_VALID_ENABLED
6857
0
                if ((ctxt->validate) && (ctxt->inputNr > declInputNr)) {
6858
0
        xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6859
0
                                     "All markup of the conditional section is"
6860
0
                                     " not in the same entity\n",
6861
0
                                     NULL, NULL);
6862
0
                }
6863
0
#endif
6864
0
            } else {
6865
0
                xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6866
0
                return;
6867
0
            }
6868
0
        } else if ((depth > 0) &&
6869
0
                   (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6870
0
            if (isFreshPE) {
6871
0
                xmlFatalErrMsg(ctxt, XML_ERR_CONDSEC_INVALID,
6872
0
                               "Parameter entity must match "
6873
0
                               "extSubsetDecl\n");
6874
0
                return;
6875
0
            }
6876
6877
0
            depth--;
6878
0
#ifdef LIBXML_VALID_ENABLED
6879
0
            if ((ctxt->validate) && (ctxt->inputNr > declInputNr)) {
6880
0
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6881
0
                                 "All markup of the conditional section is not"
6882
0
                                 " in the same entity\n",
6883
0
                                 NULL, NULL);
6884
0
            }
6885
0
#endif
6886
0
            SKIP(3);
6887
0
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
6888
0
            isFreshPE = 0;
6889
0
            xmlParseMarkupDecl(ctxt);
6890
0
        } else if (RAW == '%') {
6891
0
            xmlParsePERefInternal(ctxt, 1);
6892
0
            if (ctxt->inputNr > declInputNr) {
6893
0
                isFreshPE = 1;
6894
0
                declInputNr = ctxt->inputNr;
6895
0
            }
6896
0
        } else {
6897
0
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6898
0
            return;
6899
0
        }
6900
6901
0
        if (depth == 0)
6902
0
            break;
6903
6904
0
        SKIP_BLANKS;
6905
0
        SHRINK;
6906
0
        GROW;
6907
0
    }
6908
0
}
6909
6910
/**
6911
 * Parse markup declarations. Always consumes '<!' or '<?'.
6912
 *
6913
 * @deprecated Internal function, don't use.
6914
 *
6915
 *     [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6916
 *                         NotationDecl | PI | Comment
6917
 *
6918
 * [ VC: Proper Declaration/PE Nesting ]
6919
 * Parameter-entity replacement text must be properly nested with
6920
 * markup declarations. That is to say, if either the first character
6921
 * or the last character of a markup declaration (markupdecl above) is
6922
 * contained in the replacement text for a parameter-entity reference,
6923
 * both must be contained in the same replacement text.
6924
 *
6925
 * [ WFC: PEs in Internal Subset ]
6926
 * In the internal DTD subset, parameter-entity references can occur
6927
 * only where markup declarations can occur, not within markup declarations.
6928
 * (This does not apply to references that occur in external parameter
6929
 * entities or to the external subset.)
6930
 *
6931
 * @param ctxt  an XML parser context
6932
 */
6933
void
6934
124k
xmlParseMarkupDecl(xmlParserCtxt *ctxt) {
6935
124k
    GROW;
6936
124k
    if (CUR == '<') {
6937
124k
        if (NXT(1) == '!') {
6938
122k
      switch (NXT(2)) {
6939
56.3k
          case 'E':
6940
56.3k
        if (NXT(3) == 'L')
6941
24.4k
      xmlParseElementDecl(ctxt);
6942
31.9k
        else if (NXT(3) == 'N')
6943
31.7k
      xmlParseEntityDecl(ctxt);
6944
194
                    else
6945
194
                        SKIP(2);
6946
56.3k
        break;
6947
55.7k
          case 'A':
6948
55.7k
        xmlParseAttributeListDecl(ctxt);
6949
55.7k
        break;
6950
7.69k
          case 'N':
6951
7.69k
        xmlParseNotationDecl(ctxt);
6952
7.69k
        break;
6953
450
          case '-':
6954
450
        xmlParseComment(ctxt);
6955
450
        break;
6956
1.76k
    default:
6957
1.76k
                    xmlFatalErr(ctxt,
6958
1.76k
                                ctxt->inSubset == 2 ?
6959
0
                                    XML_ERR_EXT_SUBSET_NOT_FINISHED :
6960
1.76k
                                    XML_ERR_INT_SUBSET_NOT_FINISHED,
6961
1.76k
                                NULL);
6962
1.76k
                    SKIP(2);
6963
1.76k
        break;
6964
122k
      }
6965
122k
  } else if (NXT(1) == '?') {
6966
2.03k
      xmlParsePI(ctxt);
6967
2.03k
  }
6968
124k
    }
6969
124k
}
6970
6971
/**
6972
 * parse an XML declaration header for external entities
6973
 *
6974
 * @deprecated Internal function, don't use.
6975
 *
6976
 *     [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6977
 * @param ctxt  an XML parser context
6978
 */
6979
6980
void
6981
0
xmlParseTextDecl(xmlParserCtxt *ctxt) {
6982
0
    xmlChar *version;
6983
6984
    /*
6985
     * We know that '<?xml' is here.
6986
     */
6987
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6988
0
  SKIP(5);
6989
0
    } else {
6990
0
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6991
0
  return;
6992
0
    }
6993
6994
0
    if (SKIP_BLANKS == 0) {
6995
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6996
0
           "Space needed after '<?xml'\n");
6997
0
    }
6998
6999
    /*
7000
     * We may have the VersionInfo here.
7001
     */
7002
0
    version = xmlParseVersionInfo(ctxt);
7003
0
    if (version == NULL) {
7004
0
  version = xmlCharStrdup(XML_DEFAULT_VERSION);
7005
0
        if (version == NULL) {
7006
0
            xmlErrMemory(ctxt);
7007
0
            return;
7008
0
        }
7009
0
    } else {
7010
0
  if (SKIP_BLANKS == 0) {
7011
0
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7012
0
               "Space needed here\n");
7013
0
  }
7014
0
    }
7015
0
    ctxt->input->version = version;
7016
7017
    /*
7018
     * We must have the encoding declaration
7019
     */
7020
0
    xmlParseEncodingDecl(ctxt);
7021
7022
0
    SKIP_BLANKS;
7023
0
    if ((RAW == '?') && (NXT(1) == '>')) {
7024
0
        SKIP(2);
7025
0
    } else if (RAW == '>') {
7026
        /* Deprecated old WD ... */
7027
0
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7028
0
  NEXT;
7029
0
    } else {
7030
0
        int c;
7031
7032
0
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7033
0
        while ((PARSER_STOPPED(ctxt) == 0) && ((c = CUR) != 0)) {
7034
0
            NEXT;
7035
0
            if (c == '>')
7036
0
                break;
7037
0
        }
7038
0
    }
7039
0
}
7040
7041
/**
7042
 * parse Markup declarations from an external subset
7043
 *
7044
 * @deprecated Internal function, don't use.
7045
 *
7046
 *     [30] extSubset ::= textDecl? extSubsetDecl
7047
 *
7048
 *     [31] extSubsetDecl ::= (markupdecl | conditionalSect |
7049
 *                             PEReference | S) *
7050
 * @param ctxt  an XML parser context
7051
 * @param publicId  the public identifier
7052
 * @param systemId  the system identifier (URL)
7053
 */
7054
void
7055
xmlParseExternalSubset(xmlParserCtxt *ctxt, const xmlChar *publicId,
7056
2.88k
                       const xmlChar *systemId) {
7057
2.88k
    int oldInputNr;
7058
7059
2.88k
    xmlCtxtInitializeLate(ctxt);
7060
7061
2.88k
    xmlDetectEncoding(ctxt);
7062
7063
2.88k
    if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7064
0
  xmlParseTextDecl(ctxt);
7065
0
    }
7066
2.88k
    if (ctxt->myDoc == NULL) {
7067
0
        ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7068
0
  if (ctxt->myDoc == NULL) {
7069
0
      xmlErrMemory(ctxt);
7070
0
      return;
7071
0
  }
7072
0
  ctxt->myDoc->properties = XML_DOC_INTERNAL;
7073
0
    }
7074
2.88k
    if ((ctxt->myDoc->intSubset == NULL) &&
7075
2.88k
        (xmlCreateIntSubset(ctxt->myDoc, NULL, publicId, systemId) == NULL)) {
7076
6
        xmlErrMemory(ctxt);
7077
6
    }
7078
7079
2.88k
    ctxt->inSubset = 2;
7080
2.88k
    oldInputNr = ctxt->inputNr;
7081
7082
2.88k
    SKIP_BLANKS;
7083
2.88k
    while (!PARSER_STOPPED(ctxt)) {
7084
1.32k
        if (ctxt->input->cur >= ctxt->input->end) {
7085
0
            if (ctxt->inputNr <= oldInputNr) {
7086
0
                xmlParserCheckEOF(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED);
7087
0
                break;
7088
0
            }
7089
7090
0
            xmlPopPE(ctxt);
7091
1.32k
        } else if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7092
0
            xmlParseConditionalSections(ctxt);
7093
1.32k
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7094
0
            xmlParseMarkupDecl(ctxt);
7095
1.32k
        } else if (RAW == '%') {
7096
0
            xmlParsePERefInternal(ctxt, 1);
7097
1.32k
        } else {
7098
1.32k
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7099
7100
1.32k
            while (ctxt->inputNr > oldInputNr)
7101
0
                xmlPopPE(ctxt);
7102
1.32k
            break;
7103
1.32k
        }
7104
0
        SKIP_BLANKS;
7105
0
        SHRINK;
7106
0
        GROW;
7107
0
    }
7108
2.88k
}
7109
7110
/**
7111
 * parse and handle entity references in content, depending on the SAX
7112
 * interface, this may end-up in a call to character() if this is a
7113
 * CharRef, a predefined entity, if there is no reference() callback.
7114
 * or if the parser was asked to switch to that mode.
7115
 *
7116
 * @deprecated Internal function, don't use.
7117
 *
7118
 * Always consumes '&'.
7119
 *
7120
 *     [67] Reference ::= EntityRef | CharRef
7121
 * @param ctxt  an XML parser context
7122
 */
7123
void
7124
164k
xmlParseReference(xmlParserCtxt *ctxt) {
7125
164k
    xmlEntityPtr ent = NULL;
7126
164k
    const xmlChar *name;
7127
164k
    xmlChar *val;
7128
7129
164k
    if (RAW != '&')
7130
0
        return;
7131
7132
    /*
7133
     * Simple case of a CharRef
7134
     */
7135
164k
    if (NXT(1) == '#') {
7136
46.8k
  int i = 0;
7137
46.8k
  xmlChar out[16];
7138
46.8k
  int value = xmlParseCharRef(ctxt);
7139
7140
46.8k
  if (value == 0)
7141
3.35k
      return;
7142
7143
        /*
7144
         * Just encode the value in UTF-8
7145
         */
7146
43.4k
        COPY_BUF(out, i, value);
7147
43.4k
        out[i] = 0;
7148
43.4k
        if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7149
43.4k
            (!ctxt->disableSAX))
7150
34.9k
            ctxt->sax->characters(ctxt->userData, out, i);
7151
43.4k
  return;
7152
46.8k
    }
7153
7154
    /*
7155
     * We are seeing an entity reference
7156
     */
7157
118k
    name = xmlParseEntityRefInternal(ctxt);
7158
118k
    if (name == NULL)
7159
8.69k
        return;
7160
109k
    ent = xmlLookupGeneralEntity(ctxt, name, /* isAttr */ 0);
7161
109k
    if (ent == NULL) {
7162
        /*
7163
         * Create a reference for undeclared entities.
7164
         */
7165
3.66k
        if ((ctxt->replaceEntities == 0) &&
7166
3.66k
            (ctxt->sax != NULL) &&
7167
3.66k
            (ctxt->disableSAX == 0) &&
7168
3.66k
            (ctxt->sax->reference != NULL)) {
7169
1.22k
            ctxt->sax->reference(ctxt->userData, name);
7170
1.22k
        }
7171
3.66k
        return;
7172
3.66k
    }
7173
105k
    if (!ctxt->wellFormed)
7174
58.3k
  return;
7175
7176
    /* special case of predefined entities */
7177
47.3k
    if ((ent->name == NULL) ||
7178
47.3k
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7179
41.1k
  val = ent->content;
7180
41.1k
  if (val == NULL) return;
7181
  /*
7182
   * inline the entity.
7183
   */
7184
41.1k
  if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7185
41.1k
      (!ctxt->disableSAX))
7186
41.1k
      ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7187
41.1k
  return;
7188
41.1k
    }
7189
7190
    /*
7191
     * Some users try to parse entities on their own and used to set
7192
     * the renamed "checked" member. Fix the flags to cover this
7193
     * case.
7194
     */
7195
6.17k
    if (((ent->flags & XML_ENT_PARSED) == 0) && (ent->children != NULL))
7196
0
        ent->flags |= XML_ENT_PARSED;
7197
7198
    /*
7199
     * The first reference to the entity trigger a parsing phase
7200
     * where the ent->children is filled with the result from
7201
     * the parsing.
7202
     * Note: external parsed entities will not be loaded, it is not
7203
     * required for a non-validating parser, unless the parsing option
7204
     * of validating, or substituting entities were given. Doing so is
7205
     * far more secure as the parser will only process data coming from
7206
     * the document entity by default.
7207
     *
7208
     * FIXME: This doesn't work correctly since entities can be
7209
     * expanded with different namespace declarations in scope.
7210
     * For example:
7211
     *
7212
     * <!DOCTYPE doc [
7213
     *   <!ENTITY ent "<ns:elem/>">
7214
     * ]>
7215
     * <doc>
7216
     *   <decl1 xmlns:ns="urn:ns1">
7217
     *     &ent;
7218
     *   </decl1>
7219
     *   <decl2 xmlns:ns="urn:ns2">
7220
     *     &ent;
7221
     *   </decl2>
7222
     * </doc>
7223
     *
7224
     * Proposed fix:
7225
     *
7226
     * - Ignore current namespace declarations when parsing the
7227
     *   entity. If a prefix can't be resolved, don't report an error
7228
     *   but mark it as unresolved.
7229
     * - Try to resolve these prefixes when expanding the entity.
7230
     *   This will require a specialized version of xmlStaticCopyNode
7231
     *   which can also make use of the namespace hash table to avoid
7232
     *   quadratic behavior.
7233
     *
7234
     * Alternatively, we could simply reparse the entity on each
7235
     * expansion like we already do with custom SAX callbacks.
7236
     * External entity content should be cached in this case.
7237
     */
7238
6.17k
    if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7239
6.17k
        (((ctxt->options & XML_PARSE_NO_XXE) == 0) &&
7240
1.36k
         ((ctxt->replaceEntities) ||
7241
5.58k
          (ctxt->validate)))) {
7242
5.58k
        if ((ent->flags & XML_ENT_PARSED) == 0) {
7243
3.67k
            xmlCtxtParseEntity(ctxt, ent);
7244
3.67k
        } else if (ent->children == NULL) {
7245
            /*
7246
             * Probably running in SAX mode and the callbacks don't
7247
             * build the entity content. Parse the entity again.
7248
             *
7249
             * This will also be triggered in normal tree builder mode
7250
             * if an entity happens to be empty, causing unnecessary
7251
             * reloads. It's hard to come up with a reliable check in
7252
             * which mode we're running.
7253
             */
7254
1.17k
            xmlCtxtParseEntity(ctxt, ent);
7255
1.17k
        }
7256
5.58k
    }
7257
7258
    /*
7259
     * We also check for amplification if entities aren't substituted.
7260
     * They might be expanded later.
7261
     */
7262
6.17k
    if (xmlParserEntityCheck(ctxt, ent->expandedSize))
7263
0
        return;
7264
7265
6.17k
    if ((ctxt->sax == NULL) || (ctxt->disableSAX))
7266
2.34k
        return;
7267
7268
3.83k
    if (ctxt->replaceEntities == 0) {
7269
  /*
7270
   * Create a reference
7271
   */
7272
1.39k
        if (ctxt->sax->reference != NULL)
7273
1.39k
      ctxt->sax->reference(ctxt->userData, ent->name);
7274
2.43k
    } else if ((ent->children != NULL) && (ctxt->node != NULL)) {
7275
1.46k
        xmlNodePtr copy, cur;
7276
7277
        /*
7278
         * Seems we are generating the DOM content, copy the tree
7279
   */
7280
1.46k
        cur = ent->children;
7281
7282
        /*
7283
         * Handle first text node with SAX to coalesce text efficiently
7284
         */
7285
1.46k
        if ((cur->type == XML_TEXT_NODE) ||
7286
1.46k
            (cur->type == XML_CDATA_SECTION_NODE)) {
7287
662
            int len = xmlStrlen(cur->content);
7288
7289
662
            if ((cur->type == XML_TEXT_NODE) ||
7290
662
                (ctxt->options & XML_PARSE_NOCDATA)) {
7291
374
                if (ctxt->sax->characters != NULL)
7292
374
                    ctxt->sax->characters(ctxt, cur->content, len);
7293
374
            } else {
7294
288
                if (ctxt->sax->cdataBlock != NULL)
7295
288
                    ctxt->sax->cdataBlock(ctxt, cur->content, len);
7296
288
            }
7297
7298
662
            cur = cur->next;
7299
662
        }
7300
7301
3.52k
        while (cur != NULL) {
7302
2.92k
            xmlNodePtr last;
7303
7304
            /*
7305
             * Handle last text node with SAX to coalesce text efficiently
7306
             */
7307
2.92k
            if ((cur->next == NULL) &&
7308
2.92k
                ((cur->type == XML_TEXT_NODE) ||
7309
1.09k
                 (cur->type == XML_CDATA_SECTION_NODE))) {
7310
870
                int len = xmlStrlen(cur->content);
7311
7312
870
                if ((cur->type == XML_TEXT_NODE) ||
7313
870
                    (ctxt->options & XML_PARSE_NOCDATA)) {
7314
582
                    if (ctxt->sax->characters != NULL)
7315
582
                        ctxt->sax->characters(ctxt, cur->content, len);
7316
582
                } else {
7317
288
                    if (ctxt->sax->cdataBlock != NULL)
7318
288
                        ctxt->sax->cdataBlock(ctxt, cur->content, len);
7319
288
                }
7320
7321
870
                break;
7322
870
            }
7323
7324
            /*
7325
             * Reset coalesce buffer stats only for non-text nodes.
7326
             */
7327
2.05k
            ctxt->nodemem = 0;
7328
2.05k
            ctxt->nodelen = 0;
7329
7330
2.05k
            copy = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7331
7332
2.05k
            if (copy == NULL) {
7333
1
                xmlErrMemory(ctxt);
7334
1
                break;
7335
1
            }
7336
7337
2.05k
            if (ctxt->parseMode == XML_PARSE_READER) {
7338
                /* Needed for reader */
7339
0
                copy->extra = cur->extra;
7340
                /* Maybe needed for reader */
7341
0
                copy->_private = cur->_private;
7342
0
            }
7343
7344
2.05k
            copy->parent = ctxt->node;
7345
2.05k
            last = ctxt->node->last;
7346
2.05k
            if (last == NULL) {
7347
790
                ctxt->node->children = copy;
7348
1.26k
            } else {
7349
1.26k
                last->next = copy;
7350
1.26k
                copy->prev = last;
7351
1.26k
            }
7352
2.05k
            ctxt->node->last = copy;
7353
7354
2.05k
            cur = cur->next;
7355
2.05k
        }
7356
1.46k
    }
7357
3.83k
}
7358
7359
static void
7360
4.51k
xmlHandleUndeclaredEntity(xmlParserCtxtPtr ctxt, const xmlChar *name) {
7361
    /*
7362
     * [ WFC: Entity Declared ]
7363
     * In a document without any DTD, a document with only an
7364
     * internal DTD subset which contains no parameter entity
7365
     * references, or a document with "standalone='yes'", the
7366
     * Name given in the entity reference must match that in an
7367
     * entity declaration, except that well-formed documents
7368
     * need not declare any of the following entities: amp, lt,
7369
     * gt, apos, quot.
7370
     * The declaration of a parameter entity must precede any
7371
     * reference to it.
7372
     * Similarly, the declaration of a general entity must
7373
     * precede any reference to it which appears in a default
7374
     * value in an attribute-list declaration. Note that if
7375
     * entities are declared in the external subset or in
7376
     * external parameter entities, a non-validating processor
7377
     * is not obligated to read and process their declarations;
7378
     * for such documents, the rule that an entity must be
7379
     * declared is a well-formedness constraint only if
7380
     * standalone='yes'.
7381
     */
7382
4.51k
    if ((ctxt->standalone == 1) ||
7383
4.51k
        ((ctxt->hasExternalSubset == 0) &&
7384
3.25k
         (ctxt->hasPErefs == 0))) {
7385
2.68k
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7386
2.68k
                          "Entity '%s' not defined\n", name);
7387
2.68k
#ifdef LIBXML_VALID_ENABLED
7388
2.68k
    } else if (ctxt->validate) {
7389
        /*
7390
         * [ VC: Entity Declared ]
7391
         * In a document with an external subset or external
7392
         * parameter entities with "standalone='no'", ...
7393
         * ... The declaration of a parameter entity must
7394
         * precede any reference to it...
7395
         */
7396
223
        xmlValidityError(ctxt, XML_ERR_UNDECLARED_ENTITY,
7397
223
                         "Entity '%s' not defined\n", name, NULL);
7398
223
#endif
7399
1.60k
    } else if ((ctxt->loadsubset & ~XML_SKIP_IDS) ||
7400
1.60k
               ((ctxt->replaceEntities) &&
7401
1.17k
                ((ctxt->options & XML_PARSE_NO_XXE) == 0))) {
7402
        /*
7403
         * Also raise a non-fatal error
7404
         *
7405
         * - if the external subset is loaded and all entity declarations
7406
         *   should be available, or
7407
         * - entity substition was requested without restricting
7408
         *   external entity access.
7409
         */
7410
638
        xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7411
638
                     "Entity '%s' not defined\n", name);
7412
970
    } else {
7413
970
        xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7414
970
                      "Entity '%s' not defined\n", name, NULL);
7415
970
    }
7416
7417
4.51k
    ctxt->valid = 0;
7418
4.51k
}
7419
7420
static xmlEntityPtr
7421
584k
xmlLookupGeneralEntity(xmlParserCtxtPtr ctxt, const xmlChar *name, int inAttr) {
7422
584k
    xmlEntityPtr ent = NULL;
7423
7424
    /*
7425
     * Predefined entities override any extra definition
7426
     */
7427
584k
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7428
386k
        ent = xmlGetPredefinedEntity(name);
7429
386k
        if (ent != NULL)
7430
360k
            return(ent);
7431
386k
    }
7432
7433
    /*
7434
     * Ask first SAX for entity resolution, otherwise try the
7435
     * entities which may have stored in the parser context.
7436
     */
7437
224k
    if (ctxt->sax != NULL) {
7438
224k
  if (ctxt->sax->getEntity != NULL)
7439
224k
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7440
224k
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7441
224k
      (ctxt->options & XML_PARSE_OLDSAX))
7442
205
      ent = xmlGetPredefinedEntity(name);
7443
224k
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7444
224k
      (ctxt->userData==ctxt)) {
7445
892
      ent = xmlSAX2GetEntity(ctxt, name);
7446
892
  }
7447
224k
    }
7448
7449
224k
    if (ent == NULL) {
7450
3.96k
        xmlHandleUndeclaredEntity(ctxt, name);
7451
3.96k
    }
7452
7453
    /*
7454
     * [ WFC: Parsed Entity ]
7455
     * An entity reference must not contain the name of an
7456
     * unparsed entity
7457
     */
7458
220k
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7459
196
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7460
196
     "Entity reference to unparsed entity %s\n", name);
7461
196
        ent = NULL;
7462
196
    }
7463
7464
    /*
7465
     * [ WFC: No External Entity References ]
7466
     * Attribute values cannot contain direct or indirect
7467
     * entity references to external entities.
7468
     */
7469
220k
    else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7470
2.07k
        if (inAttr) {
7471
222
            xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7472
222
                 "Attribute references external entity '%s'\n", name);
7473
222
            ent = NULL;
7474
222
        }
7475
2.07k
    }
7476
7477
224k
    return(ent);
7478
584k
}
7479
7480
/**
7481
 * Parse an entity reference. Always consumes '&'.
7482
 *
7483
 *     [68] EntityRef ::= '&' Name ';'
7484
 *
7485
 * @param ctxt  an XML parser context
7486
 * @returns the name, or NULL in case of error.
7487
 */
7488
static const xmlChar *
7489
427k
xmlParseEntityRefInternal(xmlParserCtxtPtr ctxt) {
7490
427k
    const xmlChar *name;
7491
7492
427k
    GROW;
7493
7494
427k
    if (RAW != '&')
7495
0
        return(NULL);
7496
427k
    NEXT;
7497
427k
    name = xmlParseName(ctxt);
7498
427k
    if (name == NULL) {
7499
12.6k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7500
12.6k
           "xmlParseEntityRef: no name\n");
7501
12.6k
        return(NULL);
7502
12.6k
    }
7503
414k
    if (RAW != ';') {
7504
833
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7505
833
  return(NULL);
7506
833
    }
7507
413k
    NEXT;
7508
7509
413k
    return(name);
7510
414k
}
7511
7512
/**
7513
 * @deprecated Internal function, don't use.
7514
 *
7515
 * @param ctxt  an XML parser context
7516
 * @returns the xmlEntity if found, or NULL otherwise.
7517
 */
7518
xmlEntity *
7519
0
xmlParseEntityRef(xmlParserCtxt *ctxt) {
7520
0
    const xmlChar *name;
7521
7522
0
    if (ctxt == NULL)
7523
0
        return(NULL);
7524
7525
0
    name = xmlParseEntityRefInternal(ctxt);
7526
0
    if (name == NULL)
7527
0
        return(NULL);
7528
7529
0
    return(xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 0));
7530
0
}
7531
7532
/**
7533
 * parse ENTITY references declarations, but this version parses it from
7534
 * a string value.
7535
 *
7536
 *     [68] EntityRef ::= '&' Name ';'
7537
 *
7538
 * [ WFC: Entity Declared ]
7539
 * In a document without any DTD, a document with only an internal DTD
7540
 * subset which contains no parameter entity references, or a document
7541
 * with "standalone='yes'", the Name given in the entity reference
7542
 * must match that in an entity declaration, except that well-formed
7543
 * documents need not declare any of the following entities: amp, lt,
7544
 * gt, apos, quot.  The declaration of a parameter entity must precede
7545
 * any reference to it.  Similarly, the declaration of a general entity
7546
 * must precede any reference to it which appears in a default value in an
7547
 * attribute-list declaration. Note that if entities are declared in the
7548
 * external subset or in external parameter entities, a non-validating
7549
 * processor is not obligated to read and process their declarations;
7550
 * for such documents, the rule that an entity must be declared is a
7551
 * well-formedness constraint only if standalone='yes'.
7552
 *
7553
 * [ WFC: Parsed Entity ]
7554
 * An entity reference must not contain the name of an unparsed entity
7555
 *
7556
 * @param ctxt  an XML parser context
7557
 * @param str  a pointer to an index in the string
7558
 * @returns the xmlEntity if found, or NULL otherwise. The str pointer
7559
 * is updated to the current location in the string.
7560
 */
7561
static xmlChar *
7562
170k
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7563
170k
    xmlChar *name;
7564
170k
    const xmlChar *ptr;
7565
170k
    xmlChar cur;
7566
7567
170k
    if ((str == NULL) || (*str == NULL))
7568
0
        return(NULL);
7569
170k
    ptr = *str;
7570
170k
    cur = *ptr;
7571
170k
    if (cur != '&')
7572
0
  return(NULL);
7573
7574
170k
    ptr++;
7575
170k
    name = xmlParseStringName(ctxt, &ptr);
7576
170k
    if (name == NULL) {
7577
12
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7578
12
           "xmlParseStringEntityRef: no name\n");
7579
12
  *str = ptr;
7580
12
  return(NULL);
7581
12
    }
7582
170k
    if (*ptr != ';') {
7583
0
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7584
0
        xmlFree(name);
7585
0
  *str = ptr;
7586
0
  return(NULL);
7587
0
    }
7588
170k
    ptr++;
7589
7590
170k
    *str = ptr;
7591
170k
    return(name);
7592
170k
}
7593
7594
/**
7595
 * Parse a parameter entity reference. Always consumes '%'.
7596
 *
7597
 * The entity content is handled directly by pushing it's content as
7598
 * a new input stream.
7599
 *
7600
 *     [69] PEReference ::= '%' Name ';'
7601
 *
7602
 * [ WFC: No Recursion ]
7603
 * A parsed entity must not contain a recursive
7604
 * reference to itself, either directly or indirectly.
7605
 *
7606
 * [ WFC: Entity Declared ]
7607
 * In a document without any DTD, a document with only an internal DTD
7608
 * subset which contains no parameter entity references, or a document
7609
 * with "standalone='yes'", ...  ... The declaration of a parameter
7610
 * entity must precede any reference to it...
7611
 *
7612
 * [ VC: Entity Declared ]
7613
 * In a document with an external subset or external parameter entities
7614
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7615
 * must precede any reference to it...
7616
 *
7617
 * [ WFC: In DTD ]
7618
 * Parameter-entity references may only appear in the DTD.
7619
 * NOTE: misleading but this is handled.
7620
 *
7621
 * @param ctxt  an XML parser context
7622
 * @param markupDecl  whether the PERef starts a markup declaration
7623
 */
7624
static void
7625
711
xmlParsePERefInternal(xmlParserCtxt *ctxt, int markupDecl) {
7626
711
    const xmlChar *name;
7627
711
    xmlEntityPtr entity = NULL;
7628
711
    xmlParserInputPtr input;
7629
7630
711
    if (RAW != '%')
7631
0
        return;
7632
711
    NEXT;
7633
711
    name = xmlParseName(ctxt);
7634
711
    if (name == NULL) {
7635
226
  xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
7636
226
  return;
7637
226
    }
7638
485
    if (RAW != ';') {
7639
202
  xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
7640
202
        return;
7641
202
    }
7642
7643
283
    NEXT;
7644
7645
    /* Must be set before xmlHandleUndeclaredEntity */
7646
283
    ctxt->hasPErefs = 1;
7647
7648
    /*
7649
     * Request the entity from SAX
7650
     */
7651
283
    if ((ctxt->sax != NULL) &&
7652
283
  (ctxt->sax->getParameterEntity != NULL))
7653
283
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7654
7655
283
    if (entity == NULL) {
7656
283
        xmlHandleUndeclaredEntity(ctxt, name);
7657
283
    } else {
7658
  /*
7659
   * Internal checking in case the entity quest barfed
7660
   */
7661
0
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7662
0
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7663
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7664
0
      "Internal: %%%s; is not a parameter entity\n",
7665
0
        name, NULL);
7666
0
  } else {
7667
0
      if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7668
0
                ((ctxt->options & XML_PARSE_NO_XXE) ||
7669
0
     (((ctxt->loadsubset & ~XML_SKIP_IDS) == 0) &&
7670
0
      (ctxt->replaceEntities == 0) &&
7671
0
      (ctxt->validate == 0))))
7672
0
    return;
7673
7674
0
            if (entity->flags & XML_ENT_EXPANDING) {
7675
0
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7676
0
                xmlHaltParser(ctxt);
7677
0
                return;
7678
0
            }
7679
7680
0
      input = xmlNewEntityInputStream(ctxt, entity);
7681
0
      if (xmlCtxtPushInput(ctxt, input) < 0) {
7682
0
                xmlFreeInputStream(input);
7683
0
    return;
7684
0
            }
7685
7686
0
            entity->flags |= XML_ENT_EXPANDING;
7687
7688
0
            if (markupDecl)
7689
0
                input->flags |= XML_INPUT_MARKUP_DECL;
7690
7691
0
            GROW;
7692
7693
0
      if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
7694
0
                xmlDetectEncoding(ctxt);
7695
7696
0
                if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7697
0
                    (IS_BLANK_CH(NXT(5)))) {
7698
0
                    xmlParseTextDecl(ctxt);
7699
0
                }
7700
0
            }
7701
0
  }
7702
0
    }
7703
283
}
7704
7705
/**
7706
 * Parse a parameter entity reference.
7707
 *
7708
 * @deprecated Internal function, don't use.
7709
 *
7710
 * @param ctxt  an XML parser context
7711
 */
7712
void
7713
0
xmlParsePEReference(xmlParserCtxt *ctxt) {
7714
0
    xmlParsePERefInternal(ctxt, 0);
7715
0
}
7716
7717
/**
7718
 * Load the content of an entity.
7719
 *
7720
 * @param ctxt  an XML parser context
7721
 * @param entity  an unloaded system entity
7722
 * @returns 0 in case of success and -1 in case of failure
7723
 */
7724
static int
7725
0
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7726
0
    xmlParserInputPtr oldinput, input = NULL;
7727
0
    xmlParserInputPtr *oldinputTab;
7728
0
    xmlChar *oldencoding;
7729
0
    xmlChar *content = NULL;
7730
0
    xmlResourceType rtype;
7731
0
    size_t length, i;
7732
0
    int oldinputNr, oldinputMax;
7733
0
    int ret = -1;
7734
0
    int res;
7735
7736
0
    if ((ctxt == NULL) || (entity == NULL) ||
7737
0
        ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7738
0
   (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7739
0
  (entity->content != NULL)) {
7740
0
  xmlFatalErr(ctxt, XML_ERR_ARGUMENT,
7741
0
              "xmlLoadEntityContent parameter error");
7742
0
        return(-1);
7743
0
    }
7744
7745
0
    if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)
7746
0
        rtype = XML_RESOURCE_PARAMETER_ENTITY;
7747
0
    else
7748
0
        rtype = XML_RESOURCE_GENERAL_ENTITY;
7749
7750
0
    input = xmlLoadResource(ctxt, (char *) entity->URI,
7751
0
                            (char *) entity->ExternalID, rtype);
7752
0
    if (input == NULL)
7753
0
        return(-1);
7754
7755
0
    oldinput = ctxt->input;
7756
0
    oldinputNr = ctxt->inputNr;
7757
0
    oldinputMax = ctxt->inputMax;
7758
0
    oldinputTab = ctxt->inputTab;
7759
0
    oldencoding = ctxt->encoding;
7760
7761
0
    ctxt->input = NULL;
7762
0
    ctxt->inputNr = 0;
7763
0
    ctxt->inputMax = 1;
7764
0
    ctxt->encoding = NULL;
7765
0
    ctxt->inputTab = xmlMalloc(sizeof(xmlParserInputPtr));
7766
0
    if (ctxt->inputTab == NULL) {
7767
0
        xmlErrMemory(ctxt);
7768
0
        xmlFreeInputStream(input);
7769
0
        goto error;
7770
0
    }
7771
7772
0
    xmlBufResetInput(input->buf->buffer, input);
7773
7774
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
7775
0
        xmlFreeInputStream(input);
7776
0
        goto error;
7777
0
    }
7778
7779
0
    xmlDetectEncoding(ctxt);
7780
7781
    /*
7782
     * Parse a possible text declaration first
7783
     */
7784
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7785
0
  xmlParseTextDecl(ctxt);
7786
        /*
7787
         * An XML-1.0 document can't reference an entity not XML-1.0
7788
         */
7789
0
        if ((xmlStrEqual(ctxt->version, BAD_CAST "1.0")) &&
7790
0
            (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
7791
0
            xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
7792
0
                           "Version mismatch between document and entity\n");
7793
0
        }
7794
0
    }
7795
7796
0
    length = input->cur - input->base;
7797
0
    xmlBufShrink(input->buf->buffer, length);
7798
0
    xmlSaturatedAdd(&ctxt->sizeentities, length);
7799
7800
0
    while ((res = xmlParserInputBufferGrow(input->buf, 4096)) > 0)
7801
0
        ;
7802
7803
0
    xmlBufResetInput(input->buf->buffer, input);
7804
7805
0
    if (res < 0) {
7806
0
        xmlCtxtErrIO(ctxt, input->buf->error, NULL);
7807
0
        goto error;
7808
0
    }
7809
7810
0
    length = xmlBufUse(input->buf->buffer);
7811
0
    if (length > INT_MAX) {
7812
0
        xmlErrMemory(ctxt);
7813
0
        goto error;
7814
0
    }
7815
7816
0
    content = xmlStrndup(xmlBufContent(input->buf->buffer), length);
7817
0
    if (content == NULL) {
7818
0
        xmlErrMemory(ctxt);
7819
0
        goto error;
7820
0
    }
7821
7822
0
    for (i = 0; i < length; ) {
7823
0
        int clen = length - i;
7824
0
        int c = xmlGetUTF8Char(content + i, &clen);
7825
7826
0
        if ((c < 0) || (!IS_CHAR(c))) {
7827
0
            xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
7828
0
                              "xmlLoadEntityContent: invalid char value %d\n",
7829
0
                              content[i]);
7830
0
            goto error;
7831
0
        }
7832
0
        i += clen;
7833
0
    }
7834
7835
0
    xmlSaturatedAdd(&ctxt->sizeentities, length);
7836
0
    entity->content = content;
7837
0
    entity->length = length;
7838
0
    content = NULL;
7839
0
    ret = 0;
7840
7841
0
error:
7842
0
    while (ctxt->inputNr > 0)
7843
0
        xmlFreeInputStream(xmlCtxtPopInput(ctxt));
7844
0
    xmlFree(ctxt->inputTab);
7845
0
    xmlFree(ctxt->encoding);
7846
7847
0
    ctxt->input = oldinput;
7848
0
    ctxt->inputNr = oldinputNr;
7849
0
    ctxt->inputMax = oldinputMax;
7850
0
    ctxt->inputTab = oldinputTab;
7851
0
    ctxt->encoding = oldencoding;
7852
7853
0
    xmlFree(content);
7854
7855
0
    return(ret);
7856
0
}
7857
7858
/**
7859
 * parse PEReference declarations
7860
 *
7861
 *     [69] PEReference ::= '%' Name ';'
7862
 *
7863
 * [ WFC: No Recursion ]
7864
 * A parsed entity must not contain a recursive
7865
 * reference to itself, either directly or indirectly.
7866
 *
7867
 * [ WFC: Entity Declared ]
7868
 * In a document without any DTD, a document with only an internal DTD
7869
 * subset which contains no parameter entity references, or a document
7870
 * with "standalone='yes'", ...  ... The declaration of a parameter
7871
 * entity must precede any reference to it...
7872
 *
7873
 * [ VC: Entity Declared ]
7874
 * In a document with an external subset or external parameter entities
7875
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7876
 * must precede any reference to it...
7877
 *
7878
 * [ WFC: In DTD ]
7879
 * Parameter-entity references may only appear in the DTD.
7880
 * NOTE: misleading but this is handled.
7881
 *
7882
 * @param ctxt  an XML parser context
7883
 * @param str  a pointer to an index in the string
7884
 * @returns the string of the entity content.
7885
 *         str is updated to the current value of the index
7886
 */
7887
static xmlEntityPtr
7888
2.38k
xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7889
2.38k
    const xmlChar *ptr;
7890
2.38k
    xmlChar cur;
7891
2.38k
    xmlChar *name;
7892
2.38k
    xmlEntityPtr entity = NULL;
7893
7894
2.38k
    if ((str == NULL) || (*str == NULL)) return(NULL);
7895
2.38k
    ptr = *str;
7896
2.38k
    cur = *ptr;
7897
2.38k
    if (cur != '%')
7898
0
        return(NULL);
7899
2.38k
    ptr++;
7900
2.38k
    name = xmlParseStringName(ctxt, &ptr);
7901
2.38k
    if (name == NULL) {
7902
1.54k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7903
1.54k
           "xmlParseStringPEReference: no name\n");
7904
1.54k
  *str = ptr;
7905
1.54k
  return(NULL);
7906
1.54k
    }
7907
839
    cur = *ptr;
7908
839
    if (cur != ';') {
7909
574
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7910
574
  xmlFree(name);
7911
574
  *str = ptr;
7912
574
  return(NULL);
7913
574
    }
7914
265
    ptr++;
7915
7916
    /* Must be set before xmlHandleUndeclaredEntity */
7917
265
    ctxt->hasPErefs = 1;
7918
7919
    /*
7920
     * Request the entity from SAX
7921
     */
7922
265
    if ((ctxt->sax != NULL) &&
7923
265
  (ctxt->sax->getParameterEntity != NULL))
7924
265
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7925
7926
265
    if (entity == NULL) {
7927
265
        xmlHandleUndeclaredEntity(ctxt, name);
7928
265
    } else {
7929
  /*
7930
   * Internal checking in case the entity quest barfed
7931
   */
7932
0
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7933
0
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7934
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7935
0
        "%%%s; is not a parameter entity\n",
7936
0
        name, NULL);
7937
0
  }
7938
0
    }
7939
7940
265
    xmlFree(name);
7941
265
    *str = ptr;
7942
265
    return(entity);
7943
839
}
7944
7945
/**
7946
 * parse a DOCTYPE declaration
7947
 *
7948
 * @deprecated Internal function, don't use.
7949
 *
7950
 *     [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7951
 *                          ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7952
 *
7953
 * [ VC: Root Element Type ]
7954
 * The Name in the document type declaration must match the element
7955
 * type of the root element.
7956
 *
7957
 * @param ctxt  an XML parser context
7958
 */
7959
7960
void
7961
109k
xmlParseDocTypeDecl(xmlParserCtxt *ctxt) {
7962
109k
    const xmlChar *name = NULL;
7963
109k
    xmlChar *publicId = NULL;
7964
109k
    xmlChar *URI = NULL;
7965
7966
    /*
7967
     * We know that '<!DOCTYPE' has been detected.
7968
     */
7969
109k
    SKIP(9);
7970
7971
109k
    if (SKIP_BLANKS == 0) {
7972
232
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7973
232
                       "Space required after 'DOCTYPE'\n");
7974
232
    }
7975
7976
    /*
7977
     * Parse the DOCTYPE name.
7978
     */
7979
109k
    name = xmlParseName(ctxt);
7980
109k
    if (name == NULL) {
7981
614
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7982
614
           "xmlParseDocTypeDecl : no DOCTYPE name !\n");
7983
614
    }
7984
109k
    ctxt->intSubName = name;
7985
7986
109k
    SKIP_BLANKS;
7987
7988
    /*
7989
     * Check for public and system identifier (URI)
7990
     */
7991
109k
    URI = xmlParseExternalID(ctxt, &publicId, 1);
7992
7993
109k
    if ((URI != NULL) || (publicId != NULL)) {
7994
106k
        ctxt->hasExternalSubset = 1;
7995
106k
    }
7996
109k
    ctxt->extSubURI = URI;
7997
109k
    ctxt->extSubSystem = publicId;
7998
7999
109k
    SKIP_BLANKS;
8000
8001
    /*
8002
     * Create and update the internal subset.
8003
     */
8004
109k
    if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8005
109k
  (!ctxt->disableSAX))
8006
91.8k
  ctxt->sax->internalSubset(ctxt->userData, name, publicId, URI);
8007
8008
109k
    if ((RAW != '[') && (RAW != '>')) {
8009
5.09k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8010
5.09k
    }
8011
109k
}
8012
8013
/**
8014
 * parse the internal subset declaration
8015
 *
8016
 *     [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8017
 * @param ctxt  an XML parser context
8018
 */
8019
8020
static void
8021
84.6k
xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8022
    /*
8023
     * Is there any DTD definition ?
8024
     */
8025
84.6k
    if (RAW == '[') {
8026
84.6k
        int oldInputNr = ctxt->inputNr;
8027
8028
84.6k
        NEXT;
8029
  /*
8030
   * Parse the succession of Markup declarations and
8031
   * PEReferences.
8032
   * Subsequence (markupdecl | PEReference | S)*
8033
   */
8034
84.6k
  SKIP_BLANKS;
8035
209k
        while (1) {
8036
209k
            if (PARSER_STOPPED(ctxt)) {
8037
5.36k
                return;
8038
204k
            } else if (ctxt->input->cur >= ctxt->input->end) {
8039
4.98k
                if (ctxt->inputNr <= oldInputNr) {
8040
4.98k
                xmlFatalErr(ctxt, XML_ERR_INT_SUBSET_NOT_FINISHED, NULL);
8041
4.98k
                    return;
8042
4.98k
                }
8043
0
                xmlPopPE(ctxt);
8044
199k
            } else if ((RAW == ']') && (ctxt->inputNr <= oldInputNr)) {
8045
39.4k
                NEXT;
8046
39.4k
                SKIP_BLANKS;
8047
39.4k
                break;
8048
159k
            } else if ((PARSER_EXTERNAL(ctxt)) &&
8049
159k
                       (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8050
                /*
8051
                 * Conditional sections are allowed in external entities
8052
                 * included by PE References in the internal subset.
8053
                 */
8054
0
                xmlParseConditionalSections(ctxt);
8055
159k
            } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
8056
124k
                xmlParseMarkupDecl(ctxt);
8057
124k
            } else if (RAW == '%') {
8058
711
                xmlParsePERefInternal(ctxt, 1);
8059
34.8k
            } else {
8060
34.8k
                xmlFatalErr(ctxt, XML_ERR_INT_SUBSET_NOT_FINISHED, NULL);
8061
8062
34.8k
                while (ctxt->inputNr > oldInputNr)
8063
0
                    xmlPopPE(ctxt);
8064
34.8k
                return;
8065
34.8k
            }
8066
124k
            SKIP_BLANKS;
8067
124k
            SHRINK;
8068
124k
            GROW;
8069
124k
        }
8070
84.6k
    }
8071
8072
    /*
8073
     * We should be at the end of the DOCTYPE declaration.
8074
     */
8075
39.4k
    if (RAW != '>') {
8076
211
        xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8077
211
        return;
8078
211
    }
8079
39.2k
    NEXT;
8080
39.2k
}
8081
8082
#ifdef LIBXML_SAX1_ENABLED
8083
/**
8084
 * parse an attribute
8085
 *
8086
 * @deprecated Internal function, don't use.
8087
 *
8088
 *     [41] Attribute ::= Name Eq AttValue
8089
 *
8090
 * [ WFC: No External Entity References ]
8091
 * Attribute values cannot contain direct or indirect entity references
8092
 * to external entities.
8093
 *
8094
 * [ WFC: No < in Attribute Values ]
8095
 * The replacement text of any entity referred to directly or indirectly in
8096
 * an attribute value (other than "&lt;") must not contain a <.
8097
 *
8098
 * [ VC: Attribute Value Type ]
8099
 * The attribute must have been declared; the value must be of the type
8100
 * declared for it.
8101
 *
8102
 *     [25] Eq ::= S? '=' S?
8103
 *
8104
 * With namespace:
8105
 *
8106
 *     [NS 11] Attribute ::= QName Eq AttValue
8107
 *
8108
 * Also the case QName == xmlns:??? is handled independently as a namespace
8109
 * definition.
8110
 *
8111
 * @param ctxt  an XML parser context
8112
 * @param value  a xmlChar ** used to store the value of the attribute
8113
 * @returns the attribute name, and the value in *value.
8114
 */
8115
8116
const xmlChar *
8117
83.1k
xmlParseAttribute(xmlParserCtxt *ctxt, xmlChar **value) {
8118
83.1k
    const xmlChar *name;
8119
83.1k
    xmlChar *val;
8120
8121
83.1k
    *value = NULL;
8122
83.1k
    GROW;
8123
83.1k
    name = xmlParseName(ctxt);
8124
83.1k
    if (name == NULL) {
8125
5.87k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8126
5.87k
                 "error parsing attribute name\n");
8127
5.87k
        return(NULL);
8128
5.87k
    }
8129
8130
    /*
8131
     * read the value
8132
     */
8133
77.2k
    SKIP_BLANKS;
8134
77.2k
    if (RAW == '=') {
8135
66.7k
        NEXT;
8136
66.7k
  SKIP_BLANKS;
8137
66.7k
  val = xmlParseAttValue(ctxt);
8138
66.7k
    } else {
8139
10.5k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8140
10.5k
         "Specification mandates value for attribute %s\n", name);
8141
10.5k
  return(name);
8142
10.5k
    }
8143
8144
    /*
8145
     * Check that xml:lang conforms to the specification
8146
     * No more registered as an error, just generate a warning now
8147
     * since this was deprecated in XML second edition
8148
     */
8149
66.7k
    if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8150
12.4k
  if (!xmlCheckLanguageID(val)) {
8151
9.22k
      xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8152
9.22k
              "Malformed value for xml:lang : %s\n",
8153
9.22k
        val, NULL);
8154
9.22k
  }
8155
12.4k
    }
8156
8157
    /*
8158
     * Check that xml:space conforms to the specification
8159
     */
8160
66.7k
    if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8161
4.16k
  if (xmlStrEqual(val, BAD_CAST "default"))
8162
2.10k
      *(ctxt->space) = 0;
8163
2.06k
  else if (xmlStrEqual(val, BAD_CAST "preserve"))
8164
1.43k
      *(ctxt->space) = 1;
8165
632
  else {
8166
632
    xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8167
632
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8168
632
                                 val, NULL);
8169
632
  }
8170
4.16k
    }
8171
8172
66.7k
    *value = val;
8173
66.7k
    return(name);
8174
77.2k
}
8175
8176
/**
8177
 * Parse a start tag. Always consumes '<'.
8178
 *
8179
 * @deprecated Internal function, don't use.
8180
 *
8181
 *     [40] STag ::= '<' Name (S Attribute)* S? '>'
8182
 *
8183
 * [ WFC: Unique Att Spec ]
8184
 * No attribute name may appear more than once in the same start-tag or
8185
 * empty-element tag.
8186
 *
8187
 *     [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8188
 *
8189
 * [ WFC: Unique Att Spec ]
8190
 * No attribute name may appear more than once in the same start-tag or
8191
 * empty-element tag.
8192
 *
8193
 * With namespace:
8194
 *
8195
 *     [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8196
 *
8197
 *     [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8198
 *
8199
 * @param ctxt  an XML parser context
8200
 * @returns the element name parsed
8201
 */
8202
8203
const xmlChar *
8204
114k
xmlParseStartTag(xmlParserCtxt *ctxt) {
8205
114k
    const xmlChar *name;
8206
114k
    const xmlChar *attname;
8207
114k
    xmlChar *attvalue;
8208
114k
    const xmlChar **atts = ctxt->atts;
8209
114k
    int nbatts = 0;
8210
114k
    int maxatts = ctxt->maxatts;
8211
114k
    int i;
8212
8213
114k
    if (RAW != '<') return(NULL);
8214
114k
    NEXT1;
8215
8216
114k
    name = xmlParseName(ctxt);
8217
114k
    if (name == NULL) {
8218
14.1k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8219
14.1k
       "xmlParseStartTag: invalid element name\n");
8220
14.1k
        return(NULL);
8221
14.1k
    }
8222
8223
    /*
8224
     * Now parse the attributes, it ends up with the ending
8225
     *
8226
     * (S Attribute)* S?
8227
     */
8228
99.9k
    SKIP_BLANKS;
8229
99.9k
    GROW;
8230
8231
130k
    while (((RAW != '>') &&
8232
130k
     ((RAW != '/') || (NXT(1) != '>')) &&
8233
130k
     (IS_BYTE_CHAR(RAW))) && (PARSER_STOPPED(ctxt) == 0)) {
8234
83.1k
  attname = xmlParseAttribute(ctxt, &attvalue);
8235
83.1k
        if (attname == NULL)
8236
5.87k
      break;
8237
77.2k
        if (attvalue != NULL) {
8238
      /*
8239
       * [ WFC: Unique Att Spec ]
8240
       * No attribute name may appear more than once in the same
8241
       * start-tag or empty-element tag.
8242
       */
8243
89.8k
      for (i = 0; i < nbatts;i += 2) {
8244
27.4k
          if (xmlStrEqual(atts[i], attname)) {
8245
2.34k
        xmlErrAttributeDup(ctxt, NULL, attname);
8246
2.34k
        goto failed;
8247
2.34k
    }
8248
27.4k
      }
8249
      /*
8250
       * Add the pair to atts
8251
       */
8252
62.3k
      if (nbatts + 4 > maxatts) {
8253
25.0k
          const xmlChar **n;
8254
25.0k
                int newSize;
8255
8256
25.0k
                newSize = xmlGrowCapacity(maxatts, sizeof(n[0]) * 2,
8257
25.0k
                                          11, XML_MAX_ATTRS);
8258
25.0k
                if (newSize < 0) {
8259
0
        xmlErrMemory(ctxt);
8260
0
        goto failed;
8261
0
    }
8262
25.0k
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
8263
25.0k
                if (newSize < 2)
8264
20.2k
                    newSize = 2;
8265
25.0k
#endif
8266
25.0k
          n = xmlRealloc(atts, newSize * sizeof(n[0]) * 2);
8267
25.0k
    if (n == NULL) {
8268
4
        xmlErrMemory(ctxt);
8269
4
        goto failed;
8270
4
    }
8271
25.0k
    atts = n;
8272
25.0k
                maxatts = newSize * 2;
8273
25.0k
    ctxt->atts = atts;
8274
25.0k
    ctxt->maxatts = maxatts;
8275
25.0k
      }
8276
8277
62.3k
      atts[nbatts++] = attname;
8278
62.3k
      atts[nbatts++] = attvalue;
8279
62.3k
      atts[nbatts] = NULL;
8280
62.3k
      atts[nbatts + 1] = NULL;
8281
8282
62.3k
            attvalue = NULL;
8283
62.3k
  }
8284
8285
77.2k
failed:
8286
8287
77.2k
        if (attvalue != NULL)
8288
2.35k
            xmlFree(attvalue);
8289
8290
77.2k
  GROW
8291
77.2k
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8292
46.8k
      break;
8293
30.4k
  if (SKIP_BLANKS == 0) {
8294
12.0k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8295
12.0k
         "attributes construct error\n");
8296
12.0k
  }
8297
30.4k
  SHRINK;
8298
30.4k
        GROW;
8299
30.4k
    }
8300
8301
    /*
8302
     * SAX: Start of Element !
8303
     */
8304
99.9k
    if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8305
99.9k
  (!ctxt->disableSAX)) {
8306
95.0k
  if (nbatts > 0)
8307
45.6k
      ctxt->sax->startElement(ctxt->userData, name, atts);
8308
49.4k
  else
8309
49.4k
      ctxt->sax->startElement(ctxt->userData, name, NULL);
8310
95.0k
    }
8311
8312
99.9k
    if (atts != NULL) {
8313
        /* Free only the content strings */
8314
125k
        for (i = 1;i < nbatts;i+=2)
8315
62.3k
      if (atts[i] != NULL)
8316
62.3k
         xmlFree((xmlChar *) atts[i]);
8317
62.9k
    }
8318
99.9k
    return(name);
8319
99.9k
}
8320
8321
/**
8322
 * Parse an end tag. Always consumes '</'.
8323
 *
8324
 *     [42] ETag ::= '</' Name S? '>'
8325
 *
8326
 * With namespace
8327
 *
8328
 *     [NS 9] ETag ::= '</' QName S? '>'
8329
 * @param ctxt  an XML parser context
8330
 * @param line  line of the start tag
8331
 */
8332
8333
static void
8334
67.6k
xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8335
67.6k
    const xmlChar *name;
8336
8337
67.6k
    GROW;
8338
67.6k
    if ((RAW != '<') || (NXT(1) != '/')) {
8339
960
  xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8340
960
           "xmlParseEndTag: '</' not found\n");
8341
960
  return;
8342
960
    }
8343
66.7k
    SKIP(2);
8344
8345
66.7k
    name = xmlParseNameAndCompare(ctxt,ctxt->name);
8346
8347
    /*
8348
     * We should definitely be at the ending "S? '>'" part
8349
     */
8350
66.7k
    GROW;
8351
66.7k
    SKIP_BLANKS;
8352
66.7k
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8353
7.77k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8354
7.77k
    } else
8355
58.9k
  NEXT1;
8356
8357
    /*
8358
     * [ WFC: Element Type Match ]
8359
     * The Name in an element's end-tag must match the element type in the
8360
     * start-tag.
8361
     *
8362
     */
8363
66.7k
    if (name != (xmlChar*)1) {
8364
5.28k
        if (name == NULL) name = BAD_CAST "unparsable";
8365
5.28k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8366
5.28k
         "Opening and ending tag mismatch: %s line %d and %s\n",
8367
5.28k
                    ctxt->name, line, name);
8368
5.28k
    }
8369
8370
    /*
8371
     * SAX: End of Tag
8372
     */
8373
66.7k
    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8374
66.7k
  (!ctxt->disableSAX))
8375
62.3k
        ctxt->sax->endElement(ctxt->userData, ctxt->name);
8376
8377
66.7k
    namePop(ctxt);
8378
66.7k
    spacePop(ctxt);
8379
66.7k
}
8380
8381
/**
8382
 * parse an end of tag
8383
 *
8384
 * @deprecated Internal function, don't use.
8385
 *
8386
 *     [42] ETag ::= '</' Name S? '>'
8387
 *
8388
 * With namespace
8389
 *
8390
 *     [NS 9] ETag ::= '</' QName S? '>'
8391
 * @param ctxt  an XML parser context
8392
 */
8393
8394
void
8395
0
xmlParseEndTag(xmlParserCtxt *ctxt) {
8396
0
    xmlParseEndTag1(ctxt, 0);
8397
0
}
8398
#endif /* LIBXML_SAX1_ENABLED */
8399
8400
/************************************************************************
8401
 *                  *
8402
 *          SAX 2 specific operations       *
8403
 *                  *
8404
 ************************************************************************/
8405
8406
/**
8407
 * parse an XML Namespace QName
8408
 *
8409
 *     [6]  QName  ::= (Prefix ':')? LocalPart
8410
 *     [7]  Prefix  ::= NCName
8411
 *     [8]  LocalPart  ::= NCName
8412
 *
8413
 * @param ctxt  an XML parser context
8414
 * @param prefix  pointer to store the prefix part
8415
 * @returns the Name parsed or NULL
8416
 */
8417
8418
static xmlHashedString
8419
395k
xmlParseQNameHashed(xmlParserCtxtPtr ctxt, xmlHashedString *prefix) {
8420
395k
    xmlHashedString l, p;
8421
395k
    int start, isNCName = 0;
8422
8423
395k
    l.name = NULL;
8424
395k
    p.name = NULL;
8425
8426
395k
    GROW;
8427
395k
    start = CUR_PTR - BASE_PTR;
8428
8429
395k
    l = xmlParseNCName(ctxt);
8430
395k
    if (l.name != NULL) {
8431
343k
        isNCName = 1;
8432
343k
        if (CUR == ':') {
8433
202k
            NEXT;
8434
202k
            p = l;
8435
202k
            l = xmlParseNCName(ctxt);
8436
202k
        }
8437
343k
    }
8438
395k
    if ((l.name == NULL) || (CUR == ':')) {
8439
81.1k
        xmlChar *tmp;
8440
8441
81.1k
        l.name = NULL;
8442
81.1k
        p.name = NULL;
8443
81.1k
        if ((isNCName == 0) && (CUR != ':'))
8444
40.5k
            return(l);
8445
40.5k
        tmp = xmlParseNmtoken(ctxt);
8446
40.5k
        if (tmp != NULL)
8447
19.8k
            xmlFree(tmp);
8448
40.5k
        l = xmlDictLookupHashed(ctxt->dict, BASE_PTR + start,
8449
40.5k
                                CUR_PTR - (BASE_PTR + start));
8450
40.5k
        if (l.name == NULL) {
8451
1
            xmlErrMemory(ctxt);
8452
1
            return(l);
8453
1
        }
8454
40.5k
        xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8455
40.5k
                 "Failed to parse QName '%s'\n", l.name, NULL, NULL);
8456
40.5k
    }
8457
8458
355k
    *prefix = p;
8459
355k
    return(l);
8460
395k
}
8461
8462
/**
8463
 * parse an XML Namespace QName
8464
 *
8465
 *     [6]  QName  ::= (Prefix ':')? LocalPart
8466
 *     [7]  Prefix  ::= NCName
8467
 *     [8]  LocalPart  ::= NCName
8468
 *
8469
 * @param ctxt  an XML parser context
8470
 * @param prefix  pointer to store the prefix part
8471
 * @returns the Name parsed or NULL
8472
 */
8473
8474
static const xmlChar *
8475
6.68k
xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8476
6.68k
    xmlHashedString n, p;
8477
8478
6.68k
    n = xmlParseQNameHashed(ctxt, &p);
8479
6.68k
    if (n.name == NULL)
8480
3.34k
        return(NULL);
8481
3.34k
    *prefix = p.name;
8482
3.34k
    return(n.name);
8483
6.68k
}
8484
8485
/**
8486
 * parse an XML name and compares for match
8487
 * (specialized for endtag parsing)
8488
 *
8489
 * @param ctxt  an XML parser context
8490
 * @param name  the localname
8491
 * @param prefix  the prefix, if any.
8492
 * @returns NULL for an illegal name, (xmlChar*) 1 for success
8493
 * and the name for mismatch
8494
 */
8495
8496
static const xmlChar *
8497
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8498
44.0k
                        xmlChar const *prefix) {
8499
44.0k
    const xmlChar *cmp;
8500
44.0k
    const xmlChar *in;
8501
44.0k
    const xmlChar *ret;
8502
44.0k
    const xmlChar *prefix2;
8503
8504
44.0k
    if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8505
8506
44.0k
    GROW;
8507
44.0k
    in = ctxt->input->cur;
8508
8509
44.0k
    cmp = prefix;
8510
165k
    while (*in != 0 && *in == *cmp) {
8511
121k
  ++in;
8512
121k
  ++cmp;
8513
121k
    }
8514
44.0k
    if ((*cmp == 0) && (*in == ':')) {
8515
38.4k
        in++;
8516
38.4k
  cmp = name;
8517
141k
  while (*in != 0 && *in == *cmp) {
8518
102k
      ++in;
8519
102k
      ++cmp;
8520
102k
  }
8521
38.4k
  if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8522
      /* success */
8523
37.3k
            ctxt->input->col += in - ctxt->input->cur;
8524
37.3k
      ctxt->input->cur = in;
8525
37.3k
      return((const xmlChar*) 1);
8526
37.3k
  }
8527
38.4k
    }
8528
    /*
8529
     * all strings coms from the dictionary, equality can be done directly
8530
     */
8531
6.68k
    ret = xmlParseQName (ctxt, &prefix2);
8532
6.68k
    if (ret == NULL)
8533
3.34k
        return(NULL);
8534
3.34k
    if ((ret == name) && (prefix == prefix2))
8535
421
  return((const xmlChar*) 1);
8536
2.92k
    return ret;
8537
3.34k
}
8538
8539
/**
8540
 * parse an attribute in the new SAX2 framework.
8541
 *
8542
 * @param ctxt  an XML parser context
8543
 * @param pref  the element prefix
8544
 * @param elem  the element name
8545
 * @param hprefix  resulting attribute prefix
8546
 * @param value  resulting value of the attribute
8547
 * @param len  resulting length of the attribute
8548
 * @param alloc  resulting indicator if the attribute was allocated
8549
 * @returns the attribute name, and the value in *value, .
8550
 */
8551
8552
static xmlHashedString
8553
xmlParseAttribute2(xmlParserCtxtPtr ctxt,
8554
                   const xmlChar * pref, const xmlChar * elem,
8555
                   xmlHashedString * hprefix, xmlChar ** value,
8556
                   int *len, int *alloc)
8557
167k
{
8558
167k
    xmlHashedString hname;
8559
167k
    const xmlChar *prefix, *name;
8560
167k
    xmlChar *val = NULL, *internal_val = NULL;
8561
167k
    int special = 0;
8562
167k
    int isNamespace;
8563
167k
    int flags;
8564
8565
167k
    *value = NULL;
8566
167k
    GROW;
8567
167k
    hname = xmlParseQNameHashed(ctxt, hprefix);
8568
167k
    if (hname.name == NULL) {
8569
6.90k
        xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8570
6.90k
                       "error parsing attribute name\n");
8571
6.90k
        return(hname);
8572
6.90k
    }
8573
160k
    name = hname.name;
8574
160k
    prefix = hprefix->name;
8575
8576
    /*
8577
     * get the type if needed
8578
     */
8579
160k
    if (ctxt->attsSpecial != NULL) {
8580
18.3k
        special = XML_PTR_TO_INT(xmlHashQLookup2(ctxt->attsSpecial, pref, elem,
8581
18.3k
                                              prefix, name));
8582
18.3k
    }
8583
8584
    /*
8585
     * read the value
8586
     */
8587
160k
    SKIP_BLANKS;
8588
160k
    if (RAW != '=') {
8589
7.60k
        xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8590
7.60k
                          "Specification mandates value for attribute %s\n",
8591
7.60k
                          name);
8592
7.60k
        goto error;
8593
7.60k
    }
8594
8595
8596
153k
    NEXT;
8597
153k
    SKIP_BLANKS;
8598
153k
    flags = 0;
8599
153k
    isNamespace = (((prefix == NULL) && (name == ctxt->str_xmlns)) ||
8600
153k
                   (prefix == ctxt->str_xmlns));
8601
153k
    val = xmlParseAttValueInternal(ctxt, len, &flags, special,
8602
153k
                                   isNamespace);
8603
153k
    if (val == NULL)
8604
2.42k
        goto error;
8605
8606
150k
    *alloc = (flags & XML_ATTVAL_ALLOC) != 0;
8607
8608
150k
#ifdef LIBXML_VALID_ENABLED
8609
150k
    if ((ctxt->validate) &&
8610
150k
        (ctxt->standalone) &&
8611
150k
        (special & XML_SPECIAL_EXTERNAL) &&
8612
150k
        (flags & XML_ATTVAL_NORM_CHANGE)) {
8613
0
        xmlValidityError(ctxt, XML_DTD_NOT_STANDALONE,
8614
0
                         "standalone: normalization of attribute %s on %s "
8615
0
                         "by external subset declaration\n",
8616
0
                         name, elem);
8617
0
    }
8618
150k
#endif
8619
8620
150k
    if (prefix == ctxt->str_xml) {
8621
        /*
8622
         * Check that xml:lang conforms to the specification
8623
         * No more registered as an error, just generate a warning now
8624
         * since this was deprecated in XML second edition
8625
         */
8626
25.1k
        if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8627
4.88k
            internal_val = xmlStrndup(val, *len);
8628
4.88k
            if (internal_val == NULL)
8629
3
                goto mem_error;
8630
4.88k
            if (!xmlCheckLanguageID(internal_val)) {
8631
3.40k
                xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8632
3.40k
                              "Malformed value for xml:lang : %s\n",
8633
3.40k
                              internal_val, NULL);
8634
3.40k
            }
8635
4.88k
        }
8636
8637
        /*
8638
         * Check that xml:space conforms to the specification
8639
         */
8640
25.1k
        if (xmlStrEqual(name, BAD_CAST "space")) {
8641
4.30k
            internal_val = xmlStrndup(val, *len);
8642
4.30k
            if (internal_val == NULL)
8643
3
                goto mem_error;
8644
4.30k
            if (xmlStrEqual(internal_val, BAD_CAST "default"))
8645
2.88k
                *(ctxt->space) = 0;
8646
1.41k
            else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8647
1.21k
                *(ctxt->space) = 1;
8648
195
            else {
8649
195
                xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8650
195
                              "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8651
195
                              internal_val, NULL);
8652
195
            }
8653
4.30k
        }
8654
25.1k
        if (internal_val) {
8655
9.18k
            xmlFree(internal_val);
8656
9.18k
        }
8657
25.1k
    }
8658
8659
150k
    *value = val;
8660
150k
    return (hname);
8661
8662
6
mem_error:
8663
6
    xmlErrMemory(ctxt);
8664
10.0k
error:
8665
10.0k
    if ((val != NULL) && (*alloc != 0))
8666
1
        xmlFree(val);
8667
10.0k
    return(hname);
8668
6
}
8669
8670
/**
8671
 * Inserts a new attribute into the hash table.
8672
 *
8673
 * @param ctxt  parser context
8674
 * @param size  size of the hash table
8675
 * @param name  attribute name
8676
 * @param uri  namespace uri
8677
 * @param hashValue  combined hash value of name and uri
8678
 * @param aindex  attribute index (this is a multiple of 5)
8679
 * @returns INT_MAX if no existing attribute was found, the attribute
8680
 * index if an attribute was found, -1 if a memory allocation failed.
8681
 */
8682
static int
8683
xmlAttrHashInsert(xmlParserCtxtPtr ctxt, unsigned size, const xmlChar *name,
8684
61.0k
                  const xmlChar *uri, unsigned hashValue, int aindex) {
8685
61.0k
    xmlAttrHashBucket *table = ctxt->attrHash;
8686
61.0k
    xmlAttrHashBucket *bucket;
8687
61.0k
    unsigned hindex;
8688
8689
61.0k
    hindex = hashValue & (size - 1);
8690
61.0k
    bucket = &table[hindex];
8691
8692
68.0k
    while (bucket->index >= 0) {
8693
21.7k
        const xmlChar **atts = &ctxt->atts[bucket->index];
8694
8695
21.7k
        if (name == atts[0]) {
8696
16.5k
            int nsIndex = XML_PTR_TO_INT(atts[2]);
8697
8698
16.5k
            if ((nsIndex == NS_INDEX_EMPTY) ? (uri == NULL) :
8699
16.5k
                (nsIndex == NS_INDEX_XML) ? (uri == ctxt->str_xml_ns) :
8700
7.37k
                (uri == ctxt->nsTab[nsIndex * 2 + 1]))
8701
14.7k
                return(bucket->index);
8702
16.5k
        }
8703
8704
6.96k
        hindex++;
8705
6.96k
        bucket++;
8706
6.96k
        if (hindex >= size) {
8707
1.63k
            hindex = 0;
8708
1.63k
            bucket = table;
8709
1.63k
        }
8710
6.96k
    }
8711
8712
46.3k
    bucket->index = aindex;
8713
8714
46.3k
    return(INT_MAX);
8715
61.0k
}
8716
8717
static int
8718
xmlAttrHashInsertQName(xmlParserCtxtPtr ctxt, unsigned size,
8719
                       const xmlChar *name, const xmlChar *prefix,
8720
4.28k
                       unsigned hashValue, int aindex) {
8721
4.28k
    xmlAttrHashBucket *table = ctxt->attrHash;
8722
4.28k
    xmlAttrHashBucket *bucket;
8723
4.28k
    unsigned hindex;
8724
8725
4.28k
    hindex = hashValue & (size - 1);
8726
4.28k
    bucket = &table[hindex];
8727
8728
6.58k
    while (bucket->index >= 0) {
8729
3.75k
        const xmlChar **atts = &ctxt->atts[bucket->index];
8730
8731
3.75k
        if ((name == atts[0]) && (prefix == atts[1]))
8732
1.44k
            return(bucket->index);
8733
8734
2.30k
        hindex++;
8735
2.30k
        bucket++;
8736
2.30k
        if (hindex >= size) {
8737
594
            hindex = 0;
8738
594
            bucket = table;
8739
594
        }
8740
2.30k
    }
8741
8742
2.83k
    bucket->index = aindex;
8743
8744
2.83k
    return(INT_MAX);
8745
4.28k
}
8746
/**
8747
 * Parse a start tag. Always consumes '<'.
8748
 *
8749
 * This routine is called when running SAX2 parsing
8750
 *
8751
 *     [40] STag ::= '<' Name (S Attribute)* S? '>'
8752
 *
8753
 * [ WFC: Unique Att Spec ]
8754
 * No attribute name may appear more than once in the same start-tag or
8755
 * empty-element tag.
8756
 *
8757
 *     [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8758
 *
8759
 * [ WFC: Unique Att Spec ]
8760
 * No attribute name may appear more than once in the same start-tag or
8761
 * empty-element tag.
8762
 *
8763
 * With namespace:
8764
 *
8765
 *     [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8766
 *
8767
 *     [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8768
 *
8769
 * @param ctxt  an XML parser context
8770
 * @param pref  resulting namespace prefix
8771
 * @param URI  resulting namespace URI
8772
 * @param nbNsPtr  resulting number of namespace declarations
8773
 * @returns the element name parsed
8774
 */
8775
8776
static const xmlChar *
8777
xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
8778
221k
                  const xmlChar **URI, int *nbNsPtr) {
8779
221k
    xmlHashedString hlocalname;
8780
221k
    xmlHashedString hprefix;
8781
221k
    xmlHashedString hattname;
8782
221k
    xmlHashedString haprefix;
8783
221k
    const xmlChar *localname;
8784
221k
    const xmlChar *prefix;
8785
221k
    const xmlChar *attname;
8786
221k
    const xmlChar *aprefix;
8787
221k
    const xmlChar *uri;
8788
221k
    xmlChar *attvalue = NULL;
8789
221k
    const xmlChar **atts = ctxt->atts;
8790
221k
    unsigned attrHashSize = 0;
8791
221k
    int maxatts = ctxt->maxatts;
8792
221k
    int nratts, nbatts, nbdef;
8793
221k
    int i, j, nbNs, nbTotalDef, attval, nsIndex, maxAtts;
8794
221k
    int alloc = 0;
8795
221k
    int numNsErr = 0;
8796
221k
    int numDupErr = 0;
8797
8798
221k
    if (RAW != '<') return(NULL);
8799
221k
    NEXT1;
8800
8801
221k
    nbatts = 0;
8802
221k
    nratts = 0;
8803
221k
    nbdef = 0;
8804
221k
    nbNs = 0;
8805
221k
    nbTotalDef = 0;
8806
221k
    attval = 0;
8807
8808
221k
    if (xmlParserNsStartElement(ctxt->nsdb) < 0) {
8809
0
        xmlErrMemory(ctxt);
8810
0
        return(NULL);
8811
0
    }
8812
8813
221k
    hlocalname = xmlParseQNameHashed(ctxt, &hprefix);
8814
221k
    if (hlocalname.name == NULL) {
8815
30.3k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8816
30.3k
           "StartTag: invalid element name\n");
8817
30.3k
        return(NULL);
8818
30.3k
    }
8819
191k
    localname = hlocalname.name;
8820
191k
    prefix = hprefix.name;
8821
8822
    /*
8823
     * Now parse the attributes, it ends up with the ending
8824
     *
8825
     * (S Attribute)* S?
8826
     */
8827
191k
    SKIP_BLANKS;
8828
191k
    GROW;
8829
8830
    /*
8831
     * The ctxt->atts array will be ultimately passed to the SAX callback
8832
     * containing five xmlChar pointers for each attribute:
8833
     *
8834
     * [0] attribute name
8835
     * [1] attribute prefix
8836
     * [2] namespace URI
8837
     * [3] attribute value
8838
     * [4] end of attribute value
8839
     *
8840
     * To save memory, we reuse this array temporarily and store integers
8841
     * in these pointer variables.
8842
     *
8843
     * [0] attribute name
8844
     * [1] attribute prefix
8845
     * [2] hash value of attribute prefix, and later namespace index
8846
     * [3] for non-allocated values: ptrdiff_t offset into input buffer
8847
     * [4] for non-allocated values: ptrdiff_t offset into input buffer
8848
     *
8849
     * The ctxt->attallocs array contains an additional unsigned int for
8850
     * each attribute, containing the hash value of the attribute name
8851
     * and the alloc flag in bit 31.
8852
     */
8853
8854
262k
    while (((RAW != '>') &&
8855
262k
     ((RAW != '/') || (NXT(1) != '>')) &&
8856
262k
     (IS_BYTE_CHAR(RAW))) && (PARSER_STOPPED(ctxt) == 0)) {
8857
167k
  int len = -1;
8858
8859
167k
  hattname = xmlParseAttribute2(ctxt, prefix, localname,
8860
167k
                                          &haprefix, &attvalue, &len,
8861
167k
                                          &alloc);
8862
167k
        if (hattname.name == NULL)
8863
6.90k
      break;
8864
160k
        if (attvalue == NULL)
8865
10.0k
            goto next_attr;
8866
150k
        attname = hattname.name;
8867
150k
        aprefix = haprefix.name;
8868
150k
  if (len < 0) len = xmlStrlen(attvalue);
8869
8870
150k
        if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8871
13.8k
            xmlHashedString huri;
8872
13.8k
            xmlURIPtr parsedUri;
8873
8874
13.8k
            huri = xmlDictLookupHashed(ctxt->dict, attvalue, len);
8875
13.8k
            uri = huri.name;
8876
13.8k
            if (uri == NULL) {
8877
1
                xmlErrMemory(ctxt);
8878
1
                goto next_attr;
8879
1
            }
8880
13.8k
            if (*uri != 0) {
8881
12.7k
                if (xmlParseURISafe((const char *) uri, &parsedUri) < 0) {
8882
9
                    xmlErrMemory(ctxt);
8883
9
                    goto next_attr;
8884
9
                }
8885
12.7k
                if (parsedUri == NULL) {
8886
3.59k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
8887
3.59k
                             "xmlns: '%s' is not a valid URI\n",
8888
3.59k
                                       uri, NULL, NULL);
8889
9.14k
                } else {
8890
9.14k
                    if (parsedUri->scheme == NULL) {
8891
7.14k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8892
7.14k
                                  "xmlns: URI %s is not absolute\n",
8893
7.14k
                                  uri, NULL, NULL);
8894
7.14k
                    }
8895
9.14k
                    xmlFreeURI(parsedUri);
8896
9.14k
                }
8897
12.7k
                if (uri == ctxt->str_xml_ns) {
8898
197
                    if (attname != ctxt->str_xml) {
8899
197
                        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8900
197
                     "xml namespace URI cannot be the default namespace\n",
8901
197
                                 NULL, NULL, NULL);
8902
197
                    }
8903
197
                    goto next_attr;
8904
197
                }
8905
12.5k
                if ((len == 29) &&
8906
12.5k
                    (xmlStrEqual(uri,
8907
392
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8908
198
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8909
198
                         "reuse of the xmlns namespace name is forbidden\n",
8910
198
                             NULL, NULL, NULL);
8911
198
                    goto next_attr;
8912
198
                }
8913
12.5k
            }
8914
8915
13.4k
            if (xmlParserNsPush(ctxt, NULL, &huri, NULL, 0) > 0)
8916
12.3k
                nbNs++;
8917
136k
        } else if (aprefix == ctxt->str_xmlns) {
8918
75.8k
            xmlHashedString huri;
8919
75.8k
            xmlURIPtr parsedUri;
8920
8921
75.8k
            huri = xmlDictLookupHashed(ctxt->dict, attvalue, len);
8922
75.8k
            uri = huri.name;
8923
75.8k
            if (uri == NULL) {
8924
2
                xmlErrMemory(ctxt);
8925
2
                goto next_attr;
8926
2
            }
8927
8928
75.8k
            if (attname == ctxt->str_xml) {
8929
198
                if (uri != ctxt->str_xml_ns) {
8930
198
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8931
198
                             "xml namespace prefix mapped to wrong URI\n",
8932
198
                             NULL, NULL, NULL);
8933
198
                }
8934
                /*
8935
                 * Do not keep a namespace definition node
8936
                 */
8937
198
                goto next_attr;
8938
198
            }
8939
75.6k
            if (uri == ctxt->str_xml_ns) {
8940
199
                if (attname != ctxt->str_xml) {
8941
199
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8942
199
                             "xml namespace URI mapped to wrong prefix\n",
8943
199
                             NULL, NULL, NULL);
8944
199
                }
8945
199
                goto next_attr;
8946
199
            }
8947
75.4k
            if (attname == ctxt->str_xmlns) {
8948
567
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8949
567
                         "redefinition of the xmlns prefix is forbidden\n",
8950
567
                         NULL, NULL, NULL);
8951
567
                goto next_attr;
8952
567
            }
8953
74.8k
            if ((len == 29) &&
8954
74.8k
                (xmlStrEqual(uri,
8955
417
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8956
198
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8957
198
                         "reuse of the xmlns namespace name is forbidden\n",
8958
198
                         NULL, NULL, NULL);
8959
198
                goto next_attr;
8960
198
            }
8961
74.6k
            if ((uri == NULL) || (uri[0] == 0)) {
8962
3.45k
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8963
3.45k
                         "xmlns:%s: Empty XML namespace is not allowed\n",
8964
3.45k
                              attname, NULL, NULL);
8965
3.45k
                goto next_attr;
8966
71.2k
            } else {
8967
71.2k
                if (xmlParseURISafe((const char *) uri, &parsedUri) < 0) {
8968
33
                    xmlErrMemory(ctxt);
8969
33
                    goto next_attr;
8970
33
                }
8971
71.2k
                if (parsedUri == NULL) {
8972
23.6k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
8973
23.6k
                         "xmlns:%s: '%s' is not a valid URI\n",
8974
23.6k
                                       attname, uri, NULL);
8975
47.6k
                } else {
8976
47.6k
                    if ((ctxt->pedantic) && (parsedUri->scheme == NULL)) {
8977
10.2k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8978
10.2k
                                  "xmlns:%s: URI %s is not absolute\n",
8979
10.2k
                                  attname, uri, NULL);
8980
10.2k
                    }
8981
47.6k
                    xmlFreeURI(parsedUri);
8982
47.6k
                }
8983
71.2k
            }
8984
8985
71.2k
            if (xmlParserNsPush(ctxt, &hattname, &huri, NULL, 0) > 0)
8986
68.4k
                nbNs++;
8987
71.2k
        } else {
8988
            /*
8989
             * Populate attributes array, see above for repurposing
8990
             * of xmlChar pointers.
8991
             */
8992
61.1k
            if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8993
30.0k
                int res = xmlCtxtGrowAttrs(ctxt);
8994
8995
30.0k
                maxatts = ctxt->maxatts;
8996
30.0k
                atts = ctxt->atts;
8997
8998
30.0k
                if (res < 0)
8999
11
                    goto next_attr;
9000
30.0k
            }
9001
61.0k
            ctxt->attallocs[nratts++] = (hattname.hashValue & 0x7FFFFFFF) |
9002
61.0k
                                        ((unsigned) alloc << 31);
9003
61.0k
            atts[nbatts++] = attname;
9004
61.0k
            atts[nbatts++] = aprefix;
9005
61.0k
            atts[nbatts++] = XML_INT_TO_PTR(haprefix.hashValue);
9006
61.0k
            if (alloc) {
9007
16.7k
                atts[nbatts++] = attvalue;
9008
16.7k
                attvalue += len;
9009
16.7k
                atts[nbatts++] = attvalue;
9010
44.3k
            } else {
9011
                /*
9012
                 * attvalue points into the input buffer which can be
9013
                 * reallocated. Store differences to input->base instead.
9014
                 * The pointers will be reconstructed later.
9015
                 */
9016
44.3k
                atts[nbatts++] = XML_INT_TO_PTR(attvalue - BASE_PTR);
9017
44.3k
                attvalue += len;
9018
44.3k
                atts[nbatts++] = XML_INT_TO_PTR(attvalue - BASE_PTR);
9019
44.3k
            }
9020
            /*
9021
             * tag if some deallocation is needed
9022
             */
9023
61.0k
            if (alloc != 0) attval = 1;
9024
61.0k
            attvalue = NULL; /* moved into atts */
9025
61.0k
        }
9026
9027
160k
next_attr:
9028
160k
        if ((attvalue != NULL) && (alloc != 0)) {
9029
25.6k
            xmlFree(attvalue);
9030
25.6k
            attvalue = NULL;
9031
25.6k
        }
9032
9033
160k
  GROW
9034
160k
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9035
79.6k
      break;
9036
81.2k
  if (SKIP_BLANKS == 0) {
9037
10.2k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9038
10.2k
         "attributes construct error\n");
9039
10.2k
      break;
9040
10.2k
  }
9041
70.9k
        GROW;
9042
70.9k
    }
9043
9044
    /*
9045
     * Namespaces from default attributes
9046
     */
9047
191k
    if (ctxt->attsDefault != NULL) {
9048
28.7k
        xmlDefAttrsPtr defaults;
9049
9050
28.7k
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9051
28.7k
  if (defaults != NULL) {
9052
69.1k
      for (i = 0; i < defaults->nbAttrs; i++) {
9053
42.6k
                xmlDefAttr *attr = &defaults->attrs[i];
9054
9055
42.6k
          attname = attr->name.name;
9056
42.6k
    aprefix = attr->prefix.name;
9057
9058
42.6k
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9059
2.69k
                    xmlParserEntityCheck(ctxt, attr->expandedSize);
9060
9061
2.69k
                    if (xmlParserNsPush(ctxt, NULL, &attr->value, NULL, 1) > 0)
9062
2.12k
                        nbNs++;
9063
39.9k
    } else if (aprefix == ctxt->str_xmlns) {
9064
4.80k
                    xmlParserEntityCheck(ctxt, attr->expandedSize);
9065
9066
4.80k
                    if (xmlParserNsPush(ctxt, &attr->name, &attr->value,
9067
4.80k
                                      NULL, 1) > 0)
9068
3.98k
                        nbNs++;
9069
35.1k
    } else {
9070
35.1k
                    if (nratts + nbTotalDef >= XML_MAX_ATTRS) {
9071
0
                        xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
9072
0
                                    "Maximum number of attributes exceeded");
9073
0
                        break;
9074
0
                    }
9075
35.1k
                    nbTotalDef += 1;
9076
35.1k
                }
9077
42.6k
      }
9078
26.4k
  }
9079
28.7k
    }
9080
9081
    /*
9082
     * Resolve attribute namespaces
9083
     */
9084
252k
    for (i = 0; i < nbatts; i += 5) {
9085
61.0k
        attname = atts[i];
9086
61.0k
        aprefix = atts[i+1];
9087
9088
        /*
9089
  * The default namespace does not apply to attribute names.
9090
  */
9091
61.0k
  if (aprefix == NULL) {
9092
20.4k
            nsIndex = NS_INDEX_EMPTY;
9093
40.6k
        } else if (aprefix == ctxt->str_xml) {
9094
25.1k
            nsIndex = NS_INDEX_XML;
9095
25.1k
        } else {
9096
15.5k
            haprefix.name = aprefix;
9097
15.5k
            haprefix.hashValue = (size_t) atts[i+2];
9098
15.5k
            nsIndex = xmlParserNsLookup(ctxt, &haprefix, NULL);
9099
9100
15.5k
      if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex)) {
9101
4.96k
                xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9102
4.96k
        "Namespace prefix %s for %s on %s is not defined\n",
9103
4.96k
        aprefix, attname, localname);
9104
4.96k
                nsIndex = NS_INDEX_EMPTY;
9105
4.96k
            }
9106
15.5k
        }
9107
9108
61.0k
        atts[i+2] = XML_INT_TO_PTR(nsIndex);
9109
61.0k
    }
9110
9111
    /*
9112
     * Maximum number of attributes including default attributes.
9113
     */
9114
191k
    maxAtts = nratts + nbTotalDef;
9115
9116
    /*
9117
     * Verify that attribute names are unique.
9118
     */
9119
191k
    if (maxAtts > 1) {
9120
24.0k
        attrHashSize = 4;
9121
34.4k
        while (attrHashSize / 2 < (unsigned) maxAtts)
9122
10.4k
            attrHashSize *= 2;
9123
9124
24.0k
        if (attrHashSize > ctxt->attrHashMax) {
9125
13.1k
            xmlAttrHashBucket *tmp;
9126
9127
13.1k
            tmp = xmlRealloc(ctxt->attrHash, attrHashSize * sizeof(tmp[0]));
9128
13.1k
            if (tmp == NULL) {
9129
2
                xmlErrMemory(ctxt);
9130
2
                goto done;
9131
2
            }
9132
9133
13.1k
            ctxt->attrHash = tmp;
9134
13.1k
            ctxt->attrHashMax = attrHashSize;
9135
13.1k
        }
9136
9137
24.0k
        memset(ctxt->attrHash, -1, attrHashSize * sizeof(ctxt->attrHash[0]));
9138
9139
61.9k
        for (i = 0, j = 0; j < nratts; i += 5, j++) {
9140
37.9k
            const xmlChar *nsuri;
9141
37.9k
            unsigned hashValue, nameHashValue, uriHashValue;
9142
37.9k
            int res;
9143
9144
37.9k
            attname = atts[i];
9145
37.9k
            aprefix = atts[i+1];
9146
37.9k
            nsIndex = XML_PTR_TO_INT(atts[i+2]);
9147
            /* Hash values always have bit 31 set, see dict.c */
9148
37.9k
            nameHashValue = ctxt->attallocs[j] | 0x80000000;
9149
9150
37.9k
            if (nsIndex == NS_INDEX_EMPTY) {
9151
                /*
9152
                 * Prefix with empty namespace means an undeclared
9153
                 * prefix which was already reported above.
9154
                 */
9155
17.7k
                if (aprefix != NULL)
9156
4.40k
                    continue;
9157
13.3k
                nsuri = NULL;
9158
13.3k
                uriHashValue = URI_HASH_EMPTY;
9159
20.1k
            } else if (nsIndex == NS_INDEX_XML) {
9160
11.0k
                nsuri = ctxt->str_xml_ns;
9161
11.0k
                uriHashValue = URI_HASH_XML;
9162
11.0k
            } else {
9163
9.11k
                nsuri = ctxt->nsTab[nsIndex * 2 + 1];
9164
9.11k
                uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue;
9165
9.11k
            }
9166
9167
33.5k
            hashValue = xmlDictCombineHash(nameHashValue, uriHashValue);
9168
33.5k
            res = xmlAttrHashInsert(ctxt, attrHashSize, attname, nsuri,
9169
33.5k
                                    hashValue, i);
9170
33.5k
            if (res < 0)
9171
0
                continue;
9172
9173
            /*
9174
             * [ WFC: Unique Att Spec ]
9175
             * No attribute name may appear more than once in the same
9176
             * start-tag or empty-element tag.
9177
             * As extended by the Namespace in XML REC.
9178
             */
9179
33.5k
            if (res < INT_MAX) {
9180
6.89k
                if (aprefix == atts[res+1]) {
9181
3.83k
                    xmlErrAttributeDup(ctxt, aprefix, attname);
9182
3.83k
                    numDupErr += 1;
9183
3.83k
                } else {
9184
3.06k
                    xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9185
3.06k
                             "Namespaced Attribute %s in '%s' redefined\n",
9186
3.06k
                             attname, nsuri, NULL);
9187
3.06k
                    numNsErr += 1;
9188
3.06k
                }
9189
6.89k
            }
9190
33.5k
        }
9191
24.0k
    }
9192
9193
    /*
9194
     * Default attributes
9195
     */
9196
191k
    if (ctxt->attsDefault != NULL) {
9197
28.7k
        xmlDefAttrsPtr defaults;
9198
9199
28.7k
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9200
28.7k
  if (defaults != NULL) {
9201
69.1k
      for (i = 0; i < defaults->nbAttrs; i++) {
9202
42.6k
                xmlDefAttr *attr = &defaults->attrs[i];
9203
42.6k
                const xmlChar *nsuri = NULL;
9204
42.6k
                unsigned hashValue, uriHashValue = 0;
9205
42.6k
                int res;
9206
9207
42.6k
          attname = attr->name.name;
9208
42.6k
    aprefix = attr->prefix.name;
9209
9210
42.6k
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL))
9211
2.69k
                    continue;
9212
39.9k
    if (aprefix == ctxt->str_xmlns)
9213
4.80k
                    continue;
9214
9215
35.1k
                if (aprefix == NULL) {
9216
11.8k
                    nsIndex = NS_INDEX_EMPTY;
9217
11.8k
                    nsuri = NULL;
9218
11.8k
                    uriHashValue = URI_HASH_EMPTY;
9219
23.3k
                } else if (aprefix == ctxt->str_xml) {
9220
953
                    nsIndex = NS_INDEX_XML;
9221
953
                    nsuri = ctxt->str_xml_ns;
9222
953
                    uriHashValue = URI_HASH_XML;
9223
22.4k
                } else {
9224
22.4k
                    nsIndex = xmlParserNsLookup(ctxt, &attr->prefix, NULL);
9225
22.4k
                    if ((nsIndex == INT_MAX) ||
9226
22.4k
                        (nsIndex < ctxt->nsdb->minNsIndex)) {
9227
19.0k
                        xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9228
19.0k
                                 "Namespace prefix %s for %s on %s is not "
9229
19.0k
                                 "defined\n",
9230
19.0k
                                 aprefix, attname, localname);
9231
19.0k
                        nsIndex = NS_INDEX_EMPTY;
9232
19.0k
                        nsuri = NULL;
9233
19.0k
                        uriHashValue = URI_HASH_EMPTY;
9234
19.0k
                    } else {
9235
3.35k
                        nsuri = ctxt->nsTab[nsIndex * 2 + 1];
9236
3.35k
                        uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue;
9237
3.35k
                    }
9238
22.4k
                }
9239
9240
                /*
9241
                 * Check whether the attribute exists
9242
                 */
9243
35.1k
                if (maxAtts > 1) {
9244
27.5k
                    hashValue = xmlDictCombineHash(attr->name.hashValue,
9245
27.5k
                                                   uriHashValue);
9246
27.5k
                    res = xmlAttrHashInsert(ctxt, attrHashSize, attname, nsuri,
9247
27.5k
                                            hashValue, nbatts);
9248
27.5k
                    if (res < 0)
9249
0
                        continue;
9250
27.5k
                    if (res < INT_MAX) {
9251
7.88k
                        if (aprefix == atts[res+1])
9252
4.19k
                            continue;
9253
3.69k
                        xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9254
3.69k
                                 "Namespaced Attribute %s in '%s' redefined\n",
9255
3.69k
                                 attname, nsuri, NULL);
9256
3.69k
                    }
9257
27.5k
                }
9258
9259
31.0k
                xmlParserEntityCheck(ctxt, attr->expandedSize);
9260
9261
31.0k
                if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9262
10.8k
                    res = xmlCtxtGrowAttrs(ctxt);
9263
9264
10.8k
                    maxatts = ctxt->maxatts;
9265
10.8k
                    atts = ctxt->atts;
9266
9267
10.8k
                    if (res < 0) {
9268
4
                        localname = NULL;
9269
4
                        goto done;
9270
4
                    }
9271
10.8k
                }
9272
9273
30.9k
                atts[nbatts++] = attname;
9274
30.9k
                atts[nbatts++] = aprefix;
9275
30.9k
                atts[nbatts++] = XML_INT_TO_PTR(nsIndex);
9276
30.9k
                atts[nbatts++] = attr->value.name;
9277
30.9k
                atts[nbatts++] = attr->valueEnd;
9278
9279
30.9k
#ifdef LIBXML_VALID_ENABLED
9280
                /*
9281
                 * This should be moved to valid.c, but we don't keep track
9282
                 * whether an attribute was defaulted.
9283
                 */
9284
30.9k
                if ((ctxt->validate) &&
9285
30.9k
                    (ctxt->standalone == 1) &&
9286
30.9k
                    (attr->external != 0)) {
9287
0
                    xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9288
0
                            "standalone: attribute %s on %s defaulted "
9289
0
                            "from external subset\n",
9290
0
                            attname, localname);
9291
0
                }
9292
30.9k
#endif
9293
30.9k
                nbdef++;
9294
30.9k
      }
9295
26.4k
  }
9296
28.7k
    }
9297
9298
    /*
9299
     * Using a single hash table for nsUri/localName pairs cannot
9300
     * detect duplicate QNames reliably. The following example will
9301
     * only result in two namespace errors.
9302
     *
9303
     * <doc xmlns:a="a" xmlns:b="a">
9304
     *   <elem a:a="" b:a="" b:a=""/>
9305
     * </doc>
9306
     *
9307
     * If we saw more than one namespace error but no duplicate QNames
9308
     * were found, we have to scan for duplicate QNames.
9309
     */
9310
191k
    if ((numDupErr == 0) && (numNsErr > 1)) {
9311
1.04k
        memset(ctxt->attrHash, -1,
9312
1.04k
               attrHashSize * sizeof(ctxt->attrHash[0]));
9313
9314
5.62k
        for (i = 0, j = 0; j < nratts; i += 5, j++) {
9315
4.57k
            unsigned hashValue, nameHashValue, prefixHashValue;
9316
4.57k
            int res;
9317
9318
4.57k
            aprefix = atts[i+1];
9319
4.57k
            if (aprefix == NULL)
9320
298
                continue;
9321
9322
4.28k
            attname = atts[i];
9323
            /* Hash values always have bit 31 set, see dict.c */
9324
4.28k
            nameHashValue = ctxt->attallocs[j] | 0x80000000;
9325
4.28k
            prefixHashValue = xmlDictComputeHash(ctxt->dict, aprefix);
9326
9327
4.28k
            hashValue = xmlDictCombineHash(nameHashValue, prefixHashValue);
9328
4.28k
            res = xmlAttrHashInsertQName(ctxt, attrHashSize, attname,
9329
4.28k
                                         aprefix, hashValue, i);
9330
4.28k
            if (res < INT_MAX)
9331
1.44k
                xmlErrAttributeDup(ctxt, aprefix, attname);
9332
4.28k
        }
9333
1.04k
    }
9334
9335
    /*
9336
     * Reconstruct attribute pointers
9337
     */
9338
283k
    for (i = 0, j = 0; i < nbatts; i += 5, j++) {
9339
        /* namespace URI */
9340
92.0k
        nsIndex = XML_PTR_TO_INT(atts[i+2]);
9341
92.0k
        if (nsIndex == INT_MAX)
9342
53.4k
            atts[i+2] = NULL;
9343
38.6k
        else if (nsIndex == INT_MAX - 1)
9344
25.8k
            atts[i+2] = ctxt->str_xml_ns;
9345
12.7k
        else
9346
12.7k
            atts[i+2] = ctxt->nsTab[nsIndex * 2 + 1];
9347
9348
92.0k
        if ((j < nratts) && (ctxt->attallocs[j] & 0x80000000) == 0) {
9349
44.3k
            atts[i+3] = BASE_PTR + XML_PTR_TO_INT(atts[i+3]);  /* value */
9350
44.3k
            atts[i+4] = BASE_PTR + XML_PTR_TO_INT(atts[i+4]);  /* valuend */
9351
44.3k
        }
9352
92.0k
    }
9353
9354
191k
    uri = xmlParserNsLookupUri(ctxt, &hprefix);
9355
191k
    if ((prefix != NULL) && (uri == NULL)) {
9356
15.3k
  xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9357
15.3k
           "Namespace prefix %s on %s is not defined\n",
9358
15.3k
     prefix, localname, NULL);
9359
15.3k
    }
9360
191k
    *pref = prefix;
9361
191k
    *URI = uri;
9362
9363
    /*
9364
     * SAX callback
9365
     */
9366
191k
    if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9367
191k
  (!ctxt->disableSAX)) {
9368
136k
  if (nbNs > 0)
9369
42.2k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix, uri,
9370
42.2k
                          nbNs, ctxt->nsTab + 2 * (ctxt->nsNr - nbNs),
9371
42.2k
        nbatts / 5, nbdef, atts);
9372
94.4k
  else
9373
94.4k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix, uri,
9374
94.4k
                          0, NULL, nbatts / 5, nbdef, atts);
9375
136k
    }
9376
9377
191k
done:
9378
    /*
9379
     * Free allocated attribute values
9380
     */
9381
191k
    if (attval != 0) {
9382
32.4k
  for (i = 0, j = 0; j < nratts; i += 5, j++)
9383
20.3k
      if (ctxt->attallocs[j] & 0x80000000)
9384
16.7k
          xmlFree((xmlChar *) atts[i+3]);
9385
12.1k
    }
9386
9387
191k
    *nbNsPtr = nbNs;
9388
191k
    return(localname);
9389
191k
}
9390
9391
/**
9392
 * Parse an end tag. Always consumes '</'.
9393
 *
9394
 *     [42] ETag ::= '</' Name S? '>'
9395
 *
9396
 * With namespace
9397
 *
9398
 *     [NS 9] ETag ::= '</' QName S? '>'
9399
 * @param ctxt  an XML parser context
9400
 * @param tag  the corresponding start tag
9401
 */
9402
9403
static void
9404
136k
xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9405
136k
    const xmlChar *name;
9406
9407
136k
    GROW;
9408
136k
    if ((RAW != '<') || (NXT(1) != '/')) {
9409
1.06k
  xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9410
1.06k
  return;
9411
1.06k
    }
9412
135k
    SKIP(2);
9413
9414
135k
    if (tag->prefix == NULL)
9415
91.6k
        name = xmlParseNameAndCompare(ctxt, ctxt->name);
9416
44.0k
    else
9417
44.0k
        name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9418
9419
    /*
9420
     * We should definitely be at the ending "S? '>'" part
9421
     */
9422
135k
    GROW;
9423
135k
    SKIP_BLANKS;
9424
135k
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9425
13.2k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9426
13.2k
    } else
9427
122k
  NEXT1;
9428
9429
    /*
9430
     * [ WFC: Element Type Match ]
9431
     * The Name in an element's end-tag must match the element type in the
9432
     * start-tag.
9433
     *
9434
     */
9435
135k
    if (name != (xmlChar*)1) {
9436
17.1k
        if (name == NULL) name = BAD_CAST "unparsable";
9437
17.1k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9438
17.1k
         "Opening and ending tag mismatch: %s line %d and %s\n",
9439
17.1k
                    ctxt->name, tag->line, name);
9440
17.1k
    }
9441
9442
    /*
9443
     * SAX: End of Tag
9444
     */
9445
135k
    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9446
135k
  (!ctxt->disableSAX))
9447
84.5k
  ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9448
84.5k
                                tag->URI);
9449
9450
135k
    spacePop(ctxt);
9451
135k
    if (tag->nsNr != 0)
9452
49.2k
  xmlParserNsPop(ctxt, tag->nsNr);
9453
135k
}
9454
9455
/**
9456
 * Parse escaped pure raw content. Always consumes '<!['.
9457
 *
9458
 * @deprecated Internal function, don't use.
9459
 *
9460
 *     [18] CDSect ::= CDStart CData CDEnd
9461
 *
9462
 *     [19] CDStart ::= '<![CDATA['
9463
 *
9464
 *     [20] Data ::= (Char* - (Char* ']]>' Char*))
9465
 *
9466
 *     [21] CDEnd ::= ']]>'
9467
 * @param ctxt  an XML parser context
9468
 */
9469
void
9470
5.64k
xmlParseCDSect(xmlParserCtxt *ctxt) {
9471
5.64k
    xmlChar *buf = NULL;
9472
5.64k
    int len = 0;
9473
5.64k
    int size = XML_PARSER_BUFFER_SIZE;
9474
5.64k
    int r, rl;
9475
5.64k
    int s, sl;
9476
5.64k
    int cur, l;
9477
5.64k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9478
768
                    XML_MAX_HUGE_LENGTH :
9479
5.64k
                    XML_MAX_TEXT_LENGTH;
9480
9481
5.64k
    if ((CUR != '<') || (NXT(1) != '!') || (NXT(2) != '['))
9482
0
        return;
9483
5.64k
    SKIP(3);
9484
9485
5.64k
    if (!CMP6(CUR_PTR, 'C', 'D', 'A', 'T', 'A', '['))
9486
0
        return;
9487
5.64k
    SKIP(6);
9488
9489
5.64k
    r = xmlCurrentCharRecover(ctxt, &rl);
9490
5.64k
    if (!IS_CHAR(r)) {
9491
711
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9492
711
        goto out;
9493
711
    }
9494
4.93k
    NEXTL(rl);
9495
4.93k
    s = xmlCurrentCharRecover(ctxt, &sl);
9496
4.93k
    if (!IS_CHAR(s)) {
9497
594
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9498
594
        goto out;
9499
594
    }
9500
4.33k
    NEXTL(sl);
9501
4.33k
    cur = xmlCurrentCharRecover(ctxt, &l);
9502
4.33k
    buf = xmlMalloc(size);
9503
4.33k
    if (buf == NULL) {
9504
3
  xmlErrMemory(ctxt);
9505
3
        goto out;
9506
3
    }
9507
100k
    while (IS_CHAR(cur) &&
9508
100k
           ((r != ']') || (s != ']') || (cur != '>'))) {
9509
96.5k
  if (len + 5 >= size) {
9510
466
      xmlChar *tmp;
9511
466
            int newSize;
9512
9513
466
            newSize = xmlGrowCapacity(size, 1, 1, maxLength);
9514
466
            if (newSize < 0) {
9515
0
                xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9516
0
                               "CData section too big found\n");
9517
0
                goto out;
9518
0
            }
9519
466
      tmp = xmlRealloc(buf, newSize);
9520
466
      if (tmp == NULL) {
9521
1
    xmlErrMemory(ctxt);
9522
1
                goto out;
9523
1
      }
9524
465
      buf = tmp;
9525
465
      size = newSize;
9526
465
  }
9527
96.5k
  COPY_BUF(buf, len, r);
9528
96.5k
  r = s;
9529
96.5k
  rl = sl;
9530
96.5k
  s = cur;
9531
96.5k
  sl = l;
9532
96.5k
  NEXTL(l);
9533
96.5k
  cur = xmlCurrentCharRecover(ctxt, &l);
9534
96.5k
    }
9535
4.33k
    buf[len] = 0;
9536
4.33k
    if (cur != '>') {
9537
2.98k
  xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9538
2.98k
                       "CData section not finished\n%.50s\n", buf);
9539
2.98k
        goto out;
9540
2.98k
    }
9541
1.34k
    NEXTL(l);
9542
9543
    /*
9544
     * OK the buffer is to be consumed as cdata.
9545
     */
9546
1.34k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9547
1.07k
        if ((ctxt->sax->cdataBlock != NULL) &&
9548
1.07k
            ((ctxt->options & XML_PARSE_NOCDATA) == 0)) {
9549
859
            ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9550
859
        } else if (ctxt->sax->characters != NULL) {
9551
216
            ctxt->sax->characters(ctxt->userData, buf, len);
9552
216
        }
9553
1.07k
    }
9554
9555
5.64k
out:
9556
5.64k
    xmlFree(buf);
9557
5.64k
}
9558
9559
/**
9560
 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9561
 * unexpected EOF to the caller.
9562
 *
9563
 * @param ctxt  an XML parser context
9564
 */
9565
9566
static void
9567
57.6k
xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9568
57.6k
    int oldNameNr = ctxt->nameNr;
9569
57.6k
    int oldSpaceNr = ctxt->spaceNr;
9570
57.6k
    int oldNodeNr = ctxt->nodeNr;
9571
9572
57.6k
    GROW;
9573
2.47M
    while ((ctxt->input->cur < ctxt->input->end) &&
9574
2.47M
     (PARSER_STOPPED(ctxt) == 0)) {
9575
2.46M
  const xmlChar *cur = ctxt->input->cur;
9576
9577
  /*
9578
   * First case : a Processing Instruction.
9579
   */
9580
2.46M
  if ((*cur == '<') && (cur[1] == '?')) {
9581
18.0k
      xmlParsePI(ctxt);
9582
18.0k
  }
9583
9584
  /*
9585
   * Second case : a CDSection
9586
   */
9587
  /* 2.6.0 test was *cur not RAW */
9588
2.44M
  else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9589
5.64k
      xmlParseCDSect(ctxt);
9590
5.64k
  }
9591
9592
  /*
9593
   * Third case :  a comment
9594
   */
9595
2.44M
  else if ((*cur == '<') && (NXT(1) == '!') &&
9596
2.44M
     (NXT(2) == '-') && (NXT(3) == '-')) {
9597
3.11k
      xmlParseComment(ctxt);
9598
3.11k
  }
9599
9600
  /*
9601
   * Fourth case :  a sub-element.
9602
   */
9603
2.43M
  else if (*cur == '<') {
9604
458k
            if (NXT(1) == '/') {
9605
202k
                if (ctxt->nameNr <= oldNameNr)
9606
44.1k
                    break;
9607
158k
          xmlParseElementEnd(ctxt);
9608
256k
            } else {
9609
256k
          xmlParseElementStart(ctxt);
9610
256k
            }
9611
458k
  }
9612
9613
  /*
9614
   * Fifth case : a reference. If if has not been resolved,
9615
   *    parsing returns it's Name, create the node
9616
   */
9617
9618
1.97M
  else if (*cur == '&') {
9619
164k
      xmlParseReference(ctxt);
9620
164k
  }
9621
9622
  /*
9623
   * Last case, text. Note that References are handled directly.
9624
   */
9625
1.81M
  else {
9626
1.81M
      xmlParseCharDataInternal(ctxt, 0);
9627
1.81M
  }
9628
9629
2.42M
  SHRINK;
9630
2.42M
  GROW;
9631
2.42M
    }
9632
9633
57.6k
    if ((ctxt->nameNr > oldNameNr) &&
9634
57.6k
        (ctxt->input->cur >= ctxt->input->end) &&
9635
57.6k
        (ctxt->wellFormed)) {
9636
368
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9637
368
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9638
368
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9639
368
                "Premature end of data in tag %s line %d\n",
9640
368
                name, line, NULL);
9641
368
    }
9642
9643
    /*
9644
     * Clean up in error case
9645
     */
9646
9647
77.5k
    while (ctxt->nodeNr > oldNodeNr)
9648
19.8k
        nodePop(ctxt);
9649
9650
73.6k
    while (ctxt->nameNr > oldNameNr) {
9651
15.9k
        xmlStartTag *tag = &ctxt->pushTab[ctxt->nameNr - 1];
9652
9653
15.9k
        if (tag->nsNr != 0)
9654
2.38k
            xmlParserNsPop(ctxt, tag->nsNr);
9655
9656
15.9k
        namePop(ctxt);
9657
15.9k
    }
9658
9659
73.7k
    while (ctxt->spaceNr > oldSpaceNr)
9660
16.0k
        spacePop(ctxt);
9661
57.6k
}
9662
9663
/**
9664
 * Parse XML element content. This is useful if you're only interested
9665
 * in custom SAX callbacks. If you want a node list, use
9666
 * #xmlCtxtParseContent.
9667
 *
9668
 * @param ctxt  an XML parser context
9669
 */
9670
void
9671
0
xmlParseContent(xmlParserCtxt *ctxt) {
9672
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
9673
0
        return;
9674
9675
0
    xmlCtxtInitializeLate(ctxt);
9676
9677
0
    xmlParseContentInternal(ctxt);
9678
9679
0
    xmlParserCheckEOF(ctxt, XML_ERR_NOT_WELL_BALANCED);
9680
0
}
9681
9682
/**
9683
 * parse an XML element
9684
 *
9685
 * @deprecated Internal function, don't use.
9686
 *
9687
 *     [39] element ::= EmptyElemTag | STag content ETag
9688
 *
9689
 * [ WFC: Element Type Match ]
9690
 * The Name in an element's end-tag must match the element type in the
9691
 * start-tag.
9692
 *
9693
 * @param ctxt  an XML parser context
9694
 */
9695
9696
void
9697
79.0k
xmlParseElement(xmlParserCtxt *ctxt) {
9698
79.0k
    if (xmlParseElementStart(ctxt) != 0)
9699
25.1k
        return;
9700
9701
53.9k
    xmlParseContentInternal(ctxt);
9702
9703
53.9k
    if (ctxt->input->cur >= ctxt->input->end) {
9704
7.69k
        if (ctxt->wellFormed) {
9705
532
            const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9706
532
            int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9707
532
            xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9708
532
                    "Premature end of data in tag %s line %d\n",
9709
532
                    name, line, NULL);
9710
532
        }
9711
7.69k
        return;
9712
7.69k
    }
9713
9714
46.2k
    xmlParseElementEnd(ctxt);
9715
46.2k
}
9716
9717
/**
9718
 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
9719
 * opening tag was parsed, 1 if an empty element was parsed.
9720
 *
9721
 * Always consumes '<'.
9722
 *
9723
 * @param ctxt  an XML parser context
9724
 */
9725
static int
9726
335k
xmlParseElementStart(xmlParserCtxtPtr ctxt) {
9727
335k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
9728
335k
    const xmlChar *name;
9729
335k
    const xmlChar *prefix = NULL;
9730
335k
    const xmlChar *URI = NULL;
9731
335k
    xmlParserNodeInfo node_info;
9732
335k
    int line;
9733
335k
    xmlNodePtr cur;
9734
335k
    int nbNs = 0;
9735
9736
335k
    if (ctxt->nameNr > maxDepth) {
9737
0
        xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
9738
0
                "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9739
0
                ctxt->nameNr);
9740
0
  xmlHaltParser(ctxt);
9741
0
  return(-1);
9742
0
    }
9743
9744
    /* Capture start position */
9745
335k
    if (ctxt->record_info) {
9746
0
        node_info.begin_pos = ctxt->input->consumed +
9747
0
                          (CUR_PTR - ctxt->input->base);
9748
0
  node_info.begin_line = ctxt->input->line;
9749
0
    }
9750
9751
335k
    if (ctxt->spaceNr == 0)
9752
0
  spacePush(ctxt, -1);
9753
335k
    else if (*ctxt->space == -2)
9754
38.0k
  spacePush(ctxt, -1);
9755
297k
    else
9756
297k
  spacePush(ctxt, *ctxt->space);
9757
9758
335k
    line = ctxt->input->line;
9759
335k
#ifdef LIBXML_SAX1_ENABLED
9760
335k
    if (ctxt->sax2)
9761
221k
#endif /* LIBXML_SAX1_ENABLED */
9762
221k
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &nbNs);
9763
114k
#ifdef LIBXML_SAX1_ENABLED
9764
114k
    else
9765
114k
  name = xmlParseStartTag(ctxt);
9766
335k
#endif /* LIBXML_SAX1_ENABLED */
9767
335k
    if (name == NULL) {
9768
44.4k
  spacePop(ctxt);
9769
44.4k
        return(-1);
9770
44.4k
    }
9771
291k
    nameNsPush(ctxt, name, prefix, URI, line, nbNs);
9772
291k
    cur = ctxt->node;
9773
9774
291k
#ifdef LIBXML_VALID_ENABLED
9775
    /*
9776
     * [ VC: Root Element Type ]
9777
     * The Name in the document type declaration must match the element
9778
     * type of the root element.
9779
     */
9780
291k
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9781
291k
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
9782
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9783
291k
#endif /* LIBXML_VALID_ENABLED */
9784
9785
    /*
9786
     * Check for an Empty Element.
9787
     */
9788
291k
    if ((RAW == '/') && (NXT(1) == '>')) {
9789
36.7k
        SKIP(2);
9790
36.7k
  if (ctxt->sax2) {
9791
18.3k
      if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9792
18.3k
    (!ctxt->disableSAX))
9793
13.4k
    ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
9794
18.3k
#ifdef LIBXML_SAX1_ENABLED
9795
18.3k
  } else {
9796
18.3k
      if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9797
18.3k
    (!ctxt->disableSAX))
9798
17.8k
    ctxt->sax->endElement(ctxt->userData, name);
9799
18.3k
#endif /* LIBXML_SAX1_ENABLED */
9800
18.3k
  }
9801
36.7k
  namePop(ctxt);
9802
36.7k
  spacePop(ctxt);
9803
36.7k
  if (nbNs > 0)
9804
7.34k
      xmlParserNsPop(ctxt, nbNs);
9805
36.7k
  if (cur != NULL && ctxt->record_info) {
9806
0
            node_info.node = cur;
9807
0
            node_info.end_pos = ctxt->input->consumed +
9808
0
                                (CUR_PTR - ctxt->input->base);
9809
0
            node_info.end_line = ctxt->input->line;
9810
0
            xmlParserAddNodeInfo(ctxt, &node_info);
9811
0
  }
9812
36.7k
  return(1);
9813
36.7k
    }
9814
254k
    if (RAW == '>') {
9815
228k
        NEXT1;
9816
228k
        if (cur != NULL && ctxt->record_info) {
9817
0
            node_info.node = cur;
9818
0
            node_info.end_pos = 0;
9819
0
            node_info.end_line = 0;
9820
0
            xmlParserAddNodeInfo(ctxt, &node_info);
9821
0
        }
9822
228k
    } else {
9823
26.1k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9824
26.1k
         "Couldn't find end of Start Tag %s line %d\n",
9825
26.1k
                    name, line, NULL);
9826
9827
  /*
9828
   * end of parsing of this node.
9829
   */
9830
26.1k
  nodePop(ctxt);
9831
26.1k
  namePop(ctxt);
9832
26.1k
  spacePop(ctxt);
9833
26.1k
  if (nbNs > 0)
9834
5.44k
      xmlParserNsPop(ctxt, nbNs);
9835
26.1k
  return(-1);
9836
26.1k
    }
9837
9838
228k
    return(0);
9839
254k
}
9840
9841
/**
9842
 * Parse the end of an XML element. Always consumes '</'.
9843
 *
9844
 * @param ctxt  an XML parser context
9845
 */
9846
static void
9847
204k
xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
9848
204k
    xmlNodePtr cur = ctxt->node;
9849
9850
204k
    if (ctxt->nameNr <= 0) {
9851
14
        if ((RAW == '<') && (NXT(1) == '/'))
9852
4
            SKIP(2);
9853
14
        return;
9854
14
    }
9855
9856
    /*
9857
     * parse the end of tag: '</' should be here.
9858
     */
9859
204k
    if (ctxt->sax2) {
9860
136k
  xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
9861
136k
  namePop(ctxt);
9862
136k
    }
9863
67.6k
#ifdef LIBXML_SAX1_ENABLED
9864
67.6k
    else
9865
67.6k
  xmlParseEndTag1(ctxt, 0);
9866
204k
#endif /* LIBXML_SAX1_ENABLED */
9867
9868
    /*
9869
     * Capture end position
9870
     */
9871
204k
    if (cur != NULL && ctxt->record_info) {
9872
0
        xmlParserNodeInfoPtr node_info;
9873
9874
0
        node_info = (xmlParserNodeInfoPtr) xmlParserFindNodeInfo(ctxt, cur);
9875
0
        if (node_info != NULL) {
9876
0
            node_info->end_pos = ctxt->input->consumed +
9877
0
                                 (CUR_PTR - ctxt->input->base);
9878
0
            node_info->end_line = ctxt->input->line;
9879
0
        }
9880
0
    }
9881
204k
}
9882
9883
/**
9884
 * parse the XML version value.
9885
 *
9886
 * @deprecated Internal function, don't use.
9887
 *
9888
 *     [26] VersionNum ::= '1.' [0-9]+
9889
 *
9890
 * In practice allow [0-9].[0-9]+ at that level
9891
 *
9892
 * @param ctxt  an XML parser context
9893
 * @returns the string giving the XML version number, or NULL
9894
 */
9895
xmlChar *
9896
101k
xmlParseVersionNum(xmlParserCtxt *ctxt) {
9897
101k
    xmlChar *buf = NULL;
9898
101k
    int len = 0;
9899
101k
    int size = 10;
9900
101k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9901
27.0k
                    XML_MAX_TEXT_LENGTH :
9902
101k
                    XML_MAX_NAME_LENGTH;
9903
101k
    xmlChar cur;
9904
9905
101k
    buf = xmlMalloc(size);
9906
101k
    if (buf == NULL) {
9907
8
  xmlErrMemory(ctxt);
9908
8
  return(NULL);
9909
8
    }
9910
101k
    cur = CUR;
9911
101k
    if (!((cur >= '0') && (cur <= '9'))) {
9912
5.02k
  xmlFree(buf);
9913
5.02k
  return(NULL);
9914
5.02k
    }
9915
96.1k
    buf[len++] = cur;
9916
96.1k
    NEXT;
9917
96.1k
    cur=CUR;
9918
96.1k
    if (cur != '.') {
9919
723
  xmlFree(buf);
9920
723
  return(NULL);
9921
723
    }
9922
95.4k
    buf[len++] = cur;
9923
95.4k
    NEXT;
9924
95.4k
    cur=CUR;
9925
193k
    while ((cur >= '0') && (cur <= '9')) {
9926
98.3k
  if (len + 1 >= size) {
9927
222
      xmlChar *tmp;
9928
222
            int newSize;
9929
9930
222
            newSize = xmlGrowCapacity(size, 1, 1, maxLength);
9931
222
            if (newSize < 0) {
9932
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "VersionNum");
9933
0
                xmlFree(buf);
9934
0
                return(NULL);
9935
0
            }
9936
222
      tmp = xmlRealloc(buf, newSize);
9937
222
      if (tmp == NULL) {
9938
1
    xmlErrMemory(ctxt);
9939
1
          xmlFree(buf);
9940
1
    return(NULL);
9941
1
      }
9942
221
      buf = tmp;
9943
221
            size = newSize;
9944
221
  }
9945
98.3k
  buf[len++] = cur;
9946
98.3k
  NEXT;
9947
98.3k
  cur=CUR;
9948
98.3k
    }
9949
95.4k
    buf[len] = 0;
9950
95.4k
    return(buf);
9951
95.4k
}
9952
9953
/**
9954
 * parse the XML version.
9955
 *
9956
 * @deprecated Internal function, don't use.
9957
 *
9958
 *     [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
9959
 *
9960
 *     [25] Eq ::= S? '=' S?
9961
 *
9962
 * @param ctxt  an XML parser context
9963
 * @returns the version string, e.g. "1.0"
9964
 */
9965
9966
xmlChar *
9967
110k
xmlParseVersionInfo(xmlParserCtxt *ctxt) {
9968
110k
    xmlChar *version = NULL;
9969
9970
110k
    if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
9971
102k
  SKIP(7);
9972
102k
  SKIP_BLANKS;
9973
102k
  if (RAW != '=') {
9974
417
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
9975
417
      return(NULL);
9976
417
        }
9977
102k
  NEXT;
9978
102k
  SKIP_BLANKS;
9979
102k
  if (RAW == '"') {
9980
100k
      NEXT;
9981
100k
      version = xmlParseVersionNum(ctxt);
9982
100k
      if (RAW != '"') {
9983
6.09k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9984
6.09k
      } else
9985
94.7k
          NEXT;
9986
100k
  } else if (RAW == '\''){
9987
391
      NEXT;
9988
391
      version = xmlParseVersionNum(ctxt);
9989
391
      if (RAW != '\'') {
9990
195
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9991
195
      } else
9992
196
          NEXT;
9993
1.16k
  } else {
9994
1.16k
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
9995
1.16k
  }
9996
102k
    }
9997
109k
    return(version);
9998
110k
}
9999
10000
/**
10001
 * parse the XML encoding name
10002
 *
10003
 * @deprecated Internal function, don't use.
10004
 *
10005
 *     [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10006
 *
10007
 * @param ctxt  an XML parser context
10008
 * @returns the encoding name value or NULL
10009
 */
10010
xmlChar *
10011
19.0k
xmlParseEncName(xmlParserCtxt *ctxt) {
10012
19.0k
    xmlChar *buf = NULL;
10013
19.0k
    int len = 0;
10014
19.0k
    int size = 10;
10015
19.0k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
10016
3.70k
                    XML_MAX_TEXT_LENGTH :
10017
19.0k
                    XML_MAX_NAME_LENGTH;
10018
19.0k
    xmlChar cur;
10019
10020
19.0k
    cur = CUR;
10021
19.0k
    if (((cur >= 'a') && (cur <= 'z')) ||
10022
19.0k
        ((cur >= 'A') && (cur <= 'Z'))) {
10023
10.5k
  buf = xmlMalloc(size);
10024
10.5k
  if (buf == NULL) {
10025
1
      xmlErrMemory(ctxt);
10026
1
      return(NULL);
10027
1
  }
10028
10029
10.5k
  buf[len++] = cur;
10030
10.5k
  NEXT;
10031
10.5k
  cur = CUR;
10032
70.0k
  while (((cur >= 'a') && (cur <= 'z')) ||
10033
70.0k
         ((cur >= 'A') && (cur <= 'Z')) ||
10034
70.0k
         ((cur >= '0') && (cur <= '9')) ||
10035
70.0k
         (cur == '.') || (cur == '_') ||
10036
70.0k
         (cur == '-')) {
10037
59.4k
      if (len + 1 >= size) {
10038
2.93k
          xmlChar *tmp;
10039
2.93k
                int newSize;
10040
10041
2.93k
                newSize = xmlGrowCapacity(size, 1, 1, maxLength);
10042
2.93k
                if (newSize < 0) {
10043
0
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "EncName");
10044
0
                    xmlFree(buf);
10045
0
                    return(NULL);
10046
0
                }
10047
2.93k
    tmp = xmlRealloc(buf, newSize);
10048
2.93k
    if (tmp == NULL) {
10049
1
        xmlErrMemory(ctxt);
10050
1
        xmlFree(buf);
10051
1
        return(NULL);
10052
1
    }
10053
2.93k
    buf = tmp;
10054
2.93k
                size = newSize;
10055
2.93k
      }
10056
59.4k
      buf[len++] = cur;
10057
59.4k
      NEXT;
10058
59.4k
      cur = CUR;
10059
59.4k
        }
10060
10.5k
  buf[len] = 0;
10061
10.5k
    } else {
10062
8.52k
  xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10063
8.52k
    }
10064
19.0k
    return(buf);
10065
19.0k
}
10066
10067
/**
10068
 * parse the XML encoding declaration
10069
 *
10070
 * @deprecated Internal function, don't use.
10071
 *
10072
 *     [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | 
10073
 *                           "'" EncName "'")
10074
 *
10075
 * this setups the conversion filters.
10076
 *
10077
 * @param ctxt  an XML parser context
10078
 * @returns the encoding value or NULL
10079
 */
10080
10081
const xmlChar *
10082
105k
xmlParseEncodingDecl(xmlParserCtxt *ctxt) {
10083
105k
    xmlChar *encoding = NULL;
10084
10085
105k
    SKIP_BLANKS;
10086
105k
    if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g') == 0)
10087
86.2k
        return(NULL);
10088
10089
19.5k
    SKIP(8);
10090
19.5k
    SKIP_BLANKS;
10091
19.5k
    if (RAW != '=') {
10092
197
        xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10093
197
        return(NULL);
10094
197
    }
10095
19.3k
    NEXT;
10096
19.3k
    SKIP_BLANKS;
10097
19.3k
    if (RAW == '"') {
10098
18.6k
        NEXT;
10099
18.6k
        encoding = xmlParseEncName(ctxt);
10100
18.6k
        if (RAW != '"') {
10101
4.90k
            xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10102
4.90k
            xmlFree(encoding);
10103
4.90k
            return(NULL);
10104
4.90k
        } else
10105
13.7k
            NEXT;
10106
18.6k
    } else if (RAW == '\''){
10107
391
        NEXT;
10108
391
        encoding = xmlParseEncName(ctxt);
10109
391
        if (RAW != '\'') {
10110
197
            xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10111
197
            xmlFree(encoding);
10112
197
            return(NULL);
10113
197
        } else
10114
194
            NEXT;
10115
391
    } else {
10116
258
        xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10117
258
    }
10118
10119
14.2k
    if (encoding == NULL)
10120
4.79k
        return(NULL);
10121
10122
9.43k
    xmlSetDeclaredEncoding(ctxt, encoding);
10123
10124
9.43k
    return(ctxt->encoding);
10125
14.2k
}
10126
10127
/**
10128
 * parse the XML standalone declaration
10129
 *
10130
 * @deprecated Internal function, don't use.
10131
 *
10132
 *     [32] SDDecl ::= S 'standalone' Eq
10133
 *                     (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10134
 *
10135
 * [ VC: Standalone Document Declaration ]
10136
 * TODO The standalone document declaration must have the value "no"
10137
 * if any external markup declarations contain declarations of:
10138
 *  - attributes with default values, if elements to which these
10139
 *    attributes apply appear in the document without specifications
10140
 *    of values for these attributes, or
10141
 *  - entities (other than amp, lt, gt, apos, quot), if references
10142
 *    to those entities appear in the document, or
10143
 *  - attributes with values subject to normalization, where the
10144
 *    attribute appears in the document with a value which will change
10145
 *    as a result of normalization, or
10146
 *  - element types with element content, if white space occurs directly
10147
 *    within any instance of those types.
10148
 *
10149
 * @param ctxt  an XML parser context
10150
 * @returns
10151
 *   1 if standalone="yes"
10152
 *   0 if standalone="no"
10153
 *  -2 if standalone attribute is missing or invalid
10154
 *    (A standalone value of -2 means that the XML declaration was found,
10155
 *     but no value was specified for the standalone attribute).
10156
 */
10157
10158
int
10159
104k
xmlParseSDDecl(xmlParserCtxt *ctxt) {
10160
104k
    int standalone = -2;
10161
10162
104k
    SKIP_BLANKS;
10163
104k
    if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10164
86.0k
  SKIP(10);
10165
86.0k
        SKIP_BLANKS;
10166
86.0k
  if (RAW != '=') {
10167
198
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10168
198
      return(standalone);
10169
198
        }
10170
85.8k
  NEXT;
10171
85.8k
  SKIP_BLANKS;
10172
85.8k
        if (RAW == '\''){
10173
1.69k
      NEXT;
10174
1.69k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10175
388
          standalone = 0;
10176
388
                SKIP(2);
10177
1.30k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10178
1.30k
                 (NXT(2) == 's')) {
10179
390
          standalone = 1;
10180
390
    SKIP(3);
10181
912
            } else {
10182
912
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10183
912
      }
10184
1.69k
      if (RAW != '\'') {
10185
1.48k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10186
1.48k
      } else
10187
202
          NEXT;
10188
84.1k
  } else if (RAW == '"'){
10189
83.9k
      NEXT;
10190
83.9k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10191
534
          standalone = 0;
10192
534
    SKIP(2);
10193
83.4k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10194
83.4k
                 (NXT(2) == 's')) {
10195
82.6k
          standalone = 1;
10196
82.6k
                SKIP(3);
10197
82.6k
            } else {
10198
783
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10199
783
      }
10200
83.9k
      if (RAW != '"') {
10201
1.17k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10202
1.17k
      } else
10203
82.7k
          NEXT;
10204
83.9k
  } else {
10205
195
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10206
195
        }
10207
85.8k
    }
10208
104k
    return(standalone);
10209
104k
}
10210
10211
/**
10212
 * parse an XML declaration header
10213
 *
10214
 * @deprecated Internal function, don't use.
10215
 *
10216
 *     [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10217
 * @param ctxt  an XML parser context
10218
 */
10219
10220
void
10221
110k
xmlParseXMLDecl(xmlParserCtxt *ctxt) {
10222
110k
    xmlChar *version;
10223
10224
    /*
10225
     * This value for standalone indicates that the document has an
10226
     * XML declaration but it does not have a standalone attribute.
10227
     * It will be overwritten later if a standalone attribute is found.
10228
     */
10229
10230
110k
    ctxt->standalone = -2;
10231
10232
    /*
10233
     * We know that '<?xml' is here.
10234
     */
10235
110k
    SKIP(5);
10236
10237
110k
    if (!IS_BLANK_CH(RAW)) {
10238
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10239
0
                 "Blank needed after '<?xml'\n");
10240
0
    }
10241
110k
    SKIP_BLANKS;
10242
10243
    /*
10244
     * We must have the VersionInfo here.
10245
     */
10246
110k
    version = xmlParseVersionInfo(ctxt);
10247
110k
    if (version == NULL) {
10248
14.5k
  xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10249
95.4k
    } else {
10250
95.4k
  if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10251
      /*
10252
       * Changed here for XML-1.0 5th edition
10253
       */
10254
1.37k
      if (ctxt->options & XML_PARSE_OLD10) {
10255
337
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10256
337
                "Unsupported version '%s'\n",
10257
337
                version);
10258
1.03k
      } else {
10259
1.03k
          if ((version[0] == '1') && ((version[1] == '.'))) {
10260
501
        xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10261
501
                      "Unsupported version '%s'\n",
10262
501
          version, NULL);
10263
535
    } else {
10264
535
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10265
535
              "Unsupported version '%s'\n",
10266
535
              version);
10267
535
    }
10268
1.03k
      }
10269
1.37k
  }
10270
95.4k
  if (ctxt->version != NULL)
10271
0
      xmlFree(ctxt->version);
10272
95.4k
  ctxt->version = version;
10273
95.4k
    }
10274
10275
    /*
10276
     * We may have the encoding declaration
10277
     */
10278
110k
    if (!IS_BLANK_CH(RAW)) {
10279
18.5k
        if ((RAW == '?') && (NXT(1) == '>')) {
10280
4.24k
      SKIP(2);
10281
4.24k
      return;
10282
4.24k
  }
10283
14.3k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10284
14.3k
    }
10285
105k
    xmlParseEncodingDecl(ctxt);
10286
10287
    /*
10288
     * We may have the standalone status.
10289
     */
10290
105k
    if ((ctxt->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10291
2.31k
        if ((RAW == '?') && (NXT(1) == '>')) {
10292
1.45k
      SKIP(2);
10293
1.45k
      return;
10294
1.45k
  }
10295
861
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10296
861
    }
10297
10298
    /*
10299
     * We can grow the input buffer freely at that point
10300
     */
10301
104k
    GROW;
10302
10303
104k
    SKIP_BLANKS;
10304
104k
    ctxt->standalone = xmlParseSDDecl(ctxt);
10305
10306
104k
    SKIP_BLANKS;
10307
104k
    if ((RAW == '?') && (NXT(1) == '>')) {
10308
82.8k
        SKIP(2);
10309
82.8k
    } else if (RAW == '>') {
10310
        /* Deprecated old WD ... */
10311
194
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10312
194
  NEXT;
10313
21.2k
    } else {
10314
21.2k
        int c;
10315
10316
21.2k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10317
354k
        while ((PARSER_STOPPED(ctxt) == 0) &&
10318
354k
               ((c = CUR) != 0)) {
10319
342k
            NEXT;
10320
342k
            if (c == '>')
10321
9.39k
                break;
10322
342k
        }
10323
21.2k
    }
10324
104k
}
10325
10326
/**
10327
 * @since 2.14.0
10328
 *
10329
 * @param ctxt  parser context
10330
 * @returns the version from the XML declaration.
10331
 */
10332
const xmlChar *
10333
0
xmlCtxtGetVersion(xmlParserCtxt *ctxt) {
10334
0
    if (ctxt == NULL)
10335
0
        return(NULL);
10336
10337
0
    return(ctxt->version);
10338
0
}
10339
10340
/**
10341
 * @since 2.14.0
10342
 *
10343
 * @param ctxt  parser context
10344
 * @returns the value from the standalone document declaration.
10345
 */
10346
int
10347
0
xmlCtxtGetStandalone(xmlParserCtxt *ctxt) {
10348
0
    if (ctxt == NULL)
10349
0
        return(0);
10350
10351
0
    return(ctxt->standalone);
10352
0
}
10353
10354
/**
10355
 * parse an XML Misc* optional field.
10356
 *
10357
 * @deprecated Internal function, don't use.
10358
 *
10359
 *     [27] Misc ::= Comment | PI |  S
10360
 * @param ctxt  an XML parser context
10361
 */
10362
10363
void
10364
362k
xmlParseMisc(xmlParserCtxt *ctxt) {
10365
408k
    while (PARSER_STOPPED(ctxt) == 0) {
10366
398k
        SKIP_BLANKS;
10367
398k
        GROW;
10368
398k
        if ((RAW == '<') && (NXT(1) == '?')) {
10369
40.8k
      xmlParsePI(ctxt);
10370
357k
        } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10371
5.65k
      xmlParseComment(ctxt);
10372
351k
        } else {
10373
351k
            break;
10374
351k
        }
10375
398k
    }
10376
362k
}
10377
10378
static void
10379
173k
xmlFinishDocument(xmlParserCtxtPtr ctxt) {
10380
173k
    xmlDocPtr doc;
10381
10382
    /*
10383
     * SAX: end of the document processing.
10384
     */
10385
173k
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10386
173k
        ctxt->sax->endDocument(ctxt->userData);
10387
10388
    /*
10389
     * Remove locally kept entity definitions if the tree was not built
10390
     */
10391
173k
    doc = ctxt->myDoc;
10392
173k
    if ((doc != NULL) &&
10393
173k
        (xmlStrEqual(doc->version, SAX_COMPAT_MODE))) {
10394
2.26k
        xmlFreeDoc(doc);
10395
2.26k
        ctxt->myDoc = NULL;
10396
2.26k
    }
10397
173k
}
10398
10399
/**
10400
 * Parse an XML document and invoke the SAX handlers. This is useful
10401
 * if you're only interested in custom SAX callbacks. If you want a
10402
 * document tree, use #xmlCtxtParseDocument.
10403
 *
10404
 * @param ctxt  an XML parser context
10405
 * @returns 0, -1 in case of error.
10406
 */
10407
10408
int
10409
175k
xmlParseDocument(xmlParserCtxt *ctxt) {
10410
175k
    if ((ctxt == NULL) || (ctxt->input == NULL))
10411
0
        return(-1);
10412
10413
175k
    GROW;
10414
10415
    /*
10416
     * SAX: detecting the level.
10417
     */
10418
175k
    xmlCtxtInitializeLate(ctxt);
10419
10420
175k
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10421
175k
        ctxt->sax->setDocumentLocator(ctxt->userData,
10422
175k
                (xmlSAXLocator *) &xmlDefaultSAXLocator);
10423
175k
    }
10424
10425
175k
    xmlDetectEncoding(ctxt);
10426
10427
175k
    if (CUR == 0) {
10428
2.51k
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10429
2.51k
  return(-1);
10430
2.51k
    }
10431
10432
173k
    GROW;
10433
173k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10434
10435
  /*
10436
   * Note that we will switch encoding on the fly.
10437
   */
10438
110k
  xmlParseXMLDecl(ctxt);
10439
110k
  SKIP_BLANKS;
10440
110k
    } else {
10441
63.4k
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10442
63.4k
        if (ctxt->version == NULL) {
10443
13
            xmlErrMemory(ctxt);
10444
13
            return(-1);
10445
13
        }
10446
63.4k
    }
10447
173k
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10448
151k
        ctxt->sax->startDocument(ctxt->userData);
10449
173k
    if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10450
173k
        (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10451
0
  ctxt->myDoc->compression = ctxt->input->buf->compressed;
10452
0
    }
10453
10454
    /*
10455
     * The Misc part of the Prolog
10456
     */
10457
173k
    xmlParseMisc(ctxt);
10458
10459
    /*
10460
     * Then possibly doc type declaration(s) and more Misc
10461
     * (doctypedecl Misc*)?
10462
     */
10463
173k
    GROW;
10464
173k
    if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10465
10466
109k
  ctxt->inSubset = 1;
10467
109k
  xmlParseDocTypeDecl(ctxt);
10468
109k
  if (RAW == '[') {
10469
84.6k
      xmlParseInternalSubset(ctxt);
10470
84.6k
  } else if (RAW == '>') {
10471
19.8k
            NEXT;
10472
19.8k
        }
10473
10474
  /*
10475
   * Create and update the external subset.
10476
   */
10477
109k
  ctxt->inSubset = 2;
10478
109k
  if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10479
109k
      (!ctxt->disableSAX))
10480
79.3k
      ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10481
79.3k
                                ctxt->extSubSystem, ctxt->extSubURI);
10482
109k
  ctxt->inSubset = 0;
10483
10484
109k
        xmlCleanSpecialAttr(ctxt);
10485
10486
109k
  xmlParseMisc(ctxt);
10487
109k
    }
10488
10489
    /*
10490
     * Time to start parsing the tree itself
10491
     */
10492
173k
    GROW;
10493
173k
    if (RAW != '<') {
10494
94.3k
        if (ctxt->wellFormed)
10495
15.6k
            xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10496
15.6k
                           "Start tag expected, '<' not found\n");
10497
94.3k
    } else {
10498
79.0k
  xmlParseElement(ctxt);
10499
10500
  /*
10501
   * The Misc part at the end
10502
   */
10503
79.0k
  xmlParseMisc(ctxt);
10504
10505
79.0k
        xmlParserCheckEOF(ctxt, XML_ERR_DOCUMENT_END);
10506
79.0k
    }
10507
10508
173k
    ctxt->instate = XML_PARSER_EOF;
10509
173k
    xmlFinishDocument(ctxt);
10510
10511
173k
    if (! ctxt->wellFormed) {
10512
136k
  ctxt->valid = 0;
10513
136k
  return(-1);
10514
136k
    }
10515
10516
36.6k
    return(0);
10517
173k
}
10518
10519
/**
10520
 * parse a general parsed entity
10521
 * An external general parsed entity is well-formed if it matches the
10522
 * production labeled extParsedEnt.
10523
 *
10524
 * @deprecated Internal function, don't use.
10525
 *
10526
 *     [78] extParsedEnt ::= TextDecl? content
10527
 *
10528
 * @param ctxt  an XML parser context
10529
 * @returns 0, -1 in case of error. the parser context is augmented
10530
 *                as a result of the parsing.
10531
 */
10532
10533
int
10534
0
xmlParseExtParsedEnt(xmlParserCtxt *ctxt) {
10535
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
10536
0
        return(-1);
10537
10538
0
    xmlCtxtInitializeLate(ctxt);
10539
10540
0
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10541
0
        ctxt->sax->setDocumentLocator(ctxt->userData,
10542
0
                (xmlSAXLocator *) &xmlDefaultSAXLocator);
10543
0
    }
10544
10545
0
    xmlDetectEncoding(ctxt);
10546
10547
0
    if (CUR == 0) {
10548
0
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10549
0
    }
10550
10551
    /*
10552
     * Check for the XMLDecl in the Prolog.
10553
     */
10554
0
    GROW;
10555
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10556
10557
  /*
10558
   * Note that we will switch encoding on the fly.
10559
   */
10560
0
  xmlParseXMLDecl(ctxt);
10561
0
  SKIP_BLANKS;
10562
0
    } else {
10563
0
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10564
0
    }
10565
0
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10566
0
        ctxt->sax->startDocument(ctxt->userData);
10567
10568
    /*
10569
     * Doing validity checking on chunk doesn't make sense
10570
     */
10571
0
    ctxt->options &= ~XML_PARSE_DTDVALID;
10572
0
    ctxt->validate = 0;
10573
0
    ctxt->depth = 0;
10574
10575
0
    xmlParseContentInternal(ctxt);
10576
10577
0
    if (ctxt->input->cur < ctxt->input->end)
10578
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10579
10580
    /*
10581
     * SAX: end of the document processing.
10582
     */
10583
0
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10584
0
        ctxt->sax->endDocument(ctxt->userData);
10585
10586
0
    if (! ctxt->wellFormed) return(-1);
10587
0
    return(0);
10588
0
}
10589
10590
#ifdef LIBXML_PUSH_ENABLED
10591
/************************************************************************
10592
 *                  *
10593
 *    Progressive parsing interfaces        *
10594
 *                  *
10595
 ************************************************************************/
10596
10597
/**
10598
 * Check whether the input buffer contains a character.
10599
 *
10600
 * @param ctxt  an XML parser context
10601
 * @param c  character
10602
 */
10603
static int
10604
0
xmlParseLookupChar(xmlParserCtxtPtr ctxt, int c) {
10605
0
    const xmlChar *cur;
10606
10607
0
    if (ctxt->checkIndex == 0) {
10608
0
        cur = ctxt->input->cur + 1;
10609
0
    } else {
10610
0
        cur = ctxt->input->cur + ctxt->checkIndex;
10611
0
    }
10612
10613
0
    if (memchr(cur, c, ctxt->input->end - cur) == NULL) {
10614
0
        size_t index = ctxt->input->end - ctxt->input->cur;
10615
10616
0
        if (index > LONG_MAX) {
10617
0
            ctxt->checkIndex = 0;
10618
0
            return(1);
10619
0
        }
10620
0
        ctxt->checkIndex = index;
10621
0
        return(0);
10622
0
    } else {
10623
0
        ctxt->checkIndex = 0;
10624
0
        return(1);
10625
0
    }
10626
0
}
10627
10628
/**
10629
 * Check whether the input buffer contains a string.
10630
 *
10631
 * @param ctxt  an XML parser context
10632
 * @param startDelta  delta to apply at the start
10633
 * @param str  string
10634
 * @param strLen  length of string
10635
 */
10636
static const xmlChar *
10637
xmlParseLookupString(xmlParserCtxtPtr ctxt, size_t startDelta,
10638
0
                     const char *str, size_t strLen) {
10639
0
    const xmlChar *cur, *term;
10640
10641
0
    if (ctxt->checkIndex == 0) {
10642
0
        cur = ctxt->input->cur + startDelta;
10643
0
    } else {
10644
0
        cur = ctxt->input->cur + ctxt->checkIndex;
10645
0
    }
10646
10647
0
    term = BAD_CAST strstr((const char *) cur, str);
10648
0
    if (term == NULL) {
10649
0
        const xmlChar *end = ctxt->input->end;
10650
0
        size_t index;
10651
10652
        /* Rescan (strLen - 1) characters. */
10653
0
        if ((size_t) (end - cur) < strLen)
10654
0
            end = cur;
10655
0
        else
10656
0
            end -= strLen - 1;
10657
0
        index = end - ctxt->input->cur;
10658
0
        if (index > LONG_MAX) {
10659
0
            ctxt->checkIndex = 0;
10660
0
            return(ctxt->input->end - strLen);
10661
0
        }
10662
0
        ctxt->checkIndex = index;
10663
0
    } else {
10664
0
        ctxt->checkIndex = 0;
10665
0
    }
10666
10667
0
    return(term);
10668
0
}
10669
10670
/**
10671
 * Check whether the input buffer contains terminated char data.
10672
 *
10673
 * @param ctxt  an XML parser context
10674
 */
10675
static int
10676
0
xmlParseLookupCharData(xmlParserCtxtPtr ctxt) {
10677
0
    const xmlChar *cur = ctxt->input->cur + ctxt->checkIndex;
10678
0
    const xmlChar *end = ctxt->input->end;
10679
0
    size_t index;
10680
10681
0
    while (cur < end) {
10682
0
        if ((*cur == '<') || (*cur == '&')) {
10683
0
            ctxt->checkIndex = 0;
10684
0
            return(1);
10685
0
        }
10686
0
        cur++;
10687
0
    }
10688
10689
0
    index = cur - ctxt->input->cur;
10690
0
    if (index > LONG_MAX) {
10691
0
        ctxt->checkIndex = 0;
10692
0
        return(1);
10693
0
    }
10694
0
    ctxt->checkIndex = index;
10695
0
    return(0);
10696
0
}
10697
10698
/**
10699
 * Check whether there's enough data in the input buffer to finish parsing
10700
 * a start tag. This has to take quotes into account.
10701
 *
10702
 * @param ctxt  an XML parser context
10703
 */
10704
static int
10705
0
xmlParseLookupGt(xmlParserCtxtPtr ctxt) {
10706
0
    const xmlChar *cur;
10707
0
    const xmlChar *end = ctxt->input->end;
10708
0
    int state = ctxt->endCheckState;
10709
0
    size_t index;
10710
10711
0
    if (ctxt->checkIndex == 0)
10712
0
        cur = ctxt->input->cur + 1;
10713
0
    else
10714
0
        cur = ctxt->input->cur + ctxt->checkIndex;
10715
10716
0
    while (cur < end) {
10717
0
        if (state) {
10718
0
            if (*cur == state)
10719
0
                state = 0;
10720
0
        } else if (*cur == '\'' || *cur == '"') {
10721
0
            state = *cur;
10722
0
        } else if (*cur == '>') {
10723
0
            ctxt->checkIndex = 0;
10724
0
            ctxt->endCheckState = 0;
10725
0
            return(1);
10726
0
        }
10727
0
        cur++;
10728
0
    }
10729
10730
0
    index = cur - ctxt->input->cur;
10731
0
    if (index > LONG_MAX) {
10732
0
        ctxt->checkIndex = 0;
10733
0
        ctxt->endCheckState = 0;
10734
0
        return(1);
10735
0
    }
10736
0
    ctxt->checkIndex = index;
10737
0
    ctxt->endCheckState = state;
10738
0
    return(0);
10739
0
}
10740
10741
/**
10742
 * Check whether there's enough data in the input buffer to finish parsing
10743
 * the internal subset.
10744
 *
10745
 * @param ctxt  an XML parser context
10746
 */
10747
static int
10748
0
xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt) {
10749
    /*
10750
     * Sorry, but progressive parsing of the internal subset is not
10751
     * supported. We first check that the full content of the internal
10752
     * subset is available and parsing is launched only at that point.
10753
     * Internal subset ends with "']' S? '>'" in an unescaped section and
10754
     * not in a ']]>' sequence which are conditional sections.
10755
     */
10756
0
    const xmlChar *cur, *start;
10757
0
    const xmlChar *end = ctxt->input->end;
10758
0
    int state = ctxt->endCheckState;
10759
0
    size_t index;
10760
10761
0
    if (ctxt->checkIndex == 0) {
10762
0
        cur = ctxt->input->cur + 1;
10763
0
    } else {
10764
0
        cur = ctxt->input->cur + ctxt->checkIndex;
10765
0
    }
10766
0
    start = cur;
10767
10768
0
    while (cur < end) {
10769
0
        if (state == '-') {
10770
0
            if ((*cur == '-') &&
10771
0
                (cur[1] == '-') &&
10772
0
                (cur[2] == '>')) {
10773
0
                state = 0;
10774
0
                cur += 3;
10775
0
                start = cur;
10776
0
                continue;
10777
0
            }
10778
0
        }
10779
0
        else if (state == ']') {
10780
0
            if (*cur == '>') {
10781
0
                ctxt->checkIndex = 0;
10782
0
                ctxt->endCheckState = 0;
10783
0
                return(1);
10784
0
            }
10785
0
            if (IS_BLANK_CH(*cur)) {
10786
0
                state = ' ';
10787
0
            } else if (*cur != ']') {
10788
0
                state = 0;
10789
0
                start = cur;
10790
0
                continue;
10791
0
            }
10792
0
        }
10793
0
        else if (state == ' ') {
10794
0
            if (*cur == '>') {
10795
0
                ctxt->checkIndex = 0;
10796
0
                ctxt->endCheckState = 0;
10797
0
                return(1);
10798
0
            }
10799
0
            if (!IS_BLANK_CH(*cur)) {
10800
0
                state = 0;
10801
0
                start = cur;
10802
0
                continue;
10803
0
            }
10804
0
        }
10805
0
        else if (state != 0) {
10806
0
            if (*cur == state) {
10807
0
                state = 0;
10808
0
                start = cur + 1;
10809
0
            }
10810
0
        }
10811
0
        else if (*cur == '<') {
10812
0
            if ((cur[1] == '!') &&
10813
0
                (cur[2] == '-') &&
10814
0
                (cur[3] == '-')) {
10815
0
                state = '-';
10816
0
                cur += 4;
10817
                /* Don't treat <!--> as comment */
10818
0
                start = cur;
10819
0
                continue;
10820
0
            }
10821
0
        }
10822
0
        else if ((*cur == '"') || (*cur == '\'') || (*cur == ']')) {
10823
0
            state = *cur;
10824
0
        }
10825
10826
0
        cur++;
10827
0
    }
10828
10829
    /*
10830
     * Rescan the three last characters to detect "<!--" and "-->"
10831
     * split across chunks.
10832
     */
10833
0
    if ((state == 0) || (state == '-')) {
10834
0
        if (cur - start < 3)
10835
0
            cur = start;
10836
0
        else
10837
0
            cur -= 3;
10838
0
    }
10839
0
    index = cur - ctxt->input->cur;
10840
0
    if (index > LONG_MAX) {
10841
0
        ctxt->checkIndex = 0;
10842
0
        ctxt->endCheckState = 0;
10843
0
        return(1);
10844
0
    }
10845
0
    ctxt->checkIndex = index;
10846
0
    ctxt->endCheckState = state;
10847
0
    return(0);
10848
0
}
10849
10850
/**
10851
 * Try to progress on parsing
10852
 *
10853
 * @param ctxt  an XML parser context
10854
 * @param terminate  last chunk indicator
10855
 * @returns zero if no parsing was possible
10856
 */
10857
static int
10858
0
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
10859
0
    int ret = 0;
10860
0
    size_t avail;
10861
0
    xmlChar cur, next;
10862
10863
0
    if (ctxt->input == NULL)
10864
0
        return(0);
10865
10866
0
    if ((ctxt->input != NULL) &&
10867
0
        (ctxt->input->cur - ctxt->input->base > 4096)) {
10868
0
        xmlParserShrink(ctxt);
10869
0
    }
10870
10871
0
    while (ctxt->disableSAX == 0) {
10872
0
        avail = ctxt->input->end - ctxt->input->cur;
10873
0
        if (avail < 1)
10874
0
      goto done;
10875
0
        switch (ctxt->instate) {
10876
0
            case XML_PARSER_EOF:
10877
          /*
10878
     * Document parsing is done !
10879
     */
10880
0
          goto done;
10881
0
            case XML_PARSER_START:
10882
                /*
10883
                 * Very first chars read from the document flow.
10884
                 */
10885
0
                if ((!terminate) && (avail < 4))
10886
0
                    goto done;
10887
10888
                /*
10889
                 * We need more bytes to detect EBCDIC code pages.
10890
                 * See xmlDetectEBCDIC.
10891
                 */
10892
0
                if ((CMP4(CUR_PTR, 0x4C, 0x6F, 0xA7, 0x94)) &&
10893
0
                    (!terminate) && (avail < 200))
10894
0
                    goto done;
10895
10896
0
                xmlDetectEncoding(ctxt);
10897
0
                ctxt->instate = XML_PARSER_XML_DECL;
10898
0
    break;
10899
10900
0
            case XML_PARSER_XML_DECL:
10901
0
    if ((!terminate) && (avail < 2))
10902
0
        goto done;
10903
0
    cur = ctxt->input->cur[0];
10904
0
    next = ctxt->input->cur[1];
10905
0
          if ((cur == '<') && (next == '?')) {
10906
        /* PI or XML decl */
10907
0
        if ((!terminate) &&
10908
0
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
10909
0
      goto done;
10910
0
        if ((ctxt->input->cur[2] == 'x') &&
10911
0
      (ctxt->input->cur[3] == 'm') &&
10912
0
      (ctxt->input->cur[4] == 'l') &&
10913
0
      (IS_BLANK_CH(ctxt->input->cur[5]))) {
10914
0
      ret += 5;
10915
0
      xmlParseXMLDecl(ctxt);
10916
0
        } else {
10917
0
      ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10918
0
                        if (ctxt->version == NULL) {
10919
0
                            xmlErrMemory(ctxt);
10920
0
                            break;
10921
0
                        }
10922
0
        }
10923
0
    } else {
10924
0
        ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10925
0
        if (ctxt->version == NULL) {
10926
0
            xmlErrMemory(ctxt);
10927
0
      break;
10928
0
        }
10929
0
    }
10930
0
                if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10931
0
                    ctxt->sax->setDocumentLocator(ctxt->userData,
10932
0
                            (xmlSAXLocator *) &xmlDefaultSAXLocator);
10933
0
                }
10934
0
                if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10935
0
                    (!ctxt->disableSAX))
10936
0
                    ctxt->sax->startDocument(ctxt->userData);
10937
0
                ctxt->instate = XML_PARSER_MISC;
10938
0
    break;
10939
0
            case XML_PARSER_START_TAG: {
10940
0
          const xmlChar *name;
10941
0
    const xmlChar *prefix = NULL;
10942
0
    const xmlChar *URI = NULL;
10943
0
                int line = ctxt->input->line;
10944
0
    int nbNs = 0;
10945
10946
0
    if ((!terminate) && (avail < 2))
10947
0
        goto done;
10948
0
    cur = ctxt->input->cur[0];
10949
0
          if (cur != '<') {
10950
0
        xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10951
0
                                   "Start tag expected, '<' not found");
10952
0
                    ctxt->instate = XML_PARSER_EOF;
10953
0
                    xmlFinishDocument(ctxt);
10954
0
        goto done;
10955
0
    }
10956
0
    if ((!terminate) && (!xmlParseLookupGt(ctxt)))
10957
0
                    goto done;
10958
0
    if (ctxt->spaceNr == 0)
10959
0
        spacePush(ctxt, -1);
10960
0
    else if (*ctxt->space == -2)
10961
0
        spacePush(ctxt, -1);
10962
0
    else
10963
0
        spacePush(ctxt, *ctxt->space);
10964
0
#ifdef LIBXML_SAX1_ENABLED
10965
0
    if (ctxt->sax2)
10966
0
#endif /* LIBXML_SAX1_ENABLED */
10967
0
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &nbNs);
10968
0
#ifdef LIBXML_SAX1_ENABLED
10969
0
    else
10970
0
        name = xmlParseStartTag(ctxt);
10971
0
#endif /* LIBXML_SAX1_ENABLED */
10972
0
    if (name == NULL) {
10973
0
        spacePop(ctxt);
10974
0
                    ctxt->instate = XML_PARSER_EOF;
10975
0
                    xmlFinishDocument(ctxt);
10976
0
        goto done;
10977
0
    }
10978
0
#ifdef LIBXML_VALID_ENABLED
10979
    /*
10980
     * [ VC: Root Element Type ]
10981
     * The Name in the document type declaration must match
10982
     * the element type of the root element.
10983
     */
10984
0
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10985
0
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
10986
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10987
0
#endif /* LIBXML_VALID_ENABLED */
10988
10989
    /*
10990
     * Check for an Empty Element.
10991
     */
10992
0
    if ((RAW == '/') && (NXT(1) == '>')) {
10993
0
        SKIP(2);
10994
10995
0
        if (ctxt->sax2) {
10996
0
      if ((ctxt->sax != NULL) &&
10997
0
          (ctxt->sax->endElementNs != NULL) &&
10998
0
          (!ctxt->disableSAX))
10999
0
          ctxt->sax->endElementNs(ctxt->userData, name,
11000
0
                                  prefix, URI);
11001
0
      if (nbNs > 0)
11002
0
          xmlParserNsPop(ctxt, nbNs);
11003
0
#ifdef LIBXML_SAX1_ENABLED
11004
0
        } else {
11005
0
      if ((ctxt->sax != NULL) &&
11006
0
          (ctxt->sax->endElement != NULL) &&
11007
0
          (!ctxt->disableSAX))
11008
0
          ctxt->sax->endElement(ctxt->userData, name);
11009
0
#endif /* LIBXML_SAX1_ENABLED */
11010
0
        }
11011
0
        spacePop(ctxt);
11012
0
    } else if (RAW == '>') {
11013
0
        NEXT;
11014
0
                    nameNsPush(ctxt, name, prefix, URI, line, nbNs);
11015
0
    } else {
11016
0
        xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11017
0
           "Couldn't find end of Start Tag %s\n",
11018
0
           name);
11019
0
        nodePop(ctxt);
11020
0
        spacePop(ctxt);
11021
0
                    if (nbNs > 0)
11022
0
                        xmlParserNsPop(ctxt, nbNs);
11023
0
    }
11024
11025
0
                if (ctxt->nameNr == 0)
11026
0
                    ctxt->instate = XML_PARSER_EPILOG;
11027
0
                else
11028
0
                    ctxt->instate = XML_PARSER_CONTENT;
11029
0
                break;
11030
0
      }
11031
0
            case XML_PARSER_CONTENT: {
11032
0
    cur = ctxt->input->cur[0];
11033
11034
0
    if (cur == '<') {
11035
0
                    if ((!terminate) && (avail < 2))
11036
0
                        goto done;
11037
0
        next = ctxt->input->cur[1];
11038
11039
0
                    if (next == '/') {
11040
0
                        ctxt->instate = XML_PARSER_END_TAG;
11041
0
                        break;
11042
0
                    } else if (next == '?') {
11043
0
                        if ((!terminate) &&
11044
0
                            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11045
0
                            goto done;
11046
0
                        xmlParsePI(ctxt);
11047
0
                        ctxt->instate = XML_PARSER_CONTENT;
11048
0
                        break;
11049
0
                    } else if (next == '!') {
11050
0
                        if ((!terminate) && (avail < 3))
11051
0
                            goto done;
11052
0
                        next = ctxt->input->cur[2];
11053
11054
0
                        if (next == '-') {
11055
0
                            if ((!terminate) && (avail < 4))
11056
0
                                goto done;
11057
0
                            if (ctxt->input->cur[3] == '-') {
11058
0
                                if ((!terminate) &&
11059
0
                                    (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11060
0
                                    goto done;
11061
0
                                xmlParseComment(ctxt);
11062
0
                                ctxt->instate = XML_PARSER_CONTENT;
11063
0
                                break;
11064
0
                            }
11065
0
                        } else if (next == '[') {
11066
0
                            if ((!terminate) && (avail < 9))
11067
0
                                goto done;
11068
0
                            if ((ctxt->input->cur[2] == '[') &&
11069
0
                                (ctxt->input->cur[3] == 'C') &&
11070
0
                                (ctxt->input->cur[4] == 'D') &&
11071
0
                                (ctxt->input->cur[5] == 'A') &&
11072
0
                                (ctxt->input->cur[6] == 'T') &&
11073
0
                                (ctxt->input->cur[7] == 'A') &&
11074
0
                                (ctxt->input->cur[8] == '[')) {
11075
0
                                if ((!terminate) &&
11076
0
                                    (!xmlParseLookupString(ctxt, 9, "]]>", 3)))
11077
0
                                    goto done;
11078
0
                                ctxt->instate = XML_PARSER_CDATA_SECTION;
11079
0
                                xmlParseCDSect(ctxt);
11080
0
                                ctxt->instate = XML_PARSER_CONTENT;
11081
0
                                break;
11082
0
                            }
11083
0
                        }
11084
0
                    }
11085
0
    } else if (cur == '&') {
11086
0
        if ((!terminate) && (!xmlParseLookupChar(ctxt, ';')))
11087
0
      goto done;
11088
0
        xmlParseReference(ctxt);
11089
0
                    break;
11090
0
    } else {
11091
        /* TODO Avoid the extra copy, handle directly !!! */
11092
        /*
11093
         * Goal of the following test is:
11094
         *  - minimize calls to the SAX 'character' callback
11095
         *    when they are mergeable
11096
         *  - handle an problem for isBlank when we only parse
11097
         *    a sequence of blank chars and the next one is
11098
         *    not available to check against '<' presence.
11099
         *  - tries to homogenize the differences in SAX
11100
         *    callbacks between the push and pull versions
11101
         *    of the parser.
11102
         */
11103
0
        if (avail < XML_PARSER_BIG_BUFFER_SIZE) {
11104
0
      if ((!terminate) && (!xmlParseLookupCharData(ctxt)))
11105
0
          goto done;
11106
0
                    }
11107
0
                    ctxt->checkIndex = 0;
11108
0
        xmlParseCharDataInternal(ctxt, !terminate);
11109
0
                    break;
11110
0
    }
11111
11112
0
                ctxt->instate = XML_PARSER_START_TAG;
11113
0
    break;
11114
0
      }
11115
0
            case XML_PARSER_END_TAG:
11116
0
    if ((!terminate) && (!xmlParseLookupChar(ctxt, '>')))
11117
0
        goto done;
11118
0
    if (ctxt->sax2) {
11119
0
              xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11120
0
        nameNsPop(ctxt);
11121
0
    }
11122
0
#ifdef LIBXML_SAX1_ENABLED
11123
0
      else
11124
0
        xmlParseEndTag1(ctxt, 0);
11125
0
#endif /* LIBXML_SAX1_ENABLED */
11126
0
    if (ctxt->nameNr == 0) {
11127
0
        ctxt->instate = XML_PARSER_EPILOG;
11128
0
    } else {
11129
0
        ctxt->instate = XML_PARSER_CONTENT;
11130
0
    }
11131
0
    break;
11132
0
            case XML_PARSER_MISC:
11133
0
            case XML_PARSER_PROLOG:
11134
0
            case XML_PARSER_EPILOG:
11135
0
    SKIP_BLANKS;
11136
0
                avail = ctxt->input->end - ctxt->input->cur;
11137
0
    if (avail < 1)
11138
0
        goto done;
11139
0
    if (ctxt->input->cur[0] == '<') {
11140
0
                    if ((!terminate) && (avail < 2))
11141
0
                        goto done;
11142
0
                    next = ctxt->input->cur[1];
11143
0
                    if (next == '?') {
11144
0
                        if ((!terminate) &&
11145
0
                            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11146
0
                            goto done;
11147
0
                        xmlParsePI(ctxt);
11148
0
                        break;
11149
0
                    } else if (next == '!') {
11150
0
                        if ((!terminate) && (avail < 3))
11151
0
                            goto done;
11152
11153
0
                        if (ctxt->input->cur[2] == '-') {
11154
0
                            if ((!terminate) && (avail < 4))
11155
0
                                goto done;
11156
0
                            if (ctxt->input->cur[3] == '-') {
11157
0
                                if ((!terminate) &&
11158
0
                                    (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11159
0
                                    goto done;
11160
0
                                xmlParseComment(ctxt);
11161
0
                                break;
11162
0
                            }
11163
0
                        } else if (ctxt->instate == XML_PARSER_MISC) {
11164
0
                            if ((!terminate) && (avail < 9))
11165
0
                                goto done;
11166
0
                            if ((ctxt->input->cur[2] == 'D') &&
11167
0
                                (ctxt->input->cur[3] == 'O') &&
11168
0
                                (ctxt->input->cur[4] == 'C') &&
11169
0
                                (ctxt->input->cur[5] == 'T') &&
11170
0
                                (ctxt->input->cur[6] == 'Y') &&
11171
0
                                (ctxt->input->cur[7] == 'P') &&
11172
0
                                (ctxt->input->cur[8] == 'E')) {
11173
0
                                if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11174
0
                                    goto done;
11175
0
                                ctxt->inSubset = 1;
11176
0
                                xmlParseDocTypeDecl(ctxt);
11177
0
                                if (RAW == '[') {
11178
0
                                    ctxt->instate = XML_PARSER_DTD;
11179
0
                                } else {
11180
0
                                    if (RAW == '>')
11181
0
                                        NEXT;
11182
                                    /*
11183
                                     * Create and update the external subset.
11184
                                     */
11185
0
                                    ctxt->inSubset = 2;
11186
0
                                    if ((ctxt->sax != NULL) &&
11187
0
                                        (!ctxt->disableSAX) &&
11188
0
                                        (ctxt->sax->externalSubset != NULL))
11189
0
                                        ctxt->sax->externalSubset(
11190
0
                                                ctxt->userData,
11191
0
                                                ctxt->intSubName,
11192
0
                                                ctxt->extSubSystem,
11193
0
                                                ctxt->extSubURI);
11194
0
                                    ctxt->inSubset = 0;
11195
0
                                    xmlCleanSpecialAttr(ctxt);
11196
0
                                    ctxt->instate = XML_PARSER_PROLOG;
11197
0
                                }
11198
0
                                break;
11199
0
                            }
11200
0
                        }
11201
0
                    }
11202
0
                }
11203
11204
0
                if (ctxt->instate == XML_PARSER_EPILOG) {
11205
0
                    if (ctxt->errNo == XML_ERR_OK)
11206
0
                        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11207
0
        ctxt->instate = XML_PARSER_EOF;
11208
0
                    xmlFinishDocument(ctxt);
11209
0
                } else {
11210
0
        ctxt->instate = XML_PARSER_START_TAG;
11211
0
    }
11212
0
    break;
11213
0
            case XML_PARSER_DTD: {
11214
0
                if ((!terminate) && (!xmlParseLookupInternalSubset(ctxt)))
11215
0
                    goto done;
11216
0
    xmlParseInternalSubset(ctxt);
11217
0
    ctxt->inSubset = 2;
11218
0
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11219
0
        (ctxt->sax->externalSubset != NULL))
11220
0
        ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11221
0
          ctxt->extSubSystem, ctxt->extSubURI);
11222
0
    ctxt->inSubset = 0;
11223
0
    xmlCleanSpecialAttr(ctxt);
11224
0
    ctxt->instate = XML_PARSER_PROLOG;
11225
0
                break;
11226
0
      }
11227
0
            default:
11228
0
                xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
11229
0
      "PP: internal error\n");
11230
0
    ctxt->instate = XML_PARSER_EOF;
11231
0
    break;
11232
0
  }
11233
0
    }
11234
0
done:
11235
0
    return(ret);
11236
0
}
11237
11238
/**
11239
 * Parse a chunk of memory in push parser mode.
11240
 *
11241
 * Assumes that the parser context was initialized with
11242
 * #xmlCreatePushParserCtxt.
11243
 *
11244
 * The last chunk, which will often be empty, must be marked with
11245
 * the `terminate` flag. With the default SAX callbacks, the resulting
11246
 * document will be available in ctxt->myDoc. This pointer will not
11247
 * be freed when calling #xmlFreeParserCtxt and must be freed by the
11248
 * caller. If the document isn't well-formed, it will still be returned
11249
 * in ctxt->myDoc.
11250
 *
11251
 * As an exception, #xmlCtxtResetPush will free the document in
11252
 * ctxt->myDoc. So ctxt->myDoc should be set to NULL after extracting
11253
 * the document.
11254
 *
11255
 * Since 2.14.0, #xmlCtxtGetDocument can be used to retrieve the
11256
 * result document.
11257
 *
11258
 * @param ctxt  an XML parser context
11259
 * @param chunk  chunk of memory
11260
 * @param size  size of chunk in bytes
11261
 * @param terminate  last chunk indicator
11262
 * @returns an xmlParserErrors code (0 on success).
11263
 */
11264
int
11265
xmlParseChunk(xmlParserCtxt *ctxt, const char *chunk, int size,
11266
0
              int terminate) {
11267
0
    size_t curBase;
11268
0
    size_t maxLength;
11269
0
    size_t pos;
11270
0
    int end_in_lf = 0;
11271
0
    int res;
11272
11273
0
    if ((ctxt == NULL) || (size < 0))
11274
0
        return(XML_ERR_ARGUMENT);
11275
0
    if ((chunk == NULL) && (size > 0))
11276
0
        return(XML_ERR_ARGUMENT);
11277
0
    if ((ctxt->input == NULL) || (ctxt->input->buf == NULL))
11278
0
        return(XML_ERR_ARGUMENT);
11279
0
    if (ctxt->disableSAX != 0)
11280
0
        return(ctxt->errNo);
11281
11282
0
    ctxt->input->flags |= XML_INPUT_PROGRESSIVE;
11283
0
    if (ctxt->instate == XML_PARSER_START)
11284
0
        xmlCtxtInitializeLate(ctxt);
11285
0
    if ((size > 0) && (chunk != NULL) && (!terminate) &&
11286
0
        (chunk[size - 1] == '\r')) {
11287
0
  end_in_lf = 1;
11288
0
  size--;
11289
0
    }
11290
11291
    /*
11292
     * Also push an empty chunk to make sure that the raw buffer
11293
     * will be flushed if there is an encoder.
11294
     */
11295
0
    pos = ctxt->input->cur - ctxt->input->base;
11296
0
    res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11297
0
    xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
11298
0
    if (res < 0) {
11299
0
        xmlCtxtErrIO(ctxt, ctxt->input->buf->error, NULL);
11300
0
        xmlHaltParser(ctxt);
11301
0
        return(ctxt->errNo);
11302
0
    }
11303
11304
0
    xmlParseTryOrFinish(ctxt, terminate);
11305
11306
0
    curBase = ctxt->input->cur - ctxt->input->base;
11307
0
    maxLength = (ctxt->options & XML_PARSE_HUGE) ?
11308
0
                XML_MAX_HUGE_LENGTH :
11309
0
                XML_MAX_LOOKUP_LIMIT;
11310
0
    if (curBase > maxLength) {
11311
0
        xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
11312
0
                    "Buffer size limit exceeded, try XML_PARSE_HUGE\n");
11313
0
        xmlHaltParser(ctxt);
11314
0
    }
11315
11316
0
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX != 0))
11317
0
        return(ctxt->errNo);
11318
11319
0
    if (end_in_lf == 1) {
11320
0
  pos = ctxt->input->cur - ctxt->input->base;
11321
0
  res = xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
11322
0
  xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
11323
0
        if (res < 0) {
11324
0
            xmlCtxtErrIO(ctxt, ctxt->input->buf->error, NULL);
11325
0
            xmlHaltParser(ctxt);
11326
0
            return(ctxt->errNo);
11327
0
        }
11328
0
    }
11329
0
    if (terminate) {
11330
  /*
11331
   * Check for termination
11332
   */
11333
0
        if ((ctxt->instate != XML_PARSER_EOF) &&
11334
0
            (ctxt->instate != XML_PARSER_EPILOG)) {
11335
0
            if (ctxt->nameNr > 0) {
11336
0
                const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
11337
0
                int line = ctxt->pushTab[ctxt->nameNr - 1].line;
11338
0
                xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
11339
0
                        "Premature end of data in tag %s line %d\n",
11340
0
                        name, line, NULL);
11341
0
            } else if (ctxt->instate == XML_PARSER_START) {
11342
0
                xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11343
0
            } else {
11344
0
                xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
11345
0
                               "Start tag expected, '<' not found\n");
11346
0
            }
11347
0
        } else {
11348
0
            xmlParserCheckEOF(ctxt, XML_ERR_DOCUMENT_END);
11349
0
        }
11350
0
  if (ctxt->instate != XML_PARSER_EOF) {
11351
0
            ctxt->instate = XML_PARSER_EOF;
11352
0
            xmlFinishDocument(ctxt);
11353
0
  }
11354
0
    }
11355
0
    if (ctxt->wellFormed == 0)
11356
0
  return((xmlParserErrors) ctxt->errNo);
11357
0
    else
11358
0
        return(0);
11359
0
}
11360
11361
/************************************************************************
11362
 *                  *
11363
 *    I/O front end functions to the parser     *
11364
 *                  *
11365
 ************************************************************************/
11366
11367
/**
11368
 * Create a parser context for using the XML parser in push mode.
11369
 * See #xmlParseChunk.
11370
 *
11371
 * Passing an initial chunk is useless and deprecated.
11372
 *
11373
 * The push parser doesn't support recovery mode or the
11374
 * XML_PARSE_NOBLANKS option.
11375
 *
11376
 * `filename` is used as base URI to fetch external entities and for
11377
 * error reports.
11378
 *
11379
 * @param sax  a SAX handler (optional)
11380
 * @param user_data  user data for SAX callbacks (optional)
11381
 * @param chunk  initial chunk (optional, deprecated)
11382
 * @param size  size of initial chunk in bytes
11383
 * @param filename  file name or URI (optional)
11384
 * @returns the new parser context or NULL if a memory allocation
11385
 * failed.
11386
 */
11387
11388
xmlParserCtxt *
11389
xmlCreatePushParserCtxt(xmlSAXHandler *sax, void *user_data,
11390
0
                        const char *chunk, int size, const char *filename) {
11391
0
    xmlParserCtxtPtr ctxt;
11392
0
    xmlParserInputPtr input;
11393
11394
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
11395
0
    if (ctxt == NULL)
11396
0
  return(NULL);
11397
11398
0
    ctxt->options &= ~XML_PARSE_NODICT;
11399
0
    ctxt->dictNames = 1;
11400
11401
0
    input = xmlNewPushInput(filename, chunk, size);
11402
0
    if (input == NULL) {
11403
0
  xmlFreeParserCtxt(ctxt);
11404
0
  return(NULL);
11405
0
    }
11406
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
11407
0
        xmlFreeInputStream(input);
11408
0
        xmlFreeParserCtxt(ctxt);
11409
0
        return(NULL);
11410
0
    }
11411
11412
0
    return(ctxt);
11413
0
}
11414
#endif /* LIBXML_PUSH_ENABLED */
11415
11416
/**
11417
 * Blocks further parser processing
11418
 *
11419
 * @param ctxt  an XML parser context
11420
 */
11421
void
11422
0
xmlStopParser(xmlParserCtxt *ctxt) {
11423
0
    if (ctxt == NULL)
11424
0
        return;
11425
0
    xmlHaltParser(ctxt);
11426
    /*
11427
     * TODO: Update ctxt->lastError and ctxt->wellFormed?
11428
     */
11429
0
    if (ctxt->errNo != XML_ERR_NO_MEMORY)
11430
0
        ctxt->errNo = XML_ERR_USER_STOP;
11431
0
}
11432
11433
/**
11434
 * Create a parser context for using the XML parser with an existing
11435
 * I/O stream
11436
 *
11437
 * @param sax  a SAX handler (optional)
11438
 * @param user_data  user data for SAX callbacks (optional)
11439
 * @param ioread  an I/O read function
11440
 * @param ioclose  an I/O close function (optional)
11441
 * @param ioctx  an I/O handler
11442
 * @param enc  the charset encoding if known (deprecated)
11443
 * @returns the new parser context or NULL
11444
 */
11445
xmlParserCtxt *
11446
xmlCreateIOParserCtxt(xmlSAXHandler *sax, void *user_data,
11447
                      xmlInputReadCallback ioread,
11448
                      xmlInputCloseCallback ioclose,
11449
0
                      void *ioctx, xmlCharEncoding enc) {
11450
0
    xmlParserCtxtPtr ctxt;
11451
0
    xmlParserInputPtr input;
11452
0
    const char *encoding;
11453
11454
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
11455
0
    if (ctxt == NULL)
11456
0
  return(NULL);
11457
11458
0
    encoding = xmlGetCharEncodingName(enc);
11459
0
    input = xmlCtxtNewInputFromIO(ctxt, NULL, ioread, ioclose, ioctx,
11460
0
                                  encoding, 0);
11461
0
    if (input == NULL) {
11462
0
  xmlFreeParserCtxt(ctxt);
11463
0
        return (NULL);
11464
0
    }
11465
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
11466
0
        xmlFreeInputStream(input);
11467
0
        xmlFreeParserCtxt(ctxt);
11468
0
        return(NULL);
11469
0
    }
11470
11471
0
    return(ctxt);
11472
0
}
11473
11474
#ifdef LIBXML_VALID_ENABLED
11475
/************************************************************************
11476
 *                  *
11477
 *    Front ends when parsing a DTD       *
11478
 *                  *
11479
 ************************************************************************/
11480
11481
/**
11482
 * Parse a DTD.
11483
 *
11484
 * Option XML_PARSE_DTDLOAD should be enabled in the parser context
11485
 * to make external entities work.
11486
 *
11487
 * @since 2.14.0
11488
 *
11489
 * @param ctxt  a parser context
11490
 * @param input  a parser input
11491
 * @param publicId  public ID of the DTD (optional)
11492
 * @param systemId  system ID of the DTD (optional)
11493
 * @returns the resulting xmlDtd or NULL in case of error.
11494
 * `input` will be freed by the function in any case.
11495
 */
11496
xmlDtd *
11497
xmlCtxtParseDtd(xmlParserCtxt *ctxt, xmlParserInput *input,
11498
876
                const xmlChar *publicId, const xmlChar *systemId) {
11499
876
    xmlDtdPtr ret = NULL;
11500
11501
876
    if ((ctxt == NULL) || (input == NULL)) {
11502
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
11503
0
        xmlFreeInputStream(input);
11504
0
        return(NULL);
11505
0
    }
11506
11507
876
    if (xmlCtxtPushInput(ctxt, input) < 0) {
11508
2
        xmlFreeInputStream(input);
11509
2
        return(NULL);
11510
2
    }
11511
11512
874
    if (publicId == NULL)
11513
634
        publicId = BAD_CAST "none";
11514
874
    if (systemId == NULL)
11515
0
        systemId = BAD_CAST "none";
11516
11517
874
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11518
874
    if (ctxt->myDoc == NULL) {
11519
2
        xmlErrMemory(ctxt);
11520
2
        goto error;
11521
2
    }
11522
872
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
11523
872
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11524
872
                                       publicId, systemId);
11525
872
    if (ctxt->myDoc->extSubset == NULL) {
11526
4
        xmlErrMemory(ctxt);
11527
4
        xmlFreeDoc(ctxt->myDoc);
11528
4
        goto error;
11529
4
    }
11530
11531
868
    xmlParseExternalSubset(ctxt, publicId, systemId);
11532
11533
868
    if (ctxt->wellFormed) {
11534
0
        ret = ctxt->myDoc->extSubset;
11535
0
        ctxt->myDoc->extSubset = NULL;
11536
0
        if (ret != NULL) {
11537
0
            xmlNodePtr tmp;
11538
11539
0
            ret->doc = NULL;
11540
0
            tmp = ret->children;
11541
0
            while (tmp != NULL) {
11542
0
                tmp->doc = NULL;
11543
0
                tmp = tmp->next;
11544
0
            }
11545
0
        }
11546
868
    } else {
11547
868
        ret = NULL;
11548
868
    }
11549
868
    xmlFreeDoc(ctxt->myDoc);
11550
868
    ctxt->myDoc = NULL;
11551
11552
874
error:
11553
874
    xmlFreeInputStream(xmlCtxtPopInput(ctxt));
11554
11555
874
    return(ret);
11556
868
}
11557
11558
/**
11559
 * Load and parse a DTD
11560
 *
11561
 * @deprecated Use #xmlCtxtParseDtd.
11562
 *
11563
 * @param sax  the SAX handler block or NULL
11564
 * @param input  an Input Buffer
11565
 * @param enc  the charset encoding if known
11566
 * @returns the resulting xmlDtd or NULL in case of error.
11567
 * `input` will be freed by the function in any case.
11568
 */
11569
11570
xmlDtd *
11571
xmlIOParseDTD(xmlSAXHandler *sax, xmlParserInputBuffer *input,
11572
0
        xmlCharEncoding enc) {
11573
0
    xmlDtdPtr ret = NULL;
11574
0
    xmlParserCtxtPtr ctxt;
11575
0
    xmlParserInputPtr pinput = NULL;
11576
11577
0
    if (input == NULL)
11578
0
  return(NULL);
11579
11580
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
11581
0
    if (ctxt == NULL) {
11582
0
        xmlFreeParserInputBuffer(input);
11583
0
  return(NULL);
11584
0
    }
11585
0
    xmlCtxtSetOptions(ctxt, XML_PARSE_DTDLOAD);
11586
11587
    /*
11588
     * generate a parser input from the I/O handler
11589
     */
11590
11591
0
    pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
11592
0
    if (pinput == NULL) {
11593
0
        xmlFreeParserInputBuffer(input);
11594
0
  xmlFreeParserCtxt(ctxt);
11595
0
  return(NULL);
11596
0
    }
11597
11598
0
    if (enc != XML_CHAR_ENCODING_NONE) {
11599
0
        xmlSwitchEncoding(ctxt, enc);
11600
0
    }
11601
11602
0
    ret = xmlCtxtParseDtd(ctxt, pinput, NULL, NULL);
11603
11604
0
    xmlFreeParserCtxt(ctxt);
11605
0
    return(ret);
11606
0
}
11607
11608
/**
11609
 * Load and parse an external subset.
11610
 *
11611
 * @deprecated Use #xmlCtxtParseDtd.
11612
 *
11613
 * @param sax  the SAX handler block
11614
 * @param publicId  public identifier of the DTD (optional)
11615
 * @param systemId  system identifier (URL) of the DTD
11616
 * @returns the resulting xmlDtd or NULL in case of error.
11617
 */
11618
11619
xmlDtd *
11620
xmlSAXParseDTD(xmlSAXHandler *sax, const xmlChar *publicId,
11621
13.4k
               const xmlChar *systemId) {
11622
13.4k
    xmlDtdPtr ret = NULL;
11623
13.4k
    xmlParserCtxtPtr ctxt;
11624
13.4k
    xmlParserInputPtr input = NULL;
11625
13.4k
    xmlChar* systemIdCanonic;
11626
11627
13.4k
    if ((publicId == NULL) && (systemId == NULL)) return(NULL);
11628
11629
13.4k
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
11630
13.4k
    if (ctxt == NULL) {
11631
71
  return(NULL);
11632
71
    }
11633
13.4k
    xmlCtxtSetOptions(ctxt, XML_PARSE_DTDLOAD);
11634
11635
    /*
11636
     * Canonicalise the system ID
11637
     */
11638
13.4k
    systemIdCanonic = xmlCanonicPath(systemId);
11639
13.4k
    if ((systemId != NULL) && (systemIdCanonic == NULL)) {
11640
14
  xmlFreeParserCtxt(ctxt);
11641
14
  return(NULL);
11642
14
    }
11643
11644
    /*
11645
     * Ask the Entity resolver to load the damn thing
11646
     */
11647
11648
13.3k
    if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
11649
13.3k
  input = ctxt->sax->resolveEntity(ctxt->userData, publicId,
11650
13.3k
                                   systemIdCanonic);
11651
13.3k
    if (input == NULL) {
11652
12.5k
  xmlFreeParserCtxt(ctxt);
11653
12.5k
  if (systemIdCanonic != NULL)
11654
12.2k
      xmlFree(systemIdCanonic);
11655
12.5k
  return(NULL);
11656
12.5k
    }
11657
11658
876
    if (input->filename == NULL)
11659
0
  input->filename = (char *) systemIdCanonic;
11660
876
    else
11661
876
  xmlFree(systemIdCanonic);
11662
11663
876
    ret = xmlCtxtParseDtd(ctxt, input, publicId, systemId);
11664
11665
876
    xmlFreeParserCtxt(ctxt);
11666
876
    return(ret);
11667
13.3k
}
11668
11669
11670
/**
11671
 * Load and parse an external subset.
11672
 *
11673
 * @param publicId  public identifier of the DTD (optional)
11674
 * @param systemId  system identifier (URL) of the DTD
11675
 * @returns the resulting xmlDtd or NULL in case of error.
11676
 */
11677
11678
xmlDtd *
11679
13.4k
xmlParseDTD(const xmlChar *publicId, const xmlChar *systemId) {
11680
13.4k
    return(xmlSAXParseDTD(NULL, publicId, systemId));
11681
13.4k
}
11682
#endif /* LIBXML_VALID_ENABLED */
11683
11684
/************************************************************************
11685
 *                  *
11686
 *    Front ends when parsing an Entity     *
11687
 *                  *
11688
 ************************************************************************/
11689
11690
static xmlNodePtr
11691
xmlCtxtParseContentInternal(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
11692
3.79k
                            int hasTextDecl, int buildTree) {
11693
3.79k
    xmlNodePtr root = NULL;
11694
3.79k
    xmlNodePtr list = NULL;
11695
3.79k
    xmlChar *rootName = BAD_CAST "#root";
11696
3.79k
    int result;
11697
11698
3.79k
    if (buildTree) {
11699
3.79k
        root = xmlNewDocNode(ctxt->myDoc, NULL, rootName, NULL);
11700
3.79k
        if (root == NULL) {
11701
4
            xmlErrMemory(ctxt);
11702
4
            goto error;
11703
4
        }
11704
3.79k
    }
11705
11706
3.78k
    if (xmlCtxtPushInput(ctxt, input) < 0)
11707
2
        goto error;
11708
11709
3.78k
    nameNsPush(ctxt, rootName, NULL, NULL, 0, 0);
11710
3.78k
    spacePush(ctxt, -1);
11711
11712
3.78k
    if (buildTree)
11713
3.78k
        nodePush(ctxt, root);
11714
11715
3.78k
    if (hasTextDecl) {
11716
403
        xmlDetectEncoding(ctxt);
11717
11718
        /*
11719
         * Parse a possible text declaration first
11720
         */
11721
403
        if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
11722
403
            (IS_BLANK_CH(NXT(5)))) {
11723
0
            xmlParseTextDecl(ctxt);
11724
            /*
11725
             * An XML-1.0 document can't reference an entity not XML-1.0
11726
             */
11727
0
            if ((xmlStrEqual(ctxt->version, BAD_CAST "1.0")) &&
11728
0
                (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
11729
0
                xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
11730
0
                               "Version mismatch between document and "
11731
0
                               "entity\n");
11732
0
            }
11733
0
        }
11734
403
    }
11735
11736
3.78k
    xmlParseContentInternal(ctxt);
11737
11738
3.78k
    if (ctxt->input->cur < ctxt->input->end)
11739
864
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11740
11741
3.78k
    if ((ctxt->wellFormed) ||
11742
3.78k
        ((ctxt->recovery) && (!xmlCtxtIsCatastrophicError(ctxt)))) {
11743
2.37k
        if (root != NULL) {
11744
2.37k
            xmlNodePtr cur;
11745
11746
            /*
11747
             * Unlink newly created node list.
11748
             */
11749
2.37k
            list = root->children;
11750
2.37k
            root->children = NULL;
11751
2.37k
            root->last = NULL;
11752
5.39k
            for (cur = list; cur != NULL; cur = cur->next)
11753
3.02k
                cur->parent = NULL;
11754
2.37k
        }
11755
2.37k
    }
11756
11757
    /*
11758
     * Read the rest of the stream in case of errors. We want
11759
     * to account for the whole entity size.
11760
     */
11761
29.2k
    do {
11762
29.2k
        ctxt->input->cur = ctxt->input->end;
11763
29.2k
        xmlParserShrink(ctxt);
11764
29.2k
        result = xmlParserGrow(ctxt);
11765
29.2k
    } while (result > 0);
11766
11767
3.78k
    if (buildTree)
11768
3.78k
        nodePop(ctxt);
11769
11770
3.78k
    namePop(ctxt);
11771
3.78k
    spacePop(ctxt);
11772
11773
3.78k
    xmlCtxtPopInput(ctxt);
11774
11775
3.79k
error:
11776
3.79k
    xmlFreeNode(root);
11777
11778
3.79k
    return(list);
11779
3.78k
}
11780
11781
static void
11782
4.84k
xmlCtxtParseEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr ent) {
11783
4.84k
    xmlParserInputPtr input;
11784
4.84k
    xmlNodePtr list;
11785
4.84k
    unsigned long consumed;
11786
4.84k
    int isExternal;
11787
4.84k
    int buildTree;
11788
4.84k
    int oldMinNsIndex;
11789
4.84k
    int oldNodelen, oldNodemem;
11790
11791
4.84k
    isExternal = (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY);
11792
4.84k
    buildTree = (ctxt->node != NULL);
11793
11794
    /*
11795
     * Recursion check
11796
     */
11797
4.84k
    if (ent->flags & XML_ENT_EXPANDING) {
11798
677
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
11799
677
        xmlHaltParser(ctxt);
11800
677
        goto error;
11801
677
    }
11802
11803
    /*
11804
     * Load entity
11805
     */
11806
4.16k
    input = xmlNewEntityInputStream(ctxt, ent);
11807
4.16k
    if (input == NULL)
11808
371
        goto error;
11809
11810
    /*
11811
     * When building a tree, we need to limit the scope of namespace
11812
     * declarations, so that entities don't reference xmlNs structs
11813
     * from the parent of a reference.
11814
     */
11815
3.79k
    oldMinNsIndex = ctxt->nsdb->minNsIndex;
11816
3.79k
    if (buildTree)
11817
3.79k
        ctxt->nsdb->minNsIndex = ctxt->nsNr;
11818
11819
3.79k
    oldNodelen = ctxt->nodelen;
11820
3.79k
    oldNodemem = ctxt->nodemem;
11821
3.79k
    ctxt->nodelen = 0;
11822
3.79k
    ctxt->nodemem = 0;
11823
11824
    /*
11825
     * Parse content
11826
     *
11827
     * This initiates a recursive call chain:
11828
     *
11829
     * - xmlCtxtParseContentInternal
11830
     * - xmlParseContentInternal
11831
     * - xmlParseReference
11832
     * - xmlCtxtParseEntity
11833
     *
11834
     * The nesting depth is limited by the maximum number of inputs,
11835
     * see xmlCtxtPushInput.
11836
     *
11837
     * It's possible to make this non-recursive (minNsIndex must be
11838
     * stored in the input struct) at the expense of code readability.
11839
     */
11840
11841
3.79k
    ent->flags |= XML_ENT_EXPANDING;
11842
11843
3.79k
    list = xmlCtxtParseContentInternal(ctxt, input, isExternal, buildTree);
11844
11845
3.79k
    ent->flags &= ~XML_ENT_EXPANDING;
11846
11847
3.79k
    ctxt->nsdb->minNsIndex = oldMinNsIndex;
11848
3.79k
    ctxt->nodelen = oldNodelen;
11849
3.79k
    ctxt->nodemem = oldNodemem;
11850
11851
    /*
11852
     * Entity size accounting
11853
     */
11854
3.79k
    consumed = input->consumed;
11855
3.79k
    xmlSaturatedAddSizeT(&consumed, input->end - input->base);
11856
11857
3.79k
    if ((ent->flags & XML_ENT_CHECKED) == 0)
11858
2.20k
        xmlSaturatedAdd(&ent->expandedSize, consumed);
11859
11860
3.79k
    if ((ent->flags & XML_ENT_PARSED) == 0) {
11861
2.20k
        if (isExternal)
11862
403
            xmlSaturatedAdd(&ctxt->sizeentities, consumed);
11863
11864
2.20k
        ent->children = list;
11865
11866
5.23k
        while (list != NULL) {
11867
3.02k
            list->parent = (xmlNodePtr) ent;
11868
11869
            /*
11870
             * Downstream code like the nginx xslt module can set
11871
             * ctxt->myDoc->extSubset to a separate DTD, so the entity
11872
             * might have a different or a NULL document.
11873
             */
11874
3.02k
            if (list->doc != ent->doc)
11875
0
                xmlSetTreeDoc(list, ent->doc);
11876
11877
3.02k
            if (list->next == NULL)
11878
899
                ent->last = list;
11879
3.02k
            list = list->next;
11880
3.02k
        }
11881
2.20k
    } else {
11882
1.58k
        xmlFreeNodeList(list);
11883
1.58k
    }
11884
11885
3.79k
    xmlFreeInputStream(input);
11886
11887
4.84k
error:
11888
4.84k
    ent->flags |= XML_ENT_PARSED | XML_ENT_CHECKED;
11889
4.84k
}
11890
11891
/**
11892
 * Parse an external general entity within an existing parsing context
11893
 * An external general parsed entity is well-formed if it matches the
11894
 * production labeled extParsedEnt.
11895
 *
11896
 *     [78] extParsedEnt ::= TextDecl? content
11897
 *
11898
 * @param ctxt  the existing parsing context
11899
 * @param URL  the URL for the entity to load
11900
 * @param ID  the System ID for the entity to load
11901
 * @param listOut  the return value for the set of parsed nodes
11902
 * @returns 0 if the entity is well formed, -1 in case of args problem and
11903
 *    the parser error code otherwise
11904
 */
11905
11906
int
11907
xmlParseCtxtExternalEntity(xmlParserCtxt *ctxt, const xmlChar *URL,
11908
0
                           const xmlChar *ID, xmlNode **listOut) {
11909
0
    xmlParserInputPtr input;
11910
0
    xmlNodePtr list;
11911
11912
0
    if (listOut != NULL)
11913
0
        *listOut = NULL;
11914
11915
0
    if (ctxt == NULL)
11916
0
        return(XML_ERR_ARGUMENT);
11917
11918
0
    input = xmlLoadResource(ctxt, (char *) URL, (char *) ID,
11919
0
                            XML_RESOURCE_GENERAL_ENTITY);
11920
0
    if (input == NULL)
11921
0
        return(ctxt->errNo);
11922
11923
0
    xmlCtxtInitializeLate(ctxt);
11924
11925
0
    list = xmlCtxtParseContentInternal(ctxt, input, /* hasTextDecl */ 1, 1);
11926
0
    if (listOut != NULL)
11927
0
        *listOut = list;
11928
0
    else
11929
0
        xmlFreeNodeList(list);
11930
11931
0
    xmlFreeInputStream(input);
11932
0
    return(ctxt->errNo);
11933
0
}
11934
11935
#ifdef LIBXML_SAX1_ENABLED
11936
/**
11937
 * Parse an external general entity
11938
 * An external general parsed entity is well-formed if it matches the
11939
 * production labeled extParsedEnt.
11940
 *
11941
 * @deprecated Use #xmlParseCtxtExternalEntity.
11942
 *
11943
 *     [78] extParsedEnt ::= TextDecl? content
11944
 *
11945
 * @param doc  the document the chunk pertains to
11946
 * @param sax  the SAX handler block (possibly NULL)
11947
 * @param user_data  The user data returned on SAX callbacks (possibly NULL)
11948
 * @param depth  Used for loop detection, use 0
11949
 * @param URL  the URL for the entity to load
11950
 * @param ID  the System ID for the entity to load
11951
 * @param list  the return value for the set of parsed nodes
11952
 * @returns 0 if the entity is well formed, -1 in case of args problem and
11953
 *    the parser error code otherwise
11954
 */
11955
11956
int
11957
xmlParseExternalEntity(xmlDoc *doc, xmlSAXHandler *sax, void *user_data,
11958
0
    int depth, const xmlChar *URL, const xmlChar *ID, xmlNode **list) {
11959
0
    xmlParserCtxtPtr ctxt;
11960
0
    int ret;
11961
11962
0
    if (list != NULL)
11963
0
        *list = NULL;
11964
11965
0
    if (doc == NULL)
11966
0
        return(XML_ERR_ARGUMENT);
11967
11968
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
11969
0
    if (ctxt == NULL)
11970
0
        return(XML_ERR_NO_MEMORY);
11971
11972
0
    ctxt->depth = depth;
11973
0
    ctxt->myDoc = doc;
11974
0
    ret = xmlParseCtxtExternalEntity(ctxt, URL, ID, list);
11975
11976
0
    xmlFreeParserCtxt(ctxt);
11977
0
    return(ret);
11978
0
}
11979
11980
/**
11981
 * Parse a well-balanced chunk of an XML document
11982
 * called by the parser
11983
 * The allowed sequence for the Well Balanced Chunk is the one defined by
11984
 * the content production in the XML grammar:
11985
 *
11986
 *     [43] content ::= (element | CharData | Reference | CDSect | PI |
11987
 *                       Comment)*
11988
 *
11989
 * @param doc  the document the chunk pertains to (must not be NULL)
11990
 * @param sax  the SAX handler block (possibly NULL)
11991
 * @param user_data  The user data returned on SAX callbacks (possibly NULL)
11992
 * @param depth  Used for loop detection, use 0
11993
 * @param string  the input string in UTF8 or ISO-Latin (zero terminated)
11994
 * @param lst  the return value for the set of parsed nodes
11995
 * @returns 0 if the chunk is well balanced, -1 in case of args problem and
11996
 *    the parser error code otherwise
11997
 */
11998
11999
int
12000
xmlParseBalancedChunkMemory(xmlDoc *doc, xmlSAXHandler *sax,
12001
0
     void *user_data, int depth, const xmlChar *string, xmlNode **lst) {
12002
0
    return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
12003
0
                                                depth, string, lst, 0 );
12004
0
}
12005
#endif /* LIBXML_SAX1_ENABLED */
12006
12007
/**
12008
 * Parse a well-balanced chunk of XML matching the 'content' production.
12009
 *
12010
 * Namespaces in scope of `node` and entities of `node`'s document are
12011
 * recognized. When validating, the DTD of `node`'s document is used.
12012
 *
12013
 * Always consumes `input` even in error case.
12014
 *
12015
 * @since 2.14.0
12016
 *
12017
 * @param ctxt  parser context
12018
 * @param input  parser input
12019
 * @param node  target node or document
12020
 * @param hasTextDecl  whether to parse text declaration
12021
 * @returns a node list or NULL in case of error.
12022
 */
12023
xmlNode *
12024
xmlCtxtParseContent(xmlParserCtxt *ctxt, xmlParserInput *input,
12025
0
                    xmlNode *node, int hasTextDecl) {
12026
0
    xmlDocPtr doc;
12027
0
    xmlNodePtr cur, list = NULL;
12028
0
    int nsnr = 0;
12029
0
    xmlDictPtr oldDict;
12030
0
    int oldOptions, oldDictNames, oldLoadSubset;
12031
12032
0
    if ((ctxt == NULL) || (input == NULL) || (node == NULL)) {
12033
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
12034
0
        goto exit;
12035
0
    }
12036
12037
0
    doc = node->doc;
12038
0
    if (doc == NULL) {
12039
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
12040
0
        goto exit;
12041
0
    }
12042
12043
0
    switch (node->type) {
12044
0
        case XML_ELEMENT_NODE:
12045
0
        case XML_DOCUMENT_NODE:
12046
0
        case XML_HTML_DOCUMENT_NODE:
12047
0
            break;
12048
12049
0
        case XML_ATTRIBUTE_NODE:
12050
0
        case XML_TEXT_NODE:
12051
0
        case XML_CDATA_SECTION_NODE:
12052
0
        case XML_ENTITY_REF_NODE:
12053
0
        case XML_PI_NODE:
12054
0
        case XML_COMMENT_NODE:
12055
0
            for (cur = node->parent; cur != NULL; cur = node->parent) {
12056
0
                if ((cur->type == XML_ELEMENT_NODE) ||
12057
0
                    (cur->type == XML_DOCUMENT_NODE) ||
12058
0
                    (cur->type == XML_HTML_DOCUMENT_NODE)) {
12059
0
                    node = cur;
12060
0
                    break;
12061
0
                }
12062
0
            }
12063
0
            break;
12064
12065
0
        default:
12066
0
            xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
12067
0
            goto exit;
12068
0
    }
12069
12070
0
    xmlCtxtReset(ctxt);
12071
12072
0
    oldDict = ctxt->dict;
12073
0
    oldOptions = ctxt->options;
12074
0
    oldDictNames = ctxt->dictNames;
12075
0
    oldLoadSubset = ctxt->loadsubset;
12076
12077
    /*
12078
     * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
12079
     */
12080
0
    if (doc->dict != NULL) {
12081
0
        ctxt->dict = doc->dict;
12082
0
    } else {
12083
0
        ctxt->options |= XML_PARSE_NODICT;
12084
0
        ctxt->dictNames = 0;
12085
0
    }
12086
12087
    /*
12088
     * Disable IDs
12089
     */
12090
0
    ctxt->loadsubset |= XML_SKIP_IDS;
12091
0
    ctxt->options |= XML_PARSE_SKIP_IDS;
12092
12093
0
    ctxt->myDoc = doc;
12094
12095
0
#ifdef LIBXML_HTML_ENABLED
12096
0
    if (ctxt->html) {
12097
        /*
12098
         * When parsing in context, it makes no sense to add implied
12099
         * elements like html/body/etc...
12100
         */
12101
0
        ctxt->options |= HTML_PARSE_NOIMPLIED;
12102
12103
0
        list = htmlCtxtParseContentInternal(ctxt, input);
12104
0
    } else
12105
0
#endif
12106
0
    {
12107
0
        xmlCtxtInitializeLate(ctxt);
12108
12109
        /*
12110
         * initialize the SAX2 namespaces stack
12111
         */
12112
0
        cur = node;
12113
0
        while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
12114
0
            xmlNsPtr ns = cur->nsDef;
12115
0
            xmlHashedString hprefix, huri;
12116
12117
0
            while (ns != NULL) {
12118
0
                hprefix = xmlDictLookupHashed(ctxt->dict, ns->prefix, -1);
12119
0
                huri = xmlDictLookupHashed(ctxt->dict, ns->href, -1);
12120
0
                if (xmlParserNsPush(ctxt, &hprefix, &huri, ns, 1) > 0)
12121
0
                    nsnr++;
12122
0
                ns = ns->next;
12123
0
            }
12124
0
            cur = cur->parent;
12125
0
        }
12126
12127
0
        list = xmlCtxtParseContentInternal(ctxt, input, hasTextDecl, 1);
12128
12129
0
        if (nsnr > 0)
12130
0
            xmlParserNsPop(ctxt, nsnr);
12131
0
    }
12132
12133
0
    ctxt->dict = oldDict;
12134
0
    ctxt->options = oldOptions;
12135
0
    ctxt->dictNames = oldDictNames;
12136
0
    ctxt->loadsubset = oldLoadSubset;
12137
0
    ctxt->myDoc = NULL;
12138
0
    ctxt->node = NULL;
12139
12140
0
exit:
12141
0
    xmlFreeInputStream(input);
12142
0
    return(list);
12143
0
}
12144
12145
/**
12146
 * Parse a well-balanced chunk of an XML document
12147
 * within the context (DTD, namespaces, etc ...) of the given node.
12148
 *
12149
 * The allowed sequence for the data is a Well Balanced Chunk defined by
12150
 * the content production in the XML grammar:
12151
 *
12152
 *     [43] content ::= (element | CharData | Reference | CDSect | PI |
12153
 *                       Comment)*
12154
 *
12155
 * This function assumes the encoding of `node`'s document which is
12156
 * typically not what you want. A better alternative is
12157
 * #xmlCtxtParseContent.
12158
 *
12159
 * @param node  the context node
12160
 * @param data  the input string
12161
 * @param datalen  the input string length in bytes
12162
 * @param options  a combination of xmlParserOption
12163
 * @param listOut  the return value for the set of parsed nodes
12164
 * @returns XML_ERR_OK if the chunk is well balanced, and the parser
12165
 * error code otherwise
12166
 */
12167
xmlParserErrors
12168
xmlParseInNodeContext(xmlNode *node, const char *data, int datalen,
12169
0
                      int options, xmlNode **listOut) {
12170
0
    xmlParserCtxtPtr ctxt;
12171
0
    xmlParserInputPtr input;
12172
0
    xmlDocPtr doc;
12173
0
    xmlNodePtr list;
12174
0
    xmlParserErrors ret;
12175
12176
0
    if (listOut == NULL)
12177
0
        return(XML_ERR_INTERNAL_ERROR);
12178
0
    *listOut = NULL;
12179
12180
0
    if ((node == NULL) || (data == NULL) || (datalen < 0))
12181
0
        return(XML_ERR_INTERNAL_ERROR);
12182
12183
0
    doc = node->doc;
12184
0
    if (doc == NULL)
12185
0
        return(XML_ERR_INTERNAL_ERROR);
12186
12187
0
#ifdef LIBXML_HTML_ENABLED
12188
0
    if (doc->type == XML_HTML_DOCUMENT_NODE) {
12189
0
        ctxt = htmlNewParserCtxt();
12190
0
    }
12191
0
    else
12192
0
#endif
12193
0
        ctxt = xmlNewParserCtxt();
12194
12195
0
    if (ctxt == NULL)
12196
0
        return(XML_ERR_NO_MEMORY);
12197
12198
0
    input = xmlCtxtNewInputFromMemory(ctxt, NULL, data, datalen,
12199
0
                                      (const char *) doc->encoding,
12200
0
                                      XML_INPUT_BUF_STATIC);
12201
0
    if (input == NULL) {
12202
0
        xmlFreeParserCtxt(ctxt);
12203
0
        return(XML_ERR_NO_MEMORY);
12204
0
    }
12205
12206
0
    xmlCtxtUseOptions(ctxt, options);
12207
12208
0
    list = xmlCtxtParseContent(ctxt, input, node, /* hasTextDecl */ 0);
12209
12210
0
    if (list == NULL) {
12211
0
        ret = ctxt->errNo;
12212
0
        if (ret == XML_ERR_ARGUMENT)
12213
0
            ret = XML_ERR_INTERNAL_ERROR;
12214
0
    } else {
12215
0
        ret = XML_ERR_OK;
12216
0
        *listOut = list;
12217
0
    }
12218
12219
0
    xmlFreeParserCtxt(ctxt);
12220
12221
0
    return(ret);
12222
0
}
12223
12224
#ifdef LIBXML_SAX1_ENABLED
12225
/**
12226
 * Parse a well-balanced chunk of an XML document
12227
 *
12228
 * The allowed sequence for the Well Balanced Chunk is the one defined by
12229
 * the content production in the XML grammar:
12230
 *
12231
 *     [43] content ::= (element | CharData | Reference | CDSect | PI |
12232
 *                       Comment)*
12233
 *
12234
 * In case recover is set to 1, the nodelist will not be empty even if
12235
 * the parsed chunk is not well balanced, assuming the parsing succeeded to
12236
 * some extent.
12237
 *
12238
 * @param doc  the document the chunk pertains to (must not be NULL)
12239
 * @param sax  the SAX handler block (possibly NULL)
12240
 * @param user_data  The user data returned on SAX callbacks (possibly NULL)
12241
 * @param depth  Used for loop detection, use 0
12242
 * @param string  the input string in UTF8 or ISO-Latin (zero terminated)
12243
 * @param listOut  the return value for the set of parsed nodes
12244
 * @param recover  return nodes even if the data is broken (use 0)
12245
 * @returns 0 if the chunk is well balanced, or thehe parser error code
12246
 * otherwise.
12247
 */
12248
int
12249
xmlParseBalancedChunkMemoryRecover(xmlDoc *doc, xmlSAXHandler *sax,
12250
     void *user_data, int depth, const xmlChar *string, xmlNode **listOut,
12251
0
     int recover) {
12252
0
    xmlParserCtxtPtr ctxt;
12253
0
    xmlParserInputPtr input;
12254
0
    xmlNodePtr list;
12255
0
    int ret;
12256
12257
0
    if (listOut != NULL)
12258
0
        *listOut = NULL;
12259
12260
0
    if (string == NULL)
12261
0
        return(XML_ERR_ARGUMENT);
12262
12263
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12264
0
    if (ctxt == NULL)
12265
0
        return(XML_ERR_NO_MEMORY);
12266
12267
0
    xmlCtxtInitializeLate(ctxt);
12268
12269
0
    ctxt->depth = depth;
12270
0
    ctxt->myDoc = doc;
12271
0
    if (recover) {
12272
0
        ctxt->options |= XML_PARSE_RECOVER;
12273
0
        ctxt->recovery = 1;
12274
0
    }
12275
12276
0
    input = xmlNewStringInputStream(ctxt, string);
12277
0
    if (input == NULL) {
12278
0
        ret = ctxt->errNo;
12279
0
        goto error;
12280
0
    }
12281
12282
0
    list = xmlCtxtParseContentInternal(ctxt, input, /* hasTextDecl */ 0, 1);
12283
0
    if (listOut != NULL)
12284
0
        *listOut = list;
12285
0
    else
12286
0
        xmlFreeNodeList(list);
12287
12288
0
    if (!ctxt->wellFormed)
12289
0
        ret = ctxt->errNo;
12290
0
    else
12291
0
        ret = XML_ERR_OK;
12292
12293
0
error:
12294
0
    xmlFreeInputStream(input);
12295
0
    xmlFreeParserCtxt(ctxt);
12296
0
    return(ret);
12297
0
}
12298
12299
/**
12300
 * parse an XML external entity out of context and build a tree.
12301
 * It use the given SAX function block to handle the parsing callback.
12302
 * If sax is NULL, fallback to the default DOM tree building routines.
12303
 *
12304
 * @deprecated Don't use.
12305
 *
12306
 *     [78] extParsedEnt ::= TextDecl? content
12307
 *
12308
 * This correspond to a "Well Balanced" chunk
12309
 *
12310
 * @param sax  the SAX handler block
12311
 * @param filename  the filename
12312
 * @returns the resulting document tree
12313
 */
12314
12315
xmlDoc *
12316
0
xmlSAXParseEntity(xmlSAXHandler *sax, const char *filename) {
12317
0
    xmlDocPtr ret;
12318
0
    xmlParserCtxtPtr ctxt;
12319
12320
0
    ctxt = xmlCreateFileParserCtxt(filename);
12321
0
    if (ctxt == NULL) {
12322
0
  return(NULL);
12323
0
    }
12324
0
    if (sax != NULL) {
12325
0
        if (sax->initialized == XML_SAX2_MAGIC) {
12326
0
            *ctxt->sax = *sax;
12327
0
        } else {
12328
0
            memset(ctxt->sax, 0, sizeof(*ctxt->sax));
12329
0
            memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12330
0
        }
12331
0
        ctxt->userData = NULL;
12332
0
    }
12333
12334
0
    xmlParseExtParsedEnt(ctxt);
12335
12336
0
    if (ctxt->wellFormed) {
12337
0
  ret = ctxt->myDoc;
12338
0
    } else {
12339
0
        ret = NULL;
12340
0
        xmlFreeDoc(ctxt->myDoc);
12341
0
    }
12342
12343
0
    xmlFreeParserCtxt(ctxt);
12344
12345
0
    return(ret);
12346
0
}
12347
12348
/**
12349
 * parse an XML external entity out of context and build a tree.
12350
 *
12351
 *     [78] extParsedEnt ::= TextDecl? content
12352
 *
12353
 * This correspond to a "Well Balanced" chunk
12354
 *
12355
 * @param filename  the filename
12356
 * @returns the resulting document tree
12357
 */
12358
12359
xmlDoc *
12360
0
xmlParseEntity(const char *filename) {
12361
0
    return(xmlSAXParseEntity(NULL, filename));
12362
0
}
12363
#endif /* LIBXML_SAX1_ENABLED */
12364
12365
/**
12366
 * Create a parser context for an external entity
12367
 * Automatic support for ZLIB/Compress compressed document is provided
12368
 * by default if found at compile-time.
12369
 *
12370
 * @deprecated Don't use.
12371
 *
12372
 * @param URL  the entity URL
12373
 * @param ID  the entity PUBLIC ID
12374
 * @param base  a possible base for the target URI
12375
 * @returns the new parser context or NULL
12376
 */
12377
xmlParserCtxt *
12378
xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
12379
0
                    const xmlChar *base) {
12380
0
    xmlParserCtxtPtr ctxt;
12381
0
    xmlParserInputPtr input;
12382
0
    xmlChar *uri = NULL;
12383
12384
0
    ctxt = xmlNewParserCtxt();
12385
0
    if (ctxt == NULL)
12386
0
  return(NULL);
12387
12388
0
    if (base != NULL) {
12389
0
        if (xmlBuildURISafe(URL, base, &uri) < 0)
12390
0
            goto error;
12391
0
        if (uri != NULL)
12392
0
            URL = uri;
12393
0
    }
12394
12395
0
    input = xmlLoadResource(ctxt, (char *) URL, (char *) ID,
12396
0
                            XML_RESOURCE_UNKNOWN);
12397
0
    if (input == NULL)
12398
0
        goto error;
12399
12400
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
12401
0
        xmlFreeInputStream(input);
12402
0
        goto error;
12403
0
    }
12404
12405
0
    xmlFree(uri);
12406
0
    return(ctxt);
12407
12408
0
error:
12409
0
    xmlFree(uri);
12410
0
    xmlFreeParserCtxt(ctxt);
12411
0
    return(NULL);
12412
0
}
12413
12414
/************************************************************************
12415
 *                  *
12416
 *    Front ends when parsing from a file     *
12417
 *                  *
12418
 ************************************************************************/
12419
12420
/**
12421
 * Create a parser context for a file or URL content.
12422
 * Automatic support for ZLIB/Compress compressed document is provided
12423
 * by default if found at compile-time and for file accesses
12424
 *
12425
 * @deprecated Use #xmlNewParserCtxt and #xmlCtxtReadFile.
12426
 *
12427
 * @param filename  the filename or URL
12428
 * @param options  a combination of xmlParserOption
12429
 * @returns the new parser context or NULL
12430
 */
12431
xmlParserCtxt *
12432
xmlCreateURLParserCtxt(const char *filename, int options)
12433
0
{
12434
0
    xmlParserCtxtPtr ctxt;
12435
0
    xmlParserInputPtr input;
12436
12437
0
    ctxt = xmlNewParserCtxt();
12438
0
    if (ctxt == NULL)
12439
0
  return(NULL);
12440
12441
0
    options |= XML_PARSE_UNZIP;
12442
12443
0
    xmlCtxtUseOptions(ctxt, options);
12444
12445
0
    input = xmlLoadResource(ctxt, filename, NULL, XML_RESOURCE_MAIN_DOCUMENT);
12446
0
    if (input == NULL) {
12447
0
  xmlFreeParserCtxt(ctxt);
12448
0
  return(NULL);
12449
0
    }
12450
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
12451
0
        xmlFreeInputStream(input);
12452
0
        xmlFreeParserCtxt(ctxt);
12453
0
        return(NULL);
12454
0
    }
12455
12456
0
    return(ctxt);
12457
0
}
12458
12459
/**
12460
 * Create a parser context for a file content.
12461
 * Automatic support for ZLIB/Compress compressed document is provided
12462
 * by default if found at compile-time.
12463
 *
12464
 * @deprecated Use #xmlNewParserCtxt and #xmlCtxtReadFile.
12465
 *
12466
 * @param filename  the filename
12467
 * @returns the new parser context or NULL
12468
 */
12469
xmlParserCtxt *
12470
xmlCreateFileParserCtxt(const char *filename)
12471
0
{
12472
0
    return(xmlCreateURLParserCtxt(filename, 0));
12473
0
}
12474
12475
#ifdef LIBXML_SAX1_ENABLED
12476
/**
12477
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12478
 * compressed document is provided by default if found at compile-time.
12479
 * It use the given SAX function block to handle the parsing callback.
12480
 * If sax is NULL, fallback to the default DOM tree building routines.
12481
 *
12482
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadFile.
12483
 *
12484
 * User data (void *) is stored within the parser context in the
12485
 * context's _private member, so it is available nearly everywhere in libxml
12486
 *
12487
 * @param sax  the SAX handler block
12488
 * @param filename  the filename
12489
 * @param recovery  work in recovery mode, i.e. tries to read no Well Formed
12490
 *             documents
12491
 * @param data  the userdata
12492
 * @returns the resulting document tree
12493
 */
12494
12495
xmlDoc *
12496
xmlSAXParseFileWithData(xmlSAXHandler *sax, const char *filename,
12497
0
                        int recovery, void *data) {
12498
0
    xmlDocPtr ret = NULL;
12499
0
    xmlParserCtxtPtr ctxt;
12500
0
    xmlParserInputPtr input;
12501
12502
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12503
0
    if (ctxt == NULL)
12504
0
  return(NULL);
12505
12506
0
    if (data != NULL)
12507
0
  ctxt->_private = data;
12508
12509
0
    if (recovery) {
12510
0
        ctxt->options |= XML_PARSE_RECOVER;
12511
0
        ctxt->recovery = 1;
12512
0
    }
12513
12514
0
    if ((filename != NULL) && (filename[0] == '-') && (filename[1] == 0))
12515
0
        input = xmlCtxtNewInputFromFd(ctxt, filename, STDIN_FILENO, NULL, 0);
12516
0
    else
12517
0
        input = xmlCtxtNewInputFromUrl(ctxt, filename, NULL, NULL, 0);
12518
12519
0
    if (input != NULL)
12520
0
        ret = xmlCtxtParseDocument(ctxt, input);
12521
12522
0
    xmlFreeParserCtxt(ctxt);
12523
0
    return(ret);
12524
0
}
12525
12526
/**
12527
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12528
 * compressed document is provided by default if found at compile-time.
12529
 * It use the given SAX function block to handle the parsing callback.
12530
 * If sax is NULL, fallback to the default DOM tree building routines.
12531
 *
12532
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadFile.
12533
 *
12534
 * @param sax  the SAX handler block
12535
 * @param filename  the filename
12536
 * @param recovery  work in recovery mode, i.e. tries to read no Well Formed
12537
 *             documents
12538
 * @returns the resulting document tree
12539
 */
12540
12541
xmlDoc *
12542
xmlSAXParseFile(xmlSAXHandler *sax, const char *filename,
12543
0
                          int recovery) {
12544
0
    return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
12545
0
}
12546
12547
/**
12548
 * parse an XML in-memory document and build a tree.
12549
 * In the case the document is not Well Formed, a attempt to build a
12550
 * tree is tried anyway
12551
 *
12552
 * @deprecated Use #xmlReadDoc with XML_PARSE_RECOVER.
12553
 *
12554
 * @param cur  a pointer to an array of xmlChar
12555
 * @returns the resulting document tree or NULL in case of failure
12556
 */
12557
12558
xmlDoc *
12559
0
xmlRecoverDoc(const xmlChar *cur) {
12560
0
    return(xmlSAXParseDoc(NULL, cur, 1));
12561
0
}
12562
12563
/**
12564
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12565
 * compressed document is provided by default if found at compile-time.
12566
 *
12567
 * @deprecated Use #xmlReadFile.
12568
 *
12569
 * @param filename  the filename
12570
 * @returns the resulting document tree if the file was wellformed,
12571
 * NULL otherwise.
12572
 */
12573
12574
xmlDoc *
12575
0
xmlParseFile(const char *filename) {
12576
0
    return(xmlSAXParseFile(NULL, filename, 0));
12577
0
}
12578
12579
/**
12580
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12581
 * compressed document is provided by default if found at compile-time.
12582
 * In the case the document is not Well Formed, it attempts to build
12583
 * a tree anyway
12584
 *
12585
 * @deprecated Use #xmlReadFile with XML_PARSE_RECOVER.
12586
 *
12587
 * @param filename  the filename
12588
 * @returns the resulting document tree or NULL in case of failure
12589
 */
12590
12591
xmlDoc *
12592
0
xmlRecoverFile(const char *filename) {
12593
0
    return(xmlSAXParseFile(NULL, filename, 1));
12594
0
}
12595
12596
12597
/**
12598
 * Setup the parser context to parse a new buffer; Clears any prior
12599
 * contents from the parser context. The buffer parameter must not be
12600
 * NULL, but the filename parameter can be
12601
 *
12602
 * @deprecated Don't use.
12603
 *
12604
 * @param ctxt  an XML parser context
12605
 * @param buffer  a xmlChar * buffer
12606
 * @param filename  a file name
12607
 */
12608
void
12609
xmlSetupParserForBuffer(xmlParserCtxt *ctxt, const xmlChar* buffer,
12610
                             const char* filename)
12611
0
{
12612
0
    xmlParserInputPtr input;
12613
12614
0
    if ((ctxt == NULL) || (buffer == NULL))
12615
0
        return;
12616
12617
0
    xmlCtxtReset(ctxt);
12618
12619
0
    input = xmlCtxtNewInputFromString(ctxt, filename, (const char *) buffer,
12620
0
                                      NULL, 0);
12621
0
    if (input == NULL)
12622
0
        return;
12623
0
    if (xmlCtxtPushInput(ctxt, input) < 0)
12624
0
        xmlFreeInputStream(input);
12625
0
}
12626
12627
/**
12628
 * parse an XML file and call the given SAX handler routines.
12629
 * Automatic support for ZLIB/Compress compressed document is provided
12630
 *
12631
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadFile.
12632
 *
12633
 * @param sax  a SAX handler
12634
 * @param user_data  The user data returned on SAX callbacks
12635
 * @param filename  a file name
12636
 * @returns 0 in case of success or a error number otherwise
12637
 */
12638
int
12639
xmlSAXUserParseFile(xmlSAXHandler *sax, void *user_data,
12640
0
                    const char *filename) {
12641
0
    int ret = 0;
12642
0
    xmlParserCtxtPtr ctxt;
12643
12644
0
    ctxt = xmlCreateFileParserCtxt(filename);
12645
0
    if (ctxt == NULL) return -1;
12646
0
    if (sax != NULL) {
12647
0
        if (sax->initialized == XML_SAX2_MAGIC) {
12648
0
            *ctxt->sax = *sax;
12649
0
        } else {
12650
0
            memset(ctxt->sax, 0, sizeof(*ctxt->sax));
12651
0
            memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12652
0
        }
12653
0
  ctxt->userData = user_data;
12654
0
    }
12655
12656
0
    xmlParseDocument(ctxt);
12657
12658
0
    if (ctxt->wellFormed)
12659
0
  ret = 0;
12660
0
    else {
12661
0
        if (ctxt->errNo != 0)
12662
0
      ret = ctxt->errNo;
12663
0
  else
12664
0
      ret = -1;
12665
0
    }
12666
0
    if (ctxt->myDoc != NULL) {
12667
0
        xmlFreeDoc(ctxt->myDoc);
12668
0
  ctxt->myDoc = NULL;
12669
0
    }
12670
0
    xmlFreeParserCtxt(ctxt);
12671
12672
0
    return ret;
12673
0
}
12674
#endif /* LIBXML_SAX1_ENABLED */
12675
12676
/************************************************************************
12677
 *                  *
12678
 *    Front ends when parsing from memory     *
12679
 *                  *
12680
 ************************************************************************/
12681
12682
/**
12683
 * Create a parser context for an XML in-memory document. The input buffer
12684
 * must not contain a terminating null byte.
12685
 *
12686
 * @param buffer  a pointer to a char array
12687
 * @param size  the size of the array
12688
 * @returns the new parser context or NULL
12689
 */
12690
xmlParserCtxt *
12691
0
xmlCreateMemoryParserCtxt(const char *buffer, int size) {
12692
0
    xmlParserCtxtPtr ctxt;
12693
0
    xmlParserInputPtr input;
12694
12695
0
    if (size < 0)
12696
0
  return(NULL);
12697
12698
0
    ctxt = xmlNewParserCtxt();
12699
0
    if (ctxt == NULL)
12700
0
  return(NULL);
12701
12702
0
    input = xmlCtxtNewInputFromMemory(ctxt, NULL, buffer, size, NULL, 0);
12703
0
    if (input == NULL) {
12704
0
  xmlFreeParserCtxt(ctxt);
12705
0
  return(NULL);
12706
0
    }
12707
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
12708
0
        xmlFreeInputStream(input);
12709
0
        xmlFreeParserCtxt(ctxt);
12710
0
        return(NULL);
12711
0
    }
12712
12713
0
    return(ctxt);
12714
0
}
12715
12716
#ifdef LIBXML_SAX1_ENABLED
12717
/**
12718
 * parse an XML in-memory block and use the given SAX function block
12719
 * to handle the parsing callback. If sax is NULL, fallback to the default
12720
 * DOM tree building routines.
12721
 *
12722
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadMemory.
12723
 *
12724
 * User data (void *) is stored within the parser context in the
12725
 * context's _private member, so it is available nearly everywhere in libxml
12726
 *
12727
 * @param sax  the SAX handler block
12728
 * @param buffer  an pointer to a char array
12729
 * @param size  the size of the array
12730
 * @param recovery  work in recovery mode, i.e. tries to read no Well Formed
12731
 *             documents
12732
 * @param data  the userdata
12733
 * @returns the resulting document tree
12734
 */
12735
12736
xmlDoc *
12737
xmlSAXParseMemoryWithData(xmlSAXHandler *sax, const char *buffer,
12738
0
                          int size, int recovery, void *data) {
12739
0
    xmlDocPtr ret = NULL;
12740
0
    xmlParserCtxtPtr ctxt;
12741
0
    xmlParserInputPtr input;
12742
12743
0
    if (size < 0)
12744
0
        return(NULL);
12745
12746
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12747
0
    if (ctxt == NULL)
12748
0
        return(NULL);
12749
12750
0
    if (data != NULL)
12751
0
  ctxt->_private=data;
12752
12753
0
    if (recovery) {
12754
0
        ctxt->options |= XML_PARSE_RECOVER;
12755
0
        ctxt->recovery = 1;
12756
0
    }
12757
12758
0
    input = xmlCtxtNewInputFromMemory(ctxt, NULL, buffer, size, NULL,
12759
0
                                      XML_INPUT_BUF_STATIC);
12760
12761
0
    if (input != NULL)
12762
0
        ret = xmlCtxtParseDocument(ctxt, input);
12763
12764
0
    xmlFreeParserCtxt(ctxt);
12765
0
    return(ret);
12766
0
}
12767
12768
/**
12769
 * parse an XML in-memory block and use the given SAX function block
12770
 * to handle the parsing callback. If sax is NULL, fallback to the default
12771
 * DOM tree building routines.
12772
 *
12773
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadMemory.
12774
 *
12775
 * @param sax  the SAX handler block
12776
 * @param buffer  an pointer to a char array
12777
 * @param size  the size of the array
12778
 * @param recovery  work in recovery mode, i.e. tries to read not Well Formed
12779
 *             documents
12780
 * @returns the resulting document tree
12781
 */
12782
xmlDoc *
12783
xmlSAXParseMemory(xmlSAXHandler *sax, const char *buffer,
12784
0
            int size, int recovery) {
12785
0
    return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
12786
0
}
12787
12788
/**
12789
 * parse an XML in-memory block and build a tree.
12790
 *
12791
 * @deprecated Use #xmlReadMemory.
12792
 *
12793
 * @param buffer  an pointer to a char array
12794
 * @param size  the size of the array
12795
 * @returns the resulting document tree
12796
 */
12797
12798
0
xmlDoc *xmlParseMemory(const char *buffer, int size) {
12799
0
   return(xmlSAXParseMemory(NULL, buffer, size, 0));
12800
0
}
12801
12802
/**
12803
 * parse an XML in-memory block and build a tree.
12804
 * In the case the document is not Well Formed, an attempt to
12805
 * build a tree is tried anyway
12806
 *
12807
 * @deprecated Use #xmlReadMemory with XML_PARSE_RECOVER.
12808
 *
12809
 * @param buffer  an pointer to a char array
12810
 * @param size  the size of the array
12811
 * @returns the resulting document tree or NULL in case of error
12812
 */
12813
12814
0
xmlDoc *xmlRecoverMemory(const char *buffer, int size) {
12815
0
   return(xmlSAXParseMemory(NULL, buffer, size, 1));
12816
0
}
12817
12818
/**
12819
 * parse an XML in-memory buffer and call the given SAX handler routines.
12820
 *
12821
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadMemory.
12822
 *
12823
 * @param sax  a SAX handler
12824
 * @param user_data  The user data returned on SAX callbacks
12825
 * @param buffer  an in-memory XML document input
12826
 * @param size  the length of the XML document in bytes
12827
 * @returns 0 in case of success or a error number otherwise
12828
 */
12829
int xmlSAXUserParseMemory(xmlSAXHandler *sax, void *user_data,
12830
0
        const char *buffer, int size) {
12831
0
    int ret = 0;
12832
0
    xmlParserCtxtPtr ctxt;
12833
12834
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12835
0
    if (ctxt == NULL) return -1;
12836
0
    if (sax != NULL) {
12837
0
        if (sax->initialized == XML_SAX2_MAGIC) {
12838
0
            *ctxt->sax = *sax;
12839
0
        } else {
12840
0
            memset(ctxt->sax, 0, sizeof(*ctxt->sax));
12841
0
            memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12842
0
        }
12843
0
  ctxt->userData = user_data;
12844
0
    }
12845
12846
0
    xmlParseDocument(ctxt);
12847
12848
0
    if (ctxt->wellFormed)
12849
0
  ret = 0;
12850
0
    else {
12851
0
        if (ctxt->errNo != 0)
12852
0
      ret = ctxt->errNo;
12853
0
  else
12854
0
      ret = -1;
12855
0
    }
12856
0
    if (ctxt->myDoc != NULL) {
12857
0
        xmlFreeDoc(ctxt->myDoc);
12858
0
  ctxt->myDoc = NULL;
12859
0
    }
12860
0
    xmlFreeParserCtxt(ctxt);
12861
12862
0
    return ret;
12863
0
}
12864
#endif /* LIBXML_SAX1_ENABLED */
12865
12866
/**
12867
 * Creates a parser context for an XML in-memory document.
12868
 *
12869
 * @param str  a pointer to an array of xmlChar
12870
 * @returns the new parser context or NULL
12871
 */
12872
xmlParserCtxt *
12873
0
xmlCreateDocParserCtxt(const xmlChar *str) {
12874
0
    xmlParserCtxtPtr ctxt;
12875
0
    xmlParserInputPtr input;
12876
12877
0
    ctxt = xmlNewParserCtxt();
12878
0
    if (ctxt == NULL)
12879
0
  return(NULL);
12880
12881
0
    input = xmlCtxtNewInputFromString(ctxt, NULL, (const char *) str, NULL, 0);
12882
0
    if (input == NULL) {
12883
0
  xmlFreeParserCtxt(ctxt);
12884
0
  return(NULL);
12885
0
    }
12886
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
12887
0
        xmlFreeInputStream(input);
12888
0
        xmlFreeParserCtxt(ctxt);
12889
0
        return(NULL);
12890
0
    }
12891
12892
0
    return(ctxt);
12893
0
}
12894
12895
#ifdef LIBXML_SAX1_ENABLED
12896
/**
12897
 * parse an XML in-memory document and build a tree.
12898
 * It use the given SAX function block to handle the parsing callback.
12899
 * If sax is NULL, fallback to the default DOM tree building routines.
12900
 *
12901
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadDoc.
12902
 *
12903
 * @param sax  the SAX handler block
12904
 * @param cur  a pointer to an array of xmlChar
12905
 * @param recovery  work in recovery mode, i.e. tries to read no Well Formed
12906
 *             documents
12907
 * @returns the resulting document tree
12908
 */
12909
12910
xmlDoc *
12911
0
xmlSAXParseDoc(xmlSAXHandler *sax, const xmlChar *cur, int recovery) {
12912
0
    xmlDocPtr ret;
12913
0
    xmlParserCtxtPtr ctxt;
12914
0
    xmlSAXHandlerPtr oldsax = NULL;
12915
12916
0
    if (cur == NULL) return(NULL);
12917
12918
12919
0
    ctxt = xmlCreateDocParserCtxt(cur);
12920
0
    if (ctxt == NULL) return(NULL);
12921
0
    if (sax != NULL) {
12922
0
        oldsax = ctxt->sax;
12923
0
        ctxt->sax = sax;
12924
0
        ctxt->userData = NULL;
12925
0
    }
12926
12927
0
    xmlParseDocument(ctxt);
12928
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12929
0
    else {
12930
0
       ret = NULL;
12931
0
       xmlFreeDoc(ctxt->myDoc);
12932
0
       ctxt->myDoc = NULL;
12933
0
    }
12934
0
    if (sax != NULL)
12935
0
  ctxt->sax = oldsax;
12936
0
    xmlFreeParserCtxt(ctxt);
12937
12938
0
    return(ret);
12939
0
}
12940
12941
/**
12942
 * parse an XML in-memory document and build a tree.
12943
 *
12944
 * @deprecated Use #xmlReadDoc.
12945
 *
12946
 * @param cur  a pointer to an array of xmlChar
12947
 * @returns the resulting document tree
12948
 */
12949
12950
xmlDoc *
12951
0
xmlParseDoc(const xmlChar *cur) {
12952
0
    return(xmlSAXParseDoc(NULL, cur, 0));
12953
0
}
12954
#endif /* LIBXML_SAX1_ENABLED */
12955
12956
/************************************************************************
12957
 *                  *
12958
 *  New set (2.6.0) of simpler and more flexible APIs   *
12959
 *                  *
12960
 ************************************************************************/
12961
12962
/**
12963
 * Reset a parser context
12964
 *
12965
 * @param ctxt  an XML parser context
12966
 */
12967
void
12968
xmlCtxtReset(xmlParserCtxt *ctxt)
12969
0
{
12970
0
    xmlParserInputPtr input;
12971
12972
0
    if (ctxt == NULL)
12973
0
        return;
12974
12975
0
    while ((input = xmlCtxtPopInput(ctxt)) != NULL) { /* Non consuming */
12976
0
        xmlFreeInputStream(input);
12977
0
    }
12978
0
    ctxt->inputNr = 0;
12979
0
    ctxt->input = NULL;
12980
12981
0
    ctxt->spaceNr = 0;
12982
0
    if (ctxt->spaceTab != NULL) {
12983
0
  ctxt->spaceTab[0] = -1;
12984
0
  ctxt->space = &ctxt->spaceTab[0];
12985
0
    } else {
12986
0
        ctxt->space = NULL;
12987
0
    }
12988
12989
12990
0
    ctxt->nodeNr = 0;
12991
0
    ctxt->node = NULL;
12992
12993
0
    ctxt->nameNr = 0;
12994
0
    ctxt->name = NULL;
12995
12996
0
    ctxt->nsNr = 0;
12997
0
    xmlParserNsReset(ctxt->nsdb);
12998
12999
0
    if (ctxt->version != NULL) {
13000
0
        xmlFree(ctxt->version);
13001
0
        ctxt->version = NULL;
13002
0
    }
13003
0
    if (ctxt->encoding != NULL) {
13004
0
        xmlFree(ctxt->encoding);
13005
0
        ctxt->encoding = NULL;
13006
0
    }
13007
0
    if (ctxt->extSubURI != NULL) {
13008
0
        xmlFree(ctxt->extSubURI);
13009
0
        ctxt->extSubURI = NULL;
13010
0
    }
13011
0
    if (ctxt->extSubSystem != NULL) {
13012
0
        xmlFree(ctxt->extSubSystem);
13013
0
        ctxt->extSubSystem = NULL;
13014
0
    }
13015
0
    if (ctxt->directory != NULL) {
13016
0
        xmlFree(ctxt->directory);
13017
0
        ctxt->directory = NULL;
13018
0
    }
13019
13020
0
    if (ctxt->myDoc != NULL)
13021
0
        xmlFreeDoc(ctxt->myDoc);
13022
0
    ctxt->myDoc = NULL;
13023
13024
0
    ctxt->standalone = -1;
13025
0
    ctxt->hasExternalSubset = 0;
13026
0
    ctxt->hasPErefs = 0;
13027
0
    ctxt->html = ctxt->html ? 1 : 0;
13028
0
    ctxt->instate = XML_PARSER_START;
13029
13030
0
    ctxt->wellFormed = 1;
13031
0
    ctxt->nsWellFormed = 1;
13032
0
    ctxt->disableSAX = 0;
13033
0
    ctxt->valid = 1;
13034
0
    ctxt->record_info = 0;
13035
0
    ctxt->checkIndex = 0;
13036
0
    ctxt->endCheckState = 0;
13037
0
    ctxt->inSubset = 0;
13038
0
    ctxt->errNo = XML_ERR_OK;
13039
0
    ctxt->depth = 0;
13040
0
    ctxt->catalogs = NULL;
13041
0
    ctxt->sizeentities = 0;
13042
0
    ctxt->sizeentcopy = 0;
13043
0
    xmlInitNodeInfoSeq(&ctxt->node_seq);
13044
13045
0
    if (ctxt->attsDefault != NULL) {
13046
0
        xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
13047
0
        ctxt->attsDefault = NULL;
13048
0
    }
13049
0
    if (ctxt->attsSpecial != NULL) {
13050
0
        xmlHashFree(ctxt->attsSpecial, NULL);
13051
0
        ctxt->attsSpecial = NULL;
13052
0
    }
13053
13054
0
#ifdef LIBXML_CATALOG_ENABLED
13055
0
    if (ctxt->catalogs != NULL)
13056
0
  xmlCatalogFreeLocal(ctxt->catalogs);
13057
0
#endif
13058
0
    ctxt->nbErrors = 0;
13059
0
    ctxt->nbWarnings = 0;
13060
0
    if (ctxt->lastError.code != XML_ERR_OK)
13061
0
        xmlResetError(&ctxt->lastError);
13062
0
}
13063
13064
/**
13065
 * Reset a push parser context
13066
 *
13067
 * @param ctxt  an XML parser context
13068
 * @param chunk  a pointer to an array of chars
13069
 * @param size  number of chars in the array
13070
 * @param filename  an optional file name or URI
13071
 * @param encoding  the document encoding, or NULL
13072
 * @returns 0 in case of success and 1 in case of error
13073
 */
13074
int
13075
xmlCtxtResetPush(xmlParserCtxt *ctxt, const char *chunk,
13076
                 int size, const char *filename, const char *encoding)
13077
0
{
13078
0
    xmlParserInputPtr input;
13079
13080
0
    if (ctxt == NULL)
13081
0
        return(1);
13082
13083
0
    xmlCtxtReset(ctxt);
13084
13085
0
    input = xmlNewPushInput(filename, chunk, size);
13086
0
    if (input == NULL)
13087
0
        return(1);
13088
13089
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
13090
0
        xmlFreeInputStream(input);
13091
0
        return(1);
13092
0
    }
13093
13094
0
    if (encoding != NULL)
13095
0
        xmlSwitchEncodingName(ctxt, encoding);
13096
13097
0
    return(0);
13098
0
}
13099
13100
static int
13101
xmlCtxtSetOptionsInternal(xmlParserCtxtPtr ctxt, int options, int keepMask)
13102
192k
{
13103
192k
    int allMask;
13104
13105
192k
    if (ctxt == NULL)
13106
0
        return(-1);
13107
13108
    /*
13109
     * XInclude options aren't handled by the parser.
13110
     *
13111
     * XML_PARSE_XINCLUDE
13112
     * XML_PARSE_NOXINCNODE
13113
     * XML_PARSE_NOBASEFIX
13114
     */
13115
192k
    allMask = XML_PARSE_RECOVER |
13116
192k
              XML_PARSE_NOENT |
13117
192k
              XML_PARSE_DTDLOAD |
13118
192k
              XML_PARSE_DTDATTR |
13119
192k
              XML_PARSE_DTDVALID |
13120
192k
              XML_PARSE_NOERROR |
13121
192k
              XML_PARSE_NOWARNING |
13122
192k
              XML_PARSE_PEDANTIC |
13123
192k
              XML_PARSE_NOBLANKS |
13124
192k
#ifdef LIBXML_SAX1_ENABLED
13125
192k
              XML_PARSE_SAX1 |
13126
192k
#endif
13127
192k
              XML_PARSE_NONET |
13128
192k
              XML_PARSE_NODICT |
13129
192k
              XML_PARSE_NSCLEAN |
13130
192k
              XML_PARSE_NOCDATA |
13131
192k
              XML_PARSE_COMPACT |
13132
192k
              XML_PARSE_OLD10 |
13133
192k
              XML_PARSE_HUGE |
13134
192k
              XML_PARSE_OLDSAX |
13135
192k
              XML_PARSE_IGNORE_ENC |
13136
192k
              XML_PARSE_BIG_LINES |
13137
192k
              XML_PARSE_NO_XXE |
13138
192k
              XML_PARSE_UNZIP |
13139
192k
              XML_PARSE_NO_SYS_CATALOG |
13140
192k
              XML_PARSE_CATALOG_PI;
13141
13142
192k
    ctxt->options = (ctxt->options & keepMask) | (options & allMask);
13143
13144
    /*
13145
     * For some options, struct members are historically the source
13146
     * of truth. The values are initalized from global variables and
13147
     * old code could also modify them directly. Several older API
13148
     * functions that don't take an options argument rely on these
13149
     * deprecated mechanisms.
13150
     *
13151
     * Once public access to struct members and the globals are
13152
     * disabled, we can use the options bitmask as source of
13153
     * truth, making all these struct members obsolete.
13154
     *
13155
     * The XML_DETECT_IDS flags is misnamed. It simply enables
13156
     * loading of the external subset.
13157
     */
13158
192k
    ctxt->recovery = (options & XML_PARSE_RECOVER) ? 1 : 0;
13159
192k
    ctxt->replaceEntities = (options & XML_PARSE_NOENT) ? 1 : 0;
13160
192k
    ctxt->loadsubset = (options & XML_PARSE_DTDLOAD) ? XML_DETECT_IDS : 0;
13161
192k
    ctxt->loadsubset |= (options & XML_PARSE_DTDATTR) ? XML_COMPLETE_ATTRS : 0;
13162
192k
    ctxt->loadsubset |= (options & XML_PARSE_SKIP_IDS) ? XML_SKIP_IDS : 0;
13163
192k
    ctxt->validate = (options & XML_PARSE_DTDVALID) ? 1 : 0;
13164
192k
    ctxt->pedantic = (options & XML_PARSE_PEDANTIC) ? 1 : 0;
13165
192k
    ctxt->keepBlanks = (options & XML_PARSE_NOBLANKS) ? 0 : 1;
13166
192k
    ctxt->dictNames = (options & XML_PARSE_NODICT) ? 0 : 1;
13167
13168
192k
    return(options & ~allMask);
13169
192k
}
13170
13171
/**
13172
 * Applies the options to the parser context. Unset options are
13173
 * cleared.
13174
 *
13175
 * @since 2.13.0
13176
 *
13177
 * With older versions, you can use #xmlCtxtUseOptions.
13178
 *
13179
 * @param ctxt  an XML parser context
13180
 * @param options  a bitmask of xmlParserOption values
13181
 * @returns 0 in case of success, the set of unknown or unimplemented options
13182
 *         in case of error.
13183
 */
13184
int
13185
xmlCtxtSetOptions(xmlParserCtxt *ctxt, int options)
13186
13.4k
{
13187
13.4k
#ifdef LIBXML_HTML_ENABLED
13188
13.4k
    if ((ctxt != NULL) && (ctxt->html))
13189
0
        return(htmlCtxtSetOptions(ctxt, options));
13190
13.4k
#endif
13191
13192
13.4k
    return(xmlCtxtSetOptionsInternal(ctxt, options, 0));
13193
13.4k
}
13194
13195
/**
13196
 * Get the current options of the parser context.
13197
 *
13198
 * @since 2.14.0
13199
 *
13200
 * @param ctxt  an XML parser context
13201
 * @returns the current options set in the parser context, or -1 if ctxt is NULL.
13202
 */
13203
int
13204
xmlCtxtGetOptions(xmlParserCtxt *ctxt)
13205
0
{
13206
0
    if (ctxt == NULL)
13207
0
        return(-1);
13208
13209
0
    return(ctxt->options);
13210
0
}
13211
13212
/**
13213
 * Applies the options to the parser context. The following options
13214
 * are never cleared and can only be enabled:
13215
 *
13216
 * - XML_PARSE_NOERROR
13217
 * - XML_PARSE_NOWARNING
13218
 * - XML_PARSE_NONET
13219
 * - XML_PARSE_NSCLEAN
13220
 * - XML_PARSE_NOCDATA
13221
 * - XML_PARSE_COMPACT
13222
 * - XML_PARSE_OLD10
13223
 * - XML_PARSE_HUGE
13224
 * - XML_PARSE_OLDSAX
13225
 * - XML_PARSE_IGNORE_ENC
13226
 * - XML_PARSE_BIG_LINES
13227
 *
13228
 * @deprecated Use #xmlCtxtSetOptions.
13229
 *
13230
 * @param ctxt  an XML parser context
13231
 * @param options  a combination of xmlParserOption
13232
 * @returns 0 in case of success, the set of unknown or unimplemented options
13233
 *         in case of error.
13234
 */
13235
int
13236
xmlCtxtUseOptions(xmlParserCtxt *ctxt, int options)
13237
178k
{
13238
178k
    int keepMask;
13239
13240
178k
#ifdef LIBXML_HTML_ENABLED
13241
178k
    if ((ctxt != NULL) && (ctxt->html))
13242
0
        return(htmlCtxtUseOptions(ctxt, options));
13243
178k
#endif
13244
13245
    /*
13246
     * For historic reasons, some options can only be enabled.
13247
     */
13248
178k
    keepMask = XML_PARSE_NOERROR |
13249
178k
               XML_PARSE_NOWARNING |
13250
178k
               XML_PARSE_NONET |
13251
178k
               XML_PARSE_NSCLEAN |
13252
178k
               XML_PARSE_NOCDATA |
13253
178k
               XML_PARSE_COMPACT |
13254
178k
               XML_PARSE_OLD10 |
13255
178k
               XML_PARSE_HUGE |
13256
178k
               XML_PARSE_OLDSAX |
13257
178k
               XML_PARSE_IGNORE_ENC |
13258
178k
               XML_PARSE_BIG_LINES;
13259
13260
178k
    return(xmlCtxtSetOptionsInternal(ctxt, options, keepMask));
13261
178k
}
13262
13263
/**
13264
 * To protect against exponential entity expansion ("billion laughs"), the
13265
 * size of serialized output is (roughly) limited to the input size
13266
 * multiplied by this factor. The default value is 5.
13267
 *
13268
 * When working with documents making heavy use of entity expansion, it can
13269
 * be necessary to increase the value. For security reasons, this should only
13270
 * be considered when processing trusted input.
13271
 *
13272
 * @param ctxt  an XML parser context
13273
 * @param maxAmpl  maximum amplification factor
13274
 */
13275
void
13276
xmlCtxtSetMaxAmplification(xmlParserCtxt *ctxt, unsigned maxAmpl)
13277
0
{
13278
0
    if (ctxt == NULL)
13279
0
        return;
13280
0
    ctxt->maxAmpl = maxAmpl;
13281
0
}
13282
13283
/**
13284
 * Parse an XML document and return the resulting document tree.
13285
 * Takes ownership of the input object.
13286
 *
13287
 * @since 2.13.0
13288
 *
13289
 * @param ctxt  an XML parser context
13290
 * @param input  parser input
13291
 * @returns the resulting document tree or NULL
13292
 */
13293
xmlDoc *
13294
xmlCtxtParseDocument(xmlParserCtxt *ctxt, xmlParserInput *input)
13295
175k
{
13296
175k
    xmlDocPtr ret = NULL;
13297
13298
175k
    if ((ctxt == NULL) || (input == NULL)) {
13299
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
13300
0
        xmlFreeInputStream(input);
13301
0
        return(NULL);
13302
0
    }
13303
13304
    /* assert(ctxt->inputNr == 0); */
13305
175k
    while (ctxt->inputNr > 0)
13306
0
        xmlFreeInputStream(xmlCtxtPopInput(ctxt));
13307
13308
175k
    if (xmlCtxtPushInput(ctxt, input) < 0) {
13309
9
        xmlFreeInputStream(input);
13310
9
        return(NULL);
13311
9
    }
13312
13313
175k
    xmlParseDocument(ctxt);
13314
13315
175k
    ret = xmlCtxtGetDocument(ctxt);
13316
13317
    /* assert(ctxt->inputNr == 1); */
13318
351k
    while (ctxt->inputNr > 0)
13319
175k
        xmlFreeInputStream(xmlCtxtPopInput(ctxt));
13320
13321
175k
    return(ret);
13322
175k
}
13323
13324
/**
13325
 * Convenience function to parse an XML document from a
13326
 * zero-terminated string.
13327
 *
13328
 * See #xmlCtxtReadDoc for details.
13329
 *
13330
 * @param cur  a pointer to a zero terminated string
13331
 * @param URL  base URL (optional)
13332
 * @param encoding  the document encoding (optional)
13333
 * @param options  a combination of xmlParserOption
13334
 * @returns the resulting document tree
13335
 */
13336
xmlDoc *
13337
xmlReadDoc(const xmlChar *cur, const char *URL, const char *encoding,
13338
           int options)
13339
179k
{
13340
179k
    xmlParserCtxtPtr ctxt;
13341
179k
    xmlParserInputPtr input;
13342
179k
    xmlDocPtr doc = NULL;
13343
13344
179k
    ctxt = xmlNewParserCtxt();
13345
179k
    if (ctxt == NULL)
13346
129
        return(NULL);
13347
13348
178k
    xmlCtxtUseOptions(ctxt, options);
13349
13350
178k
    input = xmlCtxtNewInputFromString(ctxt, URL, (const char *) cur, encoding,
13351
178k
                                      XML_INPUT_BUF_STATIC);
13352
13353
178k
    if (input != NULL)
13354
175k
        doc = xmlCtxtParseDocument(ctxt, input);
13355
13356
178k
    xmlFreeParserCtxt(ctxt);
13357
178k
    return(doc);
13358
179k
}
13359
13360
/**
13361
 * Convenience function to parse an XML file from the filesystem,
13362
 * the network or a global user-define resource loader.
13363
 *
13364
 * This function always enables the XML_PARSE_UNZIP option for
13365
 * backward compatibility. If a "-" filename is passed, it will
13366
 * read from stdin. Both of these features are potentially
13367
 * insecure and might be removed from later versions.
13368
 *
13369
 * See #xmlCtxtReadFile for details.
13370
 *
13371
 * @param filename  a file or URL
13372
 * @param encoding  the document encoding (optional)
13373
 * @param options  a combination of xmlParserOption
13374
 * @returns the resulting document tree
13375
 */
13376
xmlDoc *
13377
xmlReadFile(const char *filename, const char *encoding, int options)
13378
0
{
13379
0
    xmlParserCtxtPtr ctxt;
13380
0
    xmlParserInputPtr input;
13381
0
    xmlDocPtr doc = NULL;
13382
13383
0
    ctxt = xmlNewParserCtxt();
13384
0
    if (ctxt == NULL)
13385
0
        return(NULL);
13386
13387
0
    options |= XML_PARSE_UNZIP;
13388
13389
0
    xmlCtxtUseOptions(ctxt, options);
13390
13391
    /*
13392
     * Backward compatibility for users of command line utilities like
13393
     * xmlstarlet expecting "-" to mean stdin. This is dangerous and
13394
     * should be removed at some point.
13395
     */
13396
0
    if ((filename != NULL) && (filename[0] == '-') && (filename[1] == 0))
13397
0
        input = xmlCtxtNewInputFromFd(ctxt, filename, STDIN_FILENO,
13398
0
                                      encoding, 0);
13399
0
    else
13400
0
        input = xmlCtxtNewInputFromUrl(ctxt, filename, NULL, encoding, 0);
13401
13402
0
    if (input != NULL)
13403
0
        doc = xmlCtxtParseDocument(ctxt, input);
13404
13405
0
    xmlFreeParserCtxt(ctxt);
13406
0
    return(doc);
13407
0
}
13408
13409
/**
13410
 * Parse an XML in-memory document and build a tree. The input buffer must
13411
 * not contain a terminating null byte.
13412
 *
13413
 * See #xmlCtxtReadMemory for details.
13414
 *
13415
 * @param buffer  a pointer to a char array
13416
 * @param size  the size of the array
13417
 * @param url  base URL (optional)
13418
 * @param encoding  the document encoding (optional)
13419
 * @param options  a combination of xmlParserOption
13420
 * @returns the resulting document tree
13421
 */
13422
xmlDoc *
13423
xmlReadMemory(const char *buffer, int size, const char *url,
13424
              const char *encoding, int options)
13425
0
{
13426
0
    xmlParserCtxtPtr ctxt;
13427
0
    xmlParserInputPtr input;
13428
0
    xmlDocPtr doc = NULL;
13429
13430
0
    if (size < 0)
13431
0
  return(NULL);
13432
13433
0
    ctxt = xmlNewParserCtxt();
13434
0
    if (ctxt == NULL)
13435
0
        return(NULL);
13436
13437
0
    xmlCtxtUseOptions(ctxt, options);
13438
13439
0
    input = xmlCtxtNewInputFromMemory(ctxt, url, buffer, size, encoding,
13440
0
                                      XML_INPUT_BUF_STATIC);
13441
13442
0
    if (input != NULL)
13443
0
        doc = xmlCtxtParseDocument(ctxt, input);
13444
13445
0
    xmlFreeParserCtxt(ctxt);
13446
0
    return(doc);
13447
0
}
13448
13449
/**
13450
 * Parse an XML from a file descriptor and build a tree.
13451
 *
13452
 * See #xmlCtxtReadFd for details.
13453
 *
13454
 * NOTE that the file descriptor will not be closed when the
13455
 * context is freed or reset.
13456
 *
13457
 * @param fd  an open file descriptor
13458
 * @param URL  base URL (optional)
13459
 * @param encoding  the document encoding (optional)
13460
 * @param options  a combination of xmlParserOption
13461
 * @returns the resulting document tree
13462
 */
13463
xmlDoc *
13464
xmlReadFd(int fd, const char *URL, const char *encoding, int options)
13465
0
{
13466
0
    xmlParserCtxtPtr ctxt;
13467
0
    xmlParserInputPtr input;
13468
0
    xmlDocPtr doc = NULL;
13469
13470
0
    ctxt = xmlNewParserCtxt();
13471
0
    if (ctxt == NULL)
13472
0
        return(NULL);
13473
13474
0
    xmlCtxtUseOptions(ctxt, options);
13475
13476
0
    input = xmlCtxtNewInputFromFd(ctxt, URL, fd, encoding, 0);
13477
13478
0
    if (input != NULL)
13479
0
        doc = xmlCtxtParseDocument(ctxt, input);
13480
13481
0
    xmlFreeParserCtxt(ctxt);
13482
0
    return(doc);
13483
0
}
13484
13485
/**
13486
 * Parse an XML document from I/O functions and context and build a tree.
13487
 *
13488
 * See #xmlCtxtReadIO for details.
13489
 *
13490
 * @param ioread  an I/O read function
13491
 * @param ioclose  an I/O close function (optional)
13492
 * @param ioctx  an I/O handler
13493
 * @param URL  base URL (optional)
13494
 * @param encoding  the document encoding (optional)
13495
 * @param options  a combination of xmlParserOption
13496
 * @returns the resulting document tree
13497
 */
13498
xmlDoc *
13499
xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
13500
          void *ioctx, const char *URL, const char *encoding, int options)
13501
0
{
13502
0
    xmlParserCtxtPtr ctxt;
13503
0
    xmlParserInputPtr input;
13504
0
    xmlDocPtr doc = NULL;
13505
13506
0
    ctxt = xmlNewParserCtxt();
13507
0
    if (ctxt == NULL)
13508
0
        return(NULL);
13509
13510
0
    xmlCtxtUseOptions(ctxt, options);
13511
13512
0
    input = xmlCtxtNewInputFromIO(ctxt, URL, ioread, ioclose, ioctx,
13513
0
                                  encoding, 0);
13514
13515
0
    if (input != NULL)
13516
0
        doc = xmlCtxtParseDocument(ctxt, input);
13517
13518
0
    xmlFreeParserCtxt(ctxt);
13519
0
    return(doc);
13520
0
}
13521
13522
/**
13523
 * Parse an XML in-memory document and build a tree.
13524
 *
13525
 * `URL` is used as base to resolve external entities and for error
13526
 * reporting.
13527
 *
13528
 * See #xmlCtxtUseOptions for details.
13529
 *
13530
 * @param ctxt  an XML parser context
13531
 * @param str  a pointer to a zero terminated string
13532
 * @param URL  base URL (optional)
13533
 * @param encoding  the document encoding (optional)
13534
 * @param options  a combination of xmlParserOption
13535
 * @returns the resulting document tree
13536
 */
13537
xmlDoc *
13538
xmlCtxtReadDoc(xmlParserCtxt *ctxt, const xmlChar *str,
13539
               const char *URL, const char *encoding, int options)
13540
0
{
13541
0
    xmlParserInputPtr input;
13542
13543
0
    if (ctxt == NULL)
13544
0
        return(NULL);
13545
13546
0
    xmlCtxtReset(ctxt);
13547
0
    xmlCtxtUseOptions(ctxt, options);
13548
13549
0
    input = xmlCtxtNewInputFromString(ctxt, URL, (const char *) str, encoding,
13550
0
                                      XML_INPUT_BUF_STATIC);
13551
0
    if (input == NULL)
13552
0
        return(NULL);
13553
13554
0
    return(xmlCtxtParseDocument(ctxt, input));
13555
0
}
13556
13557
/**
13558
 * Parse an XML file from the filesystem, the network or a user-defined
13559
 * resource loader.
13560
 *
13561
 * This function always enables the XML_PARSE_UNZIP option for
13562
 * backward compatibility. This feature is potentially insecure
13563
 * and might be removed from later versions.
13564
 *
13565
 * @param ctxt  an XML parser context
13566
 * @param filename  a file or URL
13567
 * @param encoding  the document encoding (optional)
13568
 * @param options  a combination of xmlParserOption
13569
 * @returns the resulting document tree
13570
 */
13571
xmlDoc *
13572
xmlCtxtReadFile(xmlParserCtxt *ctxt, const char *filename,
13573
                const char *encoding, int options)
13574
0
{
13575
0
    xmlParserInputPtr input;
13576
13577
0
    if (ctxt == NULL)
13578
0
        return(NULL);
13579
13580
0
    options |= XML_PARSE_UNZIP;
13581
13582
0
    xmlCtxtReset(ctxt);
13583
0
    xmlCtxtUseOptions(ctxt, options);
13584
13585
0
    input = xmlCtxtNewInputFromUrl(ctxt, filename, NULL, encoding, 0);
13586
0
    if (input == NULL)
13587
0
        return(NULL);
13588
13589
0
    return(xmlCtxtParseDocument(ctxt, input));
13590
0
}
13591
13592
/**
13593
 * Parse an XML in-memory document and build a tree. The input buffer must
13594
 * not contain a terminating null byte.
13595
 *
13596
 * `URL` is used as base to resolve external entities and for error
13597
 * reporting.
13598
 *
13599
 * See #xmlCtxtUseOptions for details.
13600
 *
13601
 * @param ctxt  an XML parser context
13602
 * @param buffer  a pointer to a char array
13603
 * @param size  the size of the array
13604
 * @param URL  base URL (optional)
13605
 * @param encoding  the document encoding (optional)
13606
 * @param options  a combination of xmlParserOption
13607
 * @returns the resulting document tree
13608
 */
13609
xmlDoc *
13610
xmlCtxtReadMemory(xmlParserCtxt *ctxt, const char *buffer, int size,
13611
                  const char *URL, const char *encoding, int options)
13612
0
{
13613
0
    xmlParserInputPtr input;
13614
13615
0
    if ((ctxt == NULL) || (size < 0))
13616
0
        return(NULL);
13617
13618
0
    xmlCtxtReset(ctxt);
13619
0
    xmlCtxtUseOptions(ctxt, options);
13620
13621
0
    input = xmlCtxtNewInputFromMemory(ctxt, URL, buffer, size, encoding,
13622
0
                                      XML_INPUT_BUF_STATIC);
13623
0
    if (input == NULL)
13624
0
        return(NULL);
13625
13626
0
    return(xmlCtxtParseDocument(ctxt, input));
13627
0
}
13628
13629
/**
13630
 * Parse an XML document from a file descriptor and build a tree.
13631
 *
13632
 * NOTE that the file descriptor will not be closed when the
13633
 * context is freed or reset.
13634
 *
13635
 * `URL` is used as base to resolve external entities and for error
13636
 * reporting.
13637
 *
13638
 * See #xmlCtxtUseOptions for details.
13639
 *
13640
 * @param ctxt  an XML parser context
13641
 * @param fd  an open file descriptor
13642
 * @param URL  base URL (optional)
13643
 * @param encoding  the document encoding (optional)
13644
 * @param options  a combination of xmlParserOption
13645
 * @returns the resulting document tree
13646
 */
13647
xmlDoc *
13648
xmlCtxtReadFd(xmlParserCtxt *ctxt, int fd,
13649
              const char *URL, const char *encoding, int options)
13650
0
{
13651
0
    xmlParserInputPtr input;
13652
13653
0
    if (ctxt == NULL)
13654
0
        return(NULL);
13655
13656
0
    xmlCtxtReset(ctxt);
13657
0
    xmlCtxtUseOptions(ctxt, options);
13658
13659
0
    input = xmlCtxtNewInputFromFd(ctxt, URL, fd, encoding, 0);
13660
0
    if (input == NULL)
13661
0
        return(NULL);
13662
13663
0
    return(xmlCtxtParseDocument(ctxt, input));
13664
0
}
13665
13666
/**
13667
 * parse an XML document from I/O functions and source and build a tree.
13668
 * This reuses the existing `ctxt` parser context
13669
 *
13670
 * `URL` is used as base to resolve external entities and for error
13671
 * reporting.
13672
 *
13673
 * See #xmlCtxtUseOptions for details.
13674
 *
13675
 * @param ctxt  an XML parser context
13676
 * @param ioread  an I/O read function
13677
 * @param ioclose  an I/O close function
13678
 * @param ioctx  an I/O handler
13679
 * @param URL  the base URL to use for the document
13680
 * @param encoding  the document encoding, or NULL
13681
 * @param options  a combination of xmlParserOption
13682
 * @returns the resulting document tree
13683
 */
13684
xmlDoc *
13685
xmlCtxtReadIO(xmlParserCtxt *ctxt, xmlInputReadCallback ioread,
13686
              xmlInputCloseCallback ioclose, void *ioctx,
13687
        const char *URL,
13688
              const char *encoding, int options)
13689
0
{
13690
0
    xmlParserInputPtr input;
13691
13692
0
    if (ctxt == NULL)
13693
0
        return(NULL);
13694
13695
0
    xmlCtxtReset(ctxt);
13696
0
    xmlCtxtUseOptions(ctxt, options);
13697
13698
0
    input = xmlCtxtNewInputFromIO(ctxt, URL, ioread, ioclose, ioctx,
13699
0
                                  encoding, 0);
13700
0
    if (input == NULL)
13701
0
        return(NULL);
13702
13703
0
    return(xmlCtxtParseDocument(ctxt, input));
13704
0
}
13705