Coverage Report

Created: 2025-08-29 06:56

/src/libxml2/parser.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
 *            implemented on top of the SAX interfaces
4
 *
5
 * References:
6
 *   The XML specification:
7
 *     http://www.w3.org/TR/REC-xml
8
 *   Original 1.0 version:
9
 *     http://www.w3.org/TR/1998/REC-xml-19980210
10
 *   XML second edition working draft
11
 *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12
 *
13
 * Okay this is a big file, the parser core is around 7000 lines, then it
14
 * is followed by the progressive parser top routines, then the various
15
 * high level APIs to call the parser and a few miscellaneous functions.
16
 * A number of helper functions and deprecated ones have been moved to
17
 * parserInternals.c to reduce this file size.
18
 * As much as possible the functions are associated with their relative
19
 * production in the XML specification. A few productions defining the
20
 * different ranges of character are actually implanted either in
21
 * parserInternals.h or parserInternals.c
22
 * The DOM tree build is realized from the default SAX callbacks in
23
 * the module SAX2.c.
24
 * The routines doing the validation checks are in valid.c and called either
25
 * from the SAX callbacks or as standalone functions using a preparsed
26
 * document.
27
 *
28
 * See Copyright for the status of this software.
29
 *
30
 * Author: Daniel Veillard
31
 */
32
33
/* To avoid EBCDIC trouble when parsing on zOS */
34
#if defined(__MVS__)
35
#pragma convert("ISO8859-1")
36
#endif
37
38
#define IN_LIBXML
39
#include "libxml.h"
40
41
#if defined(_WIN32)
42
#define XML_DIR_SEP '\\'
43
#else
44
#define XML_DIR_SEP '/'
45
#endif
46
47
#include <stdlib.h>
48
#include <limits.h>
49
#include <string.h>
50
#include <stdarg.h>
51
#include <stddef.h>
52
#include <ctype.h>
53
#include <stdlib.h>
54
#include <libxml/parser.h>
55
#include <libxml/xmlmemory.h>
56
#include <libxml/tree.h>
57
#include <libxml/parserInternals.h>
58
#include <libxml/valid.h>
59
#include <libxml/entities.h>
60
#include <libxml/xmlerror.h>
61
#include <libxml/encoding.h>
62
#include <libxml/xmlIO.h>
63
#include <libxml/uri.h>
64
#include <libxml/SAX2.h>
65
#include <libxml/HTMLparser.h>
66
#ifdef LIBXML_CATALOG_ENABLED
67
#include <libxml/catalog.h>
68
#endif
69
70
#include "private/buf.h"
71
#include "private/dict.h"
72
#include "private/entities.h"
73
#include "private/error.h"
74
#include "private/html.h"
75
#include "private/io.h"
76
#include "private/memory.h"
77
#include "private/parser.h"
78
#include "private/tree.h"
79
80
107k
#define NS_INDEX_EMPTY  INT_MAX
81
52.7k
#define NS_INDEX_XML    (INT_MAX - 1)
82
41.7k
#define URI_HASH_EMPTY  0xD943A04E
83
12.5k
#define URI_HASH_XML    0xF0451F02
84
85
#ifndef STDIN_FILENO
86
0
  #define STDIN_FILENO 0
87
#endif
88
89
#ifndef SIZE_MAX
90
  #define SIZE_MAX ((size_t) -1)
91
#endif
92
93
89.3k
#define XML_MAX_ATTRS 100000000 /* 100 million */
94
95
258k
#define XML_SPECIAL_EXTERNAL    (1 << 20)
96
245k
#define XML_SPECIAL_TYPE_MASK   (XML_SPECIAL_EXTERNAL - 1)
97
98
273k
#define XML_ATTVAL_ALLOC        (1 << 0)
99
9.39k
#define XML_ATTVAL_NORM_CHANGE  (1 << 1)
100
101
struct _xmlStartTag {
102
    const xmlChar *prefix;
103
    const xmlChar *URI;
104
    int line;
105
    int nsNr;
106
};
107
108
typedef struct {
109
    void *saxData;
110
    unsigned prefixHashValue;
111
    unsigned uriHashValue;
112
    unsigned elementId;
113
    int oldIndex;
114
} xmlParserNsExtra;
115
116
typedef struct {
117
    unsigned hashValue;
118
    int index;
119
} xmlParserNsBucket;
120
121
struct _xmlParserNsData {
122
    xmlParserNsExtra *extra;
123
124
    unsigned hashSize;
125
    unsigned hashElems;
126
    xmlParserNsBucket *hash;
127
128
    unsigned elementId;
129
    int defaultNsIndex;
130
    int minNsIndex;
131
};
132
133
static int
134
xmlParseElementStart(xmlParserCtxtPtr ctxt);
135
136
static void
137
xmlParseElementEnd(xmlParserCtxtPtr ctxt);
138
139
static xmlEntityPtr
140
xmlLookupGeneralEntity(xmlParserCtxtPtr ctxt, const xmlChar *name, int inAttr);
141
142
static const xmlChar *
143
xmlParseEntityRefInternal(xmlParserCtxtPtr ctxt);
144
145
/************************************************************************
146
 *                  *
147
 *  Arbitrary limits set in the parser. See XML_PARSE_HUGE    *
148
 *                  *
149
 ************************************************************************/
150
151
#define XML_PARSER_BIG_ENTITY 1000
152
#define XML_PARSER_LOT_ENTITY 5000
153
154
/*
155
 * Constants for protection against abusive entity expansion
156
 * ("billion laughs").
157
 */
158
159
/*
160
 * A certain amount of entity expansion which is always allowed.
161
 */
162
61.2k
#define XML_PARSER_ALLOWED_EXPANSION 1000000
163
164
/*
165
 * Fixed cost for each entity reference. This crudely models processing time
166
 * as well to protect, for example, against exponential expansion of empty
167
 * or very short entities.
168
 */
169
61.6k
#define XML_ENT_FIXED_COST 20
170
171
417k
#define XML_PARSER_BIG_BUFFER_SIZE 300
172
251k
#define XML_PARSER_BUFFER_SIZE 100
173
163k
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
174
175
/**
176
 * XML_PARSER_CHUNK_SIZE
177
 *
178
 * When calling GROW that's the minimal amount of data
179
 * the parser expected to have received. It is not a hard
180
 * limit but an optimization when reading strings like Names
181
 * It is not strictly needed as long as inputs available characters
182
 * are followed by 0, which should be provided by the I/O level
183
 */
184
#define XML_PARSER_CHUNK_SIZE 100
185
186
/**
187
 * Constant string describing the version of the library used at
188
 * run-time.
189
 */
190
const char *const
191
xmlParserVersion = LIBXML_VERSION_STRING LIBXML_VERSION_EXTRA;
192
193
/*
194
 * List of XML prefixed PI allowed by W3C specs
195
 */
196
197
static const char* const xmlW3CPIs[] = {
198
    "xml-stylesheet",
199
    "xml-model",
200
    NULL
201
};
202
203
204
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
205
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
206
                                              const xmlChar **str);
207
208
static void
209
xmlCtxtParseEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr ent);
210
211
static int
212
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
213
214
static void
215
xmlParsePERefInternal(xmlParserCtxt *ctxt, int markupDecl);
216
217
/************************************************************************
218
 *                  *
219
 *    Some factorized error routines        *
220
 *                  *
221
 ************************************************************************/
222
223
static void
224
393
xmlErrMemory(xmlParserCtxtPtr ctxt) {
225
393
    xmlCtxtErrMemory(ctxt);
226
393
}
227
228
/**
229
 * Handle a redefinition of attribute error
230
 *
231
 * @param ctxt  an XML parser context
232
 * @param prefix  the attribute prefix
233
 * @param localname  the attribute localname
234
 */
235
static void
236
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
237
                   const xmlChar * localname)
238
7.65k
{
239
7.65k
    if (prefix == NULL)
240
4.37k
        xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, XML_ERR_ATTRIBUTE_REDEFINED,
241
4.37k
                   XML_ERR_FATAL, localname, NULL, NULL, 0,
242
4.37k
                   "Attribute %s redefined\n", localname);
243
3.28k
    else
244
3.28k
        xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, XML_ERR_ATTRIBUTE_REDEFINED,
245
3.28k
                   XML_ERR_FATAL, prefix, localname, NULL, 0,
246
3.28k
                   "Attribute %s:%s redefined\n", prefix, localname);
247
7.65k
}
248
249
/**
250
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
251
 *
252
 * @param ctxt  an XML parser context
253
 * @param error  the error number
254
 * @param msg  the error message
255
 */
256
static void LIBXML_ATTR_FORMAT(3,0)
257
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
258
               const char *msg)
259
268k
{
260
268k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
261
268k
               NULL, NULL, NULL, 0, "%s", msg);
262
268k
}
263
264
/**
265
 * Handle a warning.
266
 *
267
 * @param ctxt  an XML parser context
268
 * @param error  the error number
269
 * @param msg  the error message
270
 * @param str1  extra data
271
 * @param str2  extra data
272
 */
273
void LIBXML_ATTR_FORMAT(3,0)
274
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
275
              const char *msg, const xmlChar *str1, const xmlChar *str2)
276
100k
{
277
100k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_WARNING,
278
100k
               str1, str2, NULL, 0, msg, str1, str2);
279
100k
}
280
281
#ifdef LIBXML_VALID_ENABLED
282
/**
283
 * Handle a validity error.
284
 *
285
 * @param ctxt  an XML parser context
286
 * @param error  the error number
287
 * @param msg  the error message
288
 * @param str1  extra data
289
 * @param str2  extra data
290
 */
291
static void LIBXML_ATTR_FORMAT(3,0)
292
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
293
              const char *msg, const xmlChar *str1, const xmlChar *str2)
294
444
{
295
444
    ctxt->valid = 0;
296
297
444
    xmlCtxtErr(ctxt, NULL, XML_FROM_DTD, error, XML_ERR_ERROR,
298
444
               str1, str2, NULL, 0, msg, str1, str2);
299
444
}
300
#endif
301
302
/**
303
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
304
 *
305
 * @param ctxt  an XML parser context
306
 * @param error  the error number
307
 * @param msg  the error message
308
 * @param val  an integer value
309
 */
310
static void LIBXML_ATTR_FORMAT(3,0)
311
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
312
                  const char *msg, int val)
313
1.02M
{
314
1.02M
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
315
1.02M
               NULL, NULL, NULL, val, msg, val);
316
1.02M
}
317
318
/**
319
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
320
 *
321
 * @param ctxt  an XML parser context
322
 * @param error  the error number
323
 * @param msg  the error message
324
 * @param str1  an string info
325
 * @param val  an integer value
326
 * @param str2  an string info
327
 */
328
static void LIBXML_ATTR_FORMAT(3,0)
329
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
330
                  const char *msg, const xmlChar *str1, int val,
331
      const xmlChar *str2)
332
47.4k
{
333
47.4k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
334
47.4k
               str1, str2, NULL, val, msg, str1, val, str2);
335
47.4k
}
336
337
/**
338
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
339
 *
340
 * @param ctxt  an XML parser context
341
 * @param error  the error number
342
 * @param msg  the error message
343
 * @param val  a string value
344
 */
345
static void LIBXML_ATTR_FORMAT(3,0)
346
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
347
                  const char *msg, const xmlChar * val)
348
39.7k
{
349
39.7k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
350
39.7k
               val, NULL, NULL, 0, msg, val);
351
39.7k
}
352
353
/**
354
 * Handle a non fatal parser error
355
 *
356
 * @param ctxt  an XML parser context
357
 * @param error  the error number
358
 * @param msg  the error message
359
 * @param val  a string value
360
 */
361
static void LIBXML_ATTR_FORMAT(3,0)
362
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
363
                  const char *msg, const xmlChar * val)
364
559
{
365
559
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_ERROR,
366
559
               val, NULL, NULL, 0, msg, val);
367
559
}
368
369
/**
370
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
371
 *
372
 * @param ctxt  an XML parser context
373
 * @param error  the error number
374
 * @param msg  the message
375
 * @param info1  extra information string
376
 * @param info2  extra information string
377
 * @param info3  extra information string
378
 */
379
static void LIBXML_ATTR_FORMAT(3,0)
380
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
381
         const char *msg,
382
         const xmlChar * info1, const xmlChar * info2,
383
         const xmlChar * info3)
384
108k
{
385
108k
    ctxt->nsWellFormed = 0;
386
387
108k
    xmlCtxtErr(ctxt, NULL, XML_FROM_NAMESPACE, error, XML_ERR_ERROR,
388
108k
               info1, info2, info3, 0, msg, info1, info2, info3);
389
108k
}
390
391
/**
392
 * Handle a namespace warning error
393
 *
394
 * @param ctxt  an XML parser context
395
 * @param error  the error number
396
 * @param msg  the message
397
 * @param info1  extra information string
398
 * @param info2  extra information string
399
 * @param info3  extra information string
400
 */
401
static void LIBXML_ATTR_FORMAT(3,0)
402
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
403
         const char *msg,
404
         const xmlChar * info1, const xmlChar * info2,
405
         const xmlChar * info3)
406
26.6k
{
407
26.6k
    xmlCtxtErr(ctxt, NULL, XML_FROM_NAMESPACE, error, XML_ERR_WARNING,
408
26.6k
               info1, info2, info3, 0, msg, info1, info2, info3);
409
26.6k
}
410
411
/**
412
 * Check for non-linear entity expansion behaviour.
413
 *
414
 * In some cases like xmlExpandEntityInAttValue, this function is called
415
 * for each, possibly nested entity and its unexpanded content length.
416
 *
417
 * In other cases like #xmlParseReference, it's only called for each
418
 * top-level entity with its unexpanded content length plus the sum of
419
 * the unexpanded content lengths (plus fixed cost) of all nested
420
 * entities.
421
 *
422
 * Summing the unexpanded lengths also adds the length of the reference.
423
 * This is by design. Taking the length of the entity name into account
424
 * discourages attacks that try to waste CPU time with abusively long
425
 * entity names. See test/recurse/lol6.xml for example. Each call also
426
 * adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with
427
 * short entities.
428
 *
429
 * @param ctxt  parser context
430
 * @param extra  sum of unexpanded entity sizes
431
 * @returns 1 on error, 0 on success.
432
 */
433
static int
434
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long extra)
435
61.9k
{
436
61.9k
    unsigned long consumed;
437
61.9k
    unsigned long *expandedSize;
438
61.9k
    xmlParserInputPtr input = ctxt->input;
439
61.9k
    xmlEntityPtr entity = input->entity;
440
441
61.9k
    if ((entity) && (entity->flags & XML_ENT_CHECKED))
442
772
        return(0);
443
444
    /*
445
     * Compute total consumed bytes so far, including input streams of
446
     * external entities.
447
     */
448
61.2k
    consumed = input->consumed;
449
61.2k
    xmlSaturatedAddSizeT(&consumed, input->cur - input->base);
450
61.2k
    xmlSaturatedAdd(&consumed, ctxt->sizeentities);
451
452
61.2k
    if (entity)
453
2.77k
        expandedSize = &entity->expandedSize;
454
58.4k
    else
455
58.4k
        expandedSize = &ctxt->sizeentcopy;
456
457
    /*
458
     * Add extra cost and some fixed cost.
459
     */
460
61.2k
    xmlSaturatedAdd(expandedSize, extra);
461
61.2k
    xmlSaturatedAdd(expandedSize, XML_ENT_FIXED_COST);
462
463
    /*
464
     * It's important to always use saturation arithmetic when tracking
465
     * entity sizes to make the size checks reliable. If "sizeentcopy"
466
     * overflows, we have to abort.
467
     */
468
61.2k
    if ((*expandedSize > XML_PARSER_ALLOWED_EXPANSION) &&
469
61.2k
        ((*expandedSize >= ULONG_MAX) ||
470
8
         (*expandedSize / ctxt->maxAmpl > consumed))) {
471
0
        xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
472
0
                       "Maximum entity amplification factor exceeded, see "
473
0
                       "xmlCtxtSetMaxAmplification.\n");
474
0
        return(1);
475
0
    }
476
477
61.2k
    return(0);
478
61.2k
}
479
480
/************************************************************************
481
 *                  *
482
 *    Library wide options          *
483
 *                  *
484
 ************************************************************************/
485
486
/**
487
 * Examines if the library has been compiled with a given feature.
488
 *
489
 * @param feature  the feature to be examined
490
 * @returns zero (0) if the feature does not exist or an unknown
491
 * feature is requested, non-zero otherwise.
492
 */
493
int
494
xmlHasFeature(xmlFeature feature)
495
0
{
496
0
    switch (feature) {
497
0
  case XML_WITH_THREAD:
498
0
#ifdef LIBXML_THREAD_ENABLED
499
0
      return(1);
500
#else
501
      return(0);
502
#endif
503
0
        case XML_WITH_TREE:
504
0
            return(1);
505
0
        case XML_WITH_OUTPUT:
506
0
#ifdef LIBXML_OUTPUT_ENABLED
507
0
            return(1);
508
#else
509
            return(0);
510
#endif
511
0
        case XML_WITH_PUSH:
512
0
#ifdef LIBXML_PUSH_ENABLED
513
0
            return(1);
514
#else
515
            return(0);
516
#endif
517
0
        case XML_WITH_READER:
518
0
#ifdef LIBXML_READER_ENABLED
519
0
            return(1);
520
#else
521
            return(0);
522
#endif
523
0
        case XML_WITH_PATTERN:
524
0
#ifdef LIBXML_PATTERN_ENABLED
525
0
            return(1);
526
#else
527
            return(0);
528
#endif
529
0
        case XML_WITH_WRITER:
530
0
#ifdef LIBXML_WRITER_ENABLED
531
0
            return(1);
532
#else
533
            return(0);
534
#endif
535
0
        case XML_WITH_SAX1:
536
0
#ifdef LIBXML_SAX1_ENABLED
537
0
            return(1);
538
#else
539
            return(0);
540
#endif
541
0
        case XML_WITH_HTTP:
542
0
            return(0);
543
0
        case XML_WITH_VALID:
544
0
#ifdef LIBXML_VALID_ENABLED
545
0
            return(1);
546
#else
547
            return(0);
548
#endif
549
0
        case XML_WITH_HTML:
550
0
#ifdef LIBXML_HTML_ENABLED
551
0
            return(1);
552
#else
553
            return(0);
554
#endif
555
0
        case XML_WITH_LEGACY:
556
0
            return(0);
557
0
        case XML_WITH_C14N:
558
0
#ifdef LIBXML_C14N_ENABLED
559
0
            return(1);
560
#else
561
            return(0);
562
#endif
563
0
        case XML_WITH_CATALOG:
564
0
#ifdef LIBXML_CATALOG_ENABLED
565
0
            return(1);
566
#else
567
            return(0);
568
#endif
569
0
        case XML_WITH_XPATH:
570
0
#ifdef LIBXML_XPATH_ENABLED
571
0
            return(1);
572
#else
573
            return(0);
574
#endif
575
0
        case XML_WITH_XPTR:
576
0
#ifdef LIBXML_XPTR_ENABLED
577
0
            return(1);
578
#else
579
            return(0);
580
#endif
581
0
        case XML_WITH_XINCLUDE:
582
0
#ifdef LIBXML_XINCLUDE_ENABLED
583
0
            return(1);
584
#else
585
            return(0);
586
#endif
587
0
        case XML_WITH_ICONV:
588
0
#ifdef LIBXML_ICONV_ENABLED
589
0
            return(1);
590
#else
591
            return(0);
592
#endif
593
0
        case XML_WITH_ISO8859X:
594
0
#ifdef LIBXML_ISO8859X_ENABLED
595
0
            return(1);
596
#else
597
            return(0);
598
#endif
599
0
        case XML_WITH_UNICODE:
600
0
            return(0);
601
0
        case XML_WITH_REGEXP:
602
0
#ifdef LIBXML_REGEXP_ENABLED
603
0
            return(1);
604
#else
605
            return(0);
606
#endif
607
0
        case XML_WITH_AUTOMATA:
608
0
#ifdef LIBXML_REGEXP_ENABLED
609
0
            return(1);
610
#else
611
            return(0);
612
#endif
613
0
        case XML_WITH_EXPR:
614
0
            return(0);
615
0
        case XML_WITH_RELAXNG:
616
0
#ifdef LIBXML_RELAXNG_ENABLED
617
0
            return(1);
618
#else
619
            return(0);
620
#endif
621
0
        case XML_WITH_SCHEMAS:
622
0
#ifdef LIBXML_SCHEMAS_ENABLED
623
0
            return(1);
624
#else
625
            return(0);
626
#endif
627
0
        case XML_WITH_SCHEMATRON:
628
#ifdef LIBXML_SCHEMATRON_ENABLED
629
            return(1);
630
#else
631
0
            return(0);
632
0
#endif
633
0
        case XML_WITH_MODULES:
634
0
#ifdef LIBXML_MODULES_ENABLED
635
0
            return(1);
636
#else
637
            return(0);
638
#endif
639
0
        case XML_WITH_DEBUG:
640
#ifdef LIBXML_DEBUG_ENABLED
641
            return(1);
642
#else
643
0
            return(0);
644
0
#endif
645
0
        case XML_WITH_DEBUG_MEM:
646
0
            return(0);
647
0
        case XML_WITH_ZLIB:
648
0
#ifdef LIBXML_ZLIB_ENABLED
649
0
            return(1);
650
#else
651
            return(0);
652
#endif
653
0
        case XML_WITH_LZMA:
654
0
#ifdef LIBXML_LZMA_ENABLED
655
0
            return(1);
656
#else
657
            return(0);
658
#endif
659
0
        case XML_WITH_ICU:
660
#ifdef LIBXML_ICU_ENABLED
661
            return(1);
662
#else
663
0
            return(0);
664
0
#endif
665
0
        default:
666
0
      break;
667
0
     }
668
0
     return(0);
669
0
}
670
671
/************************************************************************
672
 *                  *
673
 *      Simple string buffer        *
674
 *                  *
675
 ************************************************************************/
676
677
typedef struct {
678
    xmlChar *mem;
679
    unsigned size;
680
    unsigned cap; /* size < cap */
681
    unsigned max; /* size <= max */
682
    xmlParserErrors code;
683
} xmlSBuf;
684
685
static void
686
276k
xmlSBufInit(xmlSBuf *buf, unsigned max) {
687
276k
    buf->mem = NULL;
688
276k
    buf->size = 0;
689
276k
    buf->cap = 0;
690
276k
    buf->max = max;
691
276k
    buf->code = XML_ERR_OK;
692
276k
}
693
694
static int
695
154k
xmlSBufGrow(xmlSBuf *buf, unsigned len) {
696
154k
    xmlChar *mem;
697
154k
    unsigned cap;
698
699
154k
    if (len >= UINT_MAX / 2 - buf->size) {
700
0
        if (buf->code == XML_ERR_OK)
701
0
            buf->code = XML_ERR_RESOURCE_LIMIT;
702
0
        return(-1);
703
0
    }
704
705
154k
    cap = (buf->size + len) * 2;
706
154k
    if (cap < 240)
707
140k
        cap = 240;
708
709
154k
    mem = xmlRealloc(buf->mem, cap);
710
154k
    if (mem == NULL) {
711
65
        buf->code = XML_ERR_NO_MEMORY;
712
65
        return(-1);
713
65
    }
714
715
154k
    buf->mem = mem;
716
154k
    buf->cap = cap;
717
718
154k
    return(0);
719
154k
}
720
721
static void
722
1.17M
xmlSBufAddString(xmlSBuf *buf, const xmlChar *str, unsigned len) {
723
1.17M
    if (buf->max - buf->size < len) {
724
0
        if (buf->code == XML_ERR_OK)
725
0
            buf->code = XML_ERR_RESOURCE_LIMIT;
726
0
        return;
727
0
    }
728
729
1.17M
    if (buf->cap - buf->size <= len) {
730
149k
        if (xmlSBufGrow(buf, len) < 0)
731
60
            return;
732
149k
    }
733
734
1.17M
    if (len > 0)
735
1.17M
        memcpy(buf->mem + buf->size, str, len);
736
1.17M
    buf->size += len;
737
1.17M
}
738
739
static void
740
390k
xmlSBufAddCString(xmlSBuf *buf, const char *str, unsigned len) {
741
390k
    xmlSBufAddString(buf, (const xmlChar *) str, len);
742
390k
}
743
744
static void
745
101k
xmlSBufAddChar(xmlSBuf *buf, int c) {
746
101k
    xmlChar *end;
747
748
101k
    if (buf->max - buf->size < 4) {
749
0
        if (buf->code == XML_ERR_OK)
750
0
            buf->code = XML_ERR_RESOURCE_LIMIT;
751
0
        return;
752
0
    }
753
754
101k
    if (buf->cap - buf->size <= 4) {
755
4.95k
        if (xmlSBufGrow(buf, 4) < 0)
756
5
            return;
757
4.95k
    }
758
759
101k
    end = buf->mem + buf->size;
760
761
101k
    if (c < 0x80) {
762
64.8k
        *end = (xmlChar) c;
763
64.8k
        buf->size += 1;
764
64.8k
    } else {
765
37.1k
        buf->size += xmlCopyCharMultiByte(end, c);
766
37.1k
    }
767
101k
}
768
769
static void
770
278k
xmlSBufAddReplChar(xmlSBuf *buf) {
771
278k
    xmlSBufAddCString(buf, "\xEF\xBF\xBD", 3);
772
278k
}
773
774
static void
775
68
xmlSBufReportError(xmlSBuf *buf, xmlParserCtxtPtr ctxt, const char *errMsg) {
776
68
    if (buf->code == XML_ERR_NO_MEMORY)
777
68
        xmlCtxtErrMemory(ctxt);
778
0
    else
779
0
        xmlFatalErr(ctxt, buf->code, errMsg);
780
68
}
781
782
static xmlChar *
783
xmlSBufFinish(xmlSBuf *buf, int *sizeOut, xmlParserCtxtPtr ctxt,
784
158k
              const char *errMsg) {
785
158k
    if (buf->mem == NULL) {
786
15.7k
        buf->mem = xmlMalloc(1);
787
15.7k
        if (buf->mem == NULL) {
788
3
            buf->code = XML_ERR_NO_MEMORY;
789
15.7k
        } else {
790
15.7k
            buf->mem[0] = 0;
791
15.7k
        }
792
143k
    } else {
793
143k
        buf->mem[buf->size] = 0;
794
143k
    }
795
796
158k
    if (buf->code == XML_ERR_OK) {
797
158k
        if (sizeOut != NULL)
798
36.2k
            *sizeOut = buf->size;
799
158k
        return(buf->mem);
800
158k
    }
801
802
59
    xmlSBufReportError(buf, ctxt, errMsg);
803
804
59
    xmlFree(buf->mem);
805
806
59
    if (sizeOut != NULL)
807
18
        *sizeOut = 0;
808
59
    return(NULL);
809
158k
}
810
811
static void
812
112k
xmlSBufCleanup(xmlSBuf *buf, xmlParserCtxtPtr ctxt, const char *errMsg) {
813
112k
    if (buf->code != XML_ERR_OK)
814
9
        xmlSBufReportError(buf, ctxt, errMsg);
815
816
112k
    xmlFree(buf->mem);
817
112k
}
818
819
static int
820
xmlUTF8MultibyteLen(xmlParserCtxtPtr ctxt, const xmlChar *str,
821
228k
                    const char *errMsg) {
822
228k
    int c = str[0];
823
228k
    int c1 = str[1];
824
825
228k
    if ((c1 & 0xC0) != 0x80)
826
132k
        goto encoding_error;
827
828
95.1k
    if (c < 0xE0) {
829
        /* 2-byte sequence */
830
83.2k
        if (c < 0xC2)
831
72.2k
            goto encoding_error;
832
833
11.0k
        return(2);
834
83.2k
    } else {
835
11.9k
        int c2 = str[2];
836
837
11.9k
        if ((c2 & 0xC0) != 0x80)
838
4.23k
            goto encoding_error;
839
840
7.67k
        if (c < 0xF0) {
841
            /* 3-byte sequence */
842
4.01k
            if (c == 0xE0) {
843
                /* overlong */
844
939
                if (c1 < 0xA0)
845
736
                    goto encoding_error;
846
3.07k
            } else if (c == 0xED) {
847
                /* surrogate */
848
571
                if (c1 >= 0xA0)
849
197
                    goto encoding_error;
850
2.50k
            } else if (c == 0xEF) {
851
                /* U+FFFE and U+FFFF are invalid Chars */
852
1.83k
                if ((c1 == 0xBF) && (c2 >= 0xBE))
853
280
                    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR, errMsg);
854
1.83k
            }
855
856
3.08k
            return(3);
857
4.01k
        } else {
858
            /* 4-byte sequence */
859
3.65k
            if ((str[3] & 0xC0) != 0x80)
860
1.07k
                goto encoding_error;
861
2.58k
            if (c == 0xF0) {
862
                /* overlong */
863
963
                if (c1 < 0x90)
864
769
                    goto encoding_error;
865
1.61k
            } else if (c >= 0xF4) {
866
                /* greater than 0x10FFFF */
867
1.33k
                if ((c > 0xF4) || (c1 >= 0x90))
868
1.09k
                    goto encoding_error;
869
1.33k
            }
870
871
720
            return(4);
872
2.58k
        }
873
7.67k
    }
874
875
213k
encoding_error:
876
    /* Only report the first error */
877
213k
    if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
878
3.32k
        xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
879
3.32k
        ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
880
3.32k
    }
881
882
213k
    return(0);
883
95.1k
}
884
885
/************************************************************************
886
 *                  *
887
 *    SAX2 defaulted attributes handling      *
888
 *                  *
889
 ************************************************************************/
890
891
/**
892
 * Final initialization of the parser context before starting to parse.
893
 *
894
 * This accounts for users modifying struct members of parser context
895
 * directly.
896
 *
897
 * @param ctxt  an XML parser context
898
 */
899
static void
900
170k
xmlCtxtInitializeLate(xmlParserCtxtPtr ctxt) {
901
170k
    xmlSAXHandlerPtr sax;
902
903
    /* Avoid unused variable warning if features are disabled. */
904
170k
    (void) sax;
905
906
    /*
907
     * Changing the SAX struct directly is still widespread practice
908
     * in internal and external code.
909
     */
910
170k
    if (ctxt == NULL) return;
911
170k
    sax = ctxt->sax;
912
170k
#ifdef LIBXML_SAX1_ENABLED
913
    /*
914
     * Only enable SAX2 if there SAX2 element handlers, except when there
915
     * are no element handlers at all.
916
     */
917
170k
    if (((ctxt->options & XML_PARSE_SAX1) == 0) &&
918
170k
        (sax) &&
919
170k
        (sax->initialized == XML_SAX2_MAGIC) &&
920
170k
        ((sax->startElementNs != NULL) ||
921
121k
         (sax->endElementNs != NULL) ||
922
121k
         ((sax->startElement == NULL) && (sax->endElement == NULL))))
923
121k
        ctxt->sax2 = 1;
924
#else
925
    ctxt->sax2 = 1;
926
#endif /* LIBXML_SAX1_ENABLED */
927
928
    /*
929
     * Some users replace the dictionary directly in the context struct.
930
     * We really need an API function to do that cleanly.
931
     */
932
170k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
933
170k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
934
170k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
935
170k
    if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
936
170k
    (ctxt->str_xml_ns == NULL)) {
937
51
        xmlErrMemory(ctxt);
938
51
    }
939
940
170k
    xmlDictSetLimit(ctxt->dict,
941
170k
                    (ctxt->options & XML_PARSE_HUGE) ?
942
41.3k
                        0 :
943
170k
                        XML_MAX_DICTIONARY_LIMIT);
944
945
170k
#ifdef LIBXML_VALID_ENABLED
946
170k
    if (ctxt->validate)
947
49.7k
        ctxt->vctxt.flags |= XML_VCTXT_VALIDATE;
948
120k
    else
949
120k
        ctxt->vctxt.flags &= ~XML_VCTXT_VALIDATE;
950
170k
#endif /* LIBXML_VALID_ENABLED */
951
170k
}
952
953
typedef struct {
954
    xmlHashedString prefix;
955
    xmlHashedString name;
956
    xmlHashedString value;
957
    const xmlChar *valueEnd;
958
    int external;
959
    int expandedSize;
960
} xmlDefAttr;
961
962
typedef struct _xmlDefAttrs xmlDefAttrs;
963
typedef xmlDefAttrs *xmlDefAttrsPtr;
964
struct _xmlDefAttrs {
965
    int nbAttrs;  /* number of defaulted attributes on that element */
966
    int maxAttrs;       /* the size of the array */
967
#if __STDC_VERSION__ >= 199901L
968
    /* Using a C99 flexible array member avoids UBSan errors. */
969
    xmlDefAttr attrs[] ATTRIBUTE_COUNTED_BY(maxAttrs);
970
#else
971
    xmlDefAttr attrs[1];
972
#endif
973
};
974
975
/**
976
 * Normalize the space in non CDATA attribute values:
977
 * If the attribute type is not CDATA, then the XML processor MUST further
978
 * process the normalized attribute value by discarding any leading and
979
 * trailing space (\#x20) characters, and by replacing sequences of space
980
 * (\#x20) characters by a single space (\#x20) character.
981
 * Note that the size of dst need to be at least src, and if one doesn't need
982
 * to preserve dst (and it doesn't come from a dictionary or read-only) then
983
 * passing src as dst is just fine.
984
 *
985
 * @param src  the source string
986
 * @param dst  the target string
987
 * @returns a pointer to the normalized value (dst) or NULL if no conversion
988
 *         is needed.
989
 */
990
static xmlChar *
991
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
992
21.5k
{
993
21.5k
    if ((src == NULL) || (dst == NULL))
994
0
        return(NULL);
995
996
24.5k
    while (*src == 0x20) src++;
997
112k
    while (*src != 0) {
998
91.3k
  if (*src == 0x20) {
999
5.56k
      while (*src == 0x20) src++;
1000
2.54k
      if (*src != 0)
1001
2.03k
    *dst++ = 0x20;
1002
88.7k
  } else {
1003
88.7k
      *dst++ = *src++;
1004
88.7k
  }
1005
91.3k
    }
1006
21.5k
    *dst = 0;
1007
21.5k
    if (dst == src)
1008
20.4k
       return(NULL);
1009
1.10k
    return(dst);
1010
21.5k
}
1011
1012
/**
1013
 * Add a defaulted attribute for an element
1014
 *
1015
 * @param ctxt  an XML parser context
1016
 * @param fullname  the element fullname
1017
 * @param fullattr  the attribute fullname
1018
 * @param value  the attribute value
1019
 */
1020
static void
1021
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1022
               const xmlChar *fullname,
1023
               const xmlChar *fullattr,
1024
22.1k
               const xmlChar *value) {
1025
22.1k
    xmlDefAttrsPtr defaults;
1026
22.1k
    xmlDefAttr *attr;
1027
22.1k
    int len, expandedSize;
1028
22.1k
    xmlHashedString name;
1029
22.1k
    xmlHashedString prefix;
1030
22.1k
    xmlHashedString hvalue;
1031
22.1k
    const xmlChar *localname;
1032
1033
    /*
1034
     * Allows to detect attribute redefinitions
1035
     */
1036
22.1k
    if (ctxt->attsSpecial != NULL) {
1037
9.16k
        if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1038
954
      return;
1039
9.16k
    }
1040
1041
21.1k
    if (ctxt->attsDefault == NULL) {
1042
12.9k
        ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1043
12.9k
  if (ctxt->attsDefault == NULL)
1044
5
      goto mem_error;
1045
12.9k
    }
1046
1047
    /*
1048
     * split the element name into prefix:localname , the string found
1049
     * are within the DTD and then not associated to namespace names.
1050
     */
1051
21.1k
    localname = xmlSplitQName3(fullname, &len);
1052
21.1k
    if (localname == NULL) {
1053
19.7k
        name = xmlDictLookupHashed(ctxt->dict, fullname, -1);
1054
19.7k
  prefix.name = NULL;
1055
19.7k
    } else {
1056
1.42k
        name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1057
1.42k
  prefix = xmlDictLookupHashed(ctxt->dict, fullname, len);
1058
1.42k
        if (prefix.name == NULL)
1059
1
            goto mem_error;
1060
1.42k
    }
1061
21.1k
    if (name.name == NULL)
1062
1
        goto mem_error;
1063
1064
    /*
1065
     * make sure there is some storage
1066
     */
1067
21.1k
    defaults = xmlHashLookup2(ctxt->attsDefault, name.name, prefix.name);
1068
21.1k
    if ((defaults == NULL) ||
1069
21.1k
        (defaults->nbAttrs >= defaults->maxAttrs)) {
1070
15.4k
        xmlDefAttrsPtr temp;
1071
15.4k
        int newSize;
1072
1073
15.4k
        if (defaults == NULL) {
1074
15.2k
            newSize = 4;
1075
15.2k
        } else {
1076
196
            if ((defaults->maxAttrs >= XML_MAX_ATTRS) ||
1077
196
                ((size_t) defaults->maxAttrs >
1078
196
                     SIZE_MAX / 2 / sizeof(temp[0]) - sizeof(*defaults)))
1079
0
                goto mem_error;
1080
1081
196
            if (defaults->maxAttrs > XML_MAX_ATTRS / 2)
1082
0
                newSize = XML_MAX_ATTRS;
1083
196
            else
1084
196
                newSize = defaults->maxAttrs * 2;
1085
196
        }
1086
15.4k
        temp = xmlRealloc(defaults,
1087
15.4k
                          sizeof(*defaults) + newSize * sizeof(xmlDefAttr));
1088
15.4k
  if (temp == NULL)
1089
1
      goto mem_error;
1090
15.4k
        if (defaults == NULL)
1091
15.2k
            temp->nbAttrs = 0;
1092
15.4k
  temp->maxAttrs = newSize;
1093
15.4k
        defaults = temp;
1094
15.4k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name.name, prefix.name,
1095
15.4k
                          defaults, NULL) < 0) {
1096
0
      xmlFree(defaults);
1097
0
      goto mem_error;
1098
0
  }
1099
15.4k
    }
1100
1101
    /*
1102
     * Split the attribute name into prefix:localname , the string found
1103
     * are within the DTD and hen not associated to namespace names.
1104
     */
1105
21.1k
    localname = xmlSplitQName3(fullattr, &len);
1106
21.1k
    if (localname == NULL) {
1107
10.0k
        name = xmlDictLookupHashed(ctxt->dict, fullattr, -1);
1108
10.0k
  prefix.name = NULL;
1109
11.1k
    } else {
1110
11.1k
        name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1111
11.1k
  prefix = xmlDictLookupHashed(ctxt->dict, fullattr, len);
1112
11.1k
        if (prefix.name == NULL)
1113
1
            goto mem_error;
1114
11.1k
    }
1115
21.1k
    if (name.name == NULL)
1116
1
        goto mem_error;
1117
1118
    /* intern the string and precompute the end */
1119
21.1k
    len = strlen((const char *) value);
1120
21.1k
    hvalue = xmlDictLookupHashed(ctxt->dict, value, len);
1121
21.1k
    if (hvalue.name == NULL)
1122
1
        goto mem_error;
1123
1124
21.1k
    expandedSize = strlen((const char *) name.name);
1125
21.1k
    if (prefix.name != NULL)
1126
11.1k
        expandedSize += strlen((const char *) prefix.name);
1127
21.1k
    expandedSize += len;
1128
1129
21.1k
    attr = &defaults->attrs[defaults->nbAttrs++];
1130
21.1k
    attr->name = name;
1131
21.1k
    attr->prefix = prefix;
1132
21.1k
    attr->value = hvalue;
1133
21.1k
    attr->valueEnd = hvalue.name + len;
1134
21.1k
    attr->external = PARSER_EXTERNAL(ctxt);
1135
21.1k
    attr->expandedSize = expandedSize;
1136
1137
21.1k
    return;
1138
1139
11
mem_error:
1140
11
    xmlErrMemory(ctxt);
1141
11
}
1142
1143
/**
1144
 * Register this attribute type
1145
 *
1146
 * @param ctxt  an XML parser context
1147
 * @param fullname  the element fullname
1148
 * @param fullattr  the attribute fullname
1149
 * @param type  the attribute type
1150
 */
1151
static void
1152
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1153
      const xmlChar *fullname,
1154
      const xmlChar *fullattr,
1155
      int type)
1156
23.5k
{
1157
23.5k
    if (ctxt->attsSpecial == NULL) {
1158
14.1k
        ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1159
14.1k
  if (ctxt->attsSpecial == NULL)
1160
6
      goto mem_error;
1161
14.1k
    }
1162
1163
23.5k
    if (PARSER_EXTERNAL(ctxt))
1164
0
        type |= XML_SPECIAL_EXTERNAL;
1165
1166
23.5k
    if (xmlHashAdd2(ctxt->attsSpecial, fullname, fullattr,
1167
23.5k
                    XML_INT_TO_PTR(type)) < 0)
1168
0
        goto mem_error;
1169
23.5k
    return;
1170
1171
23.5k
mem_error:
1172
6
    xmlErrMemory(ctxt);
1173
6
}
1174
1175
/**
1176
 * Removes CDATA attributes from the special attribute table
1177
 */
1178
static void
1179
xmlCleanSpecialAttrCallback(void *payload, void *data,
1180
                            const xmlChar *fullname, const xmlChar *fullattr,
1181
22.5k
                            const xmlChar *unused ATTRIBUTE_UNUSED) {
1182
22.5k
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1183
1184
22.5k
    if (XML_PTR_TO_INT(payload) == XML_ATTRIBUTE_CDATA) {
1185
7.11k
        xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1186
7.11k
    }
1187
22.5k
}
1188
1189
/**
1190
 * Trim the list of attributes defined to remove all those of type
1191
 * CDATA as they are not special. This call should be done when finishing
1192
 * to parse the DTD and before starting to parse the document root.
1193
 *
1194
 * @param ctxt  an XML parser context
1195
 */
1196
static void
1197
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1198
105k
{
1199
105k
    if (ctxt->attsSpecial == NULL)
1200
91.3k
        return;
1201
1202
14.1k
    xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1203
1204
14.1k
    if (xmlHashSize(ctxt->attsSpecial) == 0) {
1205
5.28k
        xmlHashFree(ctxt->attsSpecial, NULL);
1206
5.28k
        ctxt->attsSpecial = NULL;
1207
5.28k
    }
1208
14.1k
}
1209
1210
/**
1211
 * Checks that the value conforms to the LanguageID production:
1212
 *
1213
 * @deprecated Internal function, do not use.
1214
 *
1215
 * NOTE: this is somewhat deprecated, those productions were removed from
1216
 * the XML Second edition.
1217
 *
1218
 *     [33] LanguageID ::= Langcode ('-' Subcode)*
1219
 *     [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1220
 *     [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1221
 *     [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1222
 *     [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1223
 *     [38] Subcode ::= ([a-z] | [A-Z])+
1224
 *
1225
 * The current REC reference the successors of RFC 1766, currently 5646
1226
 *
1227
 * http://www.rfc-editor.org/rfc/rfc5646.txt
1228
 *
1229
 *     langtag       = language
1230
 *                     ["-" script]
1231
 *                     ["-" region]
1232
 *                     *("-" variant)
1233
 *                     *("-" extension)
1234
 *                     ["-" privateuse]
1235
 *     language      = 2*3ALPHA            ; shortest ISO 639 code
1236
 *                     ["-" extlang]       ; sometimes followed by
1237
 *                                         ; extended language subtags
1238
 *                   / 4ALPHA              ; or reserved for future use
1239
 *                   / 5*8ALPHA            ; or registered language subtag
1240
 *
1241
 *     extlang       = 3ALPHA              ; selected ISO 639 codes
1242
 *                     *2("-" 3ALPHA)      ; permanently reserved
1243
 *
1244
 *     script        = 4ALPHA              ; ISO 15924 code
1245
 *
1246
 *     region        = 2ALPHA              ; ISO 3166-1 code
1247
 *                   / 3DIGIT              ; UN M.49 code
1248
 *
1249
 *     variant       = 5*8alphanum         ; registered variants
1250
 *                   / (DIGIT 3alphanum)
1251
 *
1252
 *     extension     = singleton 1*("-" (2*8alphanum))
1253
 *
1254
 *                                         ; Single alphanumerics
1255
 *                                         ; "x" reserved for private use
1256
 *     singleton     = DIGIT               ; 0 - 9
1257
 *                   / %x41-57             ; A - W
1258
 *                   / %x59-5A             ; Y - Z
1259
 *                   / %x61-77             ; a - w
1260
 *                   / %x79-7A             ; y - z
1261
 *
1262
 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1263
 * The parser below doesn't try to cope with extension or privateuse
1264
 * that could be added but that's not interoperable anyway
1265
 *
1266
 * @param lang  pointer to the string value
1267
 * @returns 1 if correct 0 otherwise
1268
 **/
1269
int
1270
xmlCheckLanguageID(const xmlChar * lang)
1271
15.7k
{
1272
15.7k
    const xmlChar *cur = lang, *nxt;
1273
1274
15.7k
    if (cur == NULL)
1275
231
        return (0);
1276
15.5k
    if (((cur[0] == 'i') && (cur[1] == '-')) ||
1277
15.5k
        ((cur[0] == 'I') && (cur[1] == '-')) ||
1278
15.5k
        ((cur[0] == 'x') && (cur[1] == '-')) ||
1279
15.5k
        ((cur[0] == 'X') && (cur[1] == '-'))) {
1280
        /*
1281
         * Still allow IANA code and user code which were coming
1282
         * from the previous version of the XML-1.0 specification
1283
         * it's deprecated but we should not fail
1284
         */
1285
1.14k
        cur += 2;
1286
1.92k
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1287
1.92k
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
1288
785
            cur++;
1289
1.14k
        return(cur[0] == 0);
1290
1.14k
    }
1291
14.4k
    nxt = cur;
1292
49.4k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1293
49.4k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1294
34.9k
           nxt++;
1295
14.4k
    if (nxt - cur >= 4) {
1296
        /*
1297
         * Reserved
1298
         */
1299
2.97k
        if ((nxt - cur > 8) || (nxt[0] != 0))
1300
820
            return(0);
1301
2.15k
        return(1);
1302
2.97k
    }
1303
11.4k
    if (nxt - cur < 2)
1304
3.33k
        return(0);
1305
    /* we got an ISO 639 code */
1306
8.11k
    if (nxt[0] == 0)
1307
374
        return(1);
1308
7.73k
    if (nxt[0] != '-')
1309
1.51k
        return(0);
1310
1311
6.22k
    nxt++;
1312
6.22k
    cur = nxt;
1313
    /* now we can have extlang or script or region or variant */
1314
6.22k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1315
965
        goto region_m49;
1316
1317
23.0k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1318
23.0k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1319
17.7k
           nxt++;
1320
5.26k
    if (nxt - cur == 4)
1321
1.42k
        goto script;
1322
3.83k
    if (nxt - cur == 2)
1323
1.00k
        goto region;
1324
2.82k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1325
591
        goto variant;
1326
2.23k
    if (nxt - cur != 3)
1327
594
        return(0);
1328
    /* we parsed an extlang */
1329
1.64k
    if (nxt[0] == 0)
1330
196
        return(1);
1331
1.44k
    if (nxt[0] != '-')
1332
196
        return(0);
1333
1334
1.25k
    nxt++;
1335
1.25k
    cur = nxt;
1336
    /* now we can have script or region or variant */
1337
1.25k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1338
194
        goto region_m49;
1339
1340
5.75k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1341
5.75k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1342
4.69k
           nxt++;
1343
1.05k
    if (nxt - cur == 2)
1344
198
        goto region;
1345
860
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1346
202
        goto variant;
1347
658
    if (nxt - cur != 4)
1348
464
        return(0);
1349
    /* we parsed a script */
1350
1.62k
script:
1351
1.62k
    if (nxt[0] == 0)
1352
398
        return(1);
1353
1.22k
    if (nxt[0] != '-')
1354
194
        return(0);
1355
1356
1.03k
    nxt++;
1357
1.03k
    cur = nxt;
1358
    /* now we can have region or variant */
1359
1.03k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1360
194
        goto region_m49;
1361
1362
4.46k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1363
4.46k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1364
3.62k
           nxt++;
1365
1366
836
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1367
207
        goto variant;
1368
629
    if (nxt - cur != 2)
1369
432
        return(0);
1370
    /* we parsed a region */
1371
1.59k
region:
1372
1.59k
    if (nxt[0] == 0)
1373
560
        return(1);
1374
1.03k
    if (nxt[0] != '-')
1375
364
        return(0);
1376
1377
670
    nxt++;
1378
670
    cur = nxt;
1379
    /* now we can just have a variant */
1380
4.92k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1381
4.92k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1382
4.25k
           nxt++;
1383
1384
670
    if ((nxt - cur < 5) || (nxt - cur > 8))
1385
469
        return(0);
1386
1387
    /* we parsed a variant */
1388
1.20k
variant:
1389
1.20k
    if (nxt[0] == 0)
1390
640
        return(1);
1391
561
    if (nxt[0] != '-')
1392
367
        return(0);
1393
    /* extensions and private use subtags not checked */
1394
194
    return (1);
1395
1396
1.35k
region_m49:
1397
1.35k
    if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1398
1.35k
        ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1399
194
        nxt += 3;
1400
194
        goto region;
1401
194
    }
1402
1.15k
    return(0);
1403
1.35k
}
1404
1405
/************************************************************************
1406
 *                  *
1407
 *    Parser stacks related functions and macros    *
1408
 *                  *
1409
 ************************************************************************/
1410
1411
static xmlChar *
1412
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar **str);
1413
1414
/**
1415
 * Create a new namespace database.
1416
 *
1417
 * @returns the new obejct.
1418
 */
1419
xmlParserNsData *
1420
182k
xmlParserNsCreate(void) {
1421
182k
    xmlParserNsData *nsdb = xmlMalloc(sizeof(*nsdb));
1422
1423
182k
    if (nsdb == NULL)
1424
23
        return(NULL);
1425
182k
    memset(nsdb, 0, sizeof(*nsdb));
1426
182k
    nsdb->defaultNsIndex = INT_MAX;
1427
1428
182k
    return(nsdb);
1429
182k
}
1430
1431
/**
1432
 * Free a namespace database.
1433
 *
1434
 * @param nsdb  namespace database
1435
 */
1436
void
1437
182k
xmlParserNsFree(xmlParserNsData *nsdb) {
1438
182k
    if (nsdb == NULL)
1439
0
        return;
1440
1441
182k
    xmlFree(nsdb->extra);
1442
182k
    xmlFree(nsdb->hash);
1443
182k
    xmlFree(nsdb);
1444
182k
}
1445
1446
/**
1447
 * Reset a namespace database.
1448
 *
1449
 * @param nsdb  namespace database
1450
 */
1451
static void
1452
0
xmlParserNsReset(xmlParserNsData *nsdb) {
1453
0
    if (nsdb == NULL)
1454
0
        return;
1455
1456
0
    nsdb->hashElems = 0;
1457
0
    nsdb->elementId = 0;
1458
0
    nsdb->defaultNsIndex = INT_MAX;
1459
1460
0
    if (nsdb->hash)
1461
0
        memset(nsdb->hash, 0, nsdb->hashSize * sizeof(nsdb->hash[0]));
1462
0
}
1463
1464
/**
1465
 * Signal that a new element has started.
1466
 *
1467
 * @param nsdb  namespace database
1468
 * @returns 0 on success, -1 if the element counter overflowed.
1469
 */
1470
static int
1471
218k
xmlParserNsStartElement(xmlParserNsData *nsdb) {
1472
218k
    if (nsdb->elementId == UINT_MAX)
1473
0
        return(-1);
1474
218k
    nsdb->elementId++;
1475
1476
218k
    return(0);
1477
218k
}
1478
1479
/**
1480
 * Lookup namespace with given prefix. If `bucketPtr` is non-NULL, it will
1481
 * be set to the matching bucket, or the first empty bucket if no match
1482
 * was found.
1483
 *
1484
 * @param ctxt  parser context
1485
 * @param prefix  namespace prefix
1486
 * @param bucketPtr  optional bucket (return value)
1487
 * @returns the namespace index on success, INT_MAX if no namespace was
1488
 * found.
1489
 */
1490
static int
1491
xmlParserNsLookup(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1492
431k
                  xmlParserNsBucket **bucketPtr) {
1493
431k
    xmlParserNsBucket *bucket, *tombstone;
1494
431k
    unsigned index, hashValue;
1495
1496
431k
    if (prefix->name == NULL)
1497
145k
        return(ctxt->nsdb->defaultNsIndex);
1498
1499
285k
    if (ctxt->nsdb->hashSize == 0)
1500
43.9k
        return(INT_MAX);
1501
1502
241k
    hashValue = prefix->hashValue;
1503
241k
    index = hashValue & (ctxt->nsdb->hashSize - 1);
1504
241k
    bucket = &ctxt->nsdb->hash[index];
1505
241k
    tombstone = NULL;
1506
1507
343k
    while (bucket->hashValue) {
1508
307k
        if (bucket->index == INT_MAX) {
1509
37.5k
            if (tombstone == NULL)
1510
30.2k
                tombstone = bucket;
1511
269k
        } else if (bucket->hashValue == hashValue) {
1512
205k
            if (ctxt->nsTab[bucket->index * 2] == prefix->name) {
1513
205k
                if (bucketPtr != NULL)
1514
98.2k
                    *bucketPtr = bucket;
1515
205k
                return(bucket->index);
1516
205k
            }
1517
205k
        }
1518
1519
101k
        index++;
1520
101k
        bucket++;
1521
101k
        if (index == ctxt->nsdb->hashSize) {
1522
36.6k
            index = 0;
1523
36.6k
            bucket = ctxt->nsdb->hash;
1524
36.6k
        }
1525
101k
    }
1526
1527
36.2k
    if (bucketPtr != NULL)
1528
30.5k
        *bucketPtr = tombstone ? tombstone : bucket;
1529
36.2k
    return(INT_MAX);
1530
241k
}
1531
1532
/**
1533
 * Lookup namespace URI with given prefix.
1534
 *
1535
 * @param ctxt  parser context
1536
 * @param prefix  namespace prefix
1537
 * @returns the namespace URI on success, NULL if no namespace was found.
1538
 */
1539
static const xmlChar *
1540
182k
xmlParserNsLookupUri(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix) {
1541
182k
    const xmlChar *ret;
1542
182k
    int nsIndex;
1543
1544
182k
    if (prefix->name == ctxt->str_xml)
1545
2.88k
        return(ctxt->str_xml_ns);
1546
1547
    /*
1548
     * minNsIndex is used when building an entity tree. We must
1549
     * ignore namespaces declared outside the entity.
1550
     */
1551
179k
    nsIndex = xmlParserNsLookup(ctxt, prefix, NULL);
1552
179k
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1553
125k
        return(NULL);
1554
1555
54.1k
    ret = ctxt->nsTab[nsIndex * 2 + 1];
1556
54.1k
    if (ret[0] == 0)
1557
603
        ret = NULL;
1558
54.1k
    return(ret);
1559
179k
}
1560
1561
/**
1562
 * Lookup extra data for the given prefix. This returns data stored
1563
 * with xmlParserNsUdpateSax.
1564
 *
1565
 * @param ctxt  parser context
1566
 * @param prefix  namespace prefix
1567
 * @returns the data on success, NULL if no namespace was found.
1568
 */
1569
void *
1570
40.3k
xmlParserNsLookupSax(xmlParserCtxt *ctxt, const xmlChar *prefix) {
1571
40.3k
    xmlHashedString hprefix;
1572
40.3k
    int nsIndex;
1573
1574
40.3k
    if (prefix == ctxt->str_xml)
1575
19.3k
        return(NULL);
1576
1577
20.9k
    hprefix.name = prefix;
1578
20.9k
    if (prefix != NULL)
1579
17.6k
        hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1580
3.33k
    else
1581
3.33k
        hprefix.hashValue = 0;
1582
20.9k
    nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1583
20.9k
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1584
0
        return(NULL);
1585
1586
20.9k
    return(ctxt->nsdb->extra[nsIndex].saxData);
1587
20.9k
}
1588
1589
/**
1590
 * Sets or updates extra data for the given prefix. This value will be
1591
 * returned by xmlParserNsLookupSax as long as the namespace with the
1592
 * given prefix is in scope.
1593
 *
1594
 * @param ctxt  parser context
1595
 * @param prefix  namespace prefix
1596
 * @param saxData  extra data for SAX handler
1597
 * @returns the data on success, NULL if no namespace was found.
1598
 */
1599
int
1600
xmlParserNsUpdateSax(xmlParserCtxt *ctxt, const xmlChar *prefix,
1601
53.5k
                     void *saxData) {
1602
53.5k
    xmlHashedString hprefix;
1603
53.5k
    int nsIndex;
1604
1605
53.5k
    if (prefix == ctxt->str_xml)
1606
0
        return(-1);
1607
1608
53.5k
    hprefix.name = prefix;
1609
53.5k
    if (prefix != NULL)
1610
42.8k
        hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1611
10.7k
    else
1612
10.7k
        hprefix.hashValue = 0;
1613
53.5k
    nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1614
53.5k
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1615
0
        return(-1);
1616
1617
53.5k
    ctxt->nsdb->extra[nsIndex].saxData = saxData;
1618
53.5k
    return(0);
1619
53.5k
}
1620
1621
/**
1622
 * Grows the namespace tables.
1623
 *
1624
 * @param ctxt  parser context
1625
 * @returns 0 on success, -1 if a memory allocation failed.
1626
 */
1627
static int
1628
37.7k
xmlParserNsGrow(xmlParserCtxtPtr ctxt) {
1629
37.7k
    const xmlChar **table;
1630
37.7k
    xmlParserNsExtra *extra;
1631
37.7k
    int newSize;
1632
1633
37.7k
    newSize = xmlGrowCapacity(ctxt->nsMax,
1634
37.7k
                              sizeof(table[0]) + sizeof(extra[0]),
1635
37.7k
                              16, XML_MAX_ITEMS);
1636
37.7k
    if (newSize < 0)
1637
0
        goto error;
1638
1639
37.7k
    table = xmlRealloc(ctxt->nsTab, 2 * newSize * sizeof(table[0]));
1640
37.7k
    if (table == NULL)
1641
6
        goto error;
1642
37.7k
    ctxt->nsTab = table;
1643
1644
37.7k
    extra = xmlRealloc(ctxt->nsdb->extra, newSize * sizeof(extra[0]));
1645
37.7k
    if (extra == NULL)
1646
6
        goto error;
1647
37.7k
    ctxt->nsdb->extra = extra;
1648
1649
37.7k
    ctxt->nsMax = newSize;
1650
37.7k
    return(0);
1651
1652
12
error:
1653
12
    xmlErrMemory(ctxt);
1654
12
    return(-1);
1655
37.7k
}
1656
1657
/**
1658
 * Push a new namespace on the table.
1659
 *
1660
 * @param ctxt  parser context
1661
 * @param prefix  prefix with hash value
1662
 * @param uri  uri with hash value
1663
 * @param saxData  extra data for SAX handler
1664
 * @param defAttr  whether the namespace comes from a default attribute
1665
 * @returns 1 if the namespace was pushed, 0 if the namespace was ignored,
1666
 * -1 if a memory allocation failed.
1667
 */
1668
static int
1669
xmlParserNsPush(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1670
89.6k
                const xmlHashedString *uri, void *saxData, int defAttr) {
1671
89.6k
    xmlParserNsBucket *bucket = NULL;
1672
89.6k
    xmlParserNsExtra *extra;
1673
89.6k
    const xmlChar **ns;
1674
89.6k
    unsigned hashValue, nsIndex, oldIndex;
1675
1676
89.6k
    if ((prefix != NULL) && (prefix->name == ctxt->str_xml))
1677
197
        return(0);
1678
1679
89.4k
    if ((ctxt->nsNr >= ctxt->nsMax) && (xmlParserNsGrow(ctxt) < 0)) {
1680
12
        xmlErrMemory(ctxt);
1681
12
        return(-1);
1682
12
    }
1683
1684
    /*
1685
     * Default namespace and 'xml' namespace
1686
     */
1687
89.4k
    if ((prefix == NULL) || (prefix->name == NULL)) {
1688
16.9k
        oldIndex = ctxt->nsdb->defaultNsIndex;
1689
1690
16.9k
        if (oldIndex != INT_MAX) {
1691
11.0k
            extra = &ctxt->nsdb->extra[oldIndex];
1692
1693
11.0k
            if (extra->elementId == ctxt->nsdb->elementId) {
1694
678
                if (defAttr == 0)
1695
214
                    xmlErrAttributeDup(ctxt, NULL, BAD_CAST "xmlns");
1696
678
                return(0);
1697
678
            }
1698
1699
10.3k
            if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1700
10.3k
                (uri->name == ctxt->nsTab[oldIndex * 2 + 1]))
1701
844
                return(0);
1702
10.3k
        }
1703
1704
15.4k
        ctxt->nsdb->defaultNsIndex = ctxt->nsNr;
1705
15.4k
        goto populate_entry;
1706
16.9k
    }
1707
1708
    /*
1709
     * Hash table lookup
1710
     */
1711
72.4k
    oldIndex = xmlParserNsLookup(ctxt, prefix, &bucket);
1712
72.4k
    if (oldIndex != INT_MAX) {
1713
29.0k
        extra = &ctxt->nsdb->extra[oldIndex];
1714
1715
        /*
1716
         * Check for duplicate definitions on the same element.
1717
         */
1718
29.0k
        if (extra->elementId == ctxt->nsdb->elementId) {
1719
406
            if (defAttr == 0)
1720
200
                xmlErrAttributeDup(ctxt, BAD_CAST "xmlns", prefix->name);
1721
406
            return(0);
1722
406
        }
1723
1724
28.6k
        if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1725
28.6k
            (uri->name == ctxt->nsTab[bucket->index * 2 + 1]))
1726
1.59k
            return(0);
1727
1728
27.0k
        bucket->index = ctxt->nsNr;
1729
27.0k
        goto populate_entry;
1730
28.6k
    }
1731
1732
    /*
1733
     * Insert new bucket
1734
     */
1735
1736
43.4k
    hashValue = prefix->hashValue;
1737
1738
    /*
1739
     * Grow hash table, 50% fill factor
1740
     */
1741
43.4k
    if (ctxt->nsdb->hashElems + 1 > ctxt->nsdb->hashSize / 2) {
1742
15.7k
        xmlParserNsBucket *newHash;
1743
15.7k
        unsigned newSize, i, index;
1744
1745
15.7k
        if (ctxt->nsdb->hashSize > UINT_MAX / 2) {
1746
0
            xmlErrMemory(ctxt);
1747
0
            return(-1);
1748
0
        }
1749
15.7k
        newSize = ctxt->nsdb->hashSize ? ctxt->nsdb->hashSize * 2 : 16;
1750
15.7k
        newHash = xmlMalloc(newSize * sizeof(newHash[0]));
1751
15.7k
        if (newHash == NULL) {
1752
3
            xmlErrMemory(ctxt);
1753
3
            return(-1);
1754
3
        }
1755
15.7k
        memset(newHash, 0, newSize * sizeof(newHash[0]));
1756
1757
61.5k
        for (i = 0; i < ctxt->nsdb->hashSize; i++) {
1758
45.8k
            unsigned hv = ctxt->nsdb->hash[i].hashValue;
1759
45.8k
            unsigned newIndex;
1760
1761
45.8k
            if ((hv == 0) || (ctxt->nsdb->hash[i].index == INT_MAX))
1762
41.2k
                continue;
1763
4.61k
            newIndex = hv & (newSize - 1);
1764
1765
6.38k
            while (newHash[newIndex].hashValue != 0) {
1766
1.76k
                newIndex++;
1767
1.76k
                if (newIndex == newSize)
1768
930
                    newIndex = 0;
1769
1.76k
            }
1770
1771
4.61k
            newHash[newIndex] = ctxt->nsdb->hash[i];
1772
4.61k
        }
1773
1774
15.7k
        xmlFree(ctxt->nsdb->hash);
1775
15.7k
        ctxt->nsdb->hash = newHash;
1776
15.7k
        ctxt->nsdb->hashSize = newSize;
1777
1778
        /*
1779
         * Relookup
1780
         */
1781
15.7k
        index = hashValue & (newSize - 1);
1782
1783
17.4k
        while (newHash[index].hashValue != 0) {
1784
1.74k
            index++;
1785
1.74k
            if (index == newSize)
1786
195
                index = 0;
1787
1.74k
        }
1788
1789
15.7k
        bucket = &newHash[index];
1790
15.7k
    }
1791
1792
43.4k
    bucket->hashValue = hashValue;
1793
43.4k
    bucket->index = ctxt->nsNr;
1794
43.4k
    ctxt->nsdb->hashElems++;
1795
43.4k
    oldIndex = INT_MAX;
1796
1797
85.8k
populate_entry:
1798
85.8k
    nsIndex = ctxt->nsNr;
1799
1800
85.8k
    ns = &ctxt->nsTab[nsIndex * 2];
1801
85.8k
    ns[0] = prefix ? prefix->name : NULL;
1802
85.8k
    ns[1] = uri->name;
1803
1804
85.8k
    extra = &ctxt->nsdb->extra[nsIndex];
1805
85.8k
    extra->saxData = saxData;
1806
85.8k
    extra->prefixHashValue = prefix ? prefix->hashValue : 0;
1807
85.8k
    extra->uriHashValue = uri->hashValue;
1808
85.8k
    extra->elementId = ctxt->nsdb->elementId;
1809
85.8k
    extra->oldIndex = oldIndex;
1810
1811
85.8k
    ctxt->nsNr++;
1812
1813
85.8k
    return(1);
1814
43.4k
}
1815
1816
/**
1817
 * Pops the top `nr` namespaces and restores the hash table.
1818
 *
1819
 * @param ctxt  an XML parser context
1820
 * @param nr  the number to pop
1821
 * @returns the number of namespaces popped.
1822
 */
1823
static int
1824
xmlParserNsPop(xmlParserCtxtPtr ctxt, int nr)
1825
63.2k
{
1826
63.2k
    int i;
1827
1828
    /* assert(nr <= ctxt->nsNr); */
1829
1830
147k
    for (i = ctxt->nsNr - 1; i >= ctxt->nsNr - nr; i--) {
1831
84.4k
        const xmlChar *prefix = ctxt->nsTab[i * 2];
1832
84.4k
        xmlParserNsExtra *extra = &ctxt->nsdb->extra[i];
1833
1834
84.4k
        if (prefix == NULL) {
1835
15.2k
            ctxt->nsdb->defaultNsIndex = extra->oldIndex;
1836
69.1k
        } else {
1837
69.1k
            xmlHashedString hprefix;
1838
69.1k
            xmlParserNsBucket *bucket = NULL;
1839
1840
69.1k
            hprefix.name = prefix;
1841
69.1k
            hprefix.hashValue = extra->prefixHashValue;
1842
69.1k
            xmlParserNsLookup(ctxt, &hprefix, &bucket);
1843
            /* assert(bucket && bucket->hashValue); */
1844
69.1k
            bucket->index = extra->oldIndex;
1845
69.1k
        }
1846
84.4k
    }
1847
1848
63.2k
    ctxt->nsNr -= nr;
1849
63.2k
    return(nr);
1850
63.2k
}
1851
1852
static int
1853
35.9k
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt) {
1854
35.9k
    const xmlChar **atts;
1855
35.9k
    unsigned *attallocs;
1856
35.9k
    int newSize;
1857
1858
35.9k
    newSize = xmlGrowCapacity(ctxt->maxatts / 5,
1859
35.9k
                              sizeof(atts[0]) * 5 + sizeof(attallocs[0]),
1860
35.9k
                              10, XML_MAX_ATTRS);
1861
35.9k
    if (newSize < 0) {
1862
0
        xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
1863
0
                    "Maximum number of attributes exceeded");
1864
0
        return(-1);
1865
0
    }
1866
1867
35.9k
    atts = xmlRealloc(ctxt->atts, newSize * sizeof(atts[0]) * 5);
1868
35.9k
    if (atts == NULL)
1869
5
        goto mem_error;
1870
35.9k
    ctxt->atts = atts;
1871
1872
35.9k
    attallocs = xmlRealloc(ctxt->attallocs,
1873
35.9k
                           newSize * sizeof(attallocs[0]));
1874
35.9k
    if (attallocs == NULL)
1875
11
        goto mem_error;
1876
35.9k
    ctxt->attallocs = attallocs;
1877
1878
35.9k
    ctxt->maxatts = newSize * 5;
1879
1880
35.9k
    return(0);
1881
1882
16
mem_error:
1883
16
    xmlErrMemory(ctxt);
1884
16
    return(-1);
1885
35.9k
}
1886
1887
/**
1888
 * Pushes a new parser input on top of the input stack
1889
 *
1890
 * @param ctxt  an XML parser context
1891
 * @param value  the parser input
1892
 * @returns -1 in case of error, the index in the stack otherwise
1893
 */
1894
int
1895
xmlCtxtPushInput(xmlParserCtxt *ctxt, xmlParserInput *value)
1896
174k
{
1897
174k
    char *directory = NULL;
1898
174k
    int maxDepth;
1899
1900
174k
    if ((ctxt == NULL) || (value == NULL))
1901
0
        return(-1);
1902
1903
174k
    maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
1904
1905
174k
    if (ctxt->inputNr >= ctxt->inputMax) {
1906
2.94k
        xmlParserInputPtr *tmp;
1907
2.94k
        int newSize;
1908
1909
2.94k
        newSize = xmlGrowCapacity(ctxt->inputMax, sizeof(tmp[0]),
1910
2.94k
                                  5, maxDepth);
1911
2.94k
        if (newSize < 0) {
1912
0
            xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
1913
0
                           "Maximum entity nesting depth exceeded");
1914
0
            return(-1);
1915
0
        }
1916
2.94k
        tmp = xmlRealloc(ctxt->inputTab, newSize * sizeof(tmp[0]));
1917
2.94k
        if (tmp == NULL) {
1918
2
            xmlErrMemory(ctxt);
1919
2
            return(-1);
1920
2
        }
1921
2.94k
        ctxt->inputTab = tmp;
1922
2.94k
        ctxt->inputMax = newSize;
1923
2.94k
    }
1924
1925
174k
    if ((ctxt->inputNr == 0) && (value->filename != NULL)) {
1926
133k
        directory = xmlParserGetDirectory(value->filename);
1927
133k
        if (directory == NULL) {
1928
16
            xmlErrMemory(ctxt);
1929
16
            return(-1);
1930
16
        }
1931
133k
    }
1932
1933
174k
    if (ctxt->input_id >= INT_MAX) {
1934
0
        xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT, "Input ID overflow\n");
1935
0
        return(-1);
1936
0
    }
1937
1938
174k
    ctxt->inputTab[ctxt->inputNr] = value;
1939
174k
    ctxt->input = value;
1940
1941
174k
    if (ctxt->inputNr == 0) {
1942
170k
        xmlFree(ctxt->directory);
1943
170k
        ctxt->directory = directory;
1944
170k
    }
1945
1946
    /*
1947
     * The input ID is unused internally, but there are entity
1948
     * loaders in downstream code that detect the main document
1949
     * by checking for "input_id == 1".
1950
     */
1951
174k
    value->id = ctxt->input_id++;
1952
1953
174k
    return(ctxt->inputNr++);
1954
174k
}
1955
1956
/**
1957
 * Pops the top parser input from the input stack
1958
 *
1959
 * @param ctxt  an XML parser context
1960
 * @returns the input just removed
1961
 */
1962
xmlParserInput *
1963
xmlCtxtPopInput(xmlParserCtxt *ctxt)
1964
536k
{
1965
536k
    xmlParserInputPtr ret;
1966
1967
536k
    if (ctxt == NULL)
1968
0
        return(NULL);
1969
536k
    if (ctxt->inputNr <= 0)
1970
364k
        return (NULL);
1971
172k
    ctxt->inputNr--;
1972
172k
    if (ctxt->inputNr > 0)
1973
3.94k
        ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1974
168k
    else
1975
168k
        ctxt->input = NULL;
1976
172k
    ret = ctxt->inputTab[ctxt->inputNr];
1977
172k
    ctxt->inputTab[ctxt->inputNr] = NULL;
1978
172k
    return (ret);
1979
536k
}
1980
1981
/**
1982
 * Pushes a new element node on top of the node stack
1983
 *
1984
 * @deprecated Internal function, do not use.
1985
 *
1986
 * @param ctxt  an XML parser context
1987
 * @param value  the element node
1988
 * @returns -1 in case of error, the index in the stack otherwise
1989
 */
1990
int
1991
nodePush(xmlParserCtxt *ctxt, xmlNode *value)
1992
216k
{
1993
216k
    if (ctxt == NULL)
1994
0
        return(0);
1995
1996
216k
    if (ctxt->nodeNr >= ctxt->nodeMax) {
1997
67.8k
        int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
1998
67.8k
        xmlNodePtr *tmp;
1999
67.8k
        int newSize;
2000
2001
67.8k
        newSize = xmlGrowCapacity(ctxt->nodeMax, sizeof(tmp[0]),
2002
67.8k
                                  10, maxDepth);
2003
67.8k
        if (newSize < 0) {
2004
0
            xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
2005
0
                    "Excessive depth in document: %d,"
2006
0
                    " use XML_PARSE_HUGE option\n",
2007
0
                    ctxt->nodeNr);
2008
0
            return(-1);
2009
0
        }
2010
2011
67.8k
  tmp = xmlRealloc(ctxt->nodeTab, newSize * sizeof(tmp[0]));
2012
67.8k
        if (tmp == NULL) {
2013
13
            xmlErrMemory(ctxt);
2014
13
            return (-1);
2015
13
        }
2016
67.8k
        ctxt->nodeTab = tmp;
2017
67.8k
  ctxt->nodeMax = newSize;
2018
67.8k
    }
2019
2020
216k
    ctxt->nodeTab[ctxt->nodeNr] = value;
2021
216k
    ctxt->node = value;
2022
216k
    return (ctxt->nodeNr++);
2023
216k
}
2024
2025
/**
2026
 * Pops the top element node from the node stack
2027
 *
2028
 * @deprecated Internal function, do not use.
2029
 *
2030
 * @param ctxt  an XML parser context
2031
 * @returns the node just removed
2032
 */
2033
xmlNode *
2034
nodePop(xmlParserCtxt *ctxt)
2035
209k
{
2036
209k
    xmlNodePtr ret;
2037
2038
209k
    if (ctxt == NULL) return(NULL);
2039
209k
    if (ctxt->nodeNr <= 0)
2040
5.15k
        return (NULL);
2041
204k
    ctxt->nodeNr--;
2042
204k
    if (ctxt->nodeNr > 0)
2043
155k
        ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
2044
48.5k
    else
2045
48.5k
        ctxt->node = NULL;
2046
204k
    ret = ctxt->nodeTab[ctxt->nodeNr];
2047
204k
    ctxt->nodeTab[ctxt->nodeNr] = NULL;
2048
204k
    return (ret);
2049
209k
}
2050
2051
/**
2052
 * Pushes a new element name/prefix/URL on top of the name stack
2053
 *
2054
 * @param ctxt  an XML parser context
2055
 * @param value  the element name
2056
 * @param prefix  the element prefix
2057
 * @param URI  the element namespace name
2058
 * @param line  the current line number for error messages
2059
 * @param nsNr  the number of namespaces pushed on the namespace table
2060
 * @returns -1 in case of error, the index in the stack otherwise
2061
 */
2062
static int
2063
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
2064
           const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
2065
274k
{
2066
274k
    xmlStartTag *tag;
2067
2068
274k
    if (ctxt->nameNr >= ctxt->nameMax) {
2069
79.8k
        const xmlChar **tmp;
2070
79.8k
        xmlStartTag *tmp2;
2071
79.8k
        int newSize;
2072
2073
79.8k
        newSize = xmlGrowCapacity(ctxt->nameMax,
2074
79.8k
                                  sizeof(tmp[0]) + sizeof(tmp2[0]),
2075
79.8k
                                  10, XML_MAX_ITEMS);
2076
79.8k
        if (newSize < 0)
2077
0
            goto mem_error;
2078
2079
79.8k
        tmp = xmlRealloc(ctxt->nameTab, newSize * sizeof(tmp[0]));
2080
79.8k
        if (tmp == NULL)
2081
13
      goto mem_error;
2082
79.8k
  ctxt->nameTab = tmp;
2083
2084
79.8k
        tmp2 = xmlRealloc(ctxt->pushTab, newSize * sizeof(tmp2[0]));
2085
79.8k
        if (tmp2 == NULL)
2086
18
      goto mem_error;
2087
79.8k
  ctxt->pushTab = tmp2;
2088
2089
79.8k
        ctxt->nameMax = newSize;
2090
194k
    } else if (ctxt->pushTab == NULL) {
2091
65.8k
        ctxt->pushTab = xmlMalloc(ctxt->nameMax * sizeof(ctxt->pushTab[0]));
2092
65.8k
        if (ctxt->pushTab == NULL)
2093
22
            goto mem_error;
2094
65.8k
    }
2095
274k
    ctxt->nameTab[ctxt->nameNr] = value;
2096
274k
    ctxt->name = value;
2097
274k
    tag = &ctxt->pushTab[ctxt->nameNr];
2098
274k
    tag->prefix = prefix;
2099
274k
    tag->URI = URI;
2100
274k
    tag->line = line;
2101
274k
    tag->nsNr = nsNr;
2102
274k
    return (ctxt->nameNr++);
2103
53
mem_error:
2104
53
    xmlErrMemory(ctxt);
2105
53
    return (-1);
2106
274k
}
2107
#ifdef LIBXML_PUSH_ENABLED
2108
/**
2109
 * Pops the top element/prefix/URI name from the name stack
2110
 *
2111
 * @param ctxt  an XML parser context
2112
 * @returns the name just removed
2113
 */
2114
static const xmlChar *
2115
nameNsPop(xmlParserCtxtPtr ctxt)
2116
0
{
2117
0
    const xmlChar *ret;
2118
2119
0
    if (ctxt->nameNr <= 0)
2120
0
        return (NULL);
2121
0
    ctxt->nameNr--;
2122
0
    if (ctxt->nameNr > 0)
2123
0
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2124
0
    else
2125
0
        ctxt->name = NULL;
2126
0
    ret = ctxt->nameTab[ctxt->nameNr];
2127
0
    ctxt->nameTab[ctxt->nameNr] = NULL;
2128
0
    return (ret);
2129
0
}
2130
#endif /* LIBXML_PUSH_ENABLED */
2131
2132
/**
2133
 * Pops the top element name from the name stack
2134
 *
2135
 * @deprecated Internal function, do not use.
2136
 *
2137
 * @param ctxt  an XML parser context
2138
 * @returns the name just removed
2139
 */
2140
static const xmlChar *
2141
namePop(xmlParserCtxtPtr ctxt)
2142
266k
{
2143
266k
    const xmlChar *ret;
2144
2145
266k
    if ((ctxt == NULL) || (ctxt->nameNr <= 0))
2146
10
        return (NULL);
2147
266k
    ctxt->nameNr--;
2148
266k
    if (ctxt->nameNr > 0)
2149
208k
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2150
57.6k
    else
2151
57.6k
        ctxt->name = NULL;
2152
266k
    ret = ctxt->nameTab[ctxt->nameNr];
2153
266k
    ctxt->nameTab[ctxt->nameNr] = NULL;
2154
266k
    return (ret);
2155
266k
}
2156
2157
324k
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
2158
324k
    if (ctxt->spaceNr >= ctxt->spaceMax) {
2159
140k
        int *tmp;
2160
140k
        int newSize;
2161
2162
140k
        newSize = xmlGrowCapacity(ctxt->spaceMax, sizeof(tmp[0]),
2163
140k
                                  10, XML_MAX_ITEMS);
2164
140k
        if (newSize < 0) {
2165
0
      xmlErrMemory(ctxt);
2166
0
      return(-1);
2167
0
        }
2168
2169
140k
        tmp = xmlRealloc(ctxt->spaceTab, newSize * sizeof(tmp[0]));
2170
140k
        if (tmp == NULL) {
2171
22
      xmlErrMemory(ctxt);
2172
22
      return(-1);
2173
22
  }
2174
140k
  ctxt->spaceTab = tmp;
2175
2176
140k
        ctxt->spaceMax = newSize;
2177
140k
    }
2178
323k
    ctxt->spaceTab[ctxt->spaceNr] = val;
2179
323k
    ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
2180
323k
    return(ctxt->spaceNr++);
2181
324k
}
2182
2183
314k
static int spacePop(xmlParserCtxtPtr ctxt) {
2184
314k
    int ret;
2185
314k
    if (ctxt->spaceNr <= 0) return(0);
2186
314k
    ctxt->spaceNr--;
2187
314k
    if (ctxt->spaceNr > 0)
2188
314k
  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
2189
12
    else
2190
12
        ctxt->space = &ctxt->spaceTab[0];
2191
314k
    ret = ctxt->spaceTab[ctxt->spaceNr];
2192
314k
    ctxt->spaceTab[ctxt->spaceNr] = -1;
2193
314k
    return(ret);
2194
314k
}
2195
2196
/*
2197
 * Macros for accessing the content. Those should be used only by the parser,
2198
 * and not exported.
2199
 *
2200
 * Dirty macros, i.e. one often need to make assumption on the context to
2201
 * use them
2202
 *
2203
 *   CUR_PTR return the current pointer to the xmlChar to be parsed.
2204
 *           To be used with extreme caution since operations consuming
2205
 *           characters may move the input buffer to a different location !
2206
 *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
2207
 *           This should be used internally by the parser
2208
 *           only to compare to ASCII values otherwise it would break when
2209
 *           running with UTF-8 encoding.
2210
 *   RAW     same as CUR but in the input buffer, bypass any token
2211
 *           extraction that may have been done
2212
 *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
2213
 *           to compare on ASCII based substring.
2214
 *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2215
 *           strings without newlines within the parser.
2216
 *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2217
 *           defined char within the parser.
2218
 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2219
 *
2220
 *   NEXT    Skip to the next character, this does the proper decoding
2221
 *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2222
 *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2223
 *   COPY_BUF  copy the current unicode char to the target buffer, increment
2224
 *            the index
2225
 *   GROW, SHRINK  handling of input buffers
2226
 */
2227
2228
8.35M
#define RAW (*ctxt->input->cur)
2229
8.78M
#define CUR (*ctxt->input->cur)
2230
3.15M
#define NXT(val) ctxt->input->cur[(val)]
2231
11.0M
#define CUR_PTR ctxt->input->cur
2232
664k
#define BASE_PTR ctxt->input->base
2233
2234
#define CMP4( s, c1, c2, c3, c4 ) \
2235
6.68M
  ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2236
3.51M
    ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2237
#define CMP5( s, c1, c2, c3, c4, c5 ) \
2238
6.07M
  ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2239
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2240
5.48M
  ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2241
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2242
4.96M
  ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2243
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2244
4.57M
  ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2245
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2246
2.29M
  ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2247
2.29M
    ((unsigned char *) s)[ 8 ] == c9 )
2248
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2249
99.3k
  ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2250
99.3k
    ((unsigned char *) s)[ 9 ] == c10 )
2251
2252
1.43M
#define SKIP(val) do {             \
2253
1.43M
    ctxt->input->cur += (val),ctxt->input->col+=(val);      \
2254
1.43M
    if (*ctxt->input->cur == 0)           \
2255
1.43M
        xmlParserGrow(ctxt);           \
2256
1.43M
  } while (0)
2257
2258
#define SKIPL(val) do {             \
2259
    int skipl;                \
2260
    for(skipl=0; skipl<val; skipl++) {          \
2261
  if (*(ctxt->input->cur) == '\n') {        \
2262
  ctxt->input->line++; ctxt->input->col = 1;      \
2263
  } else ctxt->input->col++;          \
2264
  ctxt->input->cur++;           \
2265
    }                 \
2266
    if (*ctxt->input->cur == 0)           \
2267
        xmlParserGrow(ctxt);            \
2268
  } while (0)
2269
2270
#define SHRINK \
2271
3.07M
    if (!PARSER_PROGRESSIVE(ctxt)) \
2272
3.07M
  xmlParserShrink(ctxt);
2273
2274
#define GROW \
2275
9.62M
    if ((!PARSER_PROGRESSIVE(ctxt)) && \
2276
9.62M
        (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2277
4.31M
  xmlParserGrow(ctxt);
2278
2279
3.24M
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2280
2281
381k
#define SKIP_BLANKS_PE xmlSkipBlankCharsPE(ctxt)
2282
2283
3.70M
#define NEXT xmlNextChar(ctxt)
2284
2285
694k
#define NEXT1 {               \
2286
694k
  ctxt->input->col++;           \
2287
694k
  ctxt->input->cur++;           \
2288
694k
  if (*ctxt->input->cur == 0)         \
2289
694k
      xmlParserGrow(ctxt);           \
2290
694k
    }
2291
2292
8.97M
#define NEXTL(l) do {             \
2293
8.97M
    if (*(ctxt->input->cur) == '\n') {         \
2294
69.0k
  ctxt->input->line++; ctxt->input->col = 1;      \
2295
8.90M
    } else ctxt->input->col++;           \
2296
8.97M
    ctxt->input->cur += l;        \
2297
8.97M
  } while (0)
2298
2299
#define COPY_BUF(b, i, v)           \
2300
2.90M
    if (v < 0x80) b[i++] = v;           \
2301
2.90M
    else i += xmlCopyCharMultiByte(&b[i],v)
2302
2303
static int
2304
3.97M
xmlCurrentCharRecover(xmlParserCtxtPtr ctxt, int *len) {
2305
3.97M
    int c = xmlCurrentChar(ctxt, len);
2306
2307
3.97M
    if (c == XML_INVALID_CHAR)
2308
233k
        c = 0xFFFD; /* replacement character */
2309
2310
3.97M
    return(c);
2311
3.97M
}
2312
2313
/**
2314
 * Skip whitespace in the input stream.
2315
 *
2316
 * @deprecated Internal function, do not use.
2317
 *
2318
 * @param ctxt  the XML parser context
2319
 * @returns the number of space chars skipped
2320
 */
2321
int
2322
3.62M
xmlSkipBlankChars(xmlParserCtxt *ctxt) {
2323
3.62M
    const xmlChar *cur;
2324
3.62M
    int res = 0;
2325
2326
3.62M
    cur = ctxt->input->cur;
2327
3.62M
    while (IS_BLANK_CH(*cur)) {
2328
1.69M
        if (*cur == '\n') {
2329
399k
            ctxt->input->line++; ctxt->input->col = 1;
2330
1.29M
        } else {
2331
1.29M
            ctxt->input->col++;
2332
1.29M
        }
2333
1.69M
        cur++;
2334
1.69M
        if (res < INT_MAX)
2335
1.69M
            res++;
2336
1.69M
        if (*cur == 0) {
2337
47.6k
            ctxt->input->cur = cur;
2338
47.6k
            xmlParserGrow(ctxt);
2339
47.6k
            cur = ctxt->input->cur;
2340
47.6k
        }
2341
1.69M
    }
2342
3.62M
    ctxt->input->cur = cur;
2343
2344
3.62M
    if (res > 4)
2345
1.51k
        GROW;
2346
2347
3.62M
    return(res);
2348
3.62M
}
2349
2350
static void
2351
0
xmlPopPE(xmlParserCtxtPtr ctxt) {
2352
0
    unsigned long consumed;
2353
0
    xmlEntityPtr ent;
2354
2355
0
    ent = ctxt->input->entity;
2356
2357
0
    ent->flags &= ~XML_ENT_EXPANDING;
2358
2359
0
    if ((ent->flags & XML_ENT_CHECKED) == 0) {
2360
0
        int result;
2361
2362
        /*
2363
         * Read the rest of the stream in case of errors. We want
2364
         * to account for the whole entity size.
2365
         */
2366
0
        do {
2367
0
            ctxt->input->cur = ctxt->input->end;
2368
0
            xmlParserShrink(ctxt);
2369
0
            result = xmlParserGrow(ctxt);
2370
0
        } while (result > 0);
2371
2372
0
        consumed = ctxt->input->consumed;
2373
0
        xmlSaturatedAddSizeT(&consumed,
2374
0
                             ctxt->input->end - ctxt->input->base);
2375
2376
0
        xmlSaturatedAdd(&ent->expandedSize, consumed);
2377
2378
        /*
2379
         * Add to sizeentities when parsing an external entity
2380
         * for the first time.
2381
         */
2382
0
        if (ent->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
2383
0
            xmlSaturatedAdd(&ctxt->sizeentities, consumed);
2384
0
        }
2385
2386
0
        ent->flags |= XML_ENT_CHECKED;
2387
0
    }
2388
2389
0
    xmlFreeInputStream(xmlCtxtPopInput(ctxt));
2390
2391
0
    xmlParserEntityCheck(ctxt, ent->expandedSize);
2392
2393
0
    GROW;
2394
0
}
2395
2396
/**
2397
 * Skip whitespace in the input stream, also handling parameter
2398
 * entities.
2399
 *
2400
 * @param ctxt  the XML parser context
2401
 * @returns the number of space chars skipped
2402
 */
2403
static int
2404
381k
xmlSkipBlankCharsPE(xmlParserCtxtPtr ctxt) {
2405
381k
    int res = 0;
2406
381k
    int inParam;
2407
381k
    int expandParam;
2408
2409
381k
    inParam = PARSER_IN_PE(ctxt);
2410
381k
    expandParam = PARSER_EXTERNAL(ctxt);
2411
2412
381k
    if (!inParam && !expandParam)
2413
381k
        return(xmlSkipBlankChars(ctxt));
2414
2415
    /*
2416
     * It's Okay to use CUR/NEXT here since all the blanks are on
2417
     * the ASCII range.
2418
     */
2419
0
    while (PARSER_STOPPED(ctxt) == 0) {
2420
0
        if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2421
0
            NEXT;
2422
0
        } else if (CUR == '%') {
2423
0
            if ((expandParam == 0) ||
2424
0
                (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2425
0
                break;
2426
2427
            /*
2428
             * Expand parameter entity. We continue to consume
2429
             * whitespace at the start of the entity and possible
2430
             * even consume the whole entity and pop it. We might
2431
             * even pop multiple PEs in this loop.
2432
             */
2433
0
            xmlParsePERefInternal(ctxt, 0);
2434
2435
0
            inParam = PARSER_IN_PE(ctxt);
2436
0
            expandParam = PARSER_EXTERNAL(ctxt);
2437
0
        } else if (CUR == 0) {
2438
0
            if (inParam == 0)
2439
0
                break;
2440
2441
            /*
2442
             * Don't pop parameter entities that start a markup
2443
             * declaration to detect Well-formedness constraint:
2444
             * PE Between Declarations.
2445
             */
2446
0
            if (ctxt->input->flags & XML_INPUT_MARKUP_DECL)
2447
0
                break;
2448
2449
0
            xmlPopPE(ctxt);
2450
2451
0
            inParam = PARSER_IN_PE(ctxt);
2452
0
            expandParam = PARSER_EXTERNAL(ctxt);
2453
0
        } else {
2454
0
            break;
2455
0
        }
2456
2457
        /*
2458
         * Also increase the counter when entering or exiting a PERef.
2459
         * The spec says: "When a parameter-entity reference is recognized
2460
         * in the DTD and included, its replacement text MUST be enlarged
2461
         * by the attachment of one leading and one following space (#x20)
2462
         * character."
2463
         */
2464
0
        if (res < INT_MAX)
2465
0
            res++;
2466
0
    }
2467
2468
0
    return(res);
2469
381k
}
2470
2471
/************************************************************************
2472
 *                  *
2473
 *    Commodity functions to handle entities      *
2474
 *                  *
2475
 ************************************************************************/
2476
2477
/**
2478
 * @deprecated Internal function, don't use.
2479
 *
2480
 * @param ctxt  an XML parser context
2481
 * @returns the current xmlChar in the parser context
2482
 */
2483
xmlChar
2484
0
xmlPopInput(xmlParserCtxt *ctxt) {
2485
0
    xmlParserInputPtr input;
2486
2487
0
    if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2488
0
    input = xmlCtxtPopInput(ctxt);
2489
0
    xmlFreeInputStream(input);
2490
0
    if (*ctxt->input->cur == 0)
2491
0
        xmlParserGrow(ctxt);
2492
0
    return(CUR);
2493
0
}
2494
2495
/**
2496
 * Push an input stream onto the stack.
2497
 *
2498
 * @deprecated Internal function, don't use.
2499
 *
2500
 * @param ctxt  an XML parser context
2501
 * @param input  an XML parser input fragment (entity, XML fragment ...).
2502
 * @returns -1 in case of error or the index in the input stack
2503
 */
2504
int
2505
0
xmlPushInput(xmlParserCtxt *ctxt, xmlParserInput *input) {
2506
0
    int ret;
2507
2508
0
    if ((ctxt == NULL) || (input == NULL))
2509
0
        return(-1);
2510
2511
0
    ret = xmlCtxtPushInput(ctxt, input);
2512
0
    if (ret >= 0)
2513
0
        GROW;
2514
0
    return(ret);
2515
0
}
2516
2517
/**
2518
 * Parse a numeric character reference. Always consumes '&'.
2519
 *
2520
 * @deprecated Internal function, don't use.
2521
 *
2522
 *     [66] CharRef ::= '&#' [0-9]+ ';' |
2523
 *                      '&#x' [0-9a-fA-F]+ ';'
2524
 *
2525
 * [ WFC: Legal Character ]
2526
 * Characters referred to using character references must match the
2527
 * production for Char.
2528
 *
2529
 * @param ctxt  an XML parser context
2530
 * @returns the value parsed (as an int), 0 in case of error
2531
 */
2532
int
2533
119k
xmlParseCharRef(xmlParserCtxt *ctxt) {
2534
119k
    int val = 0;
2535
119k
    int count = 0;
2536
2537
    /*
2538
     * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2539
     */
2540
119k
    if ((RAW == '&') && (NXT(1) == '#') &&
2541
119k
        (NXT(2) == 'x')) {
2542
52.4k
  SKIP(3);
2543
52.4k
  GROW;
2544
252k
  while ((RAW != ';') && (PARSER_STOPPED(ctxt) == 0)) {
2545
201k
      if (count++ > 20) {
2546
430
    count = 0;
2547
430
    GROW;
2548
430
      }
2549
201k
      if ((RAW >= '0') && (RAW <= '9'))
2550
64.5k
          val = val * 16 + (CUR - '0');
2551
136k
      else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2552
621
          val = val * 16 + (CUR - 'a') + 10;
2553
136k
      else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2554
134k
          val = val * 16 + (CUR - 'A') + 10;
2555
1.42k
      else {
2556
1.42k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2557
1.42k
    val = 0;
2558
1.42k
    break;
2559
1.42k
      }
2560
199k
      if (val > 0x110000)
2561
5.00k
          val = 0x110000;
2562
2563
199k
      NEXT;
2564
199k
      count++;
2565
199k
  }
2566
52.4k
  if (RAW == ';') {
2567
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2568
51.0k
      ctxt->input->col++;
2569
51.0k
      ctxt->input->cur++;
2570
51.0k
  }
2571
66.6k
    } else if  ((RAW == '&') && (NXT(1) == '#')) {
2572
66.6k
  SKIP(2);
2573
66.6k
  GROW;
2574
211k
  while (RAW != ';') { /* loop blocked by count */
2575
145k
      if (count++ > 20) {
2576
436
    count = 0;
2577
436
    GROW;
2578
436
      }
2579
145k
      if ((RAW >= '0') && (RAW <= '9'))
2580
144k
          val = val * 10 + (CUR - '0');
2581
1.26k
      else {
2582
1.26k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2583
1.26k
    val = 0;
2584
1.26k
    break;
2585
1.26k
      }
2586
144k
      if (val > 0x110000)
2587
2.17k
          val = 0x110000;
2588
2589
144k
      NEXT;
2590
144k
      count++;
2591
144k
  }
2592
66.6k
  if (RAW == ';') {
2593
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2594
65.4k
      ctxt->input->col++;
2595
65.4k
      ctxt->input->cur++;
2596
65.4k
  }
2597
66.6k
    } else {
2598
0
        if (RAW == '&')
2599
0
            SKIP(1);
2600
0
        xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2601
0
    }
2602
2603
    /*
2604
     * [ WFC: Legal Character ]
2605
     * Characters referred to using character references must match the
2606
     * production for Char.
2607
     */
2608
119k
    if (val >= 0x110000) {
2609
377
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2610
377
                "xmlParseCharRef: character reference out of bounds\n",
2611
377
          val);
2612
377
        val = 0xFFFD;
2613
118k
    } else if (!IS_CHAR(val)) {
2614
3.25k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2615
3.25k
                          "xmlParseCharRef: invalid xmlChar value %d\n",
2616
3.25k
                    val);
2617
3.25k
    }
2618
119k
    return(val);
2619
119k
}
2620
2621
/**
2622
 * Parse Reference declarations, variant parsing from a string rather
2623
 * than an an input flow.
2624
 *
2625
 *     [66] CharRef ::= '&#' [0-9]+ ';' |
2626
 *                      '&#x' [0-9a-fA-F]+ ';'
2627
 *
2628
 * [ WFC: Legal Character ]
2629
 * Characters referred to using character references must match the
2630
 * production for Char.
2631
 *
2632
 * @param ctxt  an XML parser context
2633
 * @param str  a pointer to an index in the string
2634
 * @returns the value parsed (as an int), 0 in case of error, str will be
2635
 *         updated to the current value of the index
2636
 */
2637
static int
2638
29.2k
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2639
29.2k
    const xmlChar *ptr;
2640
29.2k
    xmlChar cur;
2641
29.2k
    int val = 0;
2642
2643
29.2k
    if ((str == NULL) || (*str == NULL)) return(0);
2644
29.2k
    ptr = *str;
2645
29.2k
    cur = *ptr;
2646
29.2k
    if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2647
6.57k
  ptr += 3;
2648
6.57k
  cur = *ptr;
2649
29.8k
  while (cur != ';') { /* Non input consuming loop */
2650
23.6k
      if ((cur >= '0') && (cur <= '9'))
2651
1.94k
          val = val * 16 + (cur - '0');
2652
21.7k
      else if ((cur >= 'a') && (cur <= 'f'))
2653
263
          val = val * 16 + (cur - 'a') + 10;
2654
21.4k
      else if ((cur >= 'A') && (cur <= 'F'))
2655
21.0k
          val = val * 16 + (cur - 'A') + 10;
2656
430
      else {
2657
430
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2658
430
    val = 0;
2659
430
    break;
2660
430
      }
2661
23.2k
      if (val > 0x110000)
2662
216
          val = 0x110000;
2663
2664
23.2k
      ptr++;
2665
23.2k
      cur = *ptr;
2666
23.2k
  }
2667
6.57k
  if (cur == ';')
2668
6.14k
      ptr++;
2669
22.6k
    } else if  ((cur == '&') && (ptr[1] == '#')){
2670
22.6k
  ptr += 2;
2671
22.6k
  cur = *ptr;
2672
72.7k
  while (cur != ';') { /* Non input consuming loops */
2673
50.7k
      if ((cur >= '0') && (cur <= '9'))
2674
50.0k
          val = val * 10 + (cur - '0');
2675
731
      else {
2676
731
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2677
731
    val = 0;
2678
731
    break;
2679
731
      }
2680
50.0k
      if (val > 0x110000)
2681
377
          val = 0x110000;
2682
2683
50.0k
      ptr++;
2684
50.0k
      cur = *ptr;
2685
50.0k
  }
2686
22.6k
  if (cur == ';')
2687
21.9k
      ptr++;
2688
22.6k
    } else {
2689
0
  xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2690
0
  return(0);
2691
0
    }
2692
29.2k
    *str = ptr;
2693
2694
    /*
2695
     * [ WFC: Legal Character ]
2696
     * Characters referred to using character references must match the
2697
     * production for Char.
2698
     */
2699
29.2k
    if (val >= 0x110000) {
2700
203
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2701
203
                "xmlParseStringCharRef: character reference out of bounds\n",
2702
203
                val);
2703
29.0k
    } else if (IS_CHAR(val)) {
2704
27.2k
        return(val);
2705
27.2k
    } else {
2706
1.76k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2707
1.76k
        "xmlParseStringCharRef: invalid xmlChar value %d\n",
2708
1.76k
        val);
2709
1.76k
    }
2710
1.96k
    return(0);
2711
29.2k
}
2712
2713
/**
2714
 *     [69] PEReference ::= '%' Name ';'
2715
 *
2716
 * @deprecated Internal function, do not use.
2717
 *
2718
 * [ WFC: No Recursion ]
2719
 * A parsed entity must not contain a recursive
2720
 * reference to itself, either directly or indirectly.
2721
 *
2722
 * [ WFC: Entity Declared ]
2723
 * In a document without any DTD, a document with only an internal DTD
2724
 * subset which contains no parameter entity references, or a document
2725
 * with "standalone='yes'", ...  ... The declaration of a parameter
2726
 * entity must precede any reference to it...
2727
 *
2728
 * [ VC: Entity Declared ]
2729
 * In a document with an external subset or external parameter entities
2730
 * with "standalone='no'", ...  ... The declaration of a parameter entity
2731
 * must precede any reference to it...
2732
 *
2733
 * [ WFC: In DTD ]
2734
 * Parameter-entity references may only appear in the DTD.
2735
 * NOTE: misleading but this is handled.
2736
 *
2737
 * A PEReference may have been detected in the current input stream
2738
 * the handling is done accordingly to
2739
 *      http://www.w3.org/TR/REC-xml#entproc
2740
 * i.e.
2741
 *   - Included in literal in entity values
2742
 *   - Included as Parameter Entity reference within DTDs
2743
 * @param ctxt  the parser context
2744
 */
2745
void
2746
0
xmlParserHandlePEReference(xmlParserCtxt *ctxt) {
2747
0
    xmlParsePERefInternal(ctxt, 0);
2748
0
}
2749
2750
/**
2751
 * @deprecated Internal function, don't use.
2752
 *
2753
 * @param ctxt  the parser context
2754
 * @param str  the input string
2755
 * @param len  the string length
2756
 * @param what  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2757
 * @param end  an end marker xmlChar, 0 if none
2758
 * @param end2  an end marker xmlChar, 0 if none
2759
 * @param end3  an end marker xmlChar, 0 if none
2760
 * @returns A newly allocated string with the substitution done. The caller
2761
 *      must deallocate it !
2762
 */
2763
xmlChar *
2764
xmlStringLenDecodeEntities(xmlParserCtxt *ctxt, const xmlChar *str, int len,
2765
                           int what ATTRIBUTE_UNUSED,
2766
0
                           xmlChar end, xmlChar end2, xmlChar end3) {
2767
0
    if ((ctxt == NULL) || (str == NULL) || (len < 0))
2768
0
        return(NULL);
2769
2770
0
    if ((str[len] != 0) ||
2771
0
        (end != 0) || (end2 != 0) || (end3 != 0))
2772
0
        return(NULL);
2773
2774
0
    return(xmlExpandEntitiesInAttValue(ctxt, str, 0));
2775
0
}
2776
2777
/**
2778
 * @deprecated Internal function, don't use.
2779
 *
2780
 * @param ctxt  the parser context
2781
 * @param str  the input string
2782
 * @param what  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2783
 * @param end  an end marker xmlChar, 0 if none
2784
 * @param end2  an end marker xmlChar, 0 if none
2785
 * @param end3  an end marker xmlChar, 0 if none
2786
 * @returns A newly allocated string with the substitution done. The caller
2787
 *      must deallocate it !
2788
 */
2789
xmlChar *
2790
xmlStringDecodeEntities(xmlParserCtxt *ctxt, const xmlChar *str,
2791
                        int what ATTRIBUTE_UNUSED,
2792
0
            xmlChar end, xmlChar  end2, xmlChar end3) {
2793
0
    if ((ctxt == NULL) || (str == NULL))
2794
0
        return(NULL);
2795
2796
0
    if ((end != 0) || (end2 != 0) || (end3 != 0))
2797
0
        return(NULL);
2798
2799
0
    return(xmlExpandEntitiesInAttValue(ctxt, str, 0));
2800
0
}
2801
2802
/************************************************************************
2803
 *                  *
2804
 *    Commodity functions, cleanup needed ?     *
2805
 *                  *
2806
 ************************************************************************/
2807
2808
/**
2809
 * Is this a sequence of blank chars that one can ignore ?
2810
 *
2811
 * @param ctxt  an XML parser context
2812
 * @param str  a xmlChar *
2813
 * @param len  the size of `str`
2814
 * @param blank_chars  we know the chars are blanks
2815
 * @returns 1 if ignorable 0 otherwise.
2816
 */
2817
2818
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2819
99.1k
                     int blank_chars) {
2820
99.1k
    int i;
2821
99.1k
    xmlNodePtr lastChild;
2822
2823
    /*
2824
     * Check for xml:space value.
2825
     */
2826
99.1k
    if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2827
99.1k
        (*(ctxt->space) == -2))
2828
43.6k
  return(0);
2829
2830
    /*
2831
     * Check that the string is made of blanks
2832
     */
2833
55.4k
    if (blank_chars == 0) {
2834
43.8k
  for (i = 0;i < len;i++)
2835
42.6k
      if (!(IS_BLANK_CH(str[i]))) return(0);
2836
39.2k
    }
2837
2838
    /*
2839
     * Look if the element is mixed content in the DTD if available
2840
     */
2841
17.4k
    if (ctxt->node == NULL) return(0);
2842
17.4k
    if (ctxt->myDoc != NULL) {
2843
17.4k
        xmlElementPtr elemDecl = NULL;
2844
17.4k
        xmlDocPtr doc = ctxt->myDoc;
2845
17.4k
        const xmlChar *prefix = NULL;
2846
2847
17.4k
        if (ctxt->node->ns)
2848
3.82k
            prefix = ctxt->node->ns->prefix;
2849
17.4k
        if (doc->intSubset != NULL)
2850
14.0k
            elemDecl = xmlHashLookup2(doc->intSubset->elements, ctxt->node->name,
2851
14.0k
                                      prefix);
2852
17.4k
        if ((elemDecl == NULL) && (doc->extSubset != NULL))
2853
0
            elemDecl = xmlHashLookup2(doc->extSubset->elements, ctxt->node->name,
2854
0
                                      prefix);
2855
17.4k
        if (elemDecl != NULL) {
2856
4.28k
            if (elemDecl->etype == XML_ELEMENT_TYPE_ELEMENT)
2857
1.92k
                return(1);
2858
2.36k
            if ((elemDecl->etype == XML_ELEMENT_TYPE_ANY) ||
2859
2.36k
                (elemDecl->etype == XML_ELEMENT_TYPE_MIXED))
2860
410
                return(0);
2861
2.36k
        }
2862
17.4k
    }
2863
2864
    /*
2865
     * Otherwise, heuristic :-\
2866
     *
2867
     * When push parsing, we could be at the end of a chunk.
2868
     * This makes the look-ahead and consequently the NOBLANKS
2869
     * option unreliable.
2870
     */
2871
15.1k
    if ((RAW != '<') && (RAW != 0xD)) return(0);
2872
14.5k
    if ((ctxt->node->children == NULL) &&
2873
14.5k
  (RAW == '<') && (NXT(1) == '/')) return(0);
2874
2875
14.2k
    lastChild = xmlGetLastChild(ctxt->node);
2876
14.2k
    if (lastChild == NULL) {
2877
7.26k
        if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2878
7.26k
            (ctxt->node->content != NULL)) return(0);
2879
7.26k
    } else if (xmlNodeIsText(lastChild))
2880
293
        return(0);
2881
6.64k
    else if ((ctxt->node->children != NULL) &&
2882
6.64k
             (xmlNodeIsText(ctxt->node->children)))
2883
259
        return(0);
2884
13.6k
    return(1);
2885
14.2k
}
2886
2887
/************************************************************************
2888
 *                  *
2889
 *    Extra stuff for namespace support     *
2890
 *  Relates to http://www.w3.org/TR/WD-xml-names      *
2891
 *                  *
2892
 ************************************************************************/
2893
2894
/**
2895
 * Parse an UTF8 encoded XML qualified name string
2896
 *
2897
 * @deprecated Don't use.
2898
 *
2899
 * @param ctxt  an XML parser context
2900
 * @param name  an XML parser context
2901
 * @param prefixOut  a xmlChar **
2902
 * @returns the local part, and prefix is updated
2903
 *   to get the Prefix if any.
2904
 */
2905
2906
xmlChar *
2907
0
xmlSplitQName(xmlParserCtxt *ctxt, const xmlChar *name, xmlChar **prefixOut) {
2908
0
    xmlChar *ret;
2909
0
    const xmlChar *localname;
2910
2911
0
    localname = xmlSplitQName4(name, prefixOut);
2912
0
    if (localname == NULL) {
2913
0
        xmlCtxtErrMemory(ctxt);
2914
0
        return(NULL);
2915
0
    }
2916
2917
0
    ret = xmlStrdup(localname);
2918
0
    if (ret == NULL) {
2919
0
        xmlCtxtErrMemory(ctxt);
2920
0
        xmlFree(*prefixOut);
2921
0
    }
2922
2923
0
    return(ret);
2924
0
}
2925
2926
/************************************************************************
2927
 *                  *
2928
 *      The parser itself       *
2929
 *  Relates to http://www.w3.org/TR/REC-xml       *
2930
 *                  *
2931
 ************************************************************************/
2932
2933
/************************************************************************
2934
 *                  *
2935
 *  Routines to parse Name, NCName and NmToken      *
2936
 *                  *
2937
 ************************************************************************/
2938
2939
/*
2940
 * The two following functions are related to the change of accepted
2941
 * characters for Name and NmToken in the Revision 5 of XML-1.0
2942
 * They correspond to the modified production [4] and the new production [4a]
2943
 * changes in that revision. Also note that the macros used for the
2944
 * productions Letter, Digit, CombiningChar and Extender are not needed
2945
 * anymore.
2946
 * We still keep compatibility to pre-revision5 parsing semantic if the
2947
 * new XML_PARSE_OLD10 option is given to the parser.
2948
 */
2949
2950
static int
2951
299k
xmlIsNameStartCharNew(int c) {
2952
    /*
2953
     * Use the new checks of production [4] [4a] amd [5] of the
2954
     * Update 5 of XML-1.0
2955
     */
2956
299k
    if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
2957
299k
        (((c >= 'a') && (c <= 'z')) ||
2958
292k
         ((c >= 'A') && (c <= 'Z')) ||
2959
292k
         (c == '_') || (c == ':') ||
2960
292k
         ((c >= 0xC0) && (c <= 0xD6)) ||
2961
292k
         ((c >= 0xD8) && (c <= 0xF6)) ||
2962
292k
         ((c >= 0xF8) && (c <= 0x2FF)) ||
2963
292k
         ((c >= 0x370) && (c <= 0x37D)) ||
2964
292k
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
2965
292k
         ((c >= 0x200C) && (c <= 0x200D)) ||
2966
292k
         ((c >= 0x2070) && (c <= 0x218F)) ||
2967
292k
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
2968
292k
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
2969
292k
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
2970
292k
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
2971
292k
         ((c >= 0x10000) && (c <= 0xEFFFF))))
2972
203k
        return(1);
2973
96.6k
    return(0);
2974
299k
}
2975
2976
static int
2977
767k
xmlIsNameCharNew(int c) {
2978
    /*
2979
     * Use the new checks of production [4] [4a] amd [5] of the
2980
     * Update 5 of XML-1.0
2981
     */
2982
767k
    if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
2983
767k
        (((c >= 'a') && (c <= 'z')) ||
2984
740k
         ((c >= 'A') && (c <= 'Z')) ||
2985
740k
         ((c >= '0') && (c <= '9')) || /* !start */
2986
740k
         (c == '_') || (c == ':') ||
2987
740k
         (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
2988
740k
         ((c >= 0xC0) && (c <= 0xD6)) ||
2989
740k
         ((c >= 0xD8) && (c <= 0xF6)) ||
2990
740k
         ((c >= 0xF8) && (c <= 0x2FF)) ||
2991
740k
         ((c >= 0x300) && (c <= 0x36F)) || /* !start */
2992
740k
         ((c >= 0x370) && (c <= 0x37D)) ||
2993
740k
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
2994
740k
         ((c >= 0x200C) && (c <= 0x200D)) ||
2995
740k
         ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
2996
740k
         ((c >= 0x2070) && (c <= 0x218F)) ||
2997
740k
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
2998
740k
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
2999
740k
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
3000
740k
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3001
740k
         ((c >= 0x10000) && (c <= 0xEFFFF))))
3002
563k
         return(1);
3003
204k
    return(0);
3004
767k
}
3005
3006
static int
3007
118k
xmlIsNameStartCharOld(int c) {
3008
118k
    if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3009
118k
        ((IS_LETTER(c) || (c == '_') || (c == ':'))))
3010
84.1k
        return(1);
3011
34.1k
    return(0);
3012
118k
}
3013
3014
static int
3015
249k
xmlIsNameCharOld(int c) {
3016
249k
    if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3017
249k
        ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3018
236k
         (c == '.') || (c == '-') ||
3019
236k
         (c == '_') || (c == ':') ||
3020
236k
         (IS_COMBINING(c)) ||
3021
236k
         (IS_EXTENDER(c))))
3022
174k
        return(1);
3023
74.5k
    return(0);
3024
249k
}
3025
3026
static int
3027
418k
xmlIsNameStartChar(int c, int old10) {
3028
418k
    if (!old10)
3029
299k
        return(xmlIsNameStartCharNew(c));
3030
118k
    else
3031
118k
        return(xmlIsNameStartCharOld(c));
3032
418k
}
3033
3034
static int
3035
1.01M
xmlIsNameChar(int c, int old10) {
3036
1.01M
    if (!old10)
3037
767k
        return(xmlIsNameCharNew(c));
3038
249k
    else
3039
249k
        return(xmlIsNameCharOld(c));
3040
1.01M
}
3041
3042
/*
3043
 * Scan an XML Name, NCName or Nmtoken.
3044
 *
3045
 * Returns a pointer to the end of the name on success. If the
3046
 * name is invalid, returns `ptr`. If the name is longer than
3047
 * `maxSize` bytes, returns NULL.
3048
 *
3049
 * @param ptr  pointer to the start of the name
3050
 * @param maxSize  maximum size in bytes
3051
 * @param flags  XML_SCAN_* flags
3052
 * @returns a pointer to the end of the name or NULL
3053
 */
3054
const xmlChar *
3055
236k
xmlScanName(const xmlChar *ptr, size_t maxSize, int flags) {
3056
236k
    int stop = flags & XML_SCAN_NC ? ':' : 0;
3057
236k
    int old10 = flags & XML_SCAN_OLD10 ? 1 : 0;
3058
3059
909k
    while (1) {
3060
909k
        int c, len;
3061
3062
909k
        c = *ptr;
3063
909k
        if (c < 0x80) {
3064
903k
            if (c == stop)
3065
33.7k
                break;
3066
869k
            len = 1;
3067
869k
        } else {
3068
6.37k
            len = 4;
3069
6.37k
            c = xmlGetUTF8Char(ptr, &len);
3070
6.37k
            if (c < 0)
3071
523
                break;
3072
6.37k
        }
3073
3074
875k
        if (flags & XML_SCAN_NMTOKEN ?
3075
648k
                !xmlIsNameChar(c, old10) :
3076
875k
                !xmlIsNameStartChar(c, old10))
3077
201k
            break;
3078
3079
673k
        if ((size_t) len > maxSize)
3080
0
            return(NULL);
3081
673k
        ptr += len;
3082
673k
        maxSize -= len;
3083
673k
        flags |= XML_SCAN_NMTOKEN;
3084
673k
    }
3085
3086
236k
    return(ptr);
3087
236k
}
3088
3089
static const xmlChar *
3090
103k
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3091
103k
    const xmlChar *ret;
3092
103k
    int len = 0, l;
3093
103k
    int c;
3094
103k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3095
44.0k
                    XML_MAX_TEXT_LENGTH :
3096
103k
                    XML_MAX_NAME_LENGTH;
3097
103k
    int old10 = (ctxt->options & XML_PARSE_OLD10) ? 1 : 0;
3098
3099
    /*
3100
     * Handler for more complex cases
3101
     */
3102
103k
    c = xmlCurrentChar(ctxt, &l);
3103
103k
    if (!xmlIsNameStartChar(c, old10))
3104
68.0k
        return(NULL);
3105
35.4k
    len += l;
3106
35.4k
    NEXTL(l);
3107
35.4k
    c = xmlCurrentChar(ctxt, &l);
3108
138k
    while (xmlIsNameChar(c, old10)) {
3109
102k
        if (len <= INT_MAX - l)
3110
102k
            len += l;
3111
102k
        NEXTL(l);
3112
102k
        c = xmlCurrentChar(ctxt, &l);
3113
102k
    }
3114
35.4k
    if (len > maxLength) {
3115
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3116
0
        return(NULL);
3117
0
    }
3118
35.4k
    if (ctxt->input->cur - ctxt->input->base < len) {
3119
        /*
3120
         * There were a couple of bugs where PERefs lead to to a change
3121
         * of the buffer. Check the buffer size to avoid passing an invalid
3122
         * pointer to xmlDictLookup.
3123
         */
3124
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3125
0
                    "unexpected change of input buffer");
3126
0
        return (NULL);
3127
0
    }
3128
35.4k
    if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3129
215
        ret = xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len);
3130
35.1k
    else
3131
35.1k
        ret = xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len);
3132
35.4k
    if (ret == NULL)
3133
1
        xmlErrMemory(ctxt);
3134
35.4k
    return(ret);
3135
35.4k
}
3136
3137
/**
3138
 * Parse an XML name.
3139
 *
3140
 * @deprecated Internal function, don't use.
3141
 *
3142
 *     [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3143
 *                      CombiningChar | Extender
3144
 *
3145
 *     [5] Name ::= (Letter | '_' | ':') (NameChar)*
3146
 *
3147
 *     [6] Names ::= Name (#x20 Name)*
3148
 *
3149
 * @param ctxt  an XML parser context
3150
 * @returns the Name parsed or NULL
3151
 */
3152
3153
const xmlChar *
3154
940k
xmlParseName(xmlParserCtxt *ctxt) {
3155
940k
    const xmlChar *in;
3156
940k
    const xmlChar *ret;
3157
940k
    size_t count = 0;
3158
940k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3159
339k
                       XML_MAX_TEXT_LENGTH :
3160
940k
                       XML_MAX_NAME_LENGTH;
3161
3162
940k
    GROW;
3163
3164
    /*
3165
     * Accelerator for simple ASCII names
3166
     */
3167
940k
    in = ctxt->input->cur;
3168
940k
    if (((*in >= 0x61) && (*in <= 0x7A)) ||
3169
940k
  ((*in >= 0x41) && (*in <= 0x5A)) ||
3170
940k
  (*in == '_') || (*in == ':')) {
3171
858k
  in++;
3172
3.22M
  while (((*in >= 0x61) && (*in <= 0x7A)) ||
3173
3.22M
         ((*in >= 0x41) && (*in <= 0x5A)) ||
3174
3.22M
         ((*in >= 0x30) && (*in <= 0x39)) ||
3175
3.22M
         (*in == '_') || (*in == '-') ||
3176
3.22M
         (*in == ':') || (*in == '.'))
3177
2.36M
      in++;
3178
858k
  if ((*in > 0) && (*in < 0x80)) {
3179
836k
      count = in - ctxt->input->cur;
3180
836k
            if (count > maxLength) {
3181
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3182
0
                return(NULL);
3183
0
            }
3184
836k
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3185
836k
      ctxt->input->cur = in;
3186
836k
      ctxt->input->col += count;
3187
836k
      if (ret == NULL)
3188
5
          xmlErrMemory(ctxt);
3189
836k
      return(ret);
3190
836k
  }
3191
858k
    }
3192
    /* accelerator for special cases */
3193
103k
    return(xmlParseNameComplex(ctxt));
3194
940k
}
3195
3196
static xmlHashedString
3197
97.2k
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3198
97.2k
    xmlHashedString ret;
3199
97.2k
    int len = 0, l;
3200
97.2k
    int c;
3201
97.2k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3202
10.9k
                    XML_MAX_TEXT_LENGTH :
3203
97.2k
                    XML_MAX_NAME_LENGTH;
3204
97.2k
    int old10 = (ctxt->options & XML_PARSE_OLD10) ? 1 : 0;
3205
97.2k
    size_t startPosition = 0;
3206
3207
97.2k
    ret.name = NULL;
3208
97.2k
    ret.hashValue = 0;
3209
3210
    /*
3211
     * Handler for more complex cases
3212
     */
3213
97.2k
    startPosition = CUR_PTR - BASE_PTR;
3214
97.2k
    c = xmlCurrentChar(ctxt, &l);
3215
97.2k
    if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3216
97.2k
  (!xmlIsNameStartChar(c, old10) || (c == ':'))) {
3217
77.2k
  return(ret);
3218
77.2k
    }
3219
3220
86.0k
    while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3221
86.0k
     (xmlIsNameChar(c, old10) && (c != ':'))) {
3222
66.1k
        if (len <= INT_MAX - l)
3223
66.1k
      len += l;
3224
66.1k
  NEXTL(l);
3225
66.1k
  c = xmlCurrentChar(ctxt, &l);
3226
66.1k
    }
3227
19.9k
    if (len > maxLength) {
3228
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3229
0
        return(ret);
3230
0
    }
3231
19.9k
    ret = xmlDictLookupHashed(ctxt->dict, (BASE_PTR + startPosition), len);
3232
19.9k
    if (ret.name == NULL)
3233
1
        xmlErrMemory(ctxt);
3234
19.9k
    return(ret);
3235
19.9k
}
3236
3237
/**
3238
 * Parse an XML name.
3239
 *
3240
 *     [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3241
 *                          CombiningChar | Extender
3242
 *
3243
 *     [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3244
 *
3245
 * @param ctxt  an XML parser context
3246
 * @returns the Name parsed or NULL
3247
 */
3248
3249
static xmlHashedString
3250
576k
xmlParseNCName(xmlParserCtxtPtr ctxt) {
3251
576k
    const xmlChar *in, *e;
3252
576k
    xmlHashedString ret;
3253
576k
    size_t count = 0;
3254
576k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3255
113k
                       XML_MAX_TEXT_LENGTH :
3256
576k
                       XML_MAX_NAME_LENGTH;
3257
3258
576k
    ret.name = NULL;
3259
3260
    /*
3261
     * Accelerator for simple ASCII names
3262
     */
3263
576k
    in = ctxt->input->cur;
3264
576k
    e = ctxt->input->end;
3265
576k
    if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3266
576k
   ((*in >= 0x41) && (*in <= 0x5A)) ||
3267
576k
   (*in == '_')) && (in < e)) {
3268
491k
  in++;
3269
1.72M
  while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3270
1.72M
          ((*in >= 0x41) && (*in <= 0x5A)) ||
3271
1.72M
          ((*in >= 0x30) && (*in <= 0x39)) ||
3272
1.72M
          (*in == '_') || (*in == '-') ||
3273
1.72M
          (*in == '.')) && (in < e))
3274
1.23M
      in++;
3275
491k
  if (in >= e)
3276
930
      goto complex;
3277
490k
  if ((*in > 0) && (*in < 0x80)) {
3278
478k
      count = in - ctxt->input->cur;
3279
478k
            if (count > maxLength) {
3280
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3281
0
                return(ret);
3282
0
            }
3283
478k
      ret = xmlDictLookupHashed(ctxt->dict, ctxt->input->cur, count);
3284
478k
      ctxt->input->cur = in;
3285
478k
      ctxt->input->col += count;
3286
478k
      if (ret.name == NULL) {
3287
2
          xmlErrMemory(ctxt);
3288
2
      }
3289
478k
      return(ret);
3290
478k
  }
3291
490k
    }
3292
97.2k
complex:
3293
97.2k
    return(xmlParseNCNameComplex(ctxt));
3294
576k
}
3295
3296
/**
3297
 * Parse an XML name and compares for match
3298
 * (specialized for endtag parsing)
3299
 *
3300
 * @param ctxt  an XML parser context
3301
 * @param other  the name to compare with
3302
 * @returns NULL for an illegal name, (xmlChar*) 1 for success
3303
 * and the name for mismatch
3304
 */
3305
3306
static const xmlChar *
3307
144k
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3308
144k
    register const xmlChar *cmp = other;
3309
144k
    register const xmlChar *in;
3310
144k
    const xmlChar *ret;
3311
3312
144k
    GROW;
3313
3314
144k
    in = ctxt->input->cur;
3315
687k
    while (*in != 0 && *in == *cmp) {
3316
542k
  ++in;
3317
542k
  ++cmp;
3318
542k
    }
3319
144k
    if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3320
  /* success */
3321
127k
  ctxt->input->col += in - ctxt->input->cur;
3322
127k
  ctxt->input->cur = in;
3323
127k
  return (const xmlChar*) 1;
3324
127k
    }
3325
    /* failure (or end of input buffer), check with full function */
3326
16.8k
    ret = xmlParseName (ctxt);
3327
    /* strings coming from the dictionary direct compare possible */
3328
16.8k
    if (ret == other) {
3329
877
  return (const xmlChar*) 1;
3330
877
    }
3331
15.9k
    return ret;
3332
16.8k
}
3333
3334
/**
3335
 * Parse an XML name.
3336
 *
3337
 * @param ctxt  an XML parser context
3338
 * @param str  a pointer to the string pointer (IN/OUT)
3339
 * @returns the Name parsed or NULL. The `str` pointer
3340
 * is updated to the current location in the string.
3341
 */
3342
3343
static xmlChar *
3344
193k
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3345
193k
    xmlChar *ret;
3346
193k
    const xmlChar *cur = *str;
3347
193k
    int flags = 0;
3348
193k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3349
47.2k
                    XML_MAX_TEXT_LENGTH :
3350
193k
                    XML_MAX_NAME_LENGTH;
3351
3352
193k
    if (ctxt->options & XML_PARSE_OLD10)
3353
51.4k
        flags |= XML_SCAN_OLD10;
3354
3355
193k
    cur = xmlScanName(*str, maxLength, flags);
3356
193k
    if (cur == NULL) {
3357
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3358
0
        return(NULL);
3359
0
    }
3360
193k
    if (cur == *str)
3361
1.71k
        return(NULL);
3362
3363
191k
    ret = xmlStrndup(*str, cur - *str);
3364
191k
    if (ret == NULL)
3365
20
        xmlErrMemory(ctxt);
3366
191k
    *str = cur;
3367
191k
    return(ret);
3368
193k
}
3369
3370
/**
3371
 * Parse an XML Nmtoken.
3372
 *
3373
 * @deprecated Internal function, don't use.
3374
 *
3375
 *     [7] Nmtoken ::= (NameChar)+
3376
 *
3377
 *     [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3378
 *
3379
 * @param ctxt  an XML parser context
3380
 * @returns the Nmtoken parsed or NULL
3381
 */
3382
3383
xmlChar *
3384
39.8k
xmlParseNmtoken(xmlParserCtxt *ctxt) {
3385
39.8k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3386
39.8k
    xmlChar *ret;
3387
39.8k
    int len = 0, l;
3388
39.8k
    int c;
3389
39.8k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3390
6.77k
                    XML_MAX_TEXT_LENGTH :
3391
39.8k
                    XML_MAX_NAME_LENGTH;
3392
39.8k
    int old10 = (ctxt->options & XML_PARSE_OLD10) ? 1 : 0;
3393
3394
39.8k
    c = xmlCurrentChar(ctxt, &l);
3395
3396
131k
    while (xmlIsNameChar(c, old10)) {
3397
92.3k
  COPY_BUF(buf, len, c);
3398
92.3k
  NEXTL(l);
3399
92.3k
  c = xmlCurrentChar(ctxt, &l);
3400
92.3k
  if (len >= XML_MAX_NAMELEN) {
3401
      /*
3402
       * Okay someone managed to make a huge token, so he's ready to pay
3403
       * for the processing speed.
3404
       */
3405
610
      xmlChar *buffer;
3406
610
      int max = len * 2;
3407
3408
610
      buffer = xmlMalloc(max);
3409
610
      if (buffer == NULL) {
3410
1
          xmlErrMemory(ctxt);
3411
1
    return(NULL);
3412
1
      }
3413
609
      memcpy(buffer, buf, len);
3414
22.2k
      while (xmlIsNameChar(c, old10)) {
3415
21.6k
    if (len + 10 > max) {
3416
425
        xmlChar *tmp;
3417
425
                    int newSize;
3418
3419
425
                    newSize = xmlGrowCapacity(max, 1, 1, maxLength);
3420
425
                    if (newSize < 0) {
3421
0
                        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3422
0
                        xmlFree(buffer);
3423
0
                        return(NULL);
3424
0
                    }
3425
425
        tmp = xmlRealloc(buffer, newSize);
3426
425
        if (tmp == NULL) {
3427
1
      xmlErrMemory(ctxt);
3428
1
      xmlFree(buffer);
3429
1
      return(NULL);
3430
1
        }
3431
424
        buffer = tmp;
3432
424
                    max = newSize;
3433
424
    }
3434
21.6k
    COPY_BUF(buffer, len, c);
3435
21.6k
    NEXTL(l);
3436
21.6k
    c = xmlCurrentChar(ctxt, &l);
3437
21.6k
      }
3438
608
      buffer[len] = 0;
3439
608
      return(buffer);
3440
609
  }
3441
92.3k
    }
3442
39.2k
    if (len == 0)
3443
19.9k
        return(NULL);
3444
19.2k
    if (len > maxLength) {
3445
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3446
0
        return(NULL);
3447
0
    }
3448
19.2k
    ret = xmlStrndup(buf, len);
3449
19.2k
    if (ret == NULL)
3450
3
        xmlErrMemory(ctxt);
3451
19.2k
    return(ret);
3452
19.2k
}
3453
3454
/**
3455
 * Validate an entity value and expand parameter entities.
3456
 *
3457
 * @param ctxt  parser context
3458
 * @param buf  string buffer
3459
 * @param str  entity value
3460
 * @param length  size of entity value
3461
 * @param depth  nesting depth
3462
 */
3463
static void
3464
xmlExpandPEsInEntityValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf,
3465
17.9k
                          const xmlChar *str, int length, int depth) {
3466
17.9k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3467
17.9k
    const xmlChar *end, *chunk;
3468
17.9k
    int c, l;
3469
3470
17.9k
    if (str == NULL)
3471
0
        return;
3472
3473
17.9k
    depth += 1;
3474
17.9k
    if (depth > maxDepth) {
3475
0
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3476
0
                       "Maximum entity nesting depth exceeded");
3477
0
  return;
3478
0
    }
3479
3480
17.9k
    end = str + length;
3481
17.9k
    chunk = str;
3482
3483
905k
    while ((str < end) && (!PARSER_STOPPED(ctxt))) {
3484
891k
        c = *str;
3485
3486
891k
        if (c >= 0x80) {
3487
35.8k
            l = xmlUTF8MultibyteLen(ctxt, str,
3488
35.8k
                    "invalid character in entity value\n");
3489
35.8k
            if (l == 0) {
3490
35.1k
                if (chunk < str)
3491
5.11k
                    xmlSBufAddString(buf, chunk, str - chunk);
3492
35.1k
                xmlSBufAddReplChar(buf);
3493
35.1k
                str += 1;
3494
35.1k
                chunk = str;
3495
35.1k
            } else {
3496
609
                str += l;
3497
609
            }
3498
856k
        } else if (c == '&') {
3499
84.3k
            if (str[1] == '#') {
3500
11.0k
                if (chunk < str)
3501
5.15k
                    xmlSBufAddString(buf, chunk, str - chunk);
3502
3503
11.0k
                c = xmlParseStringCharRef(ctxt, &str);
3504
11.0k
                if (c == 0)
3505
1.96k
                    return;
3506
3507
9.06k
                xmlSBufAddChar(buf, c);
3508
3509
9.06k
                chunk = str;
3510
73.3k
            } else {
3511
73.3k
                xmlChar *name;
3512
3513
                /*
3514
                 * General entity references are checked for
3515
                 * syntactic validity.
3516
                 */
3517
73.3k
                str++;
3518
73.3k
                name = xmlParseStringName(ctxt, &str);
3519
3520
73.3k
                if ((name == NULL) || (*str++ != ';')) {
3521
731
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3522
731
                            "EntityValue: '&' forbidden except for entities "
3523
731
                            "references\n");
3524
731
                    xmlFree(name);
3525
731
                    return;
3526
731
                }
3527
3528
72.6k
                xmlFree(name);
3529
72.6k
            }
3530
771k
        } else if (c == '%') {
3531
1.85k
            xmlEntityPtr ent;
3532
3533
1.85k
            if (chunk < str)
3534
1.50k
                xmlSBufAddString(buf, chunk, str - chunk);
3535
3536
1.85k
            ent = xmlParseStringPEReference(ctxt, &str);
3537
1.85k
            if (ent == NULL)
3538
1.85k
                return;
3539
3540
0
            if (!PARSER_EXTERNAL(ctxt)) {
3541
0
                xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3542
0
                return;
3543
0
            }
3544
3545
0
            if (ent->content == NULL) {
3546
                /*
3547
                 * Note: external parsed entities will not be loaded,
3548
                 * it is not required for a non-validating parser to
3549
                 * complete external PEReferences coming from the
3550
                 * internal subset
3551
                 */
3552
0
                if (((ctxt->options & XML_PARSE_NO_XXE) == 0) &&
3553
0
                    ((ctxt->replaceEntities) ||
3554
0
                     (ctxt->validate))) {
3555
0
                    xmlLoadEntityContent(ctxt, ent);
3556
0
                } else {
3557
0
                    xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
3558
0
                                  "not validating will not read content for "
3559
0
                                  "PE entity %s\n", ent->name, NULL);
3560
0
                }
3561
0
            }
3562
3563
            /*
3564
             * TODO: Skip if ent->content is still NULL.
3565
             */
3566
3567
0
            if (xmlParserEntityCheck(ctxt, ent->length))
3568
0
                return;
3569
3570
0
            if (ent->flags & XML_ENT_EXPANDING) {
3571
0
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
3572
0
                return;
3573
0
            }
3574
3575
0
            ent->flags |= XML_ENT_EXPANDING;
3576
0
            xmlExpandPEsInEntityValue(ctxt, buf, ent->content, ent->length,
3577
0
                                      depth);
3578
0
            ent->flags &= ~XML_ENT_EXPANDING;
3579
3580
0
            chunk = str;
3581
769k
        } else {
3582
            /* Normal ASCII char */
3583
769k
            if (!IS_BYTE_CHAR(c)) {
3584
8.16k
                xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3585
8.16k
                        "invalid character in entity value\n");
3586
8.16k
                if (chunk < str)
3587
2.67k
                    xmlSBufAddString(buf, chunk, str - chunk);
3588
8.16k
                xmlSBufAddReplChar(buf);
3589
8.16k
                str += 1;
3590
8.16k
                chunk = str;
3591
761k
            } else {
3592
761k
                str += 1;
3593
761k
            }
3594
769k
        }
3595
891k
    }
3596
3597
13.3k
    if (chunk < str)
3598
9.90k
        xmlSBufAddString(buf, chunk, str - chunk);
3599
13.3k
}
3600
3601
/**
3602
 * Parse a value for ENTITY declarations
3603
 *
3604
 * @deprecated Internal function, don't use.
3605
 *
3606
 *     [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3607
 *                         "'" ([^%&'] | PEReference | Reference)* "'"
3608
 *
3609
 * @param ctxt  an XML parser context
3610
 * @param orig  if non-NULL store a copy of the original entity value
3611
 * @returns the EntityValue parsed with reference substituted or NULL
3612
 */
3613
xmlChar *
3614
18.3k
xmlParseEntityValue(xmlParserCtxt *ctxt, xmlChar **orig) {
3615
18.3k
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3616
2.70k
                         XML_MAX_HUGE_LENGTH :
3617
18.3k
                         XML_MAX_TEXT_LENGTH;
3618
18.3k
    xmlSBuf buf;
3619
18.3k
    const xmlChar *start;
3620
18.3k
    int quote, length;
3621
3622
18.3k
    xmlSBufInit(&buf, maxLength);
3623
3624
18.3k
    GROW;
3625
3626
18.3k
    quote = CUR;
3627
18.3k
    if ((quote != '"') && (quote != '\'')) {
3628
0
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3629
0
  return(NULL);
3630
0
    }
3631
18.3k
    CUR_PTR++;
3632
3633
18.3k
    length = 0;
3634
3635
    /*
3636
     * Copy raw content of the entity into a buffer
3637
     */
3638
1.53M
    while (1) {
3639
1.53M
        int c;
3640
3641
1.53M
        if (PARSER_STOPPED(ctxt))
3642
1
            goto error;
3643
3644
1.53M
        if (CUR_PTR >= ctxt->input->end) {
3645
417
            xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3646
417
            goto error;
3647
417
        }
3648
3649
1.53M
        c = CUR;
3650
3651
1.53M
        if (c == 0) {
3652
0
            xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3653
0
                    "invalid character in entity value\n");
3654
0
            goto error;
3655
0
        }
3656
1.53M
        if (c == quote)
3657
17.9k
            break;
3658
1.51M
        NEXTL(1);
3659
1.51M
        length += 1;
3660
3661
        /*
3662
         * TODO: Check growth threshold
3663
         */
3664
1.51M
        if (ctxt->input->end - CUR_PTR < 10)
3665
15.8k
            GROW;
3666
1.51M
    }
3667
3668
17.9k
    start = CUR_PTR - length;
3669
3670
17.9k
    if (orig != NULL) {
3671
17.9k
        *orig = xmlStrndup(start, length);
3672
17.9k
        if (*orig == NULL)
3673
2
            xmlErrMemory(ctxt);
3674
17.9k
    }
3675
3676
17.9k
    xmlExpandPEsInEntityValue(ctxt, &buf, start, length, ctxt->inputNr);
3677
3678
17.9k
    NEXTL(1);
3679
3680
17.9k
    return(xmlSBufFinish(&buf, NULL, ctxt, "entity length too long"));
3681
3682
418
error:
3683
418
    xmlSBufCleanup(&buf, ctxt, "entity length too long");
3684
418
    return(NULL);
3685
18.3k
}
3686
3687
/**
3688
 * Check an entity reference in an attribute value for validity
3689
 * without expanding it.
3690
 *
3691
 * @param ctxt  parser context
3692
 * @param pent  entity
3693
 * @param depth  nesting depth
3694
 */
3695
static void
3696
3.11k
xmlCheckEntityInAttValue(xmlParserCtxtPtr ctxt, xmlEntityPtr pent, int depth) {
3697
3.11k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3698
3.11k
    const xmlChar *str;
3699
3.11k
    unsigned long expandedSize = pent->length;
3700
3.11k
    int c, flags;
3701
3702
3.11k
    depth += 1;
3703
3.11k
    if (depth > maxDepth) {
3704
0
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3705
0
                       "Maximum entity nesting depth exceeded");
3706
0
  return;
3707
0
    }
3708
3709
3.11k
    if (pent->flags & XML_ENT_EXPANDING) {
3710
483
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
3711
483
        return;
3712
483
    }
3713
3714
    /*
3715
     * If we're parsing a default attribute value in DTD content,
3716
     * the entity might reference other entities which weren't
3717
     * defined yet, so the check isn't reliable.
3718
     */
3719
2.62k
    if (ctxt->inSubset == 0)
3720
2.23k
        flags = XML_ENT_CHECKED | XML_ENT_VALIDATED;
3721
391
    else
3722
391
        flags = XML_ENT_VALIDATED;
3723
3724
2.62k
    str = pent->content;
3725
2.62k
    if (str == NULL)
3726
0
        goto done;
3727
3728
    /*
3729
     * Note that entity values are already validated. We only check
3730
     * for illegal less-than signs and compute the expanded size
3731
     * of the entity. No special handling for multi-byte characters
3732
     * is needed.
3733
     */
3734
93.8k
    while (!PARSER_STOPPED(ctxt)) {
3735
93.3k
        c = *str;
3736
3737
93.3k
  if (c != '&') {
3738
73.7k
            if (c == 0)
3739
2.14k
                break;
3740
3741
71.6k
            if (c == '<')
3742
286
                xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
3743
286
                        "'<' in entity '%s' is not allowed in attributes "
3744
286
                        "values\n", pent->name);
3745
3746
71.6k
            str += 1;
3747
71.6k
        } else if (str[1] == '#') {
3748
0
            int val;
3749
3750
0
      val = xmlParseStringCharRef(ctxt, &str);
3751
0
      if (val == 0) {
3752
0
                pent->content[0] = 0;
3753
0
                break;
3754
0
            }
3755
19.5k
  } else {
3756
19.5k
            xmlChar *name;
3757
19.5k
            xmlEntityPtr ent;
3758
3759
19.5k
      name = xmlParseStringEntityRef(ctxt, &str);
3760
19.5k
      if (name == NULL) {
3761
2
                pent->content[0] = 0;
3762
2
                break;
3763
2
            }
3764
3765
19.5k
            ent = xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 1);
3766
19.5k
            xmlFree(name);
3767
3768
19.5k
            if ((ent != NULL) &&
3769
19.5k
                (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
3770
483
                if ((ent->flags & flags) != flags) {
3771
483
                    pent->flags |= XML_ENT_EXPANDING;
3772
483
                    xmlCheckEntityInAttValue(ctxt, ent, depth);
3773
483
                    pent->flags &= ~XML_ENT_EXPANDING;
3774
483
                }
3775
3776
483
                xmlSaturatedAdd(&expandedSize, ent->expandedSize);
3777
483
                xmlSaturatedAdd(&expandedSize, XML_ENT_FIXED_COST);
3778
483
            }
3779
19.5k
        }
3780
93.3k
    }
3781
3782
2.62k
done:
3783
2.62k
    if (ctxt->inSubset == 0)
3784
2.23k
        pent->expandedSize = expandedSize;
3785
3786
2.62k
    pent->flags |= flags;
3787
2.62k
}
3788
3789
/**
3790
 * Expand general entity references in an entity or attribute value.
3791
 * Perform attribute value normalization.
3792
 *
3793
 * @param ctxt  parser context
3794
 * @param buf  string buffer
3795
 * @param str  entity or attribute value
3796
 * @param pent  entity for entity value, NULL for attribute values
3797
 * @param normalize  whether to collapse whitespace
3798
 * @param inSpace  whitespace state
3799
 * @param depth  nesting depth
3800
 * @param check  whether to check for amplification
3801
 * @returns  whether there was a normalization change
3802
 */
3803
static int
3804
xmlExpandEntityInAttValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf,
3805
                          const xmlChar *str, xmlEntityPtr pent, int normalize,
3806
19.9k
                          int *inSpace, int depth, int check) {
3807
19.9k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3808
19.9k
    int c, chunkSize;
3809
19.9k
    int normChange = 0;
3810
3811
19.9k
    if (str == NULL)
3812
0
        return(0);
3813
3814
19.9k
    depth += 1;
3815
19.9k
    if (depth > maxDepth) {
3816
0
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3817
0
                       "Maximum entity nesting depth exceeded");
3818
0
  return(0);
3819
0
    }
3820
3821
19.9k
    if (pent != NULL) {
3822
6.39k
        if (pent->flags & XML_ENT_EXPANDING) {
3823
266
            xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
3824
266
            return(0);
3825
266
        }
3826
3827
6.12k
        if (check) {
3828
3.96k
            if (xmlParserEntityCheck(ctxt, pent->length))
3829
0
                return(0);
3830
3.96k
        }
3831
6.12k
    }
3832
3833
19.6k
    chunkSize = 0;
3834
3835
    /*
3836
     * Note that entity values are already validated. No special
3837
     * handling for multi-byte characters is needed.
3838
     */
3839
1.08M
    while (!PARSER_STOPPED(ctxt)) {
3840
1.08M
        c = *str;
3841
3842
1.08M
  if (c != '&') {
3843
963k
            if (c == 0)
3844
19.1k
                break;
3845
3846
            /*
3847
             * If this function is called without an entity, it is used to
3848
             * expand entities in an attribute content where less-than was
3849
             * already unscaped and is allowed.
3850
             */
3851
943k
            if ((pent != NULL) && (c == '<')) {
3852
222
                xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
3853
222
                        "'<' in entity '%s' is not allowed in attributes "
3854
222
                        "values\n", pent->name);
3855
222
                break;
3856
222
            }
3857
3858
943k
            if (c <= 0x20) {
3859
46.3k
                if ((normalize) && (*inSpace)) {
3860
                    /* Skip char */
3861
0
                    if (chunkSize > 0) {
3862
0
                        xmlSBufAddString(buf, str - chunkSize, chunkSize);
3863
0
                        chunkSize = 0;
3864
0
                    }
3865
0
                    normChange = 1;
3866
46.3k
                } else if (c < 0x20) {
3867
7.01k
                    if (chunkSize > 0) {
3868
5.94k
                        xmlSBufAddString(buf, str - chunkSize, chunkSize);
3869
5.94k
                        chunkSize = 0;
3870
5.94k
                    }
3871
3872
7.01k
                    xmlSBufAddCString(buf, " ", 1);
3873
39.2k
                } else {
3874
39.2k
                    chunkSize += 1;
3875
39.2k
                }
3876
3877
46.3k
                *inSpace = 1;
3878
897k
            } else {
3879
897k
                chunkSize += 1;
3880
897k
                *inSpace = 0;
3881
897k
            }
3882
3883
943k
            str += 1;
3884
943k
        } else if (str[1] == '#') {
3885
18.2k
            int val;
3886
3887
18.2k
            if (chunkSize > 0) {
3888
17.6k
                xmlSBufAddString(buf, str - chunkSize, chunkSize);
3889
17.6k
                chunkSize = 0;
3890
17.6k
            }
3891
3892
18.2k
      val = xmlParseStringCharRef(ctxt, &str);
3893
18.2k
      if (val == 0) {
3894
0
                if (pent != NULL)
3895
0
                    pent->content[0] = 0;
3896
0
                break;
3897
0
            }
3898
3899
18.2k
            if (val == ' ') {
3900
0
                if ((normalize) && (*inSpace))
3901
0
                    normChange = 1;
3902
0
                else
3903
0
                    xmlSBufAddCString(buf, " ", 1);
3904
0
                *inSpace = 1;
3905
18.2k
            } else {
3906
18.2k
                xmlSBufAddChar(buf, val);
3907
18.2k
                *inSpace = 0;
3908
18.2k
            }
3909
98.8k
  } else {
3910
98.8k
            xmlChar *name;
3911
98.8k
            xmlEntityPtr ent;
3912
3913
98.8k
            if (chunkSize > 0) {
3914
61.8k
                xmlSBufAddString(buf, str - chunkSize, chunkSize);
3915
61.8k
                chunkSize = 0;
3916
61.8k
            }
3917
3918
98.8k
      name = xmlParseStringEntityRef(ctxt, &str);
3919
98.8k
            if (name == NULL) {
3920
8
                if (pent != NULL)
3921
8
                    pent->content[0] = 0;
3922
8
                break;
3923
8
            }
3924
3925
98.8k
            ent = xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 1);
3926
98.8k
            xmlFree(name);
3927
3928
98.8k
      if ((ent != NULL) &&
3929
98.8k
    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3930
96.1k
    if (ent->content == NULL) {
3931
0
        xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
3932
0
          "predefined entity has no content\n");
3933
0
                    break;
3934
0
                }
3935
3936
96.1k
                xmlSBufAddString(buf, ent->content, ent->length);
3937
3938
96.1k
                *inSpace = 0;
3939
96.1k
      } else if ((ent != NULL) && (ent->content != NULL)) {
3940
2.42k
                if (pent != NULL)
3941
266
                    pent->flags |= XML_ENT_EXPANDING;
3942
2.42k
    normChange |= xmlExpandEntityInAttValue(ctxt, buf,
3943
2.42k
                        ent->content, ent, normalize, inSpace, depth, check);
3944
2.42k
                if (pent != NULL)
3945
266
                    pent->flags &= ~XML_ENT_EXPANDING;
3946
2.42k
      }
3947
98.8k
        }
3948
1.08M
    }
3949
3950
19.6k
    if (chunkSize > 0)
3951
15.4k
        xmlSBufAddString(buf, str - chunkSize, chunkSize);
3952
3953
19.6k
    return(normChange);
3954
19.9k
}
3955
3956
/**
3957
 * Expand general entity references in an entity or attribute value.
3958
 * Perform attribute value normalization.
3959
 *
3960
 * @param ctxt  parser context
3961
 * @param str  entity or attribute value
3962
 * @param normalize  whether to collapse whitespace
3963
 * @returns the expanded attribtue value.
3964
 */
3965
xmlChar *
3966
xmlExpandEntitiesInAttValue(xmlParserCtxt *ctxt, const xmlChar *str,
3967
13.5k
                            int normalize) {
3968
13.5k
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3969
4.21k
                         XML_MAX_HUGE_LENGTH :
3970
13.5k
                         XML_MAX_TEXT_LENGTH;
3971
13.5k
    xmlSBuf buf;
3972
13.5k
    int inSpace = 1;
3973
3974
13.5k
    xmlSBufInit(&buf, maxLength);
3975
3976
13.5k
    xmlExpandEntityInAttValue(ctxt, &buf, str, NULL, normalize, &inSpace,
3977
13.5k
                              ctxt->inputNr, /* check */ 0);
3978
3979
13.5k
    if ((normalize) && (inSpace) && (buf.size > 0))
3980
0
        buf.size--;
3981
3982
13.5k
    return(xmlSBufFinish(&buf, NULL, ctxt, "AttValue length too long"));
3983
13.5k
}
3984
3985
/**
3986
 * Parse a value for an attribute.
3987
 *
3988
 * NOTE: if no normalization is needed, the routine will return pointers
3989
 * directly from the data buffer.
3990
 *
3991
 * 3.3.3 Attribute-Value Normalization:
3992
 *
3993
 * Before the value of an attribute is passed to the application or
3994
 * checked for validity, the XML processor must normalize it as follows:
3995
 *
3996
 * - a character reference is processed by appending the referenced
3997
 *   character to the attribute value
3998
 * - an entity reference is processed by recursively processing the
3999
 *   replacement text of the entity
4000
 * - a whitespace character (\#x20, \#xD, \#xA, \#x9) is processed by
4001
 *   appending \#x20 to the normalized value, except that only a single
4002
 *   \#x20 is appended for a "#xD#xA" sequence that is part of an external
4003
 *   parsed entity or the literal entity value of an internal parsed entity
4004
 * - other characters are processed by appending them to the normalized value
4005
 *
4006
 * If the declared value is not CDATA, then the XML processor must further
4007
 * process the normalized attribute value by discarding any leading and
4008
 * trailing space (\#x20) characters, and by replacing sequences of space
4009
 * (\#x20) characters by a single space (\#x20) character.
4010
 * All attributes for which no declaration has been read should be treated
4011
 * by a non-validating parser as if declared CDATA.
4012
 *
4013
 * @param ctxt  an XML parser context
4014
 * @param attlen  attribute len result
4015
 * @param outFlags  resulting XML_ATTVAL_* flags
4016
 * @param special  value from attsSpecial
4017
 * @param isNamespace  whether this is a namespace declaration
4018
 * @returns the AttValue parsed or NULL. The value has to be freed by the
4019
 *     caller if it was copied, this can be detected by val[*len] == 0.
4020
 */
4021
static xmlChar *
4022
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *attlen, int *outFlags,
4023
245k
                         int special, int isNamespace) {
4024
245k
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4025
89.9k
                         XML_MAX_HUGE_LENGTH :
4026
245k
                         XML_MAX_TEXT_LENGTH;
4027
245k
    xmlSBuf buf;
4028
245k
    xmlChar *ret;
4029
245k
    int c, l, quote, entFlags, chunkSize;
4030
245k
    int inSpace = 1;
4031
245k
    int replaceEntities;
4032
245k
    int normalize = (special & XML_SPECIAL_TYPE_MASK) != 0;
4033
245k
    int attvalFlags = 0;
4034
4035
    /* Always expand namespace URIs */
4036
245k
    replaceEntities = (ctxt->replaceEntities) || (isNamespace);
4037
4038
245k
    xmlSBufInit(&buf, maxLength);
4039
4040
245k
    GROW;
4041
4042
245k
    quote = CUR;
4043
245k
    if ((quote != '"') && (quote != '\'')) {
4044
5.41k
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
4045
5.41k
  return(NULL);
4046
5.41k
    }
4047
239k
    NEXTL(1);
4048
4049
239k
    if (ctxt->inSubset == 0)
4050
208k
        entFlags = XML_ENT_CHECKED | XML_ENT_VALIDATED;
4051
31.5k
    else
4052
31.5k
        entFlags = XML_ENT_VALIDATED;
4053
4054
239k
    inSpace = 1;
4055
239k
    chunkSize = 0;
4056
4057
3.41M
    while (1) {
4058
3.41M
        if (PARSER_STOPPED(ctxt))
4059
796
            goto error;
4060
4061
3.41M
        if (CUR_PTR >= ctxt->input->end) {
4062
1.30k
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4063
1.30k
                           "AttValue: ' expected\n");
4064
1.30k
            goto error;
4065
1.30k
        }
4066
4067
        /*
4068
         * TODO: Check growth threshold
4069
         */
4070
3.41M
        if (ctxt->input->end - CUR_PTR < 10)
4071
57.0k
            GROW;
4072
4073
3.41M
        c = CUR;
4074
4075
3.41M
        if (c >= 0x80) {
4076
192k
            l = xmlUTF8MultibyteLen(ctxt, CUR_PTR,
4077
192k
                    "invalid character in attribute value\n");
4078
192k
            if (l == 0) {
4079
178k
                if (chunkSize > 0) {
4080
25.0k
                    xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4081
25.0k
                    chunkSize = 0;
4082
25.0k
                }
4083
178k
                xmlSBufAddReplChar(&buf);
4084
178k
                NEXTL(1);
4085
178k
            } else {
4086
14.2k
                chunkSize += l;
4087
14.2k
                NEXTL(l);
4088
14.2k
            }
4089
4090
192k
            inSpace = 0;
4091
3.21M
        } else if (c != '&') {
4092
2.84M
            if (c > 0x20) {
4093
2.66M
                if (c == quote)
4094
237k
                    break;
4095
4096
2.42M
                if (c == '<')
4097
4.11k
                    xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4098
4099
2.42M
                chunkSize += 1;
4100
2.42M
                inSpace = 0;
4101
2.42M
            } else if (!IS_BYTE_CHAR(c)) {
4102
57.2k
                xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4103
57.2k
                        "invalid character in attribute value\n");
4104
57.2k
                if (chunkSize > 0) {
4105
4.08k
                    xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4106
4.08k
                    chunkSize = 0;
4107
4.08k
                }
4108
57.2k
                xmlSBufAddReplChar(&buf);
4109
57.2k
                inSpace = 0;
4110
127k
            } else {
4111
                /* Whitespace */
4112
127k
                if ((normalize) && (inSpace)) {
4113
                    /* Skip char */
4114
8.72k
                    if (chunkSize > 0) {
4115
3.30k
                        xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4116
3.30k
                        chunkSize = 0;
4117
3.30k
                    }
4118
8.72k
                    attvalFlags |= XML_ATTVAL_NORM_CHANGE;
4119
118k
                } else if (c < 0x20) {
4120
                    /* Convert to space */
4121
16.5k
                    if (chunkSize > 0) {
4122
13.1k
                        xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4123
13.1k
                        chunkSize = 0;
4124
13.1k
                    }
4125
4126
16.5k
                    xmlSBufAddCString(&buf, " ", 1);
4127
102k
                } else {
4128
102k
                    chunkSize += 1;
4129
102k
                }
4130
4131
127k
                inSpace = 1;
4132
4133
127k
                if ((c == 0xD) && (NXT(1) == 0xA))
4134
248
                    CUR_PTR++;
4135
127k
            }
4136
4137
2.60M
            NEXTL(1);
4138
2.60M
        } else if (NXT(1) == '#') {
4139
81.9k
            int val;
4140
4141
81.9k
            if (chunkSize > 0) {
4142
47.5k
                xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4143
47.5k
                chunkSize = 0;
4144
47.5k
            }
4145
4146
81.9k
            val = xmlParseCharRef(ctxt);
4147
81.9k
            if (val == 0)
4148
356
                goto error;
4149
4150
81.5k
            if ((val == '&') && (!replaceEntities)) {
4151
                /*
4152
                 * The reparsing will be done in xmlNodeParseContent()
4153
                 * called from SAX2.c
4154
                 */
4155
5.75k
                xmlSBufAddCString(&buf, "&#38;", 5);
4156
5.75k
                inSpace = 0;
4157
75.8k
            } else if (val == ' ') {
4158
1.14k
                if ((normalize) && (inSpace))
4159
0
                    attvalFlags |= XML_ATTVAL_NORM_CHANGE;
4160
1.14k
                else
4161
1.14k
                    xmlSBufAddCString(&buf, " ", 1);
4162
1.14k
                inSpace = 1;
4163
74.6k
            } else {
4164
74.6k
                xmlSBufAddChar(&buf, val);
4165
74.6k
                inSpace = 0;
4166
74.6k
            }
4167
291k
        } else {
4168
291k
            const xmlChar *name;
4169
291k
            xmlEntityPtr ent;
4170
4171
291k
            if (chunkSize > 0) {
4172
163k
                xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4173
163k
                chunkSize = 0;
4174
163k
            }
4175
4176
291k
            name = xmlParseEntityRefInternal(ctxt);
4177
291k
            if (name == NULL) {
4178
                /*
4179
                 * Probably a literal '&' which wasn't escaped.
4180
                 * TODO: Handle gracefully in recovery mode.
4181
                 */
4182
5.32k
                continue;
4183
5.32k
            }
4184
4185
286k
            ent = xmlLookupGeneralEntity(ctxt, name, /* isAttr */ 1);
4186
286k
            if (ent == NULL)
4187
442
                continue;
4188
4189
285k
            if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
4190
265k
                if ((ent->content[0] == '&') && (!replaceEntities))
4191
50.2k
                    xmlSBufAddCString(&buf, "&#38;", 5);
4192
215k
                else
4193
215k
                    xmlSBufAddString(&buf, ent->content, ent->length);
4194
265k
                inSpace = 0;
4195
265k
            } else if (replaceEntities) {
4196
3.96k
                if (xmlExpandEntityInAttValue(ctxt, &buf,
4197
3.96k
                        ent->content, ent, normalize, &inSpace, ctxt->inputNr,
4198
3.96k
                        /* check */ 1) > 0)
4199
0
                    attvalFlags |= XML_ATTVAL_NORM_CHANGE;
4200
15.8k
            } else {
4201
15.8k
                if ((ent->flags & entFlags) != entFlags)
4202
2.62k
                    xmlCheckEntityInAttValue(ctxt, ent, ctxt->inputNr);
4203
4204
15.8k
                if (xmlParserEntityCheck(ctxt, ent->expandedSize)) {
4205
0
                    ent->content[0] = 0;
4206
0
                    goto error;
4207
0
                }
4208
4209
                /*
4210
                 * Just output the reference
4211
                 */
4212
15.8k
                xmlSBufAddCString(&buf, "&", 1);
4213
15.8k
                xmlSBufAddString(&buf, ent->name, xmlStrlen(ent->name));
4214
15.8k
                xmlSBufAddCString(&buf, ";", 1);
4215
4216
15.8k
                inSpace = 0;
4217
15.8k
            }
4218
285k
  }
4219
3.41M
    }
4220
4221
237k
    if ((buf.mem == NULL) && (outFlags != NULL)) {
4222
109k
        ret = (xmlChar *) CUR_PTR - chunkSize;
4223
4224
109k
        if (attlen != NULL)
4225
109k
            *attlen = chunkSize;
4226
109k
        if ((normalize) && (inSpace) && (chunkSize > 0)) {
4227
372
            attvalFlags |= XML_ATTVAL_NORM_CHANGE;
4228
372
            *attlen -= 1;
4229
372
        }
4230
4231
        /* Report potential error */
4232
109k
        xmlSBufCleanup(&buf, ctxt, "AttValue length too long");
4233
127k
    } else {
4234
127k
        if (chunkSize > 0)
4235
75.4k
            xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4236
4237
127k
        if ((normalize) && (inSpace) && (buf.size > 0)) {
4238
297
            attvalFlags |= XML_ATTVAL_NORM_CHANGE;
4239
297
            buf.size--;
4240
297
        }
4241
4242
127k
        ret = xmlSBufFinish(&buf, attlen, ctxt, "AttValue length too long");
4243
127k
        attvalFlags |= XML_ATTVAL_ALLOC;
4244
4245
127k
        if (ret != NULL) {
4246
127k
            if (attlen != NULL)
4247
36.2k
                *attlen = buf.size;
4248
127k
        }
4249
127k
    }
4250
4251
237k
    if (outFlags != NULL)
4252
146k
        *outFlags = attvalFlags;
4253
4254
237k
    NEXTL(1);
4255
4256
237k
    return(ret);
4257
4258
2.45k
error:
4259
2.45k
    xmlSBufCleanup(&buf, ctxt, "AttValue length too long");
4260
2.45k
    return(NULL);
4261
239k
}
4262
4263
/**
4264
 * Parse a value for an attribute
4265
 * Note: the parser won't do substitution of entities here, this
4266
 * will be handled later in #xmlStringGetNodeList
4267
 *
4268
 * @deprecated Internal function, don't use.
4269
 *
4270
 *     [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4271
 *                       "'" ([^<&'] | Reference)* "'"
4272
 *
4273
 * 3.3.3 Attribute-Value Normalization:
4274
 *
4275
 * Before the value of an attribute is passed to the application or
4276
 * checked for validity, the XML processor must normalize it as follows:
4277
 *
4278
 * - a character reference is processed by appending the referenced
4279
 *   character to the attribute value
4280
 * - an entity reference is processed by recursively processing the
4281
 *   replacement text of the entity
4282
 * - a whitespace character (\#x20, \#xD, \#xA, \#x9) is processed by
4283
 *   appending \#x20 to the normalized value, except that only a single
4284
 *   \#x20 is appended for a "#xD#xA" sequence that is part of an external
4285
 *   parsed entity or the literal entity value of an internal parsed entity
4286
 * - other characters are processed by appending them to the normalized value
4287
 *
4288
 * If the declared value is not CDATA, then the XML processor must further
4289
 * process the normalized attribute value by discarding any leading and
4290
 * trailing space (\#x20) characters, and by replacing sequences of space
4291
 * (\#x20) characters by a single space (\#x20) character.
4292
 * All attributes for which no declaration has been read should be treated
4293
 * by a non-validating parser as if declared CDATA.
4294
 *
4295
 * @param ctxt  an XML parser context
4296
 * @returns the AttValue parsed or NULL. The value has to be freed by the
4297
 * caller.
4298
 */
4299
xmlChar *
4300
96.4k
xmlParseAttValue(xmlParserCtxt *ctxt) {
4301
96.4k
    if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4302
96.4k
    return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0, 0));
4303
96.4k
}
4304
4305
/**
4306
 * Parse an XML Literal
4307
 *
4308
 * @deprecated Internal function, don't use.
4309
 *
4310
 *     [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4311
 *
4312
 * @param ctxt  an XML parser context
4313
 * @returns the SystemLiteral parsed or NULL
4314
 */
4315
4316
xmlChar *
4317
110k
xmlParseSystemLiteral(xmlParserCtxt *ctxt) {
4318
110k
    xmlChar *buf = NULL;
4319
110k
    int len = 0;
4320
110k
    int size = XML_PARSER_BUFFER_SIZE;
4321
110k
    int cur, l;
4322
110k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4323
25.7k
                    XML_MAX_TEXT_LENGTH :
4324
110k
                    XML_MAX_NAME_LENGTH;
4325
110k
    xmlChar stop;
4326
4327
110k
    if (RAW == '"') {
4328
104k
        NEXT;
4329
104k
  stop = '"';
4330
104k
    } else if (RAW == '\'') {
4331
3.25k
        NEXT;
4332
3.25k
  stop = '\'';
4333
3.25k
    } else {
4334
2.07k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4335
2.07k
  return(NULL);
4336
2.07k
    }
4337
4338
108k
    buf = xmlMalloc(size);
4339
108k
    if (buf == NULL) {
4340
11
        xmlErrMemory(ctxt);
4341
11
  return(NULL);
4342
11
    }
4343
108k
    cur = xmlCurrentCharRecover(ctxt, &l);
4344
1.21M
    while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4345
1.10M
  if (len + 5 >= size) {
4346
3.11k
      xmlChar *tmp;
4347
3.11k
            int newSize;
4348
4349
3.11k
            newSize = xmlGrowCapacity(size, 1, 1, maxLength);
4350
3.11k
            if (newSize < 0) {
4351
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4352
0
                xmlFree(buf);
4353
0
                return(NULL);
4354
0
            }
4355
3.11k
      tmp = xmlRealloc(buf, newSize);
4356
3.11k
      if (tmp == NULL) {
4357
2
          xmlFree(buf);
4358
2
    xmlErrMemory(ctxt);
4359
2
    return(NULL);
4360
2
      }
4361
3.11k
      buf = tmp;
4362
3.11k
            size = newSize;
4363
3.11k
  }
4364
1.10M
  COPY_BUF(buf, len, cur);
4365
1.10M
  NEXTL(l);
4366
1.10M
  cur = xmlCurrentCharRecover(ctxt, &l);
4367
1.10M
    }
4368
108k
    buf[len] = 0;
4369
108k
    if (!IS_CHAR(cur)) {
4370
811
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4371
107k
    } else {
4372
107k
  NEXT;
4373
107k
    }
4374
108k
    return(buf);
4375
108k
}
4376
4377
/**
4378
 * Parse an XML public literal
4379
 *
4380
 * @deprecated Internal function, don't use.
4381
 *
4382
 *     [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4383
 *
4384
 * @param ctxt  an XML parser context
4385
 * @returns the PubidLiteral parsed or NULL.
4386
 */
4387
4388
xmlChar *
4389
59.7k
xmlParsePubidLiteral(xmlParserCtxt *ctxt) {
4390
59.7k
    xmlChar *buf = NULL;
4391
59.7k
    int len = 0;
4392
59.7k
    int size = XML_PARSER_BUFFER_SIZE;
4393
59.7k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4394
13.1k
                    XML_MAX_TEXT_LENGTH :
4395
59.7k
                    XML_MAX_NAME_LENGTH;
4396
59.7k
    xmlChar cur;
4397
59.7k
    xmlChar stop;
4398
4399
59.7k
    if (RAW == '"') {
4400
58.3k
        NEXT;
4401
58.3k
  stop = '"';
4402
58.3k
    } else if (RAW == '\'') {
4403
996
        NEXT;
4404
996
  stop = '\'';
4405
996
    } else {
4406
403
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4407
403
  return(NULL);
4408
403
    }
4409
59.2k
    buf = xmlMalloc(size);
4410
59.2k
    if (buf == NULL) {
4411
2
  xmlErrMemory(ctxt);
4412
2
  return(NULL);
4413
2
    }
4414
59.2k
    cur = CUR;
4415
412k
    while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop) &&
4416
412k
           (PARSER_STOPPED(ctxt) == 0)) { /* checked */
4417
353k
  if (len + 1 >= size) {
4418
228
      xmlChar *tmp;
4419
228
            int newSize;
4420
4421
228
      newSize = xmlGrowCapacity(size, 1, 1, maxLength);
4422
228
            if (newSize < 0) {
4423
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4424
0
                xmlFree(buf);
4425
0
                return(NULL);
4426
0
            }
4427
228
      tmp = xmlRealloc(buf, newSize);
4428
228
      if (tmp == NULL) {
4429
1
    xmlErrMemory(ctxt);
4430
1
    xmlFree(buf);
4431
1
    return(NULL);
4432
1
      }
4433
227
      buf = tmp;
4434
227
            size = newSize;
4435
227
  }
4436
353k
  buf[len++] = cur;
4437
353k
  NEXT;
4438
353k
  cur = CUR;
4439
353k
    }
4440
59.2k
    buf[len] = 0;
4441
59.2k
    if (cur != stop) {
4442
1.71k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4443
57.5k
    } else {
4444
57.5k
  NEXTL(1);
4445
57.5k
    }
4446
59.2k
    return(buf);
4447
59.2k
}
4448
4449
static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial);
4450
4451
/*
4452
 * used for the test in the inner loop of the char data testing
4453
 */
4454
static const unsigned char test_char_data[256] = {
4455
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4456
    0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4457
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4458
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4459
    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4460
    0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4461
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4462
    0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4463
    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4464
    0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4465
    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4466
    0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4467
    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4468
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4469
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4470
    0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4471
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4472
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4473
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4474
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4475
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4476
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4477
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4478
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4479
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4480
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4481
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4482
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4483
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4484
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4485
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4486
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4487
};
4488
4489
static void
4490
xmlCharacters(xmlParserCtxtPtr ctxt, const xmlChar *buf, int size,
4491
472k
              int isBlank) {
4492
472k
    int checkBlanks;
4493
4494
472k
    if ((ctxt->sax == NULL) || (ctxt->disableSAX))
4495
229k
        return;
4496
4497
243k
    checkBlanks = (!ctxt->keepBlanks) ||
4498
243k
                  (ctxt->sax->ignorableWhitespace != ctxt->sax->characters);
4499
4500
    /*
4501
     * Calling areBlanks with only parts of a text node
4502
     * is fundamentally broken, making the NOBLANKS option
4503
     * essentially unusable.
4504
     */
4505
243k
    if ((checkBlanks) &&
4506
243k
        (areBlanks(ctxt, buf, size, isBlank))) {
4507
15.5k
        if ((ctxt->sax->ignorableWhitespace != NULL) &&
4508
15.5k
            (ctxt->keepBlanks))
4509
0
            ctxt->sax->ignorableWhitespace(ctxt->userData, buf, size);
4510
227k
    } else {
4511
227k
        if (ctxt->sax->characters != NULL)
4512
227k
            ctxt->sax->characters(ctxt->userData, buf, size);
4513
4514
        /*
4515
         * The old code used to update this value for "complex" data
4516
         * even if checkBlanks was false. This was probably a bug.
4517
         */
4518
227k
        if ((checkBlanks) && (*ctxt->space == -1))
4519
37.1k
            *ctxt->space = -2;
4520
227k
    }
4521
243k
}
4522
4523
/**
4524
 * Parse character data. Always makes progress if the first char isn't
4525
 * '<' or '&'.
4526
 *
4527
 * The right angle bracket (>) may be represented using the string "&gt;",
4528
 * and must, for compatibility, be escaped using "&gt;" or a character
4529
 * reference when it appears in the string "]]>" in content, when that
4530
 * string is not marking the end of a CDATA section.
4531
 *
4532
 *     [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4533
 * @param ctxt  an XML parser context
4534
 * @param partial  buffer may contain partial UTF-8 sequences
4535
 */
4536
static void
4537
1.32M
xmlParseCharDataInternal(xmlParserCtxtPtr ctxt, int partial) {
4538
1.32M
    const xmlChar *in;
4539
1.32M
    int line = ctxt->input->line;
4540
1.32M
    int col = ctxt->input->col;
4541
1.32M
    int ccol;
4542
1.32M
    int terminate = 0;
4543
4544
1.32M
    GROW;
4545
    /*
4546
     * Accelerated common case where input don't need to be
4547
     * modified before passing it to the handler.
4548
     */
4549
1.32M
    in = ctxt->input->cur;
4550
1.32M
    do {
4551
1.42M
get_more_space:
4552
1.59M
        while (*in == 0x20) { in++; ctxt->input->col++; }
4553
1.42M
        if (*in == 0xA) {
4554
95.5k
            do {
4555
95.5k
                ctxt->input->line++; ctxt->input->col = 1;
4556
95.5k
                in++;
4557
95.5k
            } while (*in == 0xA);
4558
94.2k
            goto get_more_space;
4559
94.2k
        }
4560
1.32M
        if (*in == '<') {
4561
129k
            while (in > ctxt->input->cur) {
4562
64.7k
                const xmlChar *tmp = ctxt->input->cur;
4563
64.7k
                size_t nbchar = in - tmp;
4564
4565
64.7k
                if (nbchar > XML_MAX_ITEMS)
4566
0
                    nbchar = XML_MAX_ITEMS;
4567
64.7k
                ctxt->input->cur += nbchar;
4568
4569
64.7k
                xmlCharacters(ctxt, tmp, nbchar, 1);
4570
64.7k
            }
4571
64.7k
            return;
4572
64.7k
        }
4573
4574
1.31M
get_more:
4575
1.31M
        ccol = ctxt->input->col;
4576
3.22M
        while (test_char_data[*in]) {
4577
1.90M
            in++;
4578
1.90M
            ccol++;
4579
1.90M
        }
4580
1.31M
        ctxt->input->col = ccol;
4581
1.31M
        if (*in == 0xA) {
4582
52.0k
            do {
4583
52.0k
                ctxt->input->line++; ctxt->input->col = 1;
4584
52.0k
                in++;
4585
52.0k
            } while (*in == 0xA);
4586
51.1k
            goto get_more;
4587
51.1k
        }
4588
1.26M
        if (*in == ']') {
4589
5.42k
            size_t avail = ctxt->input->end - in;
4590
4591
5.42k
            if (partial && avail < 2) {
4592
0
                terminate = 1;
4593
0
                goto invoke_callback;
4594
0
            }
4595
5.42k
            if (in[1] == ']') {
4596
924
                if (partial && avail < 3) {
4597
0
                    terminate = 1;
4598
0
                    goto invoke_callback;
4599
0
                }
4600
924
                if (in[2] == '>')
4601
265
                    xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4602
924
            }
4603
4604
5.42k
            in++;
4605
5.42k
            ctxt->input->col++;
4606
5.42k
            goto get_more;
4607
5.42k
        }
4608
4609
1.26M
invoke_callback:
4610
1.55M
        while (in > ctxt->input->cur) {
4611
298k
            const xmlChar *tmp = ctxt->input->cur;
4612
298k
            size_t nbchar = in - tmp;
4613
4614
298k
            if (nbchar > XML_MAX_ITEMS)
4615
0
                nbchar = XML_MAX_ITEMS;
4616
298k
            ctxt->input->cur += nbchar;
4617
4618
298k
            xmlCharacters(ctxt, tmp, nbchar, 0);
4619
4620
298k
            line = ctxt->input->line;
4621
298k
            col = ctxt->input->col;
4622
298k
        }
4623
1.26M
        ctxt->input->cur = in;
4624
1.26M
        if (*in == 0xD) {
4625
4.14k
            in++;
4626
4.14k
            if (*in == 0xA) {
4627
594
                ctxt->input->cur = in;
4628
594
                in++;
4629
594
                ctxt->input->line++; ctxt->input->col = 1;
4630
594
                continue; /* while */
4631
594
            }
4632
3.54k
            in--;
4633
3.54k
        }
4634
1.26M
        if (*in == '<') {
4635
152k
            return;
4636
152k
        }
4637
1.10M
        if (*in == '&') {
4638
70.3k
            return;
4639
70.3k
        }
4640
1.03M
        if (terminate) {
4641
0
            return;
4642
0
        }
4643
1.03M
        SHRINK;
4644
1.03M
        GROW;
4645
1.03M
        in = ctxt->input->cur;
4646
1.03M
    } while (((*in >= 0x20) && (*in <= 0x7F)) ||
4647
1.03M
             (*in == 0x09) || (*in == 0x0a));
4648
1.03M
    ctxt->input->line = line;
4649
1.03M
    ctxt->input->col = col;
4650
1.03M
    xmlParseCharDataComplex(ctxt, partial);
4651
1.03M
}
4652
4653
/**
4654
 * Always makes progress if the first char isn't '<' or '&'.
4655
 *
4656
 * parse a CharData section.this is the fallback function
4657
 * of #xmlParseCharData when the parsing requires handling
4658
 * of non-ASCII characters.
4659
 *
4660
 * @param ctxt  an XML parser context
4661
 * @param partial  whether the input can end with truncated UTF-8
4662
 */
4663
static void
4664
1.03M
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial) {
4665
1.03M
    xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4666
1.03M
    int nbchar = 0;
4667
1.03M
    int cur, l;
4668
4669
1.03M
    cur = xmlCurrentCharRecover(ctxt, &l);
4670
1.45M
    while ((cur != '<') && /* checked */
4671
1.45M
           (cur != '&') &&
4672
1.45M
     (IS_CHAR(cur))) {
4673
417k
        if (cur == ']') {
4674
1.98k
            size_t avail = ctxt->input->end - ctxt->input->cur;
4675
4676
1.98k
            if (partial && avail < 2)
4677
0
                break;
4678
1.98k
            if (NXT(1) == ']') {
4679
748
                if (partial && avail < 3)
4680
0
                    break;
4681
748
                if (NXT(2) == '>')
4682
474
                    xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4683
748
            }
4684
1.98k
        }
4685
4686
417k
  COPY_BUF(buf, nbchar, cur);
4687
  /* move current position before possible calling of ctxt->sax->characters */
4688
417k
  NEXTL(l);
4689
417k
  if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4690
309
      buf[nbchar] = 0;
4691
4692
309
            xmlCharacters(ctxt, buf, nbchar, 0);
4693
309
      nbchar = 0;
4694
309
            SHRINK;
4695
309
  }
4696
417k
  cur = xmlCurrentCharRecover(ctxt, &l);
4697
417k
    }
4698
1.03M
    if (nbchar != 0) {
4699
109k
        buf[nbchar] = 0;
4700
4701
109k
        xmlCharacters(ctxt, buf, nbchar, 0);
4702
109k
    }
4703
    /*
4704
     * cur == 0 can mean
4705
     *
4706
     * - End of buffer.
4707
     * - An actual 0 character.
4708
     * - An incomplete UTF-8 sequence. This is allowed if partial is set.
4709
     */
4710
1.03M
    if (ctxt->input->cur < ctxt->input->end) {
4711
1.03M
        if ((cur == 0) && (CUR != 0)) {
4712
526
            if (partial == 0) {
4713
526
                xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4714
526
                        "Incomplete UTF-8 sequence starting with %02X\n", CUR);
4715
526
                NEXTL(1);
4716
526
            }
4717
1.03M
        } else if ((cur != '<') && (cur != '&') && (cur != ']')) {
4718
            /* Generate the error and skip the offending character */
4719
1.01M
            xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4720
1.01M
                              "PCDATA invalid Char value %d\n", cur);
4721
1.01M
            NEXTL(l);
4722
1.01M
        }
4723
1.03M
    }
4724
1.03M
}
4725
4726
/**
4727
 * @deprecated Internal function, don't use.
4728
 * @param ctxt  an XML parser context
4729
 * @param cdata  unused
4730
 */
4731
void
4732
0
xmlParseCharData(xmlParserCtxt *ctxt, ATTRIBUTE_UNUSED int cdata) {
4733
0
    xmlParseCharDataInternal(ctxt, 0);
4734
0
}
4735
4736
/**
4737
 * Parse an External ID or a Public ID
4738
 *
4739
 * @deprecated Internal function, don't use.
4740
 *
4741
 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4742
 * `'PUBLIC' S PubidLiteral S SystemLiteral`
4743
 *
4744
 *     [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4745
 *                       | 'PUBLIC' S PubidLiteral S SystemLiteral
4746
 *
4747
 *     [83] PublicID ::= 'PUBLIC' S PubidLiteral
4748
 *
4749
 * @param ctxt  an XML parser context
4750
 * @param publicId  a xmlChar** receiving PubidLiteral
4751
 * @param strict  indicate whether we should restrict parsing to only
4752
 *          production [75], see NOTE below
4753
 * @returns the function returns SystemLiteral and in the second
4754
 *                case publicID receives PubidLiteral, is strict is off
4755
 *                it is possible to return NULL and have publicID set.
4756
 */
4757
4758
xmlChar *
4759
118k
xmlParseExternalID(xmlParserCtxt *ctxt, xmlChar **publicId, int strict) {
4760
118k
    xmlChar *URI = NULL;
4761
4762
118k
    *publicId = NULL;
4763
118k
    if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4764
52.2k
        SKIP(6);
4765
52.2k
  if (SKIP_BLANKS == 0) {
4766
194
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4767
194
                     "Space required after 'SYSTEM'\n");
4768
194
  }
4769
52.2k
  URI = xmlParseSystemLiteral(ctxt);
4770
52.2k
  if (URI == NULL) {
4771
201
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4772
201
        }
4773
66.1k
    } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4774
59.7k
        SKIP(6);
4775
59.7k
  if (SKIP_BLANKS == 0) {
4776
274
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4777
274
        "Space required after 'PUBLIC'\n");
4778
274
  }
4779
59.7k
  *publicId = xmlParsePubidLiteral(ctxt);
4780
59.7k
  if (*publicId == NULL) {
4781
406
      xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4782
406
  }
4783
59.7k
  if (strict) {
4784
      /*
4785
       * We don't handle [83] so "S SystemLiteral" is required.
4786
       */
4787
56.2k
      if (SKIP_BLANKS == 0) {
4788
1.88k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4789
1.88k
      "Space required after the Public Identifier\n");
4790
1.88k
      }
4791
56.2k
  } else {
4792
      /*
4793
       * We handle [83] so we return immediately, if
4794
       * "S SystemLiteral" is not detected. We skip blanks if no
4795
             * system literal was found, but this is harmless since we must
4796
             * be at the end of a NotationDecl.
4797
       */
4798
3.50k
      if (SKIP_BLANKS == 0) return(NULL);
4799
3.17k
      if ((CUR != '\'') && (CUR != '"')) return(NULL);
4800
3.17k
  }
4801
57.9k
  URI = xmlParseSystemLiteral(ctxt);
4802
57.9k
  if (URI == NULL) {
4803
1.88k
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4804
1.88k
        }
4805
57.9k
    }
4806
116k
    return(URI);
4807
118k
}
4808
4809
/**
4810
 * Skip an XML (SGML) comment <!-- .... -->
4811
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4812
 *  must not occur within comments. "
4813
 * This is the slow routine in case the accelerator for ascii didn't work
4814
 *
4815
 *     [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4816
 * @param ctxt  an XML parser context
4817
 * @param buf  the already parsed part of the buffer
4818
 * @param len  number of bytes in the buffer
4819
 * @param size  allocated size of the buffer
4820
 */
4821
static void
4822
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4823
6.05k
                       size_t len, size_t size) {
4824
6.05k
    int q, ql;
4825
6.05k
    int r, rl;
4826
6.05k
    int cur, l;
4827
6.05k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4828
309
                    XML_MAX_HUGE_LENGTH :
4829
6.05k
                    XML_MAX_TEXT_LENGTH;
4830
4831
6.05k
    if (buf == NULL) {
4832
3.09k
        len = 0;
4833
3.09k
  size = XML_PARSER_BUFFER_SIZE;
4834
3.09k
  buf = xmlMalloc(size);
4835
3.09k
  if (buf == NULL) {
4836
2
      xmlErrMemory(ctxt);
4837
2
      return;
4838
2
  }
4839
3.09k
    }
4840
6.04k
    q = xmlCurrentCharRecover(ctxt, &ql);
4841
6.04k
    if (q == 0)
4842
254
        goto not_terminated;
4843
5.79k
    if (!IS_CHAR(q)) {
4844
1.17k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4845
1.17k
                          "xmlParseComment: invalid xmlChar value %d\n",
4846
1.17k
                    q);
4847
1.17k
  xmlFree (buf);
4848
1.17k
  return;
4849
1.17k
    }
4850
4.61k
    NEXTL(ql);
4851
4.61k
    r = xmlCurrentCharRecover(ctxt, &rl);
4852
4.61k
    if (r == 0)
4853
194
        goto not_terminated;
4854
4.42k
    if (!IS_CHAR(r)) {
4855
535
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4856
535
                          "xmlParseComment: invalid xmlChar value %d\n",
4857
535
                    r);
4858
535
  xmlFree (buf);
4859
535
  return;
4860
535
    }
4861
3.88k
    NEXTL(rl);
4862
3.88k
    cur = xmlCurrentCharRecover(ctxt, &l);
4863
3.88k
    if (cur == 0)
4864
194
        goto not_terminated;
4865
65.9k
    while (IS_CHAR(cur) && /* checked */
4866
65.9k
           ((cur != '>') ||
4867
64.1k
      (r != '-') || (q != '-'))) {
4868
62.2k
  if ((r == '-') && (q == '-')) {
4869
1.27k
      xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4870
1.27k
  }
4871
62.2k
  if (len + 5 >= size) {
4872
509
      xmlChar *tmp;
4873
509
            int newSize;
4874
4875
509
      newSize = xmlGrowCapacity(size, 1, 1, maxLength);
4876
509
            if (newSize < 0) {
4877
0
                xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4878
0
                             "Comment too big found", NULL);
4879
0
                xmlFree (buf);
4880
0
                return;
4881
0
            }
4882
509
      tmp = xmlRealloc(buf, newSize);
4883
509
      if (tmp == NULL) {
4884
1
    xmlErrMemory(ctxt);
4885
1
    xmlFree(buf);
4886
1
    return;
4887
1
      }
4888
508
      buf = tmp;
4889
508
            size = newSize;
4890
508
  }
4891
62.2k
  COPY_BUF(buf, len, q);
4892
4893
62.2k
  q = r;
4894
62.2k
  ql = rl;
4895
62.2k
  r = cur;
4896
62.2k
  rl = l;
4897
4898
62.2k
  NEXTL(l);
4899
62.2k
  cur = xmlCurrentCharRecover(ctxt, &l);
4900
4901
62.2k
    }
4902
3.69k
    buf[len] = 0;
4903
3.69k
    if (cur == 0) {
4904
331
  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4905
331
                       "Comment not terminated \n<!--%.50s\n", buf);
4906
3.36k
    } else if (!IS_CHAR(cur)) {
4907
1.47k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4908
1.47k
                          "xmlParseComment: invalid xmlChar value %d\n",
4909
1.47k
                    cur);
4910
1.89k
    } else {
4911
1.89k
        NEXT;
4912
1.89k
  if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4913
1.89k
      (!ctxt->disableSAX))
4914
812
      ctxt->sax->comment(ctxt->userData, buf);
4915
1.89k
    }
4916
3.69k
    xmlFree(buf);
4917
3.69k
    return;
4918
642
not_terminated:
4919
642
    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4920
642
       "Comment not terminated\n", NULL);
4921
642
    xmlFree(buf);
4922
642
}
4923
4924
/**
4925
 * Parse an XML (SGML) comment. Always consumes '<!'.
4926
 *
4927
 * @deprecated Internal function, don't use.
4928
 *
4929
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4930
 *  must not occur within comments. "
4931
 *
4932
 *     [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4933
 * @param ctxt  an XML parser context
4934
 */
4935
void
4936
9.80k
xmlParseComment(xmlParserCtxt *ctxt) {
4937
9.80k
    xmlChar *buf = NULL;
4938
9.80k
    size_t size = XML_PARSER_BUFFER_SIZE;
4939
9.80k
    size_t len = 0;
4940
9.80k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4941
2.37k
                       XML_MAX_HUGE_LENGTH :
4942
9.80k
                       XML_MAX_TEXT_LENGTH;
4943
9.80k
    const xmlChar *in;
4944
9.80k
    size_t nbchar = 0;
4945
9.80k
    int ccol;
4946
4947
    /*
4948
     * Check that there is a comment right here.
4949
     */
4950
9.80k
    if ((RAW != '<') || (NXT(1) != '!'))
4951
0
        return;
4952
9.80k
    SKIP(2);
4953
9.80k
    if ((RAW != '-') || (NXT(1) != '-'))
4954
195
        return;
4955
9.61k
    SKIP(2);
4956
9.61k
    GROW;
4957
4958
    /*
4959
     * Accelerated common case where input don't need to be
4960
     * modified before passing it to the handler.
4961
     */
4962
9.61k
    in = ctxt->input->cur;
4963
9.61k
    do {
4964
9.61k
  if (*in == 0xA) {
4965
611
      do {
4966
611
    ctxt->input->line++; ctxt->input->col = 1;
4967
611
    in++;
4968
611
      } while (*in == 0xA);
4969
414
  }
4970
13.3k
get_more:
4971
13.3k
        ccol = ctxt->input->col;
4972
67.7k
  while (((*in > '-') && (*in <= 0x7F)) ||
4973
67.7k
         ((*in >= 0x20) && (*in < '-')) ||
4974
67.7k
         (*in == 0x09)) {
4975
54.3k
        in++;
4976
54.3k
        ccol++;
4977
54.3k
  }
4978
13.3k
  ctxt->input->col = ccol;
4979
13.3k
  if (*in == 0xA) {
4980
1.87k
      do {
4981
1.87k
    ctxt->input->line++; ctxt->input->col = 1;
4982
1.87k
    in++;
4983
1.87k
      } while (*in == 0xA);
4984
1.02k
      goto get_more;
4985
1.02k
  }
4986
12.3k
  nbchar = in - ctxt->input->cur;
4987
  /*
4988
   * save current set of data
4989
   */
4990
12.3k
  if (nbchar > 0) {
4991
6.42k
            if (nbchar > maxLength - len) {
4992
0
                xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4993
0
                                  "Comment too big found", NULL);
4994
0
                xmlFree(buf);
4995
0
                return;
4996
0
            }
4997
6.42k
            if (buf == NULL) {
4998
3.94k
                if ((*in == '-') && (in[1] == '-'))
4999
812
                    size = nbchar + 1;
5000
3.12k
                else
5001
3.12k
                    size = XML_PARSER_BUFFER_SIZE + nbchar;
5002
3.94k
                buf = xmlMalloc(size);
5003
3.94k
                if (buf == NULL) {
5004
1
                    xmlErrMemory(ctxt);
5005
1
                    return;
5006
1
                }
5007
3.93k
                len = 0;
5008
3.93k
            } else if (len + nbchar + 1 >= size) {
5009
393
                xmlChar *new_buf;
5010
393
                size += len + nbchar + XML_PARSER_BUFFER_SIZE;
5011
393
                new_buf = xmlRealloc(buf, size);
5012
393
                if (new_buf == NULL) {
5013
1
                    xmlErrMemory(ctxt);
5014
1
                    xmlFree(buf);
5015
1
                    return;
5016
1
                }
5017
392
                buf = new_buf;
5018
392
            }
5019
6.42k
            memcpy(&buf[len], ctxt->input->cur, nbchar);
5020
6.42k
            len += nbchar;
5021
6.42k
            buf[len] = 0;
5022
6.42k
  }
5023
12.3k
  ctxt->input->cur = in;
5024
12.3k
  if (*in == 0xA) {
5025
0
      in++;
5026
0
      ctxt->input->line++; ctxt->input->col = 1;
5027
0
  }
5028
12.3k
  if (*in == 0xD) {
5029
552
      in++;
5030
552
      if (*in == 0xA) {
5031
206
    ctxt->input->cur = in;
5032
206
    in++;
5033
206
    ctxt->input->line++; ctxt->input->col = 1;
5034
206
    goto get_more;
5035
206
      }
5036
346
      in--;
5037
346
  }
5038
12.1k
  SHRINK;
5039
12.1k
  GROW;
5040
12.1k
  in = ctxt->input->cur;
5041
12.1k
  if (*in == '-') {
5042
6.06k
      if (in[1] == '-') {
5043
4.57k
          if (in[2] == '>') {
5044
3.55k
        SKIP(3);
5045
3.55k
        if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5046
3.55k
            (!ctxt->disableSAX)) {
5047
3.22k
      if (buf != NULL)
5048
699
          ctxt->sax->comment(ctxt->userData, buf);
5049
2.52k
      else
5050
2.52k
          ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5051
3.22k
        }
5052
3.55k
        if (buf != NULL)
5053
984
            xmlFree(buf);
5054
3.55k
        return;
5055
3.55k
    }
5056
1.01k
    if (buf != NULL) {
5057
812
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5058
812
                          "Double hyphen within comment: "
5059
812
                                      "<!--%.50s\n",
5060
812
              buf);
5061
812
    } else
5062
201
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5063
201
                          "Double hyphen within comment\n", NULL);
5064
1.01k
    in++;
5065
1.01k
    ctxt->input->col++;
5066
1.01k
      }
5067
2.50k
      in++;
5068
2.50k
      ctxt->input->col++;
5069
2.50k
      goto get_more;
5070
6.06k
  }
5071
12.1k
    } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5072
6.05k
    xmlParseCommentComplex(ctxt, buf, len, size);
5073
6.05k
}
5074
5075
5076
/**
5077
 * Parse the name of a PI
5078
 *
5079
 * @deprecated Internal function, don't use.
5080
 *
5081
 *     [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5082
 *
5083
 * @param ctxt  an XML parser context
5084
 * @returns the PITarget name or NULL
5085
 */
5086
5087
const xmlChar *
5088
59.6k
xmlParsePITarget(xmlParserCtxt *ctxt) {
5089
59.6k
    const xmlChar *name;
5090
5091
59.6k
    name = xmlParseName(ctxt);
5092
59.6k
    if ((name != NULL) &&
5093
59.6k
        ((name[0] == 'x') || (name[0] == 'X')) &&
5094
59.6k
        ((name[1] == 'm') || (name[1] == 'M')) &&
5095
59.6k
        ((name[2] == 'l') || (name[2] == 'L'))) {
5096
36.5k
  int i;
5097
36.5k
  if ((name[0] == 'x') && (name[1] == 'm') &&
5098
36.5k
      (name[2] == 'l') && (name[3] == 0)) {
5099
35.5k
      xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5100
35.5k
     "XML declaration allowed only at the start of the document\n");
5101
35.5k
      return(name);
5102
35.5k
  } else if (name[3] == 0) {
5103
605
      xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5104
605
      return(name);
5105
605
  }
5106
1.10k
  for (i = 0;;i++) {
5107
1.10k
      if (xmlW3CPIs[i] == NULL) break;
5108
866
      if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5109
194
          return(name);
5110
866
  }
5111
239
  xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5112
239
          "xmlParsePITarget: invalid name prefix 'xml'\n",
5113
239
          NULL, NULL);
5114
239
    }
5115
23.3k
    if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5116
475
  xmlNsErr(ctxt, XML_NS_ERR_COLON,
5117
475
     "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5118
475
    }
5119
23.3k
    return(name);
5120
59.6k
}
5121
5122
#ifdef LIBXML_CATALOG_ENABLED
5123
/**
5124
 * Parse an XML Catalog Processing Instruction.
5125
 *
5126
 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5127
 *
5128
 * Occurs only if allowed by the user and if happening in the Misc
5129
 * part of the document before any doctype information
5130
 * This will add the given catalog to the parsing context in order
5131
 * to be used if there is a resolution need further down in the document
5132
 *
5133
 * @param ctxt  an XML parser context
5134
 * @param catalog  the PI value string
5135
 */
5136
5137
static void
5138
0
xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5139
0
    xmlChar *URL = NULL;
5140
0
    const xmlChar *tmp, *base;
5141
0
    xmlChar marker;
5142
5143
0
    tmp = catalog;
5144
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5145
0
    if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5146
0
  goto error;
5147
0
    tmp += 7;
5148
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5149
0
    if (*tmp != '=') {
5150
0
  return;
5151
0
    }
5152
0
    tmp++;
5153
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5154
0
    marker = *tmp;
5155
0
    if ((marker != '\'') && (marker != '"'))
5156
0
  goto error;
5157
0
    tmp++;
5158
0
    base = tmp;
5159
0
    while ((*tmp != 0) && (*tmp != marker)) tmp++;
5160
0
    if (*tmp == 0)
5161
0
  goto error;
5162
0
    URL = xmlStrndup(base, tmp - base);
5163
0
    tmp++;
5164
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5165
0
    if (*tmp != 0)
5166
0
  goto error;
5167
5168
0
    if (URL != NULL) {
5169
        /*
5170
         * Unfortunately, the catalog API doesn't report OOM errors.
5171
         * xmlGetLastError isn't very helpful since we don't know
5172
         * where the last error came from. We'd have to reset it
5173
         * before this call and restore it afterwards.
5174
         */
5175
0
  ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5176
0
  xmlFree(URL);
5177
0
    }
5178
0
    return;
5179
5180
0
error:
5181
0
    xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5182
0
            "Catalog PI syntax error: %s\n",
5183
0
      catalog, NULL);
5184
0
    if (URL != NULL)
5185
0
  xmlFree(URL);
5186
0
}
5187
#endif
5188
5189
/**
5190
 * Parse an XML Processing Instruction.
5191
 *
5192
 * @deprecated Internal function, don't use.
5193
 *
5194
 *     [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5195
 *
5196
 * The processing is transferred to SAX once parsed.
5197
 *
5198
 * @param ctxt  an XML parser context
5199
 */
5200
5201
void
5202
59.6k
xmlParsePI(xmlParserCtxt *ctxt) {
5203
59.6k
    xmlChar *buf = NULL;
5204
59.6k
    size_t len = 0;
5205
59.6k
    size_t size = XML_PARSER_BUFFER_SIZE;
5206
59.6k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5207
10.5k
                       XML_MAX_HUGE_LENGTH :
5208
59.6k
                       XML_MAX_TEXT_LENGTH;
5209
59.6k
    int cur, l;
5210
59.6k
    const xmlChar *target;
5211
5212
59.6k
    if ((RAW == '<') && (NXT(1) == '?')) {
5213
  /*
5214
   * this is a Processing Instruction.
5215
   */
5216
59.6k
  SKIP(2);
5217
5218
  /*
5219
   * Parse the target name and check for special support like
5220
   * namespace.
5221
   */
5222
59.6k
        target = xmlParsePITarget(ctxt);
5223
59.6k
  if (target != NULL) {
5224
42.6k
      if ((RAW == '?') && (NXT(1) == '>')) {
5225
604
    SKIP(2);
5226
5227
    /*
5228
     * SAX: PI detected.
5229
     */
5230
604
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5231
604
        (ctxt->sax->processingInstruction != NULL))
5232
401
        ctxt->sax->processingInstruction(ctxt->userData,
5233
401
                                         target, NULL);
5234
604
    return;
5235
604
      }
5236
42.0k
      buf = xmlMalloc(size);
5237
42.0k
      if (buf == NULL) {
5238
7
    xmlErrMemory(ctxt);
5239
7
    return;
5240
7
      }
5241
42.0k
      if (SKIP_BLANKS == 0) {
5242
5.65k
    xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5243
5.65k
        "ParsePI: PI %s space expected\n", target);
5244
5.65k
      }
5245
42.0k
      cur = xmlCurrentCharRecover(ctxt, &l);
5246
1.12M
      while (IS_CHAR(cur) && /* checked */
5247
1.12M
       ((cur != '?') || (NXT(1) != '>'))) {
5248
1.08M
    if (len + 5 >= size) {
5249
344
        xmlChar *tmp;
5250
344
                    int newSize;
5251
5252
344
                    newSize = xmlGrowCapacity(size, 1, 1, maxLength);
5253
344
                    if (newSize < 0) {
5254
0
                        xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5255
0
                                          "PI %s too big found", target);
5256
0
                        xmlFree(buf);
5257
0
                        return;
5258
0
                    }
5259
344
        tmp = xmlRealloc(buf, newSize);
5260
344
        if (tmp == NULL) {
5261
1
      xmlErrMemory(ctxt);
5262
1
      xmlFree(buf);
5263
1
      return;
5264
1
        }
5265
343
        buf = tmp;
5266
343
                    size = newSize;
5267
343
    }
5268
1.08M
    COPY_BUF(buf, len, cur);
5269
1.08M
    NEXTL(l);
5270
1.08M
    cur = xmlCurrentCharRecover(ctxt, &l);
5271
1.08M
      }
5272
42.0k
      buf[len] = 0;
5273
42.0k
      if (cur != '?') {
5274
5.06k
    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5275
5.06k
          "ParsePI: PI %s never end ...\n", target);
5276
36.9k
      } else {
5277
36.9k
    SKIP(2);
5278
5279
36.9k
#ifdef LIBXML_CATALOG_ENABLED
5280
36.9k
    if ((ctxt->inSubset == 0) &&
5281
36.9k
        (xmlStrEqual(target, XML_CATALOG_PI))) {
5282
391
        xmlCatalogAllow allow = xmlCatalogGetDefaults();
5283
5284
391
        if ((ctxt->options & XML_PARSE_CATALOG_PI) &&
5285
391
                        ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5286
196
       (allow == XML_CATA_ALLOW_ALL)))
5287
0
      xmlParseCatalogPI(ctxt, buf);
5288
391
    }
5289
36.9k
#endif
5290
5291
    /*
5292
     * SAX: PI detected.
5293
     */
5294
36.9k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5295
36.9k
        (ctxt->sax->processingInstruction != NULL))
5296
23.5k
        ctxt->sax->processingInstruction(ctxt->userData,
5297
23.5k
                                         target, buf);
5298
36.9k
      }
5299
42.0k
      xmlFree(buf);
5300
42.0k
  } else {
5301
17.0k
      xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5302
17.0k
  }
5303
59.6k
    }
5304
59.6k
}
5305
5306
/**
5307
 * Parse a notation declaration. Always consumes '<!'.
5308
 *
5309
 * @deprecated Internal function, don't use.
5310
 *
5311
 *     [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID)
5312
 *                           S? '>'
5313
 *
5314
 * Hence there is actually 3 choices:
5315
 *
5316
 *     'PUBLIC' S PubidLiteral
5317
 *     'PUBLIC' S PubidLiteral S SystemLiteral
5318
 *     'SYSTEM' S SystemLiteral
5319
 *
5320
 * See the NOTE on #xmlParseExternalID.
5321
 *
5322
 * @param ctxt  an XML parser context
5323
 */
5324
5325
void
5326
7.61k
xmlParseNotationDecl(xmlParserCtxt *ctxt) {
5327
7.61k
    const xmlChar *name;
5328
7.61k
    xmlChar *Pubid;
5329
7.61k
    xmlChar *Systemid;
5330
5331
7.61k
    if ((CUR != '<') || (NXT(1) != '!'))
5332
0
        return;
5333
7.61k
    SKIP(2);
5334
5335
7.61k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5336
6.25k
#ifdef LIBXML_VALID_ENABLED
5337
6.25k
  int oldInputNr = ctxt->inputNr;
5338
6.25k
#endif
5339
5340
6.25k
  SKIP(8);
5341
6.25k
  if (SKIP_BLANKS_PE == 0) {
5342
198
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5343
198
         "Space required after '<!NOTATION'\n");
5344
198
      return;
5345
198
  }
5346
5347
6.06k
        name = xmlParseName(ctxt);
5348
6.06k
  if (name == NULL) {
5349
960
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5350
960
      return;
5351
960
  }
5352
5.10k
  if (xmlStrchr(name, ':') != NULL) {
5353
301
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5354
301
         "colons are forbidden from notation names '%s'\n",
5355
301
         name, NULL, NULL);
5356
301
  }
5357
5.10k
  if (SKIP_BLANKS_PE == 0) {
5358
200
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5359
200
         "Space required after the NOTATION name'\n");
5360
200
      return;
5361
200
  }
5362
5363
  /*
5364
   * Parse the IDs.
5365
   */
5366
4.90k
  Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5367
4.90k
  SKIP_BLANKS_PE;
5368
5369
4.90k
  if (RAW == '>') {
5370
4.21k
#ifdef LIBXML_VALID_ENABLED
5371
4.21k
      if ((ctxt->validate) && (ctxt->inputNr > oldInputNr)) {
5372
0
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5373
0
                           "Notation declaration doesn't start and stop"
5374
0
                                 " in the same entity\n",
5375
0
                                 NULL, NULL);
5376
0
      }
5377
4.21k
#endif
5378
4.21k
      NEXT;
5379
4.21k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5380
4.21k
    (ctxt->sax->notationDecl != NULL))
5381
3.97k
    ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5382
4.21k
  } else {
5383
685
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5384
685
  }
5385
4.90k
  if (Systemid != NULL) xmlFree(Systemid);
5386
4.90k
  if (Pubid != NULL) xmlFree(Pubid);
5387
4.90k
    }
5388
7.61k
}
5389
5390
/**
5391
 * Parse an entity declaration. Always consumes '<!'.
5392
 *
5393
 * @deprecated Internal function, don't use.
5394
 *
5395
 *     [70] EntityDecl ::= GEDecl | PEDecl
5396
 *
5397
 *     [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5398
 *
5399
 *     [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5400
 *
5401
 *     [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5402
 *
5403
 *     [74] PEDef ::= EntityValue | ExternalID
5404
 *
5405
 *     [76] NDataDecl ::= S 'NDATA' S Name
5406
 *
5407
 * [ VC: Notation Declared ]
5408
 * The Name must match the declared name of a notation.
5409
 *
5410
 * @param ctxt  an XML parser context
5411
 */
5412
5413
void
5414
30.9k
xmlParseEntityDecl(xmlParserCtxt *ctxt) {
5415
30.9k
    const xmlChar *name = NULL;
5416
30.9k
    xmlChar *value = NULL;
5417
30.9k
    xmlChar *URI = NULL, *literal = NULL;
5418
30.9k
    const xmlChar *ndata = NULL;
5419
30.9k
    int isParameter = 0;
5420
30.9k
    xmlChar *orig = NULL;
5421
5422
30.9k
    if ((CUR != '<') || (NXT(1) != '!'))
5423
0
        return;
5424
30.9k
    SKIP(2);
5425
5426
    /* GROW; done in the caller */
5427
30.9k
    if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5428
30.1k
#ifdef LIBXML_VALID_ENABLED
5429
30.1k
  int oldInputNr = ctxt->inputNr;
5430
30.1k
#endif
5431
5432
30.1k
  SKIP(6);
5433
30.1k
  if (SKIP_BLANKS_PE == 0) {
5434
196
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5435
196
         "Space required after '<!ENTITY'\n");
5436
196
  }
5437
5438
30.1k
  if (RAW == '%') {
5439
5.08k
      NEXT;
5440
5.08k
      if (SKIP_BLANKS_PE == 0) {
5441
327
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5442
327
             "Space required after '%%'\n");
5443
327
      }
5444
5.08k
      isParameter = 1;
5445
5.08k
  }
5446
5447
30.1k
        name = xmlParseName(ctxt);
5448
30.1k
  if (name == NULL) {
5449
3.79k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5450
3.79k
                     "xmlParseEntityDecl: no name\n");
5451
3.79k
            return;
5452
3.79k
  }
5453
26.3k
  if (xmlStrchr(name, ':') != NULL) {
5454
678
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5455
678
         "colons are forbidden from entities names '%s'\n",
5456
678
         name, NULL, NULL);
5457
678
  }
5458
26.3k
  if (SKIP_BLANKS_PE == 0) {
5459
861
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5460
861
         "Space required after the entity name\n");
5461
861
  }
5462
5463
  /*
5464
   * handle the various case of definitions...
5465
   */
5466
26.3k
  if (isParameter) {
5467
4.45k
      if ((RAW == '"') || (RAW == '\'')) {
5468
2.99k
          value = xmlParseEntityValue(ctxt, &orig);
5469
2.99k
    if (value) {
5470
2.80k
        if ((ctxt->sax != NULL) &&
5471
2.80k
      (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5472
1.51k
      ctxt->sax->entityDecl(ctxt->userData, name,
5473
1.51k
                        XML_INTERNAL_PARAMETER_ENTITY,
5474
1.51k
            NULL, NULL, value);
5475
2.80k
    }
5476
2.99k
      } else {
5477
1.45k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5478
1.45k
    if ((URI == NULL) && (literal == NULL)) {
5479
194
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5480
194
    }
5481
1.45k
    if (URI) {
5482
1.06k
                    if (xmlStrchr(URI, '#')) {
5483
194
                        xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5484
873
                    } else {
5485
873
                        if ((ctxt->sax != NULL) &&
5486
873
                            (!ctxt->disableSAX) &&
5487
873
                            (ctxt->sax->entityDecl != NULL))
5488
678
                            ctxt->sax->entityDecl(ctxt->userData, name,
5489
678
                                        XML_EXTERNAL_PARAMETER_ENTITY,
5490
678
                                        literal, URI, NULL);
5491
873
                    }
5492
1.06k
    }
5493
1.45k
      }
5494
21.9k
  } else {
5495
21.9k
      if ((RAW == '"') || (RAW == '\'')) {
5496
15.3k
          value = xmlParseEntityValue(ctxt, &orig);
5497
15.3k
    if ((ctxt->sax != NULL) &&
5498
15.3k
        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5499
12.4k
        ctxt->sax->entityDecl(ctxt->userData, name,
5500
12.4k
        XML_INTERNAL_GENERAL_ENTITY,
5501
12.4k
        NULL, NULL, value);
5502
    /*
5503
     * For expat compatibility in SAX mode.
5504
     */
5505
15.3k
    if ((ctxt->myDoc == NULL) ||
5506
15.3k
        (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5507
1.88k
        if (ctxt->myDoc == NULL) {
5508
1.40k
      ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5509
1.40k
      if (ctxt->myDoc == NULL) {
5510
1
          xmlErrMemory(ctxt);
5511
1
          goto done;
5512
1
      }
5513
1.40k
      ctxt->myDoc->properties = XML_DOC_INTERNAL;
5514
1.40k
        }
5515
1.88k
        if (ctxt->myDoc->intSubset == NULL) {
5516
1.40k
      ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5517
1.40k
              BAD_CAST "fake", NULL, NULL);
5518
1.40k
                        if (ctxt->myDoc->intSubset == NULL) {
5519
1
                            xmlErrMemory(ctxt);
5520
1
                            goto done;
5521
1
                        }
5522
1.40k
                    }
5523
5524
1.88k
        xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5525
1.88k
                    NULL, NULL, value);
5526
1.88k
    }
5527
15.3k
      } else {
5528
6.58k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5529
6.58k
    if ((URI == NULL) && (literal == NULL)) {
5530
2.26k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5531
2.26k
    }
5532
6.58k
    if (URI) {
5533
4.12k
                    if (xmlStrchr(URI, '#')) {
5534
194
                        xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5535
194
                    }
5536
4.12k
    }
5537
6.58k
    if ((RAW != '>') && (SKIP_BLANKS_PE == 0)) {
5538
731
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5539
731
           "Space required before 'NDATA'\n");
5540
731
    }
5541
6.58k
    if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5542
1.16k
        SKIP(5);
5543
1.16k
        if (SKIP_BLANKS_PE == 0) {
5544
197
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5545
197
               "Space required after 'NDATA'\n");
5546
197
        }
5547
1.16k
        ndata = xmlParseName(ctxt);
5548
1.16k
        if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5549
1.16k
            (ctxt->sax->unparsedEntityDecl != NULL))
5550
919
      ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5551
919
            literal, URI, ndata);
5552
5.41k
    } else {
5553
5.41k
        if ((ctxt->sax != NULL) &&
5554
5.41k
            (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5555
3.76k
      ctxt->sax->entityDecl(ctxt->userData, name,
5556
3.76k
            XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5557
3.76k
            literal, URI, NULL);
5558
        /*
5559
         * For expat compatibility in SAX mode.
5560
         * assuming the entity replacement was asked for
5561
         */
5562
5.41k
        if ((ctxt->replaceEntities != 0) &&
5563
5.41k
      ((ctxt->myDoc == NULL) ||
5564
2.30k
      (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5565
1.09k
      if (ctxt->myDoc == NULL) {
5566
564
          ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5567
564
          if (ctxt->myDoc == NULL) {
5568
1
              xmlErrMemory(ctxt);
5569
1
        goto done;
5570
1
          }
5571
563
          ctxt->myDoc->properties = XML_DOC_INTERNAL;
5572
563
      }
5573
5574
1.09k
      if (ctxt->myDoc->intSubset == NULL) {
5575
563
          ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5576
563
            BAD_CAST "fake", NULL, NULL);
5577
563
                            if (ctxt->myDoc->intSubset == NULL) {
5578
1
                                xmlErrMemory(ctxt);
5579
1
                                goto done;
5580
1
                            }
5581
563
                        }
5582
1.09k
      xmlSAX2EntityDecl(ctxt, name,
5583
1.09k
                  XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5584
1.09k
                  literal, URI, NULL);
5585
1.09k
        }
5586
5.41k
    }
5587
6.58k
      }
5588
21.9k
  }
5589
26.3k
  SKIP_BLANKS_PE;
5590
26.3k
  if (RAW != '>') {
5591
3.96k
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5592
3.96k
              "xmlParseEntityDecl: entity %s not terminated\n", name);
5593
22.4k
  } else {
5594
22.4k
#ifdef LIBXML_VALID_ENABLED
5595
22.4k
      if ((ctxt->validate) && (ctxt->inputNr > oldInputNr)) {
5596
0
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5597
0
                           "Entity declaration doesn't start and stop in"
5598
0
                                 " the same entity\n",
5599
0
                                 NULL, NULL);
5600
0
      }
5601
22.4k
#endif
5602
22.4k
      NEXT;
5603
22.4k
  }
5604
26.3k
  if (orig != NULL) {
5605
      /*
5606
       * Ugly mechanism to save the raw entity value.
5607
       */
5608
17.9k
      xmlEntityPtr cur = NULL;
5609
5610
17.9k
      if (isParameter) {
5611
2.80k
          if ((ctxt->sax != NULL) &&
5612
2.80k
        (ctxt->sax->getParameterEntity != NULL))
5613
2.80k
        cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5614
15.1k
      } else {
5615
15.1k
          if ((ctxt->sax != NULL) &&
5616
15.1k
        (ctxt->sax->getEntity != NULL))
5617
15.1k
        cur = ctxt->sax->getEntity(ctxt->userData, name);
5618
15.1k
    if ((cur == NULL) && (ctxt->userData==ctxt)) {
5619
963
        cur = xmlSAX2GetEntity(ctxt, name);
5620
963
    }
5621
15.1k
      }
5622
17.9k
            if ((cur != NULL) && (cur->orig == NULL)) {
5623
13.8k
    cur->orig = orig;
5624
13.8k
                orig = NULL;
5625
13.8k
      }
5626
17.9k
  }
5627
5628
26.3k
done:
5629
26.3k
  if (value != NULL) xmlFree(value);
5630
26.3k
  if (URI != NULL) xmlFree(URI);
5631
26.3k
  if (literal != NULL) xmlFree(literal);
5632
26.3k
        if (orig != NULL) xmlFree(orig);
5633
26.3k
    }
5634
30.9k
}
5635
5636
/**
5637
 * Parse an attribute default declaration
5638
 *
5639
 * @deprecated Internal function, don't use.
5640
 *
5641
 *     [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5642
 *
5643
 * [ VC: Required Attribute ]
5644
 * if the default declaration is the keyword \#REQUIRED, then the
5645
 * attribute must be specified for all elements of the type in the
5646
 * attribute-list declaration.
5647
 *
5648
 * [ VC: Attribute Default Legal ]
5649
 * The declared default value must meet the lexical constraints of
5650
 * the declared attribute type c.f. #xmlValidateAttributeDecl
5651
 *
5652
 * [ VC: Fixed Attribute Default ]
5653
 * if an attribute has a default value declared with the \#FIXED
5654
 * keyword, instances of that attribute must match the default value.
5655
 *
5656
 * [ WFC: No < in Attribute Values ]
5657
 * handled in #xmlParseAttValue
5658
 *
5659
 * @param ctxt  an XML parser context
5660
 * @param value  Receive a possible fixed default value for the attribute
5661
 * @returns XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5662
 *          or XML_ATTRIBUTE_FIXED.
5663
 */
5664
5665
int
5666
37.1k
xmlParseDefaultDecl(xmlParserCtxt *ctxt, xmlChar **value) {
5667
37.1k
    int val;
5668
37.1k
    xmlChar *ret;
5669
5670
37.1k
    *value = NULL;
5671
37.1k
    if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5672
1.02k
  SKIP(9);
5673
1.02k
  return(XML_ATTRIBUTE_REQUIRED);
5674
1.02k
    }
5675
36.1k
    if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5676
623
  SKIP(8);
5677
623
  return(XML_ATTRIBUTE_IMPLIED);
5678
623
    }
5679
35.5k
    val = XML_ATTRIBUTE_NONE;
5680
35.5k
    if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5681
1.20k
  SKIP(6);
5682
1.20k
  val = XML_ATTRIBUTE_FIXED;
5683
1.20k
  if (SKIP_BLANKS_PE == 0) {
5684
437
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5685
437
         "Space required after '#FIXED'\n");
5686
437
  }
5687
1.20k
    }
5688
35.5k
    ret = xmlParseAttValue(ctxt);
5689
35.5k
    if (ret == NULL) {
5690
4.18k
  xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5691
4.18k
           "Attribute default value declaration error\n");
5692
4.18k
    } else
5693
31.3k
        *value = ret;
5694
35.5k
    return(val);
5695
36.1k
}
5696
5697
/**
5698
 * Parse an Notation attribute type.
5699
 *
5700
 * @deprecated Internal function, don't use.
5701
 *
5702
 * Note: the leading 'NOTATION' S part has already being parsed...
5703
 *
5704
 *     [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5705
 *
5706
 * [ VC: Notation Attributes ]
5707
 * Values of this type must match one of the notation names included
5708
 * in the declaration; all notation names in the declaration must be declared.
5709
 *
5710
 * @param ctxt  an XML parser context
5711
 * @returns the notation attribute tree built while parsing
5712
 */
5713
5714
xmlEnumeration *
5715
783
xmlParseNotationType(xmlParserCtxt *ctxt) {
5716
783
    const xmlChar *name;
5717
783
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5718
5719
783
    if (RAW != '(') {
5720
197
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5721
197
  return(NULL);
5722
197
    }
5723
586
    do {
5724
586
        NEXT;
5725
586
  SKIP_BLANKS_PE;
5726
586
        name = xmlParseName(ctxt);
5727
586
  if (name == NULL) {
5728
195
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5729
195
         "Name expected in NOTATION declaration\n");
5730
195
            xmlFreeEnumeration(ret);
5731
195
      return(NULL);
5732
195
  }
5733
391
        tmp = NULL;
5734
391
#ifdef LIBXML_VALID_ENABLED
5735
391
        if (ctxt->validate) {
5736
194
            tmp = ret;
5737
194
            while (tmp != NULL) {
5738
0
                if (xmlStrEqual(name, tmp->name)) {
5739
0
                    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5740
0
              "standalone: attribute notation value token %s duplicated\n",
5741
0
                                     name, NULL);
5742
0
                    if (!xmlDictOwns(ctxt->dict, name))
5743
0
                        xmlFree((xmlChar *) name);
5744
0
                    break;
5745
0
                }
5746
0
                tmp = tmp->next;
5747
0
            }
5748
194
        }
5749
391
#endif /* LIBXML_VALID_ENABLED */
5750
391
  if (tmp == NULL) {
5751
391
      cur = xmlCreateEnumeration(name);
5752
391
      if (cur == NULL) {
5753
1
                xmlErrMemory(ctxt);
5754
1
                xmlFreeEnumeration(ret);
5755
1
                return(NULL);
5756
1
            }
5757
390
      if (last == NULL) ret = last = cur;
5758
0
      else {
5759
0
    last->next = cur;
5760
0
    last = cur;
5761
0
      }
5762
390
  }
5763
390
  SKIP_BLANKS_PE;
5764
390
    } while (RAW == '|');
5765
390
    if (RAW != ')') {
5766
390
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5767
390
        xmlFreeEnumeration(ret);
5768
390
  return(NULL);
5769
390
    }
5770
0
    NEXT;
5771
0
    return(ret);
5772
390
}
5773
5774
/**
5775
 * Parse an Enumeration attribute type.
5776
 *
5777
 * @deprecated Internal function, don't use.
5778
 *
5779
 *     [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5780
 *
5781
 * [ VC: Enumeration ]
5782
 * Values of this type must match one of the Nmtoken tokens in
5783
 * the declaration
5784
 *
5785
 * @param ctxt  an XML parser context
5786
 * @returns the enumeration attribute tree built while parsing
5787
 */
5788
5789
xmlEnumeration *
5790
7.17k
xmlParseEnumerationType(xmlParserCtxt *ctxt) {
5791
7.17k
    xmlChar *name;
5792
7.17k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5793
5794
7.17k
    if (RAW != '(') {
5795
4.86k
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5796
4.86k
  return(NULL);
5797
4.86k
    }
5798
3.64k
    do {
5799
3.64k
        NEXT;
5800
3.64k
  SKIP_BLANKS_PE;
5801
3.64k
        name = xmlParseNmtoken(ctxt);
5802
3.64k
  if (name == NULL) {
5803
196
      xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5804
196
      return(ret);
5805
196
  }
5806
3.45k
        tmp = NULL;
5807
3.45k
#ifdef LIBXML_VALID_ENABLED
5808
3.45k
        if (ctxt->validate) {
5809
2.69k
            tmp = ret;
5810
4.12k
            while (tmp != NULL) {
5811
1.62k
                if (xmlStrEqual(name, tmp->name)) {
5812
204
                    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5813
204
              "standalone: attribute enumeration value token %s duplicated\n",
5814
204
                                     name, NULL);
5815
204
                    if (!xmlDictOwns(ctxt->dict, name))
5816
204
                        xmlFree(name);
5817
204
                    break;
5818
204
                }
5819
1.42k
                tmp = tmp->next;
5820
1.42k
            }
5821
2.69k
        }
5822
3.45k
#endif /* LIBXML_VALID_ENABLED */
5823
3.45k
  if (tmp == NULL) {
5824
3.24k
      cur = xmlCreateEnumeration(name);
5825
3.24k
      if (!xmlDictOwns(ctxt->dict, name))
5826
3.24k
    xmlFree(name);
5827
3.24k
      if (cur == NULL) {
5828
4
                xmlErrMemory(ctxt);
5829
4
                xmlFreeEnumeration(ret);
5830
4
                return(NULL);
5831
4
            }
5832
3.24k
      if (last == NULL) ret = last = cur;
5833
1.13k
      else {
5834
1.13k
    last->next = cur;
5835
1.13k
    last = cur;
5836
1.13k
      }
5837
3.24k
  }
5838
3.44k
  SKIP_BLANKS_PE;
5839
3.44k
    } while (RAW == '|');
5840
2.10k
    if (RAW != ')') {
5841
427
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5842
427
  return(ret);
5843
427
    }
5844
1.67k
    NEXT;
5845
1.67k
    return(ret);
5846
2.10k
}
5847
5848
/**
5849
 * Parse an Enumerated attribute type.
5850
 *
5851
 * @deprecated Internal function, don't use.
5852
 *
5853
 *     [57] EnumeratedType ::= NotationType | Enumeration
5854
 *
5855
 *     [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5856
 *
5857
 * @param ctxt  an XML parser context
5858
 * @param tree  the enumeration tree built while parsing
5859
 * @returns XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5860
 */
5861
5862
int
5863
8.15k
xmlParseEnumeratedType(xmlParserCtxt *ctxt, xmlEnumeration **tree) {
5864
8.15k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5865
979
  SKIP(8);
5866
979
  if (SKIP_BLANKS_PE == 0) {
5867
196
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5868
196
         "Space required after 'NOTATION'\n");
5869
196
      return(0);
5870
196
  }
5871
783
  *tree = xmlParseNotationType(ctxt);
5872
783
  if (*tree == NULL) return(0);
5873
0
  return(XML_ATTRIBUTE_NOTATION);
5874
783
    }
5875
7.17k
    *tree = xmlParseEnumerationType(ctxt);
5876
7.17k
    if (*tree == NULL) return(0);
5877
2.10k
    return(XML_ATTRIBUTE_ENUMERATION);
5878
7.17k
}
5879
5880
/**
5881
 * Parse the Attribute list def for an element
5882
 *
5883
 * @deprecated Internal function, don't use.
5884
 *
5885
 *     [54] AttType ::= StringType | TokenizedType | EnumeratedType
5886
 *
5887
 *     [55] StringType ::= 'CDATA'
5888
 *
5889
 *     [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5890
 *                            'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5891
 *
5892
 * Validity constraints for attribute values syntax are checked in
5893
 * #xmlValidateAttributeValue
5894
 *
5895
 * [ VC: ID ]
5896
 * Values of type ID must match the Name production. A name must not
5897
 * appear more than once in an XML document as a value of this type;
5898
 * i.e., ID values must uniquely identify the elements which bear them.
5899
 *
5900
 * [ VC: One ID per Element Type ]
5901
 * No element type may have more than one ID attribute specified.
5902
 *
5903
 * [ VC: ID Attribute Default ]
5904
 * An ID attribute must have a declared default of \#IMPLIED or \#REQUIRED.
5905
 *
5906
 * [ VC: IDREF ]
5907
 * Values of type IDREF must match the Name production, and values
5908
 * of type IDREFS must match Names; each IDREF Name must match the value
5909
 * of an ID attribute on some element in the XML document; i.e. IDREF
5910
 * values must match the value of some ID attribute.
5911
 *
5912
 * [ VC: Entity Name ]
5913
 * Values of type ENTITY must match the Name production, values
5914
 * of type ENTITIES must match Names; each Entity Name must match the
5915
 * name of an unparsed entity declared in the DTD.
5916
 *
5917
 * [ VC: Name Token ]
5918
 * Values of type NMTOKEN must match the Nmtoken production; values
5919
 * of type NMTOKENS must match Nmtokens.
5920
 *
5921
 * @param ctxt  an XML parser context
5922
 * @param tree  the enumeration tree built while parsing
5923
 * @returns the attribute type
5924
 */
5925
int
5926
46.7k
xmlParseAttributeType(xmlParserCtxt *ctxt, xmlEnumeration **tree) {
5927
46.7k
    if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5928
13.3k
  SKIP(5);
5929
13.3k
  return(XML_ATTRIBUTE_CDATA);
5930
33.4k
     } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5931
2.61k
  SKIP(6);
5932
2.61k
  return(XML_ATTRIBUTE_IDREFS);
5933
30.8k
     } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5934
4.13k
  SKIP(5);
5935
4.13k
  return(XML_ATTRIBUTE_IDREF);
5936
26.6k
     } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5937
4.67k
        SKIP(2);
5938
4.67k
  return(XML_ATTRIBUTE_ID);
5939
21.9k
     } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5940
8.71k
  SKIP(6);
5941
8.71k
  return(XML_ATTRIBUTE_ENTITY);
5942
13.2k
     } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5943
1.67k
  SKIP(8);
5944
1.67k
  return(XML_ATTRIBUTE_ENTITIES);
5945
11.6k
     } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5946
595
  SKIP(8);
5947
595
  return(XML_ATTRIBUTE_NMTOKENS);
5948
11.0k
     } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5949
2.85k
  SKIP(7);
5950
2.85k
  return(XML_ATTRIBUTE_NMTOKEN);
5951
2.85k
     }
5952
8.15k
     return(xmlParseEnumeratedType(ctxt, tree));
5953
46.7k
}
5954
5955
/**
5956
 * Parse an attribute list declaration for an element. Always consumes '<!'.
5957
 *
5958
 * @deprecated Internal function, don't use.
5959
 *
5960
 *     [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5961
 *
5962
 *     [53] AttDef ::= S Name S AttType S DefaultDecl
5963
 * @param ctxt  an XML parser context
5964
 */
5965
void
5966
51.9k
xmlParseAttributeListDecl(xmlParserCtxt *ctxt) {
5967
51.9k
    const xmlChar *elemName;
5968
51.9k
    const xmlChar *attrName;
5969
51.9k
    xmlEnumerationPtr tree;
5970
5971
51.9k
    if ((CUR != '<') || (NXT(1) != '!'))
5972
0
        return;
5973
51.9k
    SKIP(2);
5974
5975
51.9k
    if (CMP7(CUR_PTR, 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5976
50.8k
#ifdef LIBXML_VALID_ENABLED
5977
50.8k
  int oldInputNr = ctxt->inputNr;
5978
50.8k
#endif
5979
5980
50.8k
  SKIP(7);
5981
50.8k
  if (SKIP_BLANKS_PE == 0) {
5982
197
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5983
197
                     "Space required after '<!ATTLIST'\n");
5984
197
  }
5985
50.8k
        elemName = xmlParseName(ctxt);
5986
50.8k
  if (elemName == NULL) {
5987
1.82k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5988
1.82k
         "ATTLIST: no name for Element\n");
5989
1.82k
      return;
5990
1.82k
  }
5991
48.9k
  SKIP_BLANKS_PE;
5992
48.9k
  GROW;
5993
81.5k
  while ((RAW != '>') && (PARSER_STOPPED(ctxt) == 0)) {
5994
49.8k
      int type;
5995
49.8k
      int def;
5996
49.8k
      xmlChar *defaultValue = NULL;
5997
5998
49.8k
      GROW;
5999
49.8k
            tree = NULL;
6000
49.8k
      attrName = xmlParseName(ctxt);
6001
49.8k
      if (attrName == NULL) {
6002
2.41k
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6003
2.41k
             "ATTLIST: no name for Attribute\n");
6004
2.41k
    break;
6005
2.41k
      }
6006
47.3k
      GROW;
6007
47.3k
      if (SKIP_BLANKS_PE == 0) {
6008
642
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6009
642
            "Space required after the attribute name\n");
6010
642
    break;
6011
642
      }
6012
6013
46.7k
      type = xmlParseAttributeType(ctxt, &tree);
6014
46.7k
      if (type <= 0) {
6015
6.04k
          break;
6016
6.04k
      }
6017
6018
40.6k
      GROW;
6019
40.6k
      if (SKIP_BLANKS_PE == 0) {
6020
3.53k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6021
3.53k
             "Space required after the attribute type\n");
6022
3.53k
          if (tree != NULL)
6023
432
        xmlFreeEnumeration(tree);
6024
3.53k
    break;
6025
3.53k
      }
6026
6027
37.1k
      def = xmlParseDefaultDecl(ctxt, &defaultValue);
6028
37.1k
      if (def <= 0) {
6029
0
                if (defaultValue != NULL)
6030
0
        xmlFree(defaultValue);
6031
0
          if (tree != NULL)
6032
0
        xmlFreeEnumeration(tree);
6033
0
          break;
6034
0
      }
6035
37.1k
      if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6036
21.5k
          xmlAttrNormalizeSpace(defaultValue, defaultValue);
6037
6038
37.1k
      GROW;
6039
37.1k
            if (RAW != '>') {
6040
5.91k
    if (SKIP_BLANKS_PE == 0) {
6041
4.64k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6042
4.64k
      "Space required after the attribute default value\n");
6043
4.64k
        if (defaultValue != NULL)
6044
359
      xmlFree(defaultValue);
6045
4.64k
        if (tree != NULL)
6046
233
      xmlFreeEnumeration(tree);
6047
4.64k
        break;
6048
4.64k
    }
6049
5.91k
      }
6050
32.5k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6051
32.5k
    (ctxt->sax->attributeDecl != NULL))
6052
31.2k
    ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6053
31.2k
                          type, def, defaultValue, tree);
6054
1.29k
      else if (tree != NULL)
6055
204
    xmlFreeEnumeration(tree);
6056
6057
32.5k
      if ((ctxt->sax2) && (defaultValue != NULL) &&
6058
32.5k
          (def != XML_ATTRIBUTE_IMPLIED) &&
6059
32.5k
    (def != XML_ATTRIBUTE_REQUIRED)) {
6060
22.1k
    xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6061
22.1k
      }
6062
32.5k
      if (ctxt->sax2) {
6063
23.5k
    xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6064
23.5k
      }
6065
32.5k
      if (defaultValue != NULL)
6066
30.9k
          xmlFree(defaultValue);
6067
32.5k
      GROW;
6068
32.5k
  }
6069
48.9k
  if (RAW == '>') {
6070
33.6k
#ifdef LIBXML_VALID_ENABLED
6071
33.6k
      if ((ctxt->validate) && (ctxt->inputNr > oldInputNr)) {
6072
0
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6073
0
                                 "Attribute list declaration doesn't start and"
6074
0
                                 " stop in the same entity\n",
6075
0
                                 NULL, NULL);
6076
0
      }
6077
33.6k
#endif
6078
33.6k
      NEXT;
6079
33.6k
  }
6080
48.9k
    }
6081
51.9k
}
6082
6083
/**
6084
 * Handle PEs and check that we don't pop the entity that started
6085
 * a balanced group.
6086
 *
6087
 * @param ctxt  parser context
6088
 * @param openInputNr  input nr of the entity with opening '('
6089
 */
6090
static void
6091
91.5k
xmlSkipBlankCharsPEBalanced(xmlParserCtxt *ctxt, int openInputNr) {
6092
91.5k
    SKIP_BLANKS;
6093
91.5k
    GROW;
6094
6095
91.5k
    (void) openInputNr;
6096
6097
91.5k
    if (!PARSER_EXTERNAL(ctxt) && !PARSER_IN_PE(ctxt))
6098
91.5k
        return;
6099
6100
0
    while (!PARSER_STOPPED(ctxt)) {
6101
0
        if (ctxt->input->cur >= ctxt->input->end) {
6102
0
#ifdef LIBXML_VALID_ENABLED
6103
0
            if ((ctxt->validate) && (ctxt->inputNr <= openInputNr)) {
6104
0
                xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6105
0
                                 "Element content declaration doesn't start "
6106
0
                                 "and stop in the same entity\n",
6107
0
                                 NULL, NULL);
6108
0
            }
6109
0
#endif
6110
0
            if (PARSER_IN_PE(ctxt))
6111
0
                xmlPopPE(ctxt);
6112
0
            else
6113
0
                break;
6114
0
        } else if (RAW == '%') {
6115
0
            xmlParsePERefInternal(ctxt, 0);
6116
0
        } else {
6117
0
            break;
6118
0
        }
6119
6120
0
        SKIP_BLANKS;
6121
0
        GROW;
6122
0
    }
6123
0
}
6124
6125
/**
6126
 * Parse the declaration for a Mixed Element content
6127
 * The leading '(' and spaces have been skipped in #xmlParseElementContentDecl
6128
 *
6129
 * @deprecated Internal function, don't use.
6130
 *
6131
 *     [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6132
 *                    '(' S? '#PCDATA' S? ')'
6133
 *
6134
 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6135
 *
6136
 * [ VC: No Duplicate Types ]
6137
 * The same name must not appear more than once in a single
6138
 * mixed-content declaration.
6139
 *
6140
 * @param ctxt  an XML parser context
6141
 * @param openInputNr  the input used for the current entity, needed for
6142
 * boundary checks
6143
 * @returns the list of the xmlElementContent describing the element choices
6144
 */
6145
xmlElementContent *
6146
3.37k
xmlParseElementMixedContentDecl(xmlParserCtxt *ctxt, int openInputNr) {
6147
3.37k
    xmlElementContentPtr ret = NULL, cur = NULL, n;
6148
3.37k
    const xmlChar *elem = NULL;
6149
6150
3.37k
    GROW;
6151
3.37k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6152
3.37k
  SKIP(7);
6153
3.37k
        xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6154
3.37k
  if (RAW == ')') {
6155
751
#ifdef LIBXML_VALID_ENABLED
6156
751
      if ((ctxt->validate) && (ctxt->inputNr > openInputNr)) {
6157
0
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6158
0
                                 "Element content declaration doesn't start "
6159
0
                                 "and stop in the same entity\n",
6160
0
                                 NULL, NULL);
6161
0
      }
6162
751
#endif
6163
751
      NEXT;
6164
751
      ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6165
751
      if (ret == NULL)
6166
1
                goto mem_error;
6167
750
      if (RAW == '*') {
6168
505
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6169
505
    NEXT;
6170
505
      }
6171
750
      return(ret);
6172
751
  }
6173
2.62k
  if ((RAW == '(') || (RAW == '|')) {
6174
2.10k
      ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6175
2.10k
      if (ret == NULL)
6176
1
                goto mem_error;
6177
2.10k
  }
6178
8.80k
  while ((RAW == '|') && (PARSER_STOPPED(ctxt) == 0)) {
6179
6.38k
      NEXT;
6180
6.38k
            n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6181
6.38k
            if (n == NULL)
6182
1
                goto mem_error;
6183
6.38k
      if (elem == NULL) {
6184
1.90k
    n->c1 = cur;
6185
1.90k
    if (cur != NULL)
6186
1.90k
        cur->parent = n;
6187
1.90k
    ret = cur = n;
6188
4.47k
      } else {
6189
4.47k
          cur->c2 = n;
6190
4.47k
    n->parent = cur;
6191
4.47k
    n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6192
4.47k
                if (n->c1 == NULL)
6193
3
                    goto mem_error;
6194
4.47k
    n->c1->parent = n;
6195
4.47k
    cur = n;
6196
4.47k
      }
6197
6.37k
            xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6198
6.37k
      elem = xmlParseName(ctxt);
6199
6.37k
      if (elem == NULL) {
6200
194
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6201
194
      "xmlParseElementMixedContentDecl : Name expected\n");
6202
194
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6203
194
    return(NULL);
6204
194
      }
6205
6.18k
            xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6206
6.18k
  }
6207
2.42k
  if ((RAW == ')') && (NXT(1) == '*')) {
6208
1.33k
      if (elem != NULL) {
6209
1.33k
    cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6210
1.33k
                                   XML_ELEMENT_CONTENT_ELEMENT);
6211
1.33k
    if (cur->c2 == NULL)
6212
1
                    goto mem_error;
6213
1.32k
    cur->c2->parent = cur;
6214
1.32k
            }
6215
1.32k
            if (ret != NULL)
6216
1.32k
                ret->ocur = XML_ELEMENT_CONTENT_MULT;
6217
1.32k
#ifdef LIBXML_VALID_ENABLED
6218
1.32k
      if ((ctxt->validate) && (ctxt->inputNr > openInputNr)) {
6219
0
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6220
0
                                 "Element content declaration doesn't start "
6221
0
                                 "and stop in the same entity\n",
6222
0
                                 NULL, NULL);
6223
0
      }
6224
1.32k
#endif
6225
1.32k
      SKIP(2);
6226
1.32k
  } else {
6227
1.09k
      xmlFreeDocElementContent(ctxt->myDoc, ret);
6228
1.09k
      xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6229
1.09k
      return(NULL);
6230
1.09k
  }
6231
6232
2.42k
    } else {
6233
0
  xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6234
0
    }
6235
1.32k
    return(ret);
6236
6237
7
mem_error:
6238
7
    xmlErrMemory(ctxt);
6239
7
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6240
7
    return(NULL);
6241
3.37k
}
6242
6243
/**
6244
 * Parse the declaration for a Mixed Element content
6245
 * The leading '(' and spaces have been skipped in #xmlParseElementContentDecl
6246
 *
6247
 *     [47] children ::= (choice | seq) ('?' | '*' | '+')?
6248
 *
6249
 *     [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6250
 *
6251
 *     [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6252
 *
6253
 *     [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6254
 *
6255
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6256
 * TODO Parameter-entity replacement text must be properly nested
6257
 *  with parenthesized groups. That is to say, if either of the
6258
 *  opening or closing parentheses in a choice, seq, or Mixed
6259
 *  construct is contained in the replacement text for a parameter
6260
 *  entity, both must be contained in the same replacement text. For
6261
 *  interoperability, if a parameter-entity reference appears in a
6262
 *  choice, seq, or Mixed construct, its replacement text should not
6263
 *  be empty, and neither the first nor last non-blank character of
6264
 *  the replacement text should be a connector (| or ,).
6265
 *
6266
 * @param ctxt  an XML parser context
6267
 * @param openInputNr  the input used for the current entity, needed for
6268
 * boundary checks
6269
 * @param depth  the level of recursion
6270
 * @returns the tree of xmlElementContent describing the element
6271
 *          hierarchy.
6272
 */
6273
static xmlElementContentPtr
6274
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int openInputNr,
6275
15.2k
                                       int depth) {
6276
15.2k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
6277
15.2k
    xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6278
15.2k
    const xmlChar *elem;
6279
15.2k
    xmlChar type = 0;
6280
6281
15.2k
    if (depth > maxDepth) {
6282
0
        xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
6283
0
                "xmlParseElementChildrenContentDecl : depth %d too deep, "
6284
0
                "use XML_PARSE_HUGE\n", depth);
6285
0
  return(NULL);
6286
0
    }
6287
15.2k
    xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6288
15.2k
    if (RAW == '(') {
6289
2.03k
        int newInputNr = ctxt->inputNr;
6290
6291
        /* Recurse on first child */
6292
2.03k
  NEXT;
6293
2.03k
        cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, newInputNr,
6294
2.03k
                                                           depth + 1);
6295
2.03k
        if (cur == NULL)
6296
1.34k
            return(NULL);
6297
13.2k
    } else {
6298
13.2k
  elem = xmlParseName(ctxt);
6299
13.2k
  if (elem == NULL) {
6300
1.24k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6301
1.24k
      return(NULL);
6302
1.24k
  }
6303
11.9k
        cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6304
11.9k
  if (cur == NULL) {
6305
4
      xmlErrMemory(ctxt);
6306
4
      return(NULL);
6307
4
  }
6308
11.9k
  GROW;
6309
11.9k
  if (RAW == '?') {
6310
1.95k
      cur->ocur = XML_ELEMENT_CONTENT_OPT;
6311
1.95k
      NEXT;
6312
10.0k
  } else if (RAW == '*') {
6313
1.08k
      cur->ocur = XML_ELEMENT_CONTENT_MULT;
6314
1.08k
      NEXT;
6315
8.92k
  } else if (RAW == '+') {
6316
988
      cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6317
988
      NEXT;
6318
7.94k
  } else {
6319
7.94k
      cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6320
7.94k
  }
6321
11.9k
  GROW;
6322
11.9k
    }
6323
28.2k
    while (!PARSER_STOPPED(ctxt)) {
6324
28.1k
        xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6325
28.1k
        if (RAW == ')')
6326
9.04k
            break;
6327
        /*
6328
   * Each loop we parse one separator and one element.
6329
   */
6330
19.0k
        if (RAW == ',') {
6331
7.24k
      if (type == 0) type = CUR;
6332
6333
      /*
6334
       * Detect "Name | Name , Name" error
6335
       */
6336
3.31k
      else if (type != CUR) {
6337
194
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6338
194
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6339
194
                      type);
6340
194
    if ((last != NULL) && (last != ret))
6341
194
        xmlFreeDocElementContent(ctxt->myDoc, last);
6342
194
    if (ret != NULL)
6343
194
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6344
194
    return(NULL);
6345
194
      }
6346
7.05k
      NEXT;
6347
6348
7.05k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6349
7.05k
      if (op == NULL) {
6350
2
                xmlErrMemory(ctxt);
6351
2
    if ((last != NULL) && (last != ret))
6352
1
        xmlFreeDocElementContent(ctxt->myDoc, last);
6353
2
          xmlFreeDocElementContent(ctxt->myDoc, ret);
6354
2
    return(NULL);
6355
2
      }
6356
7.05k
      if (last == NULL) {
6357
3.93k
    op->c1 = ret;
6358
3.93k
    if (ret != NULL)
6359
3.93k
        ret->parent = op;
6360
3.93k
    ret = cur = op;
6361
3.93k
      } else {
6362
3.12k
          cur->c2 = op;
6363
3.12k
    if (op != NULL)
6364
3.12k
        op->parent = cur;
6365
3.12k
    op->c1 = last;
6366
3.12k
    if (last != NULL)
6367
3.12k
        last->parent = op;
6368
3.12k
    cur =op;
6369
3.12k
    last = NULL;
6370
3.12k
      }
6371
11.8k
  } else if (RAW == '|') {
6372
9.58k
      if (type == 0) type = CUR;
6373
6374
      /*
6375
       * Detect "Name , Name | Name" error
6376
       */
6377
4.60k
      else if (type != CUR) {
6378
194
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6379
194
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6380
194
          type);
6381
194
    if ((last != NULL) && (last != ret))
6382
194
        xmlFreeDocElementContent(ctxt->myDoc, last);
6383
194
    if (ret != NULL)
6384
194
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6385
194
    return(NULL);
6386
194
      }
6387
9.39k
      NEXT;
6388
6389
9.39k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6390
9.39k
      if (op == NULL) {
6391
2
                xmlErrMemory(ctxt);
6392
2
    if ((last != NULL) && (last != ret))
6393
1
        xmlFreeDocElementContent(ctxt->myDoc, last);
6394
2
    if (ret != NULL)
6395
2
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6396
2
    return(NULL);
6397
2
      }
6398
9.39k
      if (last == NULL) {
6399
4.98k
    op->c1 = ret;
6400
4.98k
    if (ret != NULL)
6401
4.98k
        ret->parent = op;
6402
4.98k
    ret = cur = op;
6403
4.98k
      } else {
6404
4.40k
          cur->c2 = op;
6405
4.40k
    if (op != NULL)
6406
4.40k
        op->parent = cur;
6407
4.40k
    op->c1 = last;
6408
4.40k
    if (last != NULL)
6409
4.40k
        last->parent = op;
6410
4.40k
    cur =op;
6411
4.40k
    last = NULL;
6412
4.40k
      }
6413
9.39k
  } else {
6414
2.21k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6415
2.21k
      if ((last != NULL) && (last != ret))
6416
1.10k
          xmlFreeDocElementContent(ctxt->myDoc, last);
6417
2.21k
      if (ret != NULL)
6418
2.21k
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6419
2.21k
      return(NULL);
6420
2.21k
  }
6421
16.4k
        xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6422
16.4k
        if (RAW == '(') {
6423
790
            int newInputNr = ctxt->inputNr;
6424
6425
      /* Recurse on second child */
6426
790
      NEXT;
6427
790
      last = xmlParseElementChildrenContentDeclPriv(ctxt, newInputNr,
6428
790
                                                          depth + 1);
6429
790
            if (last == NULL) {
6430
553
    if (ret != NULL)
6431
553
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6432
553
    return(NULL);
6433
553
            }
6434
15.6k
  } else {
6435
15.6k
      elem = xmlParseName(ctxt);
6436
15.6k
      if (elem == NULL) {
6437
301
    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6438
301
    if (ret != NULL)
6439
301
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6440
301
    return(NULL);
6441
301
      }
6442
15.3k
      last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6443
15.3k
      if (last == NULL) {
6444
2
                xmlErrMemory(ctxt);
6445
2
    if (ret != NULL)
6446
2
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6447
2
    return(NULL);
6448
2
      }
6449
15.3k
      if (RAW == '?') {
6450
3.09k
    last->ocur = XML_ELEMENT_CONTENT_OPT;
6451
3.09k
    NEXT;
6452
12.2k
      } else if (RAW == '*') {
6453
1.28k
    last->ocur = XML_ELEMENT_CONTENT_MULT;
6454
1.28k
    NEXT;
6455
10.9k
      } else if (RAW == '+') {
6456
1.12k
    last->ocur = XML_ELEMENT_CONTENT_PLUS;
6457
1.12k
    NEXT;
6458
9.85k
      } else {
6459
9.85k
    last->ocur = XML_ELEMENT_CONTENT_ONCE;
6460
9.85k
      }
6461
15.3k
  }
6462
16.4k
    }
6463
9.18k
    if ((cur != NULL) && (last != NULL)) {
6464
6.56k
        cur->c2 = last;
6465
6.56k
  if (last != NULL)
6466
6.56k
      last->parent = cur;
6467
6.56k
    }
6468
9.18k
#ifdef LIBXML_VALID_ENABLED
6469
9.18k
    if ((ctxt->validate) && (ctxt->inputNr > openInputNr)) {
6470
0
        xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6471
0
                         "Element content declaration doesn't start "
6472
0
                         "and stop in the same entity\n",
6473
0
                         NULL, NULL);
6474
0
    }
6475
9.18k
#endif
6476
9.18k
    NEXT;
6477
9.18k
    if (RAW == '?') {
6478
1.09k
  if (ret != NULL) {
6479
1.09k
      if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6480
1.09k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6481
388
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6482
707
      else
6483
707
          ret->ocur = XML_ELEMENT_CONTENT_OPT;
6484
1.09k
  }
6485
1.09k
  NEXT;
6486
8.08k
    } else if (RAW == '*') {
6487
2.10k
  if (ret != NULL) {
6488
2.10k
      ret->ocur = XML_ELEMENT_CONTENT_MULT;
6489
2.10k
      cur = ret;
6490
      /*
6491
       * Some normalization:
6492
       * (a | b* | c?)* == (a | b | c)*
6493
       */
6494
4.30k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6495
2.20k
    if ((cur->c1 != NULL) &&
6496
2.20k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6497
2.20k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6498
602
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6499
2.20k
    if ((cur->c2 != NULL) &&
6500
2.20k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6501
2.20k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6502
391
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6503
2.20k
    cur = cur->c2;
6504
2.20k
      }
6505
2.10k
  }
6506
2.10k
  NEXT;
6507
5.98k
    } else if (RAW == '+') {
6508
2.29k
  if (ret != NULL) {
6509
2.29k
      int found = 0;
6510
6511
2.29k
      if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6512
2.29k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6513
388
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6514
1.90k
      else
6515
1.90k
          ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6516
      /*
6517
       * Some normalization:
6518
       * (a | b*)+ == (a | b)*
6519
       * (a | b?)+ == (a | b)*
6520
       */
6521
3.78k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6522
1.49k
    if ((cur->c1 != NULL) &&
6523
1.49k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6524
1.49k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6525
580
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6526
580
        found = 1;
6527
580
    }
6528
1.49k
    if ((cur->c2 != NULL) &&
6529
1.49k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6530
1.49k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6531
428
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6532
428
        found = 1;
6533
428
    }
6534
1.49k
    cur = cur->c2;
6535
1.49k
      }
6536
2.29k
      if (found)
6537
747
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6538
2.29k
  }
6539
2.29k
  NEXT;
6540
2.29k
    }
6541
9.18k
    return(ret);
6542
12.6k
}
6543
6544
/**
6545
 * Parse the declaration for a Mixed Element content
6546
 * The leading '(' and spaces have been skipped in #xmlParseElementContentDecl
6547
 *
6548
 * @deprecated Internal function, don't use.
6549
 *
6550
 *     [47] children ::= (choice | seq) ('?' | '*' | '+')?
6551
 *
6552
 *     [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6553
 *
6554
 *     [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6555
 *
6556
 *     [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6557
 *
6558
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6559
 * TODO Parameter-entity replacement text must be properly nested
6560
 *  with parenthesized groups. That is to say, if either of the
6561
 *  opening or closing parentheses in a choice, seq, or Mixed
6562
 *  construct is contained in the replacement text for a parameter
6563
 *  entity, both must be contained in the same replacement text. For
6564
 *  interoperability, if a parameter-entity reference appears in a
6565
 *  choice, seq, or Mixed construct, its replacement text should not
6566
 *  be empty, and neither the first nor last non-blank character of
6567
 *  the replacement text should be a connector (| or ,).
6568
 *
6569
 * @param ctxt  an XML parser context
6570
 * @param inputchk  the input used for the current entity, needed for boundary checks
6571
 * @returns the tree of xmlElementContent describing the element
6572
 *          hierarchy.
6573
 */
6574
xmlElementContent *
6575
0
xmlParseElementChildrenContentDecl(xmlParserCtxt *ctxt, int inputchk) {
6576
    /* stub left for API/ABI compat */
6577
0
    return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6578
0
}
6579
6580
/**
6581
 * Parse the declaration for an Element content either Mixed or Children,
6582
 * the cases EMPTY and ANY are handled directly in #xmlParseElementDecl
6583
 *
6584
 * @deprecated Internal function, don't use.
6585
 *
6586
 *     [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6587
 *
6588
 * @param ctxt  an XML parser context
6589
 * @param name  the name of the element being defined.
6590
 * @param result  the Element Content pointer will be stored here if any
6591
 * @returns an xmlElementTypeVal value or -1 on error
6592
 */
6593
6594
int
6595
xmlParseElementContentDecl(xmlParserCtxt *ctxt, const xmlChar *name,
6596
15.7k
                           xmlElementContent **result) {
6597
6598
15.7k
    xmlElementContentPtr tree = NULL;
6599
15.7k
    int openInputNr = ctxt->inputNr;
6600
15.7k
    int res;
6601
6602
15.7k
    *result = NULL;
6603
6604
15.7k
    if (RAW != '(') {
6605
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6606
0
    "xmlParseElementContentDecl : %s '(' expected\n", name);
6607
0
  return(-1);
6608
0
    }
6609
15.7k
    NEXT;
6610
15.7k
    xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6611
15.7k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6612
3.37k
        tree = xmlParseElementMixedContentDecl(ctxt, openInputNr);
6613
3.37k
  res = XML_ELEMENT_TYPE_MIXED;
6614
12.4k
    } else {
6615
12.4k
        tree = xmlParseElementChildrenContentDeclPriv(ctxt, openInputNr, 1);
6616
12.4k
  res = XML_ELEMENT_TYPE_ELEMENT;
6617
12.4k
    }
6618
15.7k
    if (tree == NULL)
6619
5.45k
        return(-1);
6620
10.3k
    SKIP_BLANKS_PE;
6621
10.3k
    *result = tree;
6622
10.3k
    return(res);
6623
15.7k
}
6624
6625
/**
6626
 * Parse an element declaration. Always consumes '<!'.
6627
 *
6628
 * @deprecated Internal function, don't use.
6629
 *
6630
 *     [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6631
 *
6632
 * [ VC: Unique Element Type Declaration ]
6633
 * No element type may be declared more than once
6634
 *
6635
 * @param ctxt  an XML parser context
6636
 * @returns the type of the element, or -1 in case of error
6637
 */
6638
int
6639
23.3k
xmlParseElementDecl(xmlParserCtxt *ctxt) {
6640
23.3k
    const xmlChar *name;
6641
23.3k
    int ret = -1;
6642
23.3k
    xmlElementContentPtr content  = NULL;
6643
6644
23.3k
    if ((CUR != '<') || (NXT(1) != '!'))
6645
0
        return(ret);
6646
23.3k
    SKIP(2);
6647
6648
    /* GROW; done in the caller */
6649
23.3k
    if (CMP7(CUR_PTR, 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6650
22.3k
#ifdef LIBXML_VALID_ENABLED
6651
22.3k
  int oldInputNr = ctxt->inputNr;
6652
22.3k
#endif
6653
6654
22.3k
  SKIP(7);
6655
22.3k
  if (SKIP_BLANKS_PE == 0) {
6656
194
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6657
194
               "Space required after 'ELEMENT'\n");
6658
194
      return(-1);
6659
194
  }
6660
22.1k
        name = xmlParseName(ctxt);
6661
22.1k
  if (name == NULL) {
6662
349
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6663
349
         "xmlParseElementDecl: no name for Element\n");
6664
349
      return(-1);
6665
349
  }
6666
21.8k
  if (SKIP_BLANKS_PE == 0) {
6667
10.5k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6668
10.5k
         "Space required after the element name\n");
6669
10.5k
  }
6670
21.8k
  if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6671
2.53k
      SKIP(5);
6672
      /*
6673
       * Element must always be empty.
6674
       */
6675
2.53k
      ret = XML_ELEMENT_TYPE_EMPTY;
6676
19.2k
  } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6677
19.2k
             (NXT(2) == 'Y')) {
6678
2.11k
      SKIP(3);
6679
      /*
6680
       * Element is a generic container.
6681
       */
6682
2.11k
      ret = XML_ELEMENT_TYPE_ANY;
6683
17.1k
  } else if (RAW == '(') {
6684
15.7k
      ret = xmlParseElementContentDecl(ctxt, name, &content);
6685
15.7k
            if (ret <= 0)
6686
5.45k
                return(-1);
6687
15.7k
  } else {
6688
      /*
6689
       * [ WFC: PEs in Internal Subset ] error handling.
6690
       */
6691
1.39k
            xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6692
1.39k
                  "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6693
1.39k
      return(-1);
6694
1.39k
  }
6695
6696
14.9k
  SKIP_BLANKS_PE;
6697
6698
14.9k
  if (RAW != '>') {
6699
3.10k
      xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6700
3.10k
      if (content != NULL) {
6701
2.71k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6702
2.71k
      }
6703
11.8k
  } else {
6704
11.8k
#ifdef LIBXML_VALID_ENABLED
6705
11.8k
      if ((ctxt->validate) && (ctxt->inputNr > oldInputNr)) {
6706
0
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6707
0
                                 "Element declaration doesn't start and stop in"
6708
0
                                 " the same entity\n",
6709
0
                                 NULL, NULL);
6710
0
      }
6711
11.8k
#endif
6712
6713
11.8k
      NEXT;
6714
11.8k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6715
11.8k
    (ctxt->sax->elementDecl != NULL)) {
6716
10.9k
    if (content != NULL)
6717
6.93k
        content->parent = NULL;
6718
10.9k
          ctxt->sax->elementDecl(ctxt->userData, name, ret,
6719
10.9k
                           content);
6720
10.9k
    if ((content != NULL) && (content->parent == NULL)) {
6721
        /*
6722
         * this is a trick: if xmlAddElementDecl is called,
6723
         * instead of copying the full tree it is plugged directly
6724
         * if called from the parser. Avoid duplicating the
6725
         * interfaces or change the API/ABI
6726
         */
6727
197
        xmlFreeDocElementContent(ctxt->myDoc, content);
6728
197
    }
6729
10.9k
      } else if (content != NULL) {
6730
686
    xmlFreeDocElementContent(ctxt->myDoc, content);
6731
686
      }
6732
11.8k
  }
6733
14.9k
    }
6734
15.9k
    return(ret);
6735
23.3k
}
6736
6737
/**
6738
 * Parse a conditional section. Always consumes '<!['.
6739
 *
6740
 *     [61] conditionalSect ::= includeSect | ignoreSect
6741
 *     [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6742
 *     [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6743
 *     [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>'
6744
 *                                 Ignore)*
6745
 *     [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6746
 * @param ctxt  an XML parser context
6747
 */
6748
6749
static void
6750
0
xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6751
0
    size_t depth = 0;
6752
0
    int isFreshPE = 0;
6753
0
    int oldInputNr = ctxt->inputNr;
6754
0
    int declInputNr = ctxt->inputNr;
6755
6756
0
    while (!PARSER_STOPPED(ctxt)) {
6757
0
        if (ctxt->input->cur >= ctxt->input->end) {
6758
0
            if (ctxt->inputNr <= oldInputNr) {
6759
0
                xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6760
0
                return;
6761
0
            }
6762
6763
0
            xmlPopPE(ctxt);
6764
0
            declInputNr = ctxt->inputNr;
6765
0
        } else if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6766
0
            SKIP(3);
6767
0
            SKIP_BLANKS_PE;
6768
6769
0
            isFreshPE = 0;
6770
6771
0
            if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6772
0
                SKIP(7);
6773
0
                SKIP_BLANKS_PE;
6774
0
                if (RAW != '[') {
6775
0
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6776
0
                    return;
6777
0
                }
6778
0
#ifdef LIBXML_VALID_ENABLED
6779
0
                if ((ctxt->validate) && (ctxt->inputNr > declInputNr)) {
6780
0
        xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6781
0
                                     "All markup of the conditional section is"
6782
0
                                     " not in the same entity\n",
6783
0
                                     NULL, NULL);
6784
0
                }
6785
0
#endif
6786
0
                NEXT;
6787
6788
0
                depth++;
6789
0
            } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6790
0
                size_t ignoreDepth = 0;
6791
6792
0
                SKIP(6);
6793
0
                SKIP_BLANKS_PE;
6794
0
                if (RAW != '[') {
6795
0
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6796
0
                    return;
6797
0
                }
6798
0
#ifdef LIBXML_VALID_ENABLED
6799
0
                if ((ctxt->validate) && (ctxt->inputNr > declInputNr)) {
6800
0
        xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6801
0
                                     "All markup of the conditional section is"
6802
0
                                     " not in the same entity\n",
6803
0
                                     NULL, NULL);
6804
0
                }
6805
0
#endif
6806
0
                NEXT;
6807
6808
0
                while (PARSER_STOPPED(ctxt) == 0) {
6809
0
                    if (RAW == 0) {
6810
0
                        xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6811
0
                        return;
6812
0
                    }
6813
0
                    if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6814
0
                        SKIP(3);
6815
0
                        ignoreDepth++;
6816
                        /* Check for integer overflow */
6817
0
                        if (ignoreDepth == 0) {
6818
0
                            xmlErrMemory(ctxt);
6819
0
                            return;
6820
0
                        }
6821
0
                    } else if ((RAW == ']') && (NXT(1) == ']') &&
6822
0
                               (NXT(2) == '>')) {
6823
0
                        SKIP(3);
6824
0
                        if (ignoreDepth == 0)
6825
0
                            break;
6826
0
                        ignoreDepth--;
6827
0
                    } else {
6828
0
                        NEXT;
6829
0
                    }
6830
0
                }
6831
6832
0
#ifdef LIBXML_VALID_ENABLED
6833
0
                if ((ctxt->validate) && (ctxt->inputNr > declInputNr)) {
6834
0
        xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6835
0
                                     "All markup of the conditional section is"
6836
0
                                     " not in the same entity\n",
6837
0
                                     NULL, NULL);
6838
0
                }
6839
0
#endif
6840
0
            } else {
6841
0
                xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6842
0
                return;
6843
0
            }
6844
0
        } else if ((depth > 0) &&
6845
0
                   (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6846
0
            if (isFreshPE) {
6847
0
                xmlFatalErrMsg(ctxt, XML_ERR_CONDSEC_INVALID,
6848
0
                               "Parameter entity must match "
6849
0
                               "extSubsetDecl\n");
6850
0
                return;
6851
0
            }
6852
6853
0
            depth--;
6854
0
#ifdef LIBXML_VALID_ENABLED
6855
0
            if ((ctxt->validate) && (ctxt->inputNr > declInputNr)) {
6856
0
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6857
0
                                 "All markup of the conditional section is not"
6858
0
                                 " in the same entity\n",
6859
0
                                 NULL, NULL);
6860
0
            }
6861
0
#endif
6862
0
            SKIP(3);
6863
0
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
6864
0
            isFreshPE = 0;
6865
0
            xmlParseMarkupDecl(ctxt);
6866
0
        } else if (RAW == '%') {
6867
0
            xmlParsePERefInternal(ctxt, 1);
6868
0
            if (ctxt->inputNr > declInputNr) {
6869
0
                isFreshPE = 1;
6870
0
                declInputNr = ctxt->inputNr;
6871
0
            }
6872
0
        } else {
6873
0
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6874
0
            return;
6875
0
        }
6876
6877
0
        if (depth == 0)
6878
0
            break;
6879
6880
0
        SKIP_BLANKS;
6881
0
        SHRINK;
6882
0
        GROW;
6883
0
    }
6884
0
}
6885
6886
/**
6887
 * Parse markup declarations. Always consumes '<!' or '<?'.
6888
 *
6889
 * @deprecated Internal function, don't use.
6890
 *
6891
 *     [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6892
 *                         NotationDecl | PI | Comment
6893
 *
6894
 * [ VC: Proper Declaration/PE Nesting ]
6895
 * Parameter-entity replacement text must be properly nested with
6896
 * markup declarations. That is to say, if either the first character
6897
 * or the last character of a markup declaration (markupdecl above) is
6898
 * contained in the replacement text for a parameter-entity reference,
6899
 * both must be contained in the same replacement text.
6900
 *
6901
 * [ WFC: PEs in Internal Subset ]
6902
 * In the internal DTD subset, parameter-entity references can occur
6903
 * only where markup declarations can occur, not within markup declarations.
6904
 * (This does not apply to references that occur in external parameter
6905
 * entities or to the external subset.)
6906
 *
6907
 * @param ctxt  an XML parser context
6908
 */
6909
void
6910
117k
xmlParseMarkupDecl(xmlParserCtxt *ctxt) {
6911
117k
    GROW;
6912
117k
    if (CUR == '<') {
6913
117k
        if (NXT(1) == '!') {
6914
116k
      switch (NXT(2)) {
6915
54.5k
          case 'E':
6916
54.5k
        if (NXT(3) == 'L')
6917
23.3k
      xmlParseElementDecl(ctxt);
6918
31.1k
        else if (NXT(3) == 'N')
6919
30.9k
      xmlParseEntityDecl(ctxt);
6920
216
                    else
6921
216
                        SKIP(2);
6922
54.5k
        break;
6923
51.9k
          case 'A':
6924
51.9k
        xmlParseAttributeListDecl(ctxt);
6925
51.9k
        break;
6926
7.61k
          case 'N':
6927
7.61k
        xmlParseNotationDecl(ctxt);
6928
7.61k
        break;
6929
497
          case '-':
6930
497
        xmlParseComment(ctxt);
6931
497
        break;
6932
1.55k
    default:
6933
1.55k
                    xmlFatalErr(ctxt,
6934
1.55k
                                ctxt->inSubset == 2 ?
6935
0
                                    XML_ERR_EXT_SUBSET_NOT_FINISHED :
6936
1.55k
                                    XML_ERR_INT_SUBSET_NOT_FINISHED,
6937
1.55k
                                NULL);
6938
1.55k
                    SKIP(2);
6939
1.55k
        break;
6940
116k
      }
6941
116k
  } else if (NXT(1) == '?') {
6942
1.67k
      xmlParsePI(ctxt);
6943
1.67k
  }
6944
117k
    }
6945
117k
}
6946
6947
/**
6948
 * Parse an XML declaration header for external entities
6949
 *
6950
 * @deprecated Internal function, don't use.
6951
 *
6952
 *     [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6953
 * @param ctxt  an XML parser context
6954
 */
6955
6956
void
6957
0
xmlParseTextDecl(xmlParserCtxt *ctxt) {
6958
0
    xmlChar *version;
6959
6960
    /*
6961
     * We know that '<?xml' is here.
6962
     */
6963
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6964
0
  SKIP(5);
6965
0
    } else {
6966
0
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6967
0
  return;
6968
0
    }
6969
6970
0
    if (SKIP_BLANKS == 0) {
6971
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6972
0
           "Space needed after '<?xml'\n");
6973
0
    }
6974
6975
    /*
6976
     * We may have the VersionInfo here.
6977
     */
6978
0
    version = xmlParseVersionInfo(ctxt);
6979
0
    if (version == NULL) {
6980
0
  version = xmlCharStrdup(XML_DEFAULT_VERSION);
6981
0
        if (version == NULL) {
6982
0
            xmlErrMemory(ctxt);
6983
0
            return;
6984
0
        }
6985
0
    } else {
6986
0
  if (SKIP_BLANKS == 0) {
6987
0
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6988
0
               "Space needed here\n");
6989
0
  }
6990
0
    }
6991
0
    ctxt->input->version = version;
6992
6993
    /*
6994
     * We must have the encoding declaration
6995
     */
6996
0
    xmlParseEncodingDecl(ctxt);
6997
6998
0
    SKIP_BLANKS;
6999
0
    if ((RAW == '?') && (NXT(1) == '>')) {
7000
0
        SKIP(2);
7001
0
    } else if (RAW == '>') {
7002
        /* Deprecated old WD ... */
7003
0
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7004
0
  NEXT;
7005
0
    } else {
7006
0
        int c;
7007
7008
0
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7009
0
        while ((PARSER_STOPPED(ctxt) == 0) && ((c = CUR) != 0)) {
7010
0
            NEXT;
7011
0
            if (c == '>')
7012
0
                break;
7013
0
        }
7014
0
    }
7015
0
}
7016
7017
/**
7018
 * Parse Markup declarations from an external subset
7019
 *
7020
 * @deprecated Internal function, don't use.
7021
 *
7022
 *     [30] extSubset ::= textDecl? extSubsetDecl
7023
 *
7024
 *     [31] extSubsetDecl ::= (markupdecl | conditionalSect |
7025
 *                             PEReference | S) *
7026
 * @param ctxt  an XML parser context
7027
 * @param publicId  the public identifier
7028
 * @param systemId  the system identifier (URL)
7029
 */
7030
void
7031
xmlParseExternalSubset(xmlParserCtxt *ctxt, const xmlChar *publicId,
7032
3.05k
                       const xmlChar *systemId) {
7033
3.05k
    int oldInputNr;
7034
7035
3.05k
    xmlCtxtInitializeLate(ctxt);
7036
7037
3.05k
    xmlDetectEncoding(ctxt);
7038
7039
3.05k
    if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7040
0
  xmlParseTextDecl(ctxt);
7041
0
    }
7042
3.05k
    if (ctxt->myDoc == NULL) {
7043
0
        ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7044
0
  if (ctxt->myDoc == NULL) {
7045
0
      xmlErrMemory(ctxt);
7046
0
      return;
7047
0
  }
7048
0
  ctxt->myDoc->properties = XML_DOC_INTERNAL;
7049
0
    }
7050
3.05k
    if ((ctxt->myDoc->intSubset == NULL) &&
7051
3.05k
        (xmlCreateIntSubset(ctxt->myDoc, NULL, publicId, systemId) == NULL)) {
7052
5
        xmlErrMemory(ctxt);
7053
5
    }
7054
7055
3.05k
    ctxt->inSubset = 2;
7056
3.05k
    oldInputNr = ctxt->inputNr;
7057
7058
3.05k
    SKIP_BLANKS;
7059
3.05k
    while (!PARSER_STOPPED(ctxt)) {
7060
1.40k
        if (ctxt->input->cur >= ctxt->input->end) {
7061
0
            if (ctxt->inputNr <= oldInputNr) {
7062
0
                xmlParserCheckEOF(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED);
7063
0
                break;
7064
0
            }
7065
7066
0
            xmlPopPE(ctxt);
7067
1.40k
        } else if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7068
0
            xmlParseConditionalSections(ctxt);
7069
1.40k
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7070
0
            xmlParseMarkupDecl(ctxt);
7071
1.40k
        } else if (RAW == '%') {
7072
0
            xmlParsePERefInternal(ctxt, 1);
7073
1.40k
        } else {
7074
1.40k
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7075
7076
1.40k
            while (ctxt->inputNr > oldInputNr)
7077
0
                xmlPopPE(ctxt);
7078
1.40k
            break;
7079
1.40k
        }
7080
0
        SKIP_BLANKS;
7081
0
        SHRINK;
7082
0
        GROW;
7083
0
    }
7084
3.05k
}
7085
7086
/**
7087
 * Parse and handle entity references in content, depending on the SAX
7088
 * interface, this may end-up in a call to character() if this is a
7089
 * CharRef, a predefined entity, if there is no reference() callback.
7090
 * or if the parser was asked to switch to that mode.
7091
 *
7092
 * @deprecated Internal function, don't use.
7093
 *
7094
 * Always consumes '&'.
7095
 *
7096
 *     [67] Reference ::= EntityRef | CharRef
7097
 * @param ctxt  an XML parser context
7098
 */
7099
void
7100
130k
xmlParseReference(xmlParserCtxt *ctxt) {
7101
130k
    xmlEntityPtr ent = NULL;
7102
130k
    const xmlChar *name;
7103
130k
    xmlChar *val;
7104
7105
130k
    if (RAW != '&')
7106
0
        return;
7107
7108
    /*
7109
     * Simple case of a CharRef
7110
     */
7111
130k
    if (NXT(1) == '#') {
7112
37.2k
  int i = 0;
7113
37.2k
  xmlChar out[16];
7114
37.2k
  int value = xmlParseCharRef(ctxt);
7115
7116
37.2k
  if (value == 0)
7117
2.56k
      return;
7118
7119
        /*
7120
         * Just encode the value in UTF-8
7121
         */
7122
34.6k
        COPY_BUF(out, i, value);
7123
34.6k
        out[i] = 0;
7124
34.6k
        if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7125
34.6k
            (!ctxt->disableSAX))
7126
24.7k
            ctxt->sax->characters(ctxt->userData, out, i);
7127
34.6k
  return;
7128
37.2k
    }
7129
7130
    /*
7131
     * We are seeing an entity reference
7132
     */
7133
93.4k
    name = xmlParseEntityRefInternal(ctxt);
7134
93.4k
    if (name == NULL)
7135
6.27k
        return;
7136
87.1k
    ent = xmlLookupGeneralEntity(ctxt, name, /* isAttr */ 0);
7137
87.1k
    if (ent == NULL) {
7138
        /*
7139
         * Create a reference for undeclared entities.
7140
         */
7141
3.30k
        if ((ctxt->replaceEntities == 0) &&
7142
3.30k
            (ctxt->sax != NULL) &&
7143
3.30k
            (ctxt->disableSAX == 0) &&
7144
3.30k
            (ctxt->sax->reference != NULL)) {
7145
751
            ctxt->sax->reference(ctxt->userData, name);
7146
751
        }
7147
3.30k
        return;
7148
3.30k
    }
7149
83.8k
    if (!ctxt->wellFormed)
7150
48.8k
  return;
7151
7152
    /* special case of predefined entities */
7153
35.0k
    if ((ent->name == NULL) ||
7154
35.0k
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7155
28.8k
  val = ent->content;
7156
28.8k
  if (val == NULL) return;
7157
  /*
7158
   * inline the entity.
7159
   */
7160
28.8k
  if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7161
28.8k
      (!ctxt->disableSAX))
7162
28.8k
      ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7163
28.8k
  return;
7164
28.8k
    }
7165
7166
    /*
7167
     * Some users try to parse entities on their own and used to set
7168
     * the renamed "checked" member. Fix the flags to cover this
7169
     * case.
7170
     */
7171
6.19k
    if (((ent->flags & XML_ENT_PARSED) == 0) && (ent->children != NULL))
7172
0
        ent->flags |= XML_ENT_PARSED;
7173
7174
    /*
7175
     * The first reference to the entity trigger a parsing phase
7176
     * where the ent->children is filled with the result from
7177
     * the parsing.
7178
     * Note: external parsed entities will not be loaded, it is not
7179
     * required for a non-validating parser, unless the parsing option
7180
     * of validating, or substituting entities were given. Doing so is
7181
     * far more secure as the parser will only process data coming from
7182
     * the document entity by default.
7183
     *
7184
     * FIXME: This doesn't work correctly since entities can be
7185
     * expanded with different namespace declarations in scope.
7186
     * For example:
7187
     *
7188
     * <!DOCTYPE doc [
7189
     *   <!ENTITY ent "<ns:elem/>">
7190
     * ]>
7191
     * <doc>
7192
     *   <decl1 xmlns:ns="urn:ns1">
7193
     *     &ent;
7194
     *   </decl1>
7195
     *   <decl2 xmlns:ns="urn:ns2">
7196
     *     &ent;
7197
     *   </decl2>
7198
     * </doc>
7199
     *
7200
     * Proposed fix:
7201
     *
7202
     * - Ignore current namespace declarations when parsing the
7203
     *   entity. If a prefix can't be resolved, don't report an error
7204
     *   but mark it as unresolved.
7205
     * - Try to resolve these prefixes when expanding the entity.
7206
     *   This will require a specialized version of xmlStaticCopyNode
7207
     *   which can also make use of the namespace hash table to avoid
7208
     *   quadratic behavior.
7209
     *
7210
     * Alternatively, we could simply reparse the entity on each
7211
     * expansion like we already do with custom SAX callbacks.
7212
     * External entity content should be cached in this case.
7213
     */
7214
6.19k
    if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7215
6.19k
        (((ctxt->options & XML_PARSE_NO_XXE) == 0) &&
7216
906
         ((ctxt->replaceEntities) ||
7217
5.88k
          (ctxt->validate)))) {
7218
5.88k
        if ((ent->flags & XML_ENT_PARSED) == 0) {
7219
3.71k
            xmlCtxtParseEntity(ctxt, ent);
7220
3.71k
        } else if (ent->children == NULL) {
7221
            /*
7222
             * Probably running in SAX mode and the callbacks don't
7223
             * build the entity content. Parse the entity again.
7224
             *
7225
             * This will also be triggered in normal tree builder mode
7226
             * if an entity happens to be empty, causing unnecessary
7227
             * reloads. It's hard to come up with a reliable check in
7228
             * which mode we're running.
7229
             */
7230
1.34k
            xmlCtxtParseEntity(ctxt, ent);
7231
1.34k
        }
7232
5.88k
    }
7233
7234
    /*
7235
     * We also check for amplification if entities aren't substituted.
7236
     * They might be expanded later.
7237
     */
7238
6.19k
    if (xmlParserEntityCheck(ctxt, ent->expandedSize))
7239
0
        return;
7240
7241
6.19k
    if ((ctxt->sax == NULL) || (ctxt->disableSAX))
7242
2.75k
        return;
7243
7244
3.44k
    if (ctxt->replaceEntities == 0) {
7245
  /*
7246
   * Create a reference
7247
   */
7248
1.66k
        if (ctxt->sax->reference != NULL)
7249
1.66k
      ctxt->sax->reference(ctxt->userData, ent->name);
7250
1.78k
    } else if ((ent->children != NULL) && (ctxt->node != NULL)) {
7251
1.28k
        xmlNodePtr copy, cur;
7252
7253
        /*
7254
         * Seems we are generating the DOM content, copy the tree
7255
   */
7256
1.28k
        cur = ent->children;
7257
7258
        /*
7259
         * Handle first text node with SAX to coalesce text efficiently
7260
         */
7261
1.28k
        if ((cur->type == XML_TEXT_NODE) ||
7262
1.28k
            (cur->type == XML_CDATA_SECTION_NODE)) {
7263
666
            int len = xmlStrlen(cur->content);
7264
7265
666
            if ((cur->type == XML_TEXT_NODE) ||
7266
666
                (ctxt->options & XML_PARSE_NOCDATA)) {
7267
380
                if (ctxt->sax->characters != NULL)
7268
380
                    ctxt->sax->characters(ctxt, cur->content, len);
7269
380
            } else {
7270
286
                if (ctxt->sax->cdataBlock != NULL)
7271
286
                    ctxt->sax->cdataBlock(ctxt, cur->content, len);
7272
286
            }
7273
7274
666
            cur = cur->next;
7275
666
        }
7276
7277
2.38k
        while (cur != NULL) {
7278
1.78k
            xmlNodePtr last;
7279
7280
            /*
7281
             * Handle last text node with SAX to coalesce text efficiently
7282
             */
7283
1.78k
            if ((cur->next == NULL) &&
7284
1.78k
                ((cur->type == XML_TEXT_NODE) ||
7285
906
                 (cur->type == XML_CDATA_SECTION_NODE))) {
7286
691
                int len = xmlStrlen(cur->content);
7287
7288
691
                if ((cur->type == XML_TEXT_NODE) ||
7289
691
                    (ctxt->options & XML_PARSE_NOCDATA)) {
7290
405
                    if (ctxt->sax->characters != NULL)
7291
405
                        ctxt->sax->characters(ctxt, cur->content, len);
7292
405
                } else {
7293
286
                    if (ctxt->sax->cdataBlock != NULL)
7294
286
                        ctxt->sax->cdataBlock(ctxt, cur->content, len);
7295
286
                }
7296
7297
691
                break;
7298
691
            }
7299
7300
            /*
7301
             * Reset coalesce buffer stats only for non-text nodes.
7302
             */
7303
1.09k
            ctxt->nodemem = 0;
7304
1.09k
            ctxt->nodelen = 0;
7305
7306
1.09k
            copy = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7307
7308
1.09k
            if (copy == NULL) {
7309
2
                xmlErrMemory(ctxt);
7310
2
                break;
7311
2
            }
7312
7313
1.09k
            if (ctxt->parseMode == XML_PARSE_READER) {
7314
                /* Needed for reader */
7315
0
                copy->extra = cur->extra;
7316
                /* Maybe needed for reader */
7317
0
                copy->_private = cur->_private;
7318
0
            }
7319
7320
1.09k
            copy->parent = ctxt->node;
7321
1.09k
            last = ctxt->node->last;
7322
1.09k
            if (last == NULL) {
7323
594
                ctxt->node->children = copy;
7324
594
            } else {
7325
499
                last->next = copy;
7326
499
                copy->prev = last;
7327
499
            }
7328
1.09k
            ctxt->node->last = copy;
7329
7330
1.09k
            cur = cur->next;
7331
1.09k
        }
7332
1.28k
    }
7333
3.44k
}
7334
7335
static void
7336
4.24k
xmlHandleUndeclaredEntity(xmlParserCtxtPtr ctxt, const xmlChar *name) {
7337
    /*
7338
     * [ WFC: Entity Declared ]
7339
     * In a document without any DTD, a document with only an
7340
     * internal DTD subset which contains no parameter entity
7341
     * references, or a document with "standalone='yes'", the
7342
     * Name given in the entity reference must match that in an
7343
     * entity declaration, except that well-formed documents
7344
     * need not declare any of the following entities: amp, lt,
7345
     * gt, apos, quot.
7346
     * The declaration of a parameter entity must precede any
7347
     * reference to it.
7348
     * Similarly, the declaration of a general entity must
7349
     * precede any reference to it which appears in a default
7350
     * value in an attribute-list declaration. Note that if
7351
     * entities are declared in the external subset or in
7352
     * external parameter entities, a non-validating processor
7353
     * is not obligated to read and process their declarations;
7354
     * for such documents, the rule that an entity must be
7355
     * declared is a well-formedness constraint only if
7356
     * standalone='yes'.
7357
     */
7358
4.24k
    if ((ctxt->standalone == 1) ||
7359
4.24k
        ((ctxt->hasExternalSubset == 0) &&
7360
3.10k
         (ctxt->hasPErefs == 0))) {
7361
2.57k
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7362
2.57k
                          "Entity '%s' not defined\n", name);
7363
2.57k
#ifdef LIBXML_VALID_ENABLED
7364
2.57k
    } else if (ctxt->validate) {
7365
        /*
7366
         * [ VC: Entity Declared ]
7367
         * In a document with an external subset or external
7368
         * parameter entities with "standalone='no'", ...
7369
         * ... The declaration of a parameter entity must
7370
         * precede any reference to it...
7371
         */
7372
240
        xmlValidityError(ctxt, XML_ERR_UNDECLARED_ENTITY,
7373
240
                         "Entity '%s' not defined\n", name, NULL);
7374
240
#endif
7375
1.43k
    } else if ((ctxt->loadsubset & ~XML_SKIP_IDS) ||
7376
1.43k
               ((ctxt->replaceEntities) &&
7377
1.07k
                ((ctxt->options & XML_PARSE_NO_XXE) == 0))) {
7378
        /*
7379
         * Also raise a non-fatal error
7380
         *
7381
         * - if the external subset is loaded and all entity declarations
7382
         *   should be available, or
7383
         * - entity substition was requested without restricting
7384
         *   external entity access.
7385
         */
7386
559
        xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7387
559
                     "Entity '%s' not defined\n", name);
7388
872
    } else {
7389
872
        xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7390
872
                      "Entity '%s' not defined\n", name, NULL);
7391
872
    }
7392
7393
4.24k
    ctxt->valid = 0;
7394
4.24k
}
7395
7396
static xmlEntityPtr
7397
491k
xmlLookupGeneralEntity(xmlParserCtxtPtr ctxt, const xmlChar *name, int inAttr) {
7398
491k
    xmlEntityPtr ent = NULL;
7399
7400
    /*
7401
     * Predefined entities override any extra definition
7402
     */
7403
491k
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7404
350k
        ent = xmlGetPredefinedEntity(name);
7405
350k
        if (ent != NULL)
7406
323k
            return(ent);
7407
350k
    }
7408
7409
    /*
7410
     * Ask first SAX for entity resolution, otherwise try the
7411
     * entities which may have stored in the parser context.
7412
     */
7413
168k
    if (ctxt->sax != NULL) {
7414
168k
  if (ctxt->sax->getEntity != NULL)
7415
168k
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7416
168k
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7417
168k
      (ctxt->options & XML_PARSE_OLDSAX))
7418
229
      ent = xmlGetPredefinedEntity(name);
7419
168k
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7420
168k
      (ctxt->userData==ctxt)) {
7421
1.01k
      ent = xmlSAX2GetEntity(ctxt, name);
7422
1.01k
  }
7423
168k
    }
7424
7425
168k
    if (ent == NULL) {
7426
3.74k
        xmlHandleUndeclaredEntity(ctxt, name);
7427
3.74k
    }
7428
7429
    /*
7430
     * [ WFC: Parsed Entity ]
7431
     * An entity reference must not contain the name of an
7432
     * unparsed entity
7433
     */
7434
164k
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7435
207
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7436
207
     "Entity reference to unparsed entity %s\n", name);
7437
207
        ent = NULL;
7438
207
    }
7439
7440
    /*
7441
     * [ WFC: No External Entity References ]
7442
     * Attribute values cannot contain direct or indirect
7443
     * entity references to external entities.
7444
     */
7445
164k
    else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7446
1.93k
        if (inAttr) {
7447
196
            xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7448
196
                 "Attribute references external entity '%s'\n", name);
7449
196
            ent = NULL;
7450
196
        }
7451
1.93k
    }
7452
7453
168k
    return(ent);
7454
491k
}
7455
7456
/**
7457
 * Parse an entity reference. Always consumes '&'.
7458
 *
7459
 *     [68] EntityRef ::= '&' Name ';'
7460
 *
7461
 * @param ctxt  an XML parser context
7462
 * @returns the name, or NULL in case of error.
7463
 */
7464
static const xmlChar *
7465
384k
xmlParseEntityRefInternal(xmlParserCtxtPtr ctxt) {
7466
384k
    const xmlChar *name;
7467
7468
384k
    GROW;
7469
7470
384k
    if (RAW != '&')
7471
0
        return(NULL);
7472
384k
    NEXT;
7473
384k
    name = xmlParseName(ctxt);
7474
384k
    if (name == NULL) {
7475
10.2k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7476
10.2k
           "xmlParseEntityRef: no name\n");
7477
10.2k
        return(NULL);
7478
10.2k
    }
7479
374k
    if (RAW != ';') {
7480
1.30k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7481
1.30k
  return(NULL);
7482
1.30k
    }
7483
373k
    NEXT;
7484
7485
373k
    return(name);
7486
374k
}
7487
7488
/**
7489
 * @deprecated Internal function, don't use.
7490
 *
7491
 * @param ctxt  an XML parser context
7492
 * @returns the xmlEntity if found, or NULL otherwise.
7493
 */
7494
xmlEntity *
7495
0
xmlParseEntityRef(xmlParserCtxt *ctxt) {
7496
0
    const xmlChar *name;
7497
7498
0
    if (ctxt == NULL)
7499
0
        return(NULL);
7500
7501
0
    name = xmlParseEntityRefInternal(ctxt);
7502
0
    if (name == NULL)
7503
0
        return(NULL);
7504
7505
0
    return(xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 0));
7506
0
}
7507
7508
/**
7509
 * Parse ENTITY references declarations, but this version parses it from
7510
 * a string value.
7511
 *
7512
 *     [68] EntityRef ::= '&' Name ';'
7513
 *
7514
 * [ WFC: Entity Declared ]
7515
 * In a document without any DTD, a document with only an internal DTD
7516
 * subset which contains no parameter entity references, or a document
7517
 * with "standalone='yes'", the Name given in the entity reference
7518
 * must match that in an entity declaration, except that well-formed
7519
 * documents need not declare any of the following entities: amp, lt,
7520
 * gt, apos, quot.  The declaration of a parameter entity must precede
7521
 * any reference to it.  Similarly, the declaration of a general entity
7522
 * must precede any reference to it which appears in a default value in an
7523
 * attribute-list declaration. Note that if entities are declared in the
7524
 * external subset or in external parameter entities, a non-validating
7525
 * processor is not obligated to read and process their declarations;
7526
 * for such documents, the rule that an entity must be declared is a
7527
 * well-formedness constraint only if standalone='yes'.
7528
 *
7529
 * [ WFC: Parsed Entity ]
7530
 * An entity reference must not contain the name of an unparsed entity
7531
 *
7532
 * @param ctxt  an XML parser context
7533
 * @param str  a pointer to an index in the string
7534
 * @returns the xmlEntity if found, or NULL otherwise. The str pointer
7535
 * is updated to the current location in the string.
7536
 */
7537
static xmlChar *
7538
118k
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7539
118k
    xmlChar *name;
7540
118k
    const xmlChar *ptr;
7541
118k
    xmlChar cur;
7542
7543
118k
    if ((str == NULL) || (*str == NULL))
7544
0
        return(NULL);
7545
118k
    ptr = *str;
7546
118k
    cur = *ptr;
7547
118k
    if (cur != '&')
7548
0
  return(NULL);
7549
7550
118k
    ptr++;
7551
118k
    name = xmlParseStringName(ctxt, &ptr);
7552
118k
    if (name == NULL) {
7553
10
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7554
10
           "xmlParseStringEntityRef: no name\n");
7555
10
  *str = ptr;
7556
10
  return(NULL);
7557
10
    }
7558
118k
    if (*ptr != ';') {
7559
0
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7560
0
        xmlFree(name);
7561
0
  *str = ptr;
7562
0
  return(NULL);
7563
0
    }
7564
118k
    ptr++;
7565
7566
118k
    *str = ptr;
7567
118k
    return(name);
7568
118k
}
7569
7570
/**
7571
 * Parse a parameter entity reference. Always consumes '%'.
7572
 *
7573
 * The entity content is handled directly by pushing it's content as
7574
 * a new input stream.
7575
 *
7576
 *     [69] PEReference ::= '%' Name ';'
7577
 *
7578
 * [ WFC: No Recursion ]
7579
 * A parsed entity must not contain a recursive
7580
 * reference to itself, either directly or indirectly.
7581
 *
7582
 * [ WFC: Entity Declared ]
7583
 * In a document without any DTD, a document with only an internal DTD
7584
 * subset which contains no parameter entity references, or a document
7585
 * with "standalone='yes'", ...  ... The declaration of a parameter
7586
 * entity must precede any reference to it...
7587
 *
7588
 * [ VC: Entity Declared ]
7589
 * In a document with an external subset or external parameter entities
7590
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7591
 * must precede any reference to it...
7592
 *
7593
 * [ WFC: In DTD ]
7594
 * Parameter-entity references may only appear in the DTD.
7595
 * NOTE: misleading but this is handled.
7596
 *
7597
 * @param ctxt  an XML parser context
7598
 * @param markupDecl  whether the PERef starts a markup declaration
7599
 */
7600
static void
7601
707
xmlParsePERefInternal(xmlParserCtxt *ctxt, int markupDecl) {
7602
707
    const xmlChar *name;
7603
707
    xmlEntityPtr entity = NULL;
7604
707
    xmlParserInputPtr input;
7605
7606
707
    if (RAW != '%')
7607
0
        return;
7608
707
    NEXT;
7609
707
    name = xmlParseName(ctxt);
7610
707
    if (name == NULL) {
7611
262
  xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
7612
262
  return;
7613
262
    }
7614
445
    if (RAW != ';') {
7615
210
  xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
7616
210
        return;
7617
210
    }
7618
7619
235
    NEXT;
7620
7621
    /* Must be set before xmlHandleUndeclaredEntity */
7622
235
    ctxt->hasPErefs = 1;
7623
7624
    /*
7625
     * Request the entity from SAX
7626
     */
7627
235
    if ((ctxt->sax != NULL) &&
7628
235
  (ctxt->sax->getParameterEntity != NULL))
7629
235
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7630
7631
235
    if (entity == NULL) {
7632
235
        xmlHandleUndeclaredEntity(ctxt, name);
7633
235
    } else {
7634
  /*
7635
   * Internal checking in case the entity quest barfed
7636
   */
7637
0
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7638
0
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7639
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7640
0
      "Internal: %%%s; is not a parameter entity\n",
7641
0
        name, NULL);
7642
0
  } else {
7643
0
      if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7644
0
                ((ctxt->options & XML_PARSE_NO_XXE) ||
7645
0
     (((ctxt->loadsubset & ~XML_SKIP_IDS) == 0) &&
7646
0
      (ctxt->replaceEntities == 0) &&
7647
0
      (ctxt->validate == 0))))
7648
0
    return;
7649
7650
0
            if (entity->flags & XML_ENT_EXPANDING) {
7651
0
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7652
0
                return;
7653
0
            }
7654
7655
0
      input = xmlNewEntityInputStream(ctxt, entity);
7656
0
      if (xmlCtxtPushInput(ctxt, input) < 0) {
7657
0
                xmlFreeInputStream(input);
7658
0
    return;
7659
0
            }
7660
7661
0
            entity->flags |= XML_ENT_EXPANDING;
7662
7663
0
            if (markupDecl)
7664
0
                input->flags |= XML_INPUT_MARKUP_DECL;
7665
7666
0
            GROW;
7667
7668
0
      if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
7669
0
                xmlDetectEncoding(ctxt);
7670
7671
0
                if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7672
0
                    (IS_BLANK_CH(NXT(5)))) {
7673
0
                    xmlParseTextDecl(ctxt);
7674
0
                }
7675
0
            }
7676
0
  }
7677
0
    }
7678
235
}
7679
7680
/**
7681
 * Parse a parameter entity reference.
7682
 *
7683
 * @deprecated Internal function, don't use.
7684
 *
7685
 * @param ctxt  an XML parser context
7686
 */
7687
void
7688
0
xmlParsePEReference(xmlParserCtxt *ctxt) {
7689
0
    xmlParsePERefInternal(ctxt, 0);
7690
0
}
7691
7692
/**
7693
 * Load the content of an entity.
7694
 *
7695
 * @param ctxt  an XML parser context
7696
 * @param entity  an unloaded system entity
7697
 * @returns 0 in case of success and -1 in case of failure
7698
 */
7699
static int
7700
0
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7701
0
    xmlParserInputPtr oldinput, input = NULL;
7702
0
    xmlParserInputPtr *oldinputTab;
7703
0
    xmlChar *oldencoding;
7704
0
    xmlChar *content = NULL;
7705
0
    xmlResourceType rtype;
7706
0
    size_t length, i;
7707
0
    int oldinputNr, oldinputMax;
7708
0
    int ret = -1;
7709
0
    int res;
7710
7711
0
    if ((ctxt == NULL) || (entity == NULL) ||
7712
0
        ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7713
0
   (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7714
0
  (entity->content != NULL)) {
7715
0
  xmlFatalErr(ctxt, XML_ERR_ARGUMENT,
7716
0
              "xmlLoadEntityContent parameter error");
7717
0
        return(-1);
7718
0
    }
7719
7720
0
    if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)
7721
0
        rtype = XML_RESOURCE_PARAMETER_ENTITY;
7722
0
    else
7723
0
        rtype = XML_RESOURCE_GENERAL_ENTITY;
7724
7725
0
    input = xmlLoadResource(ctxt, (char *) entity->URI,
7726
0
                            (char *) entity->ExternalID, rtype);
7727
0
    if (input == NULL)
7728
0
        return(-1);
7729
7730
0
    oldinput = ctxt->input;
7731
0
    oldinputNr = ctxt->inputNr;
7732
0
    oldinputMax = ctxt->inputMax;
7733
0
    oldinputTab = ctxt->inputTab;
7734
0
    oldencoding = ctxt->encoding;
7735
7736
0
    ctxt->input = NULL;
7737
0
    ctxt->inputNr = 0;
7738
0
    ctxt->inputMax = 1;
7739
0
    ctxt->encoding = NULL;
7740
0
    ctxt->inputTab = xmlMalloc(sizeof(xmlParserInputPtr));
7741
0
    if (ctxt->inputTab == NULL) {
7742
0
        xmlErrMemory(ctxt);
7743
0
        xmlFreeInputStream(input);
7744
0
        goto error;
7745
0
    }
7746
7747
0
    xmlBufResetInput(input->buf->buffer, input);
7748
7749
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
7750
0
        xmlFreeInputStream(input);
7751
0
        goto error;
7752
0
    }
7753
7754
0
    xmlDetectEncoding(ctxt);
7755
7756
    /*
7757
     * Parse a possible text declaration first
7758
     */
7759
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7760
0
  xmlParseTextDecl(ctxt);
7761
        /*
7762
         * An XML-1.0 document can't reference an entity not XML-1.0
7763
         */
7764
0
        if ((xmlStrEqual(ctxt->version, BAD_CAST "1.0")) &&
7765
0
            (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
7766
0
            xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
7767
0
                           "Version mismatch between document and entity\n");
7768
0
        }
7769
0
    }
7770
7771
0
    length = input->cur - input->base;
7772
0
    xmlBufShrink(input->buf->buffer, length);
7773
0
    xmlSaturatedAdd(&ctxt->sizeentities, length);
7774
7775
0
    while ((res = xmlParserInputBufferGrow(input->buf, 4096)) > 0)
7776
0
        ;
7777
7778
0
    xmlBufResetInput(input->buf->buffer, input);
7779
7780
0
    if (res < 0) {
7781
0
        xmlCtxtErrIO(ctxt, input->buf->error, NULL);
7782
0
        goto error;
7783
0
    }
7784
7785
0
    length = xmlBufUse(input->buf->buffer);
7786
0
    if (length > INT_MAX) {
7787
0
        xmlErrMemory(ctxt);
7788
0
        goto error;
7789
0
    }
7790
7791
0
    content = xmlStrndup(xmlBufContent(input->buf->buffer), length);
7792
0
    if (content == NULL) {
7793
0
        xmlErrMemory(ctxt);
7794
0
        goto error;
7795
0
    }
7796
7797
0
    for (i = 0; i < length; ) {
7798
0
        int clen = length - i;
7799
0
        int c = xmlGetUTF8Char(content + i, &clen);
7800
7801
0
        if ((c < 0) || (!IS_CHAR(c))) {
7802
0
            xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
7803
0
                              "xmlLoadEntityContent: invalid char value %d\n",
7804
0
                              content[i]);
7805
0
            goto error;
7806
0
        }
7807
0
        i += clen;
7808
0
    }
7809
7810
0
    xmlSaturatedAdd(&ctxt->sizeentities, length);
7811
0
    entity->content = content;
7812
0
    entity->length = length;
7813
0
    content = NULL;
7814
0
    ret = 0;
7815
7816
0
error:
7817
0
    while (ctxt->inputNr > 0)
7818
0
        xmlFreeInputStream(xmlCtxtPopInput(ctxt));
7819
0
    xmlFree(ctxt->inputTab);
7820
0
    xmlFree(ctxt->encoding);
7821
7822
0
    ctxt->input = oldinput;
7823
0
    ctxt->inputNr = oldinputNr;
7824
0
    ctxt->inputMax = oldinputMax;
7825
0
    ctxt->inputTab = oldinputTab;
7826
0
    ctxt->encoding = oldencoding;
7827
7828
0
    xmlFree(content);
7829
7830
0
    return(ret);
7831
0
}
7832
7833
/**
7834
 * Parse PEReference declarations
7835
 *
7836
 *     [69] PEReference ::= '%' Name ';'
7837
 *
7838
 * [ WFC: No Recursion ]
7839
 * A parsed entity must not contain a recursive
7840
 * reference to itself, either directly or indirectly.
7841
 *
7842
 * [ WFC: Entity Declared ]
7843
 * In a document without any DTD, a document with only an internal DTD
7844
 * subset which contains no parameter entity references, or a document
7845
 * with "standalone='yes'", ...  ... The declaration of a parameter
7846
 * entity must precede any reference to it...
7847
 *
7848
 * [ VC: Entity Declared ]
7849
 * In a document with an external subset or external parameter entities
7850
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7851
 * must precede any reference to it...
7852
 *
7853
 * [ WFC: In DTD ]
7854
 * Parameter-entity references may only appear in the DTD.
7855
 * NOTE: misleading but this is handled.
7856
 *
7857
 * @param ctxt  an XML parser context
7858
 * @param str  a pointer to an index in the string
7859
 * @returns the string of the entity content.
7860
 *         str is updated to the current value of the index
7861
 */
7862
static xmlEntityPtr
7863
1.85k
xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7864
1.85k
    const xmlChar *ptr;
7865
1.85k
    xmlChar cur;
7866
1.85k
    xmlChar *name;
7867
1.85k
    xmlEntityPtr entity = NULL;
7868
7869
1.85k
    if ((str == NULL) || (*str == NULL)) return(NULL);
7870
1.85k
    ptr = *str;
7871
1.85k
    cur = *ptr;
7872
1.85k
    if (cur != '%')
7873
0
        return(NULL);
7874
1.85k
    ptr++;
7875
1.85k
    name = xmlParseStringName(ctxt, &ptr);
7876
1.85k
    if (name == NULL) {
7877
1.23k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7878
1.23k
           "xmlParseStringPEReference: no name\n");
7879
1.23k
  *str = ptr;
7880
1.23k
  return(NULL);
7881
1.23k
    }
7882
618
    cur = *ptr;
7883
618
    if (cur != ';') {
7884
356
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7885
356
  xmlFree(name);
7886
356
  *str = ptr;
7887
356
  return(NULL);
7888
356
    }
7889
262
    ptr++;
7890
7891
    /* Must be set before xmlHandleUndeclaredEntity */
7892
262
    ctxt->hasPErefs = 1;
7893
7894
    /*
7895
     * Request the entity from SAX
7896
     */
7897
262
    if ((ctxt->sax != NULL) &&
7898
262
  (ctxt->sax->getParameterEntity != NULL))
7899
262
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7900
7901
262
    if (entity == NULL) {
7902
262
        xmlHandleUndeclaredEntity(ctxt, name);
7903
262
    } else {
7904
  /*
7905
   * Internal checking in case the entity quest barfed
7906
   */
7907
0
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7908
0
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7909
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7910
0
        "%%%s; is not a parameter entity\n",
7911
0
        name, NULL);
7912
0
  }
7913
0
    }
7914
7915
262
    xmlFree(name);
7916
262
    *str = ptr;
7917
262
    return(entity);
7918
618
}
7919
7920
/**
7921
 * Parse a DOCTYPE declaration
7922
 *
7923
 * @deprecated Internal function, don't use.
7924
 *
7925
 *     [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7926
 *                          ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7927
 *
7928
 * [ VC: Root Element Type ]
7929
 * The Name in the document type declaration must match the element
7930
 * type of the root element.
7931
 *
7932
 * @param ctxt  an XML parser context
7933
 */
7934
7935
void
7936
105k
xmlParseDocTypeDecl(xmlParserCtxt *ctxt) {
7937
105k
    const xmlChar *name = NULL;
7938
105k
    xmlChar *publicId = NULL;
7939
105k
    xmlChar *URI = NULL;
7940
7941
    /*
7942
     * We know that '<!DOCTYPE' has been detected.
7943
     */
7944
105k
    SKIP(9);
7945
7946
105k
    if (SKIP_BLANKS == 0) {
7947
283
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7948
283
                       "Space required after 'DOCTYPE'\n");
7949
283
    }
7950
7951
    /*
7952
     * Parse the DOCTYPE name.
7953
     */
7954
105k
    name = xmlParseName(ctxt);
7955
105k
    if (name == NULL) {
7956
778
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7957
778
           "xmlParseDocTypeDecl : no DOCTYPE name !\n");
7958
778
    }
7959
105k
    ctxt->intSubName = name;
7960
7961
105k
    SKIP_BLANKS;
7962
7963
    /*
7964
     * Check for public and system identifier (URI)
7965
     */
7966
105k
    URI = xmlParseExternalID(ctxt, &publicId, 1);
7967
7968
105k
    if ((URI != NULL) || (publicId != NULL)) {
7969
101k
        ctxt->hasExternalSubset = 1;
7970
101k
    }
7971
105k
    ctxt->extSubURI = URI;
7972
105k
    ctxt->extSubSystem = publicId;
7973
7974
105k
    SKIP_BLANKS;
7975
7976
    /*
7977
     * Create and update the internal subset.
7978
     */
7979
105k
    if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
7980
105k
  (!ctxt->disableSAX))
7981
88.4k
  ctxt->sax->internalSubset(ctxt->userData, name, publicId, URI);
7982
7983
105k
    if ((RAW != '[') && (RAW != '>')) {
7984
5.04k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
7985
5.04k
    }
7986
105k
}
7987
7988
/**
7989
 * Parse the internal subset declaration
7990
 *
7991
 *     [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7992
 * @param ctxt  an XML parser context
7993
 */
7994
7995
static void
7996
81.4k
xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
7997
    /*
7998
     * Is there any DTD definition ?
7999
     */
8000
81.4k
    if (RAW == '[') {
8001
81.4k
        int oldInputNr = ctxt->inputNr;
8002
8003
81.4k
        NEXT;
8004
  /*
8005
   * Parse the succession of Markup declarations and
8006
   * PEReferences.
8007
   * Subsequence (markupdecl | PEReference | S)*
8008
   */
8009
81.4k
  SKIP_BLANKS;
8010
200k
        while (1) {
8011
200k
            if (PARSER_STOPPED(ctxt)) {
8012
515
                return;
8013
199k
            } else if (ctxt->input->cur >= ctxt->input->end) {
8014
5.96k
                if (ctxt->inputNr <= oldInputNr) {
8015
5.96k
                xmlFatalErr(ctxt, XML_ERR_INT_SUBSET_NOT_FINISHED, NULL);
8016
5.96k
                    return;
8017
5.96k
                }
8018
0
                xmlPopPE(ctxt);
8019
193k
            } else if ((RAW == ']') && (ctxt->inputNr <= oldInputNr)) {
8020
36.6k
                NEXT;
8021
36.6k
                SKIP_BLANKS;
8022
36.6k
                break;
8023
156k
            } else if ((PARSER_EXTERNAL(ctxt)) &&
8024
156k
                       (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8025
                /*
8026
                 * Conditional sections are allowed in external entities
8027
                 * included by PE References in the internal subset.
8028
                 */
8029
0
                xmlParseConditionalSections(ctxt);
8030
156k
            } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
8031
117k
                xmlParseMarkupDecl(ctxt);
8032
117k
            } else if (RAW == '%') {
8033
707
                xmlParsePERefInternal(ctxt, 1);
8034
38.3k
            } else {
8035
38.3k
                xmlFatalErr(ctxt, XML_ERR_INT_SUBSET_NOT_FINISHED, NULL);
8036
8037
38.3k
                while (ctxt->inputNr > oldInputNr)
8038
0
                    xmlPopPE(ctxt);
8039
38.3k
                return;
8040
38.3k
            }
8041
118k
            SKIP_BLANKS;
8042
118k
            SHRINK;
8043
118k
            GROW;
8044
118k
        }
8045
81.4k
    }
8046
8047
    /*
8048
     * We should be at the end of the DOCTYPE declaration.
8049
     */
8050
36.6k
    if (RAW != '>') {
8051
214
        xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8052
214
        return;
8053
214
    }
8054
36.4k
    NEXT;
8055
36.4k
}
8056
8057
#ifdef LIBXML_SAX1_ENABLED
8058
/**
8059
 * Parse an attribute
8060
 *
8061
 * @deprecated Internal function, don't use.
8062
 *
8063
 *     [41] Attribute ::= Name Eq AttValue
8064
 *
8065
 * [ WFC: No External Entity References ]
8066
 * Attribute values cannot contain direct or indirect entity references
8067
 * to external entities.
8068
 *
8069
 * [ WFC: No < in Attribute Values ]
8070
 * The replacement text of any entity referred to directly or indirectly in
8071
 * an attribute value (other than "&lt;") must not contain a <.
8072
 *
8073
 * [ VC: Attribute Value Type ]
8074
 * The attribute must have been declared; the value must be of the type
8075
 * declared for it.
8076
 *
8077
 *     [25] Eq ::= S? '=' S?
8078
 *
8079
 * With namespace:
8080
 *
8081
 *     [NS 11] Attribute ::= QName Eq AttValue
8082
 *
8083
 * Also the case QName == xmlns:??? is handled independently as a namespace
8084
 * definition.
8085
 *
8086
 * @param ctxt  an XML parser context
8087
 * @param value  a xmlChar ** used to store the value of the attribute
8088
 * @returns the attribute name, and the value in *value.
8089
 */
8090
8091
const xmlChar *
8092
75.0k
xmlParseAttribute(xmlParserCtxt *ctxt, xmlChar **value) {
8093
75.0k
    const xmlChar *name;
8094
75.0k
    xmlChar *val;
8095
8096
75.0k
    *value = NULL;
8097
75.0k
    GROW;
8098
75.0k
    name = xmlParseName(ctxt);
8099
75.0k
    if (name == NULL) {
8100
4.28k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8101
4.28k
                 "error parsing attribute name\n");
8102
4.28k
        return(NULL);
8103
4.28k
    }
8104
8105
    /*
8106
     * read the value
8107
     */
8108
70.7k
    SKIP_BLANKS;
8109
70.7k
    if (RAW == '=') {
8110
60.9k
        NEXT;
8111
60.9k
  SKIP_BLANKS;
8112
60.9k
  val = xmlParseAttValue(ctxt);
8113
60.9k
    } else {
8114
9.81k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8115
9.81k
         "Specification mandates value for attribute %s\n", name);
8116
9.81k
  return(name);
8117
9.81k
    }
8118
8119
    /*
8120
     * Check that xml:lang conforms to the specification
8121
     * No more registered as an error, just generate a warning now
8122
     * since this was deprecated in XML second edition
8123
     */
8124
60.9k
    if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8125
9.55k
  if (!xmlCheckLanguageID(val)) {
8126
6.96k
      xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8127
6.96k
              "Malformed value for xml:lang : %s\n",
8128
6.96k
        val, NULL);
8129
6.96k
  }
8130
9.55k
    }
8131
8132
    /*
8133
     * Check that xml:space conforms to the specification
8134
     */
8135
60.9k
    if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8136
3.35k
  if (xmlStrEqual(val, BAD_CAST "default"))
8137
1.68k
      *(ctxt->space) = 0;
8138
1.66k
  else if (xmlStrEqual(val, BAD_CAST "preserve"))
8139
1.23k
      *(ctxt->space) = 1;
8140
431
  else {
8141
431
    xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8142
431
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8143
431
                                 val, NULL);
8144
431
  }
8145
3.35k
    }
8146
8147
60.9k
    *value = val;
8148
60.9k
    return(name);
8149
70.7k
}
8150
8151
/**
8152
 * Parse a start tag. Always consumes '<'.
8153
 *
8154
 * @deprecated Internal function, don't use.
8155
 *
8156
 *     [40] STag ::= '<' Name (S Attribute)* S? '>'
8157
 *
8158
 * [ WFC: Unique Att Spec ]
8159
 * No attribute name may appear more than once in the same start-tag or
8160
 * empty-element tag.
8161
 *
8162
 *     [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8163
 *
8164
 * [ WFC: Unique Att Spec ]
8165
 * No attribute name may appear more than once in the same start-tag or
8166
 * empty-element tag.
8167
 *
8168
 * With namespace:
8169
 *
8170
 *     [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8171
 *
8172
 *     [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8173
 *
8174
 * @param ctxt  an XML parser context
8175
 * @returns the element name parsed
8176
 */
8177
8178
const xmlChar *
8179
101k
xmlParseStartTag(xmlParserCtxt *ctxt) {
8180
101k
    const xmlChar *name;
8181
101k
    const xmlChar *attname;
8182
101k
    xmlChar *attvalue;
8183
101k
    const xmlChar **atts = ctxt->atts;
8184
101k
    int nbatts = 0;
8185
101k
    int maxatts = ctxt->maxatts;
8186
101k
    int i;
8187
8188
101k
    if (RAW != '<') return(NULL);
8189
101k
    NEXT1;
8190
8191
101k
    name = xmlParseName(ctxt);
8192
101k
    if (name == NULL) {
8193
13.4k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8194
13.4k
       "xmlParseStartTag: invalid element name\n");
8195
13.4k
        return(NULL);
8196
13.4k
    }
8197
8198
    /*
8199
     * Now parse the attributes, it ends up with the ending
8200
     *
8201
     * (S Attribute)* S?
8202
     */
8203
88.2k
    SKIP_BLANKS;
8204
88.2k
    GROW;
8205
8206
114k
    while (((RAW != '>') &&
8207
114k
     ((RAW != '/') || (NXT(1) != '>')) &&
8208
114k
     (IS_BYTE_CHAR(RAW))) && (PARSER_STOPPED(ctxt) == 0)) {
8209
75.0k
  attname = xmlParseAttribute(ctxt, &attvalue);
8210
75.0k
        if (attname == NULL)
8211
4.28k
      break;
8212
70.7k
        if (attvalue != NULL) {
8213
      /*
8214
       * [ WFC: Unique Att Spec ]
8215
       * No attribute name may appear more than once in the same
8216
       * start-tag or empty-element tag.
8217
       */
8218
84.0k
      for (i = 0; i < nbatts;i += 2) {
8219
26.2k
          if (xmlStrEqual(atts[i], attname)) {
8220
1.86k
        xmlErrAttributeDup(ctxt, NULL, attname);
8221
1.86k
        goto failed;
8222
1.86k
    }
8223
26.2k
      }
8224
      /*
8225
       * Add the pair to atts
8226
       */
8227
57.8k
      if (nbatts + 4 > maxatts) {
8228
20.6k
          const xmlChar **n;
8229
20.6k
                int newSize;
8230
8231
20.6k
                newSize = xmlGrowCapacity(maxatts, sizeof(n[0]) * 2,
8232
20.6k
                                          11, XML_MAX_ATTRS);
8233
20.6k
                if (newSize < 0) {
8234
0
        xmlErrMemory(ctxt);
8235
0
        goto failed;
8236
0
    }
8237
20.6k
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
8238
20.6k
                if (newSize < 2)
8239
16.7k
                    newSize = 2;
8240
20.6k
#endif
8241
20.6k
          n = xmlRealloc(atts, newSize * sizeof(n[0]) * 2);
8242
20.6k
    if (n == NULL) {
8243
2
        xmlErrMemory(ctxt);
8244
2
        goto failed;
8245
2
    }
8246
20.6k
    atts = n;
8247
20.6k
                maxatts = newSize * 2;
8248
20.6k
    ctxt->atts = atts;
8249
20.6k
    ctxt->maxatts = maxatts;
8250
20.6k
      }
8251
8252
57.8k
      atts[nbatts++] = attname;
8253
57.8k
      atts[nbatts++] = attvalue;
8254
57.8k
      atts[nbatts] = NULL;
8255
57.8k
      atts[nbatts + 1] = NULL;
8256
8257
57.8k
            attvalue = NULL;
8258
57.8k
  }
8259
8260
70.7k
failed:
8261
8262
70.7k
        if (attvalue != NULL)
8263
1.86k
            xmlFree(attvalue);
8264
8265
70.7k
  GROW
8266
70.7k
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8267
44.1k
      break;
8268
26.6k
  if (SKIP_BLANKS == 0) {
8269
10.1k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8270
10.1k
         "attributes construct error\n");
8271
10.1k
  }
8272
26.6k
  SHRINK;
8273
26.6k
        GROW;
8274
26.6k
    }
8275
8276
    /*
8277
     * SAX: Start of Element !
8278
     */
8279
88.2k
    if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8280
88.2k
  (!ctxt->disableSAX)) {
8281
84.9k
  if (nbatts > 0)
8282
43.1k
      ctxt->sax->startElement(ctxt->userData, name, atts);
8283
41.7k
  else
8284
41.7k
      ctxt->sax->startElement(ctxt->userData, name, NULL);
8285
84.9k
    }
8286
8287
88.2k
    if (atts != NULL) {
8288
        /* Free only the content strings */
8289
112k
        for (i = 1;i < nbatts;i+=2)
8290
57.8k
      if (atts[i] != NULL)
8291
57.8k
         xmlFree((xmlChar *) atts[i]);
8292
54.9k
    }
8293
88.2k
    return(name);
8294
88.2k
}
8295
8296
/**
8297
 * Parse an end tag. Always consumes '</'.
8298
 *
8299
 *     [42] ETag ::= '</' Name S? '>'
8300
 *
8301
 * With namespace
8302
 *
8303
 *     [NS 9] ETag ::= '</' QName S? '>'
8304
 * @param ctxt  an XML parser context
8305
 * @param line  line of the start tag
8306
 */
8307
8308
static void
8309
58.1k
xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8310
58.1k
    const xmlChar *name;
8311
8312
58.1k
    GROW;
8313
58.1k
    if ((RAW != '<') || (NXT(1) != '/')) {
8314
738
  xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8315
738
           "xmlParseEndTag: '</' not found\n");
8316
738
  return;
8317
738
    }
8318
57.4k
    SKIP(2);
8319
8320
57.4k
    name = xmlParseNameAndCompare(ctxt,ctxt->name);
8321
8322
    /*
8323
     * We should definitely be at the ending "S? '>'" part
8324
     */
8325
57.4k
    GROW;
8326
57.4k
    SKIP_BLANKS;
8327
57.4k
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8328
7.33k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8329
7.33k
    } else
8330
50.1k
  NEXT1;
8331
8332
    /*
8333
     * [ WFC: Element Type Match ]
8334
     * The Name in an element's end-tag must match the element type in the
8335
     * start-tag.
8336
     *
8337
     */
8338
57.4k
    if (name != (xmlChar*)1) {
8339
5.04k
        if (name == NULL) name = BAD_CAST "unparsable";
8340
5.04k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8341
5.04k
         "Opening and ending tag mismatch: %s line %d and %s\n",
8342
5.04k
                    ctxt->name, line, name);
8343
5.04k
    }
8344
8345
    /*
8346
     * SAX: End of Tag
8347
     */
8348
57.4k
    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8349
57.4k
  (!ctxt->disableSAX))
8350
54.6k
        ctxt->sax->endElement(ctxt->userData, ctxt->name);
8351
8352
57.4k
    namePop(ctxt);
8353
57.4k
    spacePop(ctxt);
8354
57.4k
}
8355
8356
/**
8357
 * Parse an end of tag
8358
 *
8359
 * @deprecated Internal function, don't use.
8360
 *
8361
 *     [42] ETag ::= '</' Name S? '>'
8362
 *
8363
 * With namespace
8364
 *
8365
 *     [NS 9] ETag ::= '</' QName S? '>'
8366
 * @param ctxt  an XML parser context
8367
 */
8368
8369
void
8370
0
xmlParseEndTag(xmlParserCtxt *ctxt) {
8371
0
    xmlParseEndTag1(ctxt, 0);
8372
0
}
8373
#endif /* LIBXML_SAX1_ENABLED */
8374
8375
/************************************************************************
8376
 *                  *
8377
 *          SAX 2 specific operations       *
8378
 *                  *
8379
 ************************************************************************/
8380
8381
/**
8382
 * Parse an XML Namespace QName
8383
 *
8384
 *     [6]  QName  ::= (Prefix ':')? LocalPart
8385
 *     [7]  Prefix  ::= NCName
8386
 *     [8]  LocalPart  ::= NCName
8387
 *
8388
 * @param ctxt  an XML parser context
8389
 * @param prefix  pointer to store the prefix part
8390
 * @returns the Name parsed or NULL
8391
 */
8392
8393
static xmlHashedString
8394
386k
xmlParseQNameHashed(xmlParserCtxtPtr ctxt, xmlHashedString *prefix) {
8395
386k
    xmlHashedString l, p;
8396
386k
    int start, isNCName = 0;
8397
8398
386k
    l.name = NULL;
8399
386k
    p.name = NULL;
8400
8401
386k
    GROW;
8402
386k
    start = CUR_PTR - BASE_PTR;
8403
8404
386k
    l = xmlParseNCName(ctxt);
8405
386k
    if (l.name != NULL) {
8406
330k
        isNCName = 1;
8407
330k
        if (CUR == ':') {
8408
189k
            NEXT;
8409
189k
            p = l;
8410
189k
            l = xmlParseNCName(ctxt);
8411
189k
        }
8412
330k
    }
8413
386k
    if ((l.name == NULL) || (CUR == ':')) {
8414
81.4k
        xmlChar *tmp;
8415
8416
81.4k
        l.name = NULL;
8417
81.4k
        p.name = NULL;
8418
81.4k
        if ((isNCName == 0) && (CUR != ':'))
8419
45.2k
            return(l);
8420
36.1k
        tmp = xmlParseNmtoken(ctxt);
8421
36.1k
        if (tmp != NULL)
8422
16.4k
            xmlFree(tmp);
8423
36.1k
        l = xmlDictLookupHashed(ctxt->dict, BASE_PTR + start,
8424
36.1k
                                CUR_PTR - (BASE_PTR + start));
8425
36.1k
        if (l.name == NULL) {
8426
1
            xmlErrMemory(ctxt);
8427
1
            return(l);
8428
1
        }
8429
36.1k
        xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8430
36.1k
                 "Failed to parse QName '%s'\n", l.name, NULL, NULL);
8431
36.1k
    }
8432
8433
341k
    *prefix = p;
8434
341k
    return(l);
8435
386k
}
8436
8437
/**
8438
 * Parse an XML Namespace QName
8439
 *
8440
 *     [6]  QName  ::= (Prefix ':')? LocalPart
8441
 *     [7]  Prefix  ::= NCName
8442
 *     [8]  LocalPart  ::= NCName
8443
 *
8444
 * @param ctxt  an XML parser context
8445
 * @param prefix  pointer to store the prefix part
8446
 * @returns the Name parsed or NULL
8447
 */
8448
8449
static const xmlChar *
8450
6.97k
xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8451
6.97k
    xmlHashedString n, p;
8452
8453
6.97k
    n = xmlParseQNameHashed(ctxt, &p);
8454
6.97k
    if (n.name == NULL)
8455
2.86k
        return(NULL);
8456
4.11k
    *prefix = p.name;
8457
4.11k
    return(n.name);
8458
6.97k
}
8459
8460
/**
8461
 * Parse an XML name and compares for match
8462
 * (specialized for endtag parsing)
8463
 *
8464
 * @param ctxt  an XML parser context
8465
 * @param name  the localname
8466
 * @param prefix  the prefix, if any.
8467
 * @returns NULL for an illegal name, (xmlChar*) 1 for success
8468
 * and the name for mismatch
8469
 */
8470
8471
static const xmlChar *
8472
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8473
40.5k
                        xmlChar const *prefix) {
8474
40.5k
    const xmlChar *cmp;
8475
40.5k
    const xmlChar *in;
8476
40.5k
    const xmlChar *ret;
8477
40.5k
    const xmlChar *prefix2;
8478
8479
40.5k
    if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8480
8481
40.5k
    GROW;
8482
40.5k
    in = ctxt->input->cur;
8483
8484
40.5k
    cmp = prefix;
8485
151k
    while (*in != 0 && *in == *cmp) {
8486
111k
  ++in;
8487
111k
  ++cmp;
8488
111k
    }
8489
40.5k
    if ((*cmp == 0) && (*in == ':')) {
8490
34.9k
        in++;
8491
34.9k
  cmp = name;
8492
124k
  while (*in != 0 && *in == *cmp) {
8493
89.9k
      ++in;
8494
89.9k
      ++cmp;
8495
89.9k
  }
8496
34.9k
  if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8497
      /* success */
8498
33.6k
            ctxt->input->col += in - ctxt->input->cur;
8499
33.6k
      ctxt->input->cur = in;
8500
33.6k
      return((const xmlChar*) 1);
8501
33.6k
  }
8502
34.9k
    }
8503
    /*
8504
     * all strings coms from the dictionary, equality can be done directly
8505
     */
8506
6.97k
    ret = xmlParseQName (ctxt, &prefix2);
8507
6.97k
    if (ret == NULL)
8508
2.86k
        return(NULL);
8509
4.11k
    if ((ret == name) && (prefix == prefix2))
8510
257
  return((const xmlChar*) 1);
8511
3.85k
    return ret;
8512
4.11k
}
8513
8514
/**
8515
 * Parse an attribute in the new SAX2 framework.
8516
 *
8517
 * @param ctxt  an XML parser context
8518
 * @param pref  the element prefix
8519
 * @param elem  the element name
8520
 * @param hprefix  resulting attribute prefix
8521
 * @param value  resulting value of the attribute
8522
 * @param len  resulting length of the attribute
8523
 * @param alloc  resulting indicator if the attribute was allocated
8524
 * @returns the attribute name, and the value in *value, .
8525
 */
8526
8527
static xmlHashedString
8528
xmlParseAttribute2(xmlParserCtxtPtr ctxt,
8529
                   const xmlChar * pref, const xmlChar * elem,
8530
                   xmlHashedString * hprefix, xmlChar ** value,
8531
                   int *len, int *alloc)
8532
161k
{
8533
161k
    xmlHashedString hname;
8534
161k
    const xmlChar *prefix, *name;
8535
161k
    xmlChar *val = NULL, *internal_val = NULL;
8536
161k
    int special = 0;
8537
161k
    int isNamespace;
8538
161k
    int flags;
8539
8540
161k
    *value = NULL;
8541
161k
    GROW;
8542
161k
    hname = xmlParseQNameHashed(ctxt, hprefix);
8543
161k
    if (hname.name == NULL) {
8544
6.13k
        xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8545
6.13k
                       "error parsing attribute name\n");
8546
6.13k
        return(hname);
8547
6.13k
    }
8548
154k
    name = hname.name;
8549
154k
    prefix = hprefix->name;
8550
8551
    /*
8552
     * get the type if needed
8553
     */
8554
154k
    if (ctxt->attsSpecial != NULL) {
8555
17.5k
        special = XML_PTR_TO_INT(xmlHashQLookup2(ctxt->attsSpecial, pref, elem,
8556
17.5k
                                              prefix, name));
8557
17.5k
    }
8558
8559
    /*
8560
     * read the value
8561
     */
8562
154k
    SKIP_BLANKS;
8563
154k
    if (RAW != '=') {
8564
6.45k
        xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8565
6.45k
                          "Specification mandates value for attribute %s\n",
8566
6.45k
                          name);
8567
6.45k
        goto error;
8568
6.45k
    }
8569
8570
8571
148k
    NEXT;
8572
148k
    SKIP_BLANKS;
8573
148k
    flags = 0;
8574
148k
    isNamespace = (((prefix == NULL) && (name == ctxt->str_xmlns)) ||
8575
148k
                   (prefix == ctxt->str_xmlns));
8576
148k
    val = xmlParseAttValueInternal(ctxt, len, &flags, special,
8577
148k
                                   isNamespace);
8578
148k
    if (val == NULL)
8579
2.44k
        goto error;
8580
8581
146k
    *alloc = (flags & XML_ATTVAL_ALLOC) != 0;
8582
8583
146k
#ifdef LIBXML_VALID_ENABLED
8584
146k
    if ((ctxt->validate) &&
8585
146k
        (ctxt->standalone) &&
8586
146k
        (special & XML_SPECIAL_EXTERNAL) &&
8587
146k
        (flags & XML_ATTVAL_NORM_CHANGE)) {
8588
0
        xmlValidityError(ctxt, XML_DTD_NOT_STANDALONE,
8589
0
                         "standalone: normalization of attribute %s on %s "
8590
0
                         "by external subset declaration\n",
8591
0
                         name, elem);
8592
0
    }
8593
146k
#endif
8594
8595
146k
    if (prefix == ctxt->str_xml) {
8596
        /*
8597
         * Check that xml:lang conforms to the specification
8598
         * No more registered as an error, just generate a warning now
8599
         * since this was deprecated in XML second edition
8600
         */
8601
25.0k
        if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8602
6.24k
            internal_val = xmlStrndup(val, *len);
8603
6.24k
            if (internal_val == NULL)
8604
3
                goto mem_error;
8605
6.24k
            if (!xmlCheckLanguageID(internal_val)) {
8606
3.59k
                xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8607
3.59k
                              "Malformed value for xml:lang : %s\n",
8608
3.59k
                              internal_val, NULL);
8609
3.59k
            }
8610
6.24k
        }
8611
8612
        /*
8613
         * Check that xml:space conforms to the specification
8614
         */
8615
25.0k
        if (xmlStrEqual(name, BAD_CAST "space")) {
8616
5.35k
            internal_val = xmlStrndup(val, *len);
8617
5.35k
            if (internal_val == NULL)
8618
3
                goto mem_error;
8619
5.35k
            if (xmlStrEqual(internal_val, BAD_CAST "default"))
8620
3.17k
                *(ctxt->space) = 0;
8621
2.17k
            else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8622
1.93k
                *(ctxt->space) = 1;
8623
245
            else {
8624
245
                xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8625
245
                              "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8626
245
                              internal_val, NULL);
8627
245
            }
8628
5.35k
        }
8629
25.0k
        if (internal_val) {
8630
11.5k
            xmlFree(internal_val);
8631
11.5k
        }
8632
25.0k
    }
8633
8634
146k
    *value = val;
8635
146k
    return (hname);
8636
8637
6
mem_error:
8638
6
    xmlErrMemory(ctxt);
8639
8.90k
error:
8640
8.90k
    if ((val != NULL) && (*alloc != 0))
8641
1
        xmlFree(val);
8642
8.90k
    return(hname);
8643
6
}
8644
8645
/**
8646
 * Inserts a new attribute into the hash table.
8647
 *
8648
 * @param ctxt  parser context
8649
 * @param size  size of the hash table
8650
 * @param name  attribute name
8651
 * @param uri  namespace uri
8652
 * @param hashValue  combined hash value of name and uri
8653
 * @param aindex  attribute index (this is a multiple of 5)
8654
 * @returns INT_MAX if no existing attribute was found, the attribute
8655
 * index if an attribute was found, -1 if a memory allocation failed.
8656
 */
8657
static int
8658
xmlAttrHashInsert(xmlParserCtxtPtr ctxt, unsigned size, const xmlChar *name,
8659
58.6k
                  const xmlChar *uri, unsigned hashValue, int aindex) {
8660
58.6k
    xmlAttrHashBucket *table = ctxt->attrHash;
8661
58.6k
    xmlAttrHashBucket *bucket;
8662
58.6k
    unsigned hindex;
8663
8664
58.6k
    hindex = hashValue & (size - 1);
8665
58.6k
    bucket = &table[hindex];
8666
8667
64.4k
    while (bucket->index >= 0) {
8668
19.8k
        const xmlChar **atts = &ctxt->atts[bucket->index];
8669
8670
19.8k
        if (name == atts[0]) {
8671
15.7k
            int nsIndex = XML_PTR_TO_INT(atts[2]);
8672
8673
15.7k
            if ((nsIndex == NS_INDEX_EMPTY) ? (uri == NULL) :
8674
15.7k
                (nsIndex == NS_INDEX_XML) ? (uri == ctxt->str_xml_ns) :
8675
6.79k
                (uri == ctxt->nsTab[nsIndex * 2 + 1]))
8676
14.0k
                return(bucket->index);
8677
15.7k
        }
8678
8679
5.83k
        hindex++;
8680
5.83k
        bucket++;
8681
5.83k
        if (hindex >= size) {
8682
1.08k
            hindex = 0;
8683
1.08k
            bucket = table;
8684
1.08k
        }
8685
5.83k
    }
8686
8687
44.5k
    bucket->index = aindex;
8688
8689
44.5k
    return(INT_MAX);
8690
58.6k
}
8691
8692
static int
8693
xmlAttrHashInsertQName(xmlParserCtxtPtr ctxt, unsigned size,
8694
                       const xmlChar *name, const xmlChar *prefix,
8695
3.81k
                       unsigned hashValue, int aindex) {
8696
3.81k
    xmlAttrHashBucket *table = ctxt->attrHash;
8697
3.81k
    xmlAttrHashBucket *bucket;
8698
3.81k
    unsigned hindex;
8699
8700
3.81k
    hindex = hashValue & (size - 1);
8701
3.81k
    bucket = &table[hindex];
8702
8703
5.96k
    while (bucket->index >= 0) {
8704
3.42k
        const xmlChar **atts = &ctxt->atts[bucket->index];
8705
8706
3.42k
        if ((name == atts[0]) && (prefix == atts[1]))
8707
1.27k
            return(bucket->index);
8708
8709
2.14k
        hindex++;
8710
2.14k
        bucket++;
8711
2.14k
        if (hindex >= size) {
8712
565
            hindex = 0;
8713
565
            bucket = table;
8714
565
        }
8715
2.14k
    }
8716
8717
2.54k
    bucket->index = aindex;
8718
8719
2.54k
    return(INT_MAX);
8720
3.81k
}
8721
/**
8722
 * Parse a start tag. Always consumes '<'.
8723
 *
8724
 * This routine is called when running SAX2 parsing
8725
 *
8726
 *     [40] STag ::= '<' Name (S Attribute)* S? '>'
8727
 *
8728
 * [ WFC: Unique Att Spec ]
8729
 * No attribute name may appear more than once in the same start-tag or
8730
 * empty-element tag.
8731
 *
8732
 *     [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8733
 *
8734
 * [ WFC: Unique Att Spec ]
8735
 * No attribute name may appear more than once in the same start-tag or
8736
 * empty-element tag.
8737
 *
8738
 * With namespace:
8739
 *
8740
 *     [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8741
 *
8742
 *     [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8743
 *
8744
 * @param ctxt  an XML parser context
8745
 * @param pref  resulting namespace prefix
8746
 * @param URI  resulting namespace URI
8747
 * @param nbNsPtr  resulting number of namespace declarations
8748
 * @returns the element name parsed
8749
 */
8750
8751
static const xmlChar *
8752
xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
8753
218k
                  const xmlChar **URI, int *nbNsPtr) {
8754
218k
    xmlHashedString hlocalname;
8755
218k
    xmlHashedString hprefix;
8756
218k
    xmlHashedString hattname;
8757
218k
    xmlHashedString haprefix;
8758
218k
    const xmlChar *localname;
8759
218k
    const xmlChar *prefix;
8760
218k
    const xmlChar *attname;
8761
218k
    const xmlChar *aprefix;
8762
218k
    const xmlChar *uri;
8763
218k
    xmlChar *attvalue = NULL;
8764
218k
    const xmlChar **atts = ctxt->atts;
8765
218k
    unsigned attrHashSize = 0;
8766
218k
    int maxatts = ctxt->maxatts;
8767
218k
    int nratts, nbatts, nbdef;
8768
218k
    int i, j, nbNs, nbTotalDef, attval, nsIndex, maxAtts;
8769
218k
    int alloc = 0;
8770
218k
    int numNsErr = 0;
8771
218k
    int numDupErr = 0;
8772
8773
218k
    if (RAW != '<') return(NULL);
8774
218k
    NEXT1;
8775
8776
218k
    nbatts = 0;
8777
218k
    nratts = 0;
8778
218k
    nbdef = 0;
8779
218k
    nbNs = 0;
8780
218k
    nbTotalDef = 0;
8781
218k
    attval = 0;
8782
8783
218k
    if (xmlParserNsStartElement(ctxt->nsdb) < 0) {
8784
0
        xmlErrMemory(ctxt);
8785
0
        return(NULL);
8786
0
    }
8787
8788
218k
    hlocalname = xmlParseQNameHashed(ctxt, &hprefix);
8789
218k
    if (hlocalname.name == NULL) {
8790
36.2k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8791
36.2k
           "StartTag: invalid element name\n");
8792
36.2k
        return(NULL);
8793
36.2k
    }
8794
182k
    localname = hlocalname.name;
8795
182k
    prefix = hprefix.name;
8796
8797
    /*
8798
     * Now parse the attributes, it ends up with the ending
8799
     *
8800
     * (S Attribute)* S?
8801
     */
8802
182k
    SKIP_BLANKS;
8803
182k
    GROW;
8804
8805
    /*
8806
     * The ctxt->atts array will be ultimately passed to the SAX callback
8807
     * containing five xmlChar pointers for each attribute:
8808
     *
8809
     * [0] attribute name
8810
     * [1] attribute prefix
8811
     * [2] namespace URI
8812
     * [3] attribute value
8813
     * [4] end of attribute value
8814
     *
8815
     * To save memory, we reuse this array temporarily and store integers
8816
     * in these pointer variables.
8817
     *
8818
     * [0] attribute name
8819
     * [1] attribute prefix
8820
     * [2] hash value of attribute prefix, and later namespace index
8821
     * [3] for non-allocated values: ptrdiff_t offset into input buffer
8822
     * [4] for non-allocated values: ptrdiff_t offset into input buffer
8823
     *
8824
     * The ctxt->attallocs array contains an additional unsigned int for
8825
     * each attribute, containing the hash value of the attribute name
8826
     * and the alloc flag in bit 31.
8827
     */
8828
8829
249k
    while (((RAW != '>') &&
8830
249k
     ((RAW != '/') || (NXT(1) != '>')) &&
8831
249k
     (IS_BYTE_CHAR(RAW))) && (PARSER_STOPPED(ctxt) == 0)) {
8832
161k
  int len = -1;
8833
8834
161k
  hattname = xmlParseAttribute2(ctxt, prefix, localname,
8835
161k
                                          &haprefix, &attvalue, &len,
8836
161k
                                          &alloc);
8837
161k
        if (hattname.name == NULL)
8838
6.13k
      break;
8839
154k
        if (attvalue == NULL)
8840
8.90k
            goto next_attr;
8841
146k
        attname = hattname.name;
8842
146k
        aprefix = haprefix.name;
8843
146k
  if (len < 0) len = xmlStrlen(attvalue);
8844
8845
146k
        if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8846
14.2k
            xmlHashedString huri;
8847
14.2k
            xmlURIPtr parsedUri;
8848
8849
14.2k
            huri = xmlDictLookupHashed(ctxt->dict, attvalue, len);
8850
14.2k
            uri = huri.name;
8851
14.2k
            if (uri == NULL) {
8852
1
                xmlErrMemory(ctxt);
8853
1
                goto next_attr;
8854
1
            }
8855
14.2k
            if (*uri != 0) {
8856
13.6k
                if (xmlParseURISafe((const char *) uri, &parsedUri) < 0) {
8857
8
                    xmlErrMemory(ctxt);
8858
8
                    goto next_attr;
8859
8
                }
8860
13.6k
                if (parsedUri == NULL) {
8861
3.84k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
8862
3.84k
                             "xmlns: '%s' is not a valid URI\n",
8863
3.84k
                                       uri, NULL, NULL);
8864
9.81k
                } else {
8865
9.81k
                    if (parsedUri->scheme == NULL) {
8866
8.15k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8867
8.15k
                                  "xmlns: URI %s is not absolute\n",
8868
8.15k
                                  uri, NULL, NULL);
8869
8.15k
                    }
8870
9.81k
                    xmlFreeURI(parsedUri);
8871
9.81k
                }
8872
13.6k
                if (uri == ctxt->str_xml_ns) {
8873
208
                    if (attname != ctxt->str_xml) {
8874
208
                        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8875
208
                     "xml namespace URI cannot be the default namespace\n",
8876
208
                                 NULL, NULL, NULL);
8877
208
                    }
8878
208
                    goto next_attr;
8879
208
                }
8880
13.4k
                if ((len == 29) &&
8881
13.4k
                    (xmlStrEqual(uri,
8882
399
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8883
194
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8884
194
                         "reuse of the xmlns namespace name is forbidden\n",
8885
194
                             NULL, NULL, NULL);
8886
194
                    goto next_attr;
8887
194
                }
8888
13.4k
            }
8889
8890
13.8k
            if (xmlParserNsPush(ctxt, NULL, &huri, NULL, 0) > 0)
8891
12.7k
                nbNs++;
8892
131k
        } else if (aprefix == ctxt->str_xmlns) {
8893
72.3k
            xmlHashedString huri;
8894
72.3k
            xmlURIPtr parsedUri;
8895
8896
72.3k
            huri = xmlDictLookupHashed(ctxt->dict, attvalue, len);
8897
72.3k
            uri = huri.name;
8898
72.3k
            if (uri == NULL) {
8899
1
                xmlErrMemory(ctxt);
8900
1
                goto next_attr;
8901
1
            }
8902
8903
72.3k
            if (attname == ctxt->str_xml) {
8904
201
                if (uri != ctxt->str_xml_ns) {
8905
201
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8906
201
                             "xml namespace prefix mapped to wrong URI\n",
8907
201
                             NULL, NULL, NULL);
8908
201
                }
8909
                /*
8910
                 * Do not keep a namespace definition node
8911
                 */
8912
201
                goto next_attr;
8913
201
            }
8914
72.1k
            if (uri == ctxt->str_xml_ns) {
8915
197
                if (attname != ctxt->str_xml) {
8916
197
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8917
197
                             "xml namespace URI mapped to wrong prefix\n",
8918
197
                             NULL, NULL, NULL);
8919
197
                }
8920
197
                goto next_attr;
8921
197
            }
8922
71.9k
            if (attname == ctxt->str_xmlns) {
8923
547
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8924
547
                         "redefinition of the xmlns prefix is forbidden\n",
8925
547
                         NULL, NULL, NULL);
8926
547
                goto next_attr;
8927
547
            }
8928
71.3k
            if ((len == 29) &&
8929
71.3k
                (xmlStrEqual(uri,
8930
420
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8931
194
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8932
194
                         "reuse of the xmlns namespace name is forbidden\n",
8933
194
                         NULL, NULL, NULL);
8934
194
                goto next_attr;
8935
194
            }
8936
71.1k
            if ((uri == NULL) || (uri[0] == 0)) {
8937
2.83k
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8938
2.83k
                         "xmlns:%s: Empty XML namespace is not allowed\n",
8939
2.83k
                              attname, NULL, NULL);
8940
2.83k
                goto next_attr;
8941
68.3k
            } else {
8942
68.3k
                if (xmlParseURISafe((const char *) uri, &parsedUri) < 0) {
8943
25
                    xmlErrMemory(ctxt);
8944
25
                    goto next_attr;
8945
25
                }
8946
68.3k
                if (parsedUri == NULL) {
8947
19.2k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
8948
19.2k
                         "xmlns:%s: '%s' is not a valid URI\n",
8949
19.2k
                                       attname, uri, NULL);
8950
49.0k
                } else {
8951
49.0k
                    if ((ctxt->pedantic) && (parsedUri->scheme == NULL)) {
8952
18.4k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8953
18.4k
                                  "xmlns:%s: URI %s is not absolute\n",
8954
18.4k
                                  attname, uri, NULL);
8955
18.4k
                    }
8956
49.0k
                    xmlFreeURI(parsedUri);
8957
49.0k
                }
8958
68.3k
            }
8959
8960
68.3k
            if (xmlParserNsPush(ctxt, &hattname, &huri, NULL, 0) > 0)
8961
66.7k
                nbNs++;
8962
68.3k
        } else {
8963
            /*
8964
             * Populate attributes array, see above for repurposing
8965
             * of xmlChar pointers.
8966
             */
8967
59.5k
            if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8968
26.5k
                int res = xmlCtxtGrowAttrs(ctxt);
8969
8970
26.5k
                maxatts = ctxt->maxatts;
8971
26.5k
                atts = ctxt->atts;
8972
8973
26.5k
                if (res < 0)
8974
8
                    goto next_attr;
8975
26.5k
            }
8976
59.5k
            ctxt->attallocs[nratts++] = (hattname.hashValue & 0x7FFFFFFF) |
8977
59.5k
                                        ((unsigned) alloc << 31);
8978
59.5k
            atts[nbatts++] = attname;
8979
59.5k
            atts[nbatts++] = aprefix;
8980
59.5k
            atts[nbatts++] = XML_INT_TO_PTR(haprefix.hashValue);
8981
59.5k
            if (alloc) {
8982
15.4k
                atts[nbatts++] = attvalue;
8983
15.4k
                attvalue += len;
8984
15.4k
                atts[nbatts++] = attvalue;
8985
44.0k
            } else {
8986
                /*
8987
                 * attvalue points into the input buffer which can be
8988
                 * reallocated. Store differences to input->base instead.
8989
                 * The pointers will be reconstructed later.
8990
                 */
8991
44.0k
                atts[nbatts++] = XML_INT_TO_PTR(attvalue - BASE_PTR);
8992
44.0k
                attvalue += len;
8993
44.0k
                atts[nbatts++] = XML_INT_TO_PTR(attvalue - BASE_PTR);
8994
44.0k
            }
8995
            /*
8996
             * tag if some deallocation is needed
8997
             */
8998
59.5k
            if (alloc != 0) attval = 1;
8999
59.5k
            attvalue = NULL; /* moved into atts */
9000
59.5k
        }
9001
9002
154k
next_attr:
9003
154k
        if ((attvalue != NULL) && (alloc != 0)) {
9004
20.7k
            xmlFree(attvalue);
9005
20.7k
            attvalue = NULL;
9006
20.7k
        }
9007
9008
154k
  GROW
9009
154k
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9010
77.2k
      break;
9011
77.7k
  if (SKIP_BLANKS == 0) {
9012
10.2k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9013
10.2k
         "attributes construct error\n");
9014
10.2k
      break;
9015
10.2k
  }
9016
67.4k
        GROW;
9017
67.4k
    }
9018
9019
    /*
9020
     * Namespaces from default attributes
9021
     */
9022
182k
    if (ctxt->attsDefault != NULL) {
9023
26.7k
        xmlDefAttrsPtr defaults;
9024
9025
26.7k
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9026
26.7k
  if (defaults != NULL) {
9027
65.3k
      for (i = 0; i < defaults->nbAttrs; i++) {
9028
39.8k
                xmlDefAttr *attr = &defaults->attrs[i];
9029
9030
39.8k
          attname = attr->name.name;
9031
39.8k
    aprefix = attr->prefix.name;
9032
9033
39.8k
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9034
3.14k
                    xmlParserEntityCheck(ctxt, attr->expandedSize);
9035
9036
3.14k
                    if (xmlParserNsPush(ctxt, NULL, &attr->value, NULL, 1) > 0)
9037
2.68k
                        nbNs++;
9038
36.7k
    } else if (aprefix == ctxt->str_xmlns) {
9039
4.31k
                    xmlParserEntityCheck(ctxt, attr->expandedSize);
9040
9041
4.31k
                    if (xmlParserNsPush(ctxt, &attr->name, &attr->value,
9042
4.31k
                                      NULL, 1) > 0)
9043
3.71k
                        nbNs++;
9044
32.3k
    } else {
9045
32.3k
                    if (nratts + nbTotalDef >= XML_MAX_ATTRS) {
9046
0
                        xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
9047
0
                                    "Maximum number of attributes exceeded");
9048
0
                        break;
9049
0
                    }
9050
32.3k
                    nbTotalDef += 1;
9051
32.3k
                }
9052
39.8k
      }
9053
25.5k
  }
9054
26.7k
    }
9055
9056
    /*
9057
     * Resolve attribute namespaces
9058
     */
9059
241k
    for (i = 0; i < nbatts; i += 5) {
9060
59.5k
        attname = atts[i];
9061
59.5k
        aprefix = atts[i+1];
9062
9063
        /*
9064
  * The default namespace does not apply to attribute names.
9065
  */
9066
59.5k
  if (aprefix == NULL) {
9067
19.4k
            nsIndex = NS_INDEX_EMPTY;
9068
40.0k
        } else if (aprefix == ctxt->str_xml) {
9069
25.0k
            nsIndex = NS_INDEX_XML;
9070
25.0k
        } else {
9071
15.0k
            haprefix.name = aprefix;
9072
15.0k
            haprefix.hashValue = (size_t) atts[i+2];
9073
15.0k
            nsIndex = xmlParserNsLookup(ctxt, &haprefix, NULL);
9074
9075
15.0k
      if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex)) {
9076
5.79k
                xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9077
5.79k
        "Namespace prefix %s for %s on %s is not defined\n",
9078
5.79k
        aprefix, attname, localname);
9079
5.79k
                nsIndex = NS_INDEX_EMPTY;
9080
5.79k
            }
9081
15.0k
        }
9082
9083
59.5k
        atts[i+2] = XML_INT_TO_PTR(nsIndex);
9084
59.5k
    }
9085
9086
    /*
9087
     * Maximum number of attributes including default attributes.
9088
     */
9089
182k
    maxAtts = nratts + nbTotalDef;
9090
9091
    /*
9092
     * Verify that attribute names are unique.
9093
     */
9094
182k
    if (maxAtts > 1) {
9095
23.8k
        attrHashSize = 4;
9096
33.7k
        while (attrHashSize / 2 < (unsigned) maxAtts)
9097
9.89k
            attrHashSize *= 2;
9098
9099
23.8k
        if (attrHashSize > ctxt->attrHashMax) {
9100
11.6k
            xmlAttrHashBucket *tmp;
9101
9102
11.6k
            tmp = xmlRealloc(ctxt->attrHash, attrHashSize * sizeof(tmp[0]));
9103
11.6k
            if (tmp == NULL) {
9104
2
                xmlErrMemory(ctxt);
9105
2
                goto done;
9106
2
            }
9107
9108
11.6k
            ctxt->attrHash = tmp;
9109
11.6k
            ctxt->attrHashMax = attrHashSize;
9110
11.6k
        }
9111
9112
23.8k
        memset(ctxt->attrHash, -1, attrHashSize * sizeof(ctxt->attrHash[0]));
9113
9114
62.5k
        for (i = 0, j = 0; j < nratts; i += 5, j++) {
9115
38.7k
            const xmlChar *nsuri;
9116
38.7k
            unsigned hashValue, nameHashValue, uriHashValue;
9117
38.7k
            int res;
9118
9119
38.7k
            attname = atts[i];
9120
38.7k
            aprefix = atts[i+1];
9121
38.7k
            nsIndex = XML_PTR_TO_INT(atts[i+2]);
9122
            /* Hash values always have bit 31 set, see dict.c */
9123
38.7k
            nameHashValue = ctxt->attallocs[j] | 0x80000000;
9124
9125
38.7k
            if (nsIndex == NS_INDEX_EMPTY) {
9126
                /*
9127
                 * Prefix with empty namespace means an undeclared
9128
                 * prefix which was already reported above.
9129
                 */
9130
19.0k
                if (aprefix != NULL)
9131
5.11k
                    continue;
9132
13.8k
                nsuri = NULL;
9133
13.8k
                uriHashValue = URI_HASH_EMPTY;
9134
19.6k
            } else if (nsIndex == NS_INDEX_XML) {
9135
11.2k
                nsuri = ctxt->str_xml_ns;
9136
11.2k
                uriHashValue = URI_HASH_XML;
9137
11.2k
            } else {
9138
8.39k
                nsuri = ctxt->nsTab[nsIndex * 2 + 1];
9139
8.39k
                uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue;
9140
8.39k
            }
9141
9142
33.5k
            hashValue = xmlDictCombineHash(nameHashValue, uriHashValue);
9143
33.5k
            res = xmlAttrHashInsert(ctxt, attrHashSize, attname, nsuri,
9144
33.5k
                                    hashValue, i);
9145
33.5k
            if (res < 0)
9146
0
                continue;
9147
9148
            /*
9149
             * [ WFC: Unique Att Spec ]
9150
             * No attribute name may appear more than once in the same
9151
             * start-tag or empty-element tag.
9152
             * As extended by the Namespace in XML REC.
9153
             */
9154
33.5k
            if (res < INT_MAX) {
9155
6.82k
                if (aprefix == atts[res+1]) {
9156
4.10k
                    xmlErrAttributeDup(ctxt, aprefix, attname);
9157
4.10k
                    numDupErr += 1;
9158
4.10k
                } else {
9159
2.72k
                    xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9160
2.72k
                             "Namespaced Attribute %s in '%s' redefined\n",
9161
2.72k
                             attname, nsuri, NULL);
9162
2.72k
                    numNsErr += 1;
9163
2.72k
                }
9164
6.82k
            }
9165
33.5k
        }
9166
23.8k
    }
9167
9168
    /*
9169
     * Default attributes
9170
     */
9171
182k
    if (ctxt->attsDefault != NULL) {
9172
26.7k
        xmlDefAttrsPtr defaults;
9173
9174
26.7k
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9175
26.7k
  if (defaults != NULL) {
9176
65.3k
      for (i = 0; i < defaults->nbAttrs; i++) {
9177
39.8k
                xmlDefAttr *attr = &defaults->attrs[i];
9178
39.8k
                const xmlChar *nsuri = NULL;
9179
39.8k
                unsigned hashValue, uriHashValue = 0;
9180
39.8k
                int res;
9181
9182
39.8k
          attname = attr->name.name;
9183
39.8k
    aprefix = attr->prefix.name;
9184
9185
39.8k
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL))
9186
3.14k
                    continue;
9187
36.6k
    if (aprefix == ctxt->str_xmlns)
9188
4.31k
                    continue;
9189
9190
32.3k
                if (aprefix == NULL) {
9191
10.3k
                    nsIndex = NS_INDEX_EMPTY;
9192
10.3k
                    nsuri = NULL;
9193
10.3k
                    uriHashValue = URI_HASH_EMPTY;
9194
22.0k
                } else if (aprefix == ctxt->str_xml) {
9195
1.22k
                    nsIndex = NS_INDEX_XML;
9196
1.22k
                    nsuri = ctxt->str_xml_ns;
9197
1.22k
                    uriHashValue = URI_HASH_XML;
9198
20.8k
                } else {
9199
20.8k
                    nsIndex = xmlParserNsLookup(ctxt, &attr->prefix, NULL);
9200
20.8k
                    if ((nsIndex == INT_MAX) ||
9201
20.8k
                        (nsIndex < ctxt->nsdb->minNsIndex)) {
9202
17.5k
                        xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9203
17.5k
                                 "Namespace prefix %s for %s on %s is not "
9204
17.5k
                                 "defined\n",
9205
17.5k
                                 aprefix, attname, localname);
9206
17.5k
                        nsIndex = NS_INDEX_EMPTY;
9207
17.5k
                        nsuri = NULL;
9208
17.5k
                        uriHashValue = URI_HASH_EMPTY;
9209
17.5k
                    } else {
9210
3.28k
                        nsuri = ctxt->nsTab[nsIndex * 2 + 1];
9211
3.28k
                        uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue;
9212
3.28k
                    }
9213
20.8k
                }
9214
9215
                /*
9216
                 * Check whether the attribute exists
9217
                 */
9218
32.3k
                if (maxAtts > 1) {
9219
25.0k
                    hashValue = xmlDictCombineHash(attr->name.hashValue,
9220
25.0k
                                                   uriHashValue);
9221
25.0k
                    res = xmlAttrHashInsert(ctxt, attrHashSize, attname, nsuri,
9222
25.0k
                                            hashValue, nbatts);
9223
25.0k
                    if (res < 0)
9224
0
                        continue;
9225
25.0k
                    if (res < INT_MAX) {
9226
7.22k
                        if (aprefix == atts[res+1])
9227
3.83k
                            continue;
9228
3.38k
                        xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9229
3.38k
                                 "Namespaced Attribute %s in '%s' redefined\n",
9230
3.38k
                                 attname, nsuri, NULL);
9231
3.38k
                    }
9232
25.0k
                }
9233
9234
28.5k
                xmlParserEntityCheck(ctxt, attr->expandedSize);
9235
9236
28.5k
                if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9237
9.42k
                    res = xmlCtxtGrowAttrs(ctxt);
9238
9239
9.42k
                    maxatts = ctxt->maxatts;
9240
9.42k
                    atts = ctxt->atts;
9241
9242
9.42k
                    if (res < 0) {
9243
8
                        localname = NULL;
9244
8
                        goto done;
9245
8
                    }
9246
9.42k
                }
9247
9248
28.5k
                atts[nbatts++] = attname;
9249
28.5k
                atts[nbatts++] = aprefix;
9250
28.5k
                atts[nbatts++] = XML_INT_TO_PTR(nsIndex);
9251
28.5k
                atts[nbatts++] = attr->value.name;
9252
28.5k
                atts[nbatts++] = attr->valueEnd;
9253
9254
28.5k
#ifdef LIBXML_VALID_ENABLED
9255
                /*
9256
                 * This should be moved to valid.c, but we don't keep track
9257
                 * whether an attribute was defaulted.
9258
                 */
9259
28.5k
                if ((ctxt->validate) &&
9260
28.5k
                    (ctxt->standalone == 1) &&
9261
28.5k
                    (attr->external != 0)) {
9262
0
                    xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9263
0
                            "standalone: attribute %s on %s defaulted "
9264
0
                            "from external subset\n",
9265
0
                            attname, localname);
9266
0
                }
9267
28.5k
#endif
9268
28.5k
                nbdef++;
9269
28.5k
      }
9270
25.5k
  }
9271
26.7k
    }
9272
9273
    /*
9274
     * Using a single hash table for nsUri/localName pairs cannot
9275
     * detect duplicate QNames reliably. The following example will
9276
     * only result in two namespace errors.
9277
     *
9278
     * <doc xmlns:a="a" xmlns:b="a">
9279
     *   <elem a:a="" b:a="" b:a=""/>
9280
     * </doc>
9281
     *
9282
     * If we saw more than one namespace error but no duplicate QNames
9283
     * were found, we have to scan for duplicate QNames.
9284
     */
9285
182k
    if ((numDupErr == 0) && (numNsErr > 1)) {
9286
973
        memset(ctxt->attrHash, -1,
9287
973
               attrHashSize * sizeof(ctxt->attrHash[0]));
9288
9289
5.15k
        for (i = 0, j = 0; j < nratts; i += 5, j++) {
9290
4.18k
            unsigned hashValue, nameHashValue, prefixHashValue;
9291
4.18k
            int res;
9292
9293
4.18k
            aprefix = atts[i+1];
9294
4.18k
            if (aprefix == NULL)
9295
369
                continue;
9296
9297
3.81k
            attname = atts[i];
9298
            /* Hash values always have bit 31 set, see dict.c */
9299
3.81k
            nameHashValue = ctxt->attallocs[j] | 0x80000000;
9300
3.81k
            prefixHashValue = xmlDictComputeHash(ctxt->dict, aprefix);
9301
9302
3.81k
            hashValue = xmlDictCombineHash(nameHashValue, prefixHashValue);
9303
3.81k
            res = xmlAttrHashInsertQName(ctxt, attrHashSize, attname,
9304
3.81k
                                         aprefix, hashValue, i);
9305
3.81k
            if (res < INT_MAX)
9306
1.27k
                xmlErrAttributeDup(ctxt, aprefix, attname);
9307
3.81k
        }
9308
973
    }
9309
9310
    /*
9311
     * Reconstruct attribute pointers
9312
     */
9313
270k
    for (i = 0, j = 0; i < nbatts; i += 5, j++) {
9314
        /* namespace URI */
9315
88.0k
        nsIndex = XML_PTR_TO_INT(atts[i+2]);
9316
88.0k
        if (nsIndex == INT_MAX)
9317
50.6k
            atts[i+2] = NULL;
9318
37.4k
        else if (nsIndex == INT_MAX - 1)
9319
25.9k
            atts[i+2] = ctxt->str_xml_ns;
9320
11.5k
        else
9321
11.5k
            atts[i+2] = ctxt->nsTab[nsIndex * 2 + 1];
9322
9323
88.0k
        if ((j < nratts) && (ctxt->attallocs[j] & 0x80000000) == 0) {
9324
44.0k
            atts[i+3] = BASE_PTR + XML_PTR_TO_INT(atts[i+3]);  /* value */
9325
44.0k
            atts[i+4] = BASE_PTR + XML_PTR_TO_INT(atts[i+4]);  /* valuend */
9326
44.0k
        }
9327
88.0k
    }
9328
9329
182k
    uri = xmlParserNsLookupUri(ctxt, &hprefix);
9330
182k
    if ((prefix != NULL) && (uri == NULL)) {
9331
13.8k
  xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9332
13.8k
           "Namespace prefix %s on %s is not defined\n",
9333
13.8k
     prefix, localname, NULL);
9334
13.8k
    }
9335
182k
    *pref = prefix;
9336
182k
    *URI = uri;
9337
9338
    /*
9339
     * SAX callback
9340
     */
9341
182k
    if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9342
182k
  (!ctxt->disableSAX)) {
9343
128k
  if (nbNs > 0)
9344
41.2k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix, uri,
9345
41.2k
                          nbNs, ctxt->nsTab + 2 * (ctxt->nsNr - nbNs),
9346
41.2k
        nbatts / 5, nbdef, atts);
9347
86.8k
  else
9348
86.8k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix, uri,
9349
86.8k
                          0, NULL, nbatts / 5, nbdef, atts);
9350
128k
    }
9351
9352
182k
done:
9353
    /*
9354
     * Free allocated attribute values
9355
     */
9356
182k
    if (attval != 0) {
9357
28.5k
  for (i = 0, j = 0; j < nratts; i += 5, j++)
9358
17.5k
      if (ctxt->attallocs[j] & 0x80000000)
9359
15.4k
          xmlFree((xmlChar *) atts[i+3]);
9360
10.9k
    }
9361
9362
182k
    *nbNsPtr = nbNs;
9363
182k
    return(localname);
9364
182k
}
9365
9366
/**
9367
 * Parse an end tag. Always consumes '</'.
9368
 *
9369
 *     [42] ETag ::= '</' Name S? '>'
9370
 *
9371
 * With namespace
9372
 *
9373
 *     [NS 9] ETag ::= '</' QName S? '>'
9374
 * @param ctxt  an XML parser context
9375
 * @param tag  the corresponding start tag
9376
 */
9377
9378
static void
9379
128k
xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9380
128k
    const xmlChar *name;
9381
9382
128k
    GROW;
9383
128k
    if ((RAW != '<') || (NXT(1) != '/')) {
9384
1.05k
  xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9385
1.05k
  return;
9386
1.05k
    }
9387
127k
    SKIP(2);
9388
9389
127k
    if (tag->prefix == NULL)
9390
86.9k
        name = xmlParseNameAndCompare(ctxt, ctxt->name);
9391
40.5k
    else
9392
40.5k
        name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9393
9394
    /*
9395
     * We should definitely be at the ending "S? '>'" part
9396
     */
9397
127k
    GROW;
9398
127k
    SKIP_BLANKS;
9399
127k
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9400
13.9k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9401
13.9k
    } else
9402
113k
  NEXT1;
9403
9404
    /*
9405
     * [ WFC: Element Type Match ]
9406
     * The Name in an element's end-tag must match the element type in the
9407
     * start-tag.
9408
     *
9409
     */
9410
127k
    if (name != (xmlChar*)1) {
9411
17.6k
        if (name == NULL) name = BAD_CAST "unparsable";
9412
17.6k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9413
17.6k
         "Opening and ending tag mismatch: %s line %d and %s\n",
9414
17.6k
                    ctxt->name, tag->line, name);
9415
17.6k
    }
9416
9417
    /*
9418
     * SAX: End of Tag
9419
     */
9420
127k
    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9421
127k
  (!ctxt->disableSAX))
9422
77.7k
  ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9423
77.7k
                                tag->URI);
9424
9425
127k
    spacePop(ctxt);
9426
127k
    if (tag->nsNr != 0)
9427
47.5k
  xmlParserNsPop(ctxt, tag->nsNr);
9428
127k
}
9429
9430
/**
9431
 * Parse escaped pure raw content. Always consumes '<!['.
9432
 *
9433
 * @deprecated Internal function, don't use.
9434
 *
9435
 *     [18] CDSect ::= CDStart CData CDEnd
9436
 *
9437
 *     [19] CDStart ::= '<![CDATA['
9438
 *
9439
 *     [20] Data ::= (Char* - (Char* ']]>' Char*))
9440
 *
9441
 *     [21] CDEnd ::= ']]>'
9442
 * @param ctxt  an XML parser context
9443
 */
9444
void
9445
5.13k
xmlParseCDSect(xmlParserCtxt *ctxt) {
9446
5.13k
    xmlChar *buf = NULL;
9447
5.13k
    int len = 0;
9448
5.13k
    int size = XML_PARSER_BUFFER_SIZE;
9449
5.13k
    int r, rl;
9450
5.13k
    int s, sl;
9451
5.13k
    int cur, l;
9452
5.13k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9453
589
                    XML_MAX_HUGE_LENGTH :
9454
5.13k
                    XML_MAX_TEXT_LENGTH;
9455
9456
5.13k
    if ((CUR != '<') || (NXT(1) != '!') || (NXT(2) != '['))
9457
0
        return;
9458
5.13k
    SKIP(3);
9459
9460
5.13k
    if (!CMP6(CUR_PTR, 'C', 'D', 'A', 'T', 'A', '['))
9461
0
        return;
9462
5.13k
    SKIP(6);
9463
9464
5.13k
    r = xmlCurrentCharRecover(ctxt, &rl);
9465
5.13k
    if (!IS_CHAR(r)) {
9466
759
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9467
759
        goto out;
9468
759
    }
9469
4.37k
    NEXTL(rl);
9470
4.37k
    s = xmlCurrentCharRecover(ctxt, &sl);
9471
4.37k
    if (!IS_CHAR(s)) {
9472
448
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9473
448
        goto out;
9474
448
    }
9475
3.92k
    NEXTL(sl);
9476
3.92k
    cur = xmlCurrentCharRecover(ctxt, &l);
9477
3.92k
    buf = xmlMalloc(size);
9478
3.92k
    if (buf == NULL) {
9479
1
  xmlErrMemory(ctxt);
9480
1
        goto out;
9481
1
    }
9482
95.6k
    while (IS_CHAR(cur) &&
9483
95.6k
           ((r != ']') || (s != ']') || (cur != '>'))) {
9484
91.7k
  if (len + 5 >= size) {
9485
482
      xmlChar *tmp;
9486
482
            int newSize;
9487
9488
482
            newSize = xmlGrowCapacity(size, 1, 1, maxLength);
9489
482
            if (newSize < 0) {
9490
0
                xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9491
0
                               "CData section too big found\n");
9492
0
                goto out;
9493
0
            }
9494
482
      tmp = xmlRealloc(buf, newSize);
9495
482
      if (tmp == NULL) {
9496
1
    xmlErrMemory(ctxt);
9497
1
                goto out;
9498
1
      }
9499
481
      buf = tmp;
9500
481
      size = newSize;
9501
481
  }
9502
91.7k
  COPY_BUF(buf, len, r);
9503
91.7k
  r = s;
9504
91.7k
  rl = sl;
9505
91.7k
  s = cur;
9506
91.7k
  sl = l;
9507
91.7k
  NEXTL(l);
9508
91.7k
  cur = xmlCurrentCharRecover(ctxt, &l);
9509
91.7k
    }
9510
3.92k
    buf[len] = 0;
9511
3.92k
    if (cur != '>') {
9512
2.58k
  xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9513
2.58k
                       "CData section not finished\n%.50s\n", buf);
9514
2.58k
        goto out;
9515
2.58k
    }
9516
1.34k
    NEXTL(l);
9517
9518
    /*
9519
     * OK the buffer is to be consumed as cdata.
9520
     */
9521
1.34k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9522
1.11k
        if ((ctxt->sax->cdataBlock != NULL) &&
9523
1.11k
            ((ctxt->options & XML_PARSE_NOCDATA) == 0)) {
9524
918
            ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9525
918
        } else if (ctxt->sax->characters != NULL) {
9526
197
            ctxt->sax->characters(ctxt->userData, buf, len);
9527
197
        }
9528
1.11k
    }
9529
9530
5.13k
out:
9531
5.13k
    xmlFree(buf);
9532
5.13k
}
9533
9534
/**
9535
 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9536
 * unexpected EOF to the caller.
9537
 *
9538
 * @param ctxt  an XML parser context
9539
 */
9540
9541
static void
9542
52.9k
xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9543
52.9k
    int oldNameNr = ctxt->nameNr;
9544
52.9k
    int oldSpaceNr = ctxt->spaceNr;
9545
52.9k
    int oldNodeNr = ctxt->nodeNr;
9546
9547
52.9k
    GROW;
9548
1.93M
    while ((ctxt->input->cur < ctxt->input->end) &&
9549
1.93M
     (PARSER_STOPPED(ctxt) == 0)) {
9550
1.91M
  const xmlChar *cur = ctxt->input->cur;
9551
9552
  /*
9553
   * First case : a Processing Instruction.
9554
   */
9555
1.91M
  if ((*cur == '<') && (cur[1] == '?')) {
9556
19.2k
      xmlParsePI(ctxt);
9557
19.2k
  }
9558
9559
  /*
9560
   * Second case : a CDSection
9561
   */
9562
  /* 2.6.0 test was *cur not RAW */
9563
1.89M
  else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9564
5.13k
      xmlParseCDSect(ctxt);
9565
5.13k
  }
9566
9567
  /*
9568
   * Third case :  a comment
9569
   */
9570
1.89M
  else if ((*cur == '<') && (NXT(1) == '!') &&
9571
1.89M
     (NXT(2) == '-') && (NXT(3) == '-')) {
9572
3.55k
      xmlParseComment(ctxt);
9573
3.55k
  }
9574
9575
  /*
9576
   * Fourth case :  a sub-element.
9577
   */
9578
1.88M
  else if (*cur == '<') {
9579
433k
            if (NXT(1) == '/') {
9580
184k
                if (ctxt->nameNr <= oldNameNr)
9581
39.3k
                    break;
9582
145k
          xmlParseElementEnd(ctxt);
9583
248k
            } else {
9584
248k
          xmlParseElementStart(ctxt);
9585
248k
            }
9586
433k
  }
9587
9588
  /*
9589
   * Fifth case : a reference. If if has not been resolved,
9590
   *    parsing returns it's Name, create the node
9591
   */
9592
9593
1.45M
  else if (*cur == '&') {
9594
130k
      xmlParseReference(ctxt);
9595
130k
  }
9596
9597
  /*
9598
   * Last case, text. Note that References are handled directly.
9599
   */
9600
1.32M
  else {
9601
1.32M
      xmlParseCharDataInternal(ctxt, 0);
9602
1.32M
  }
9603
9604
1.87M
  SHRINK;
9605
1.87M
  GROW;
9606
1.87M
    }
9607
9608
52.9k
    if ((ctxt->nameNr > oldNameNr) &&
9609
52.9k
        (ctxt->input->cur >= ctxt->input->end) &&
9610
52.9k
        (ctxt->wellFormed)) {
9611
473
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9612
473
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9613
473
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9614
473
                "Premature end of data in tag %s line %d\n",
9615
473
                name, line, NULL);
9616
473
    }
9617
9618
    /*
9619
     * Clean up in error case
9620
     */
9621
9622
72.0k
    while (ctxt->nodeNr > oldNodeNr)
9623
19.0k
        nodePop(ctxt);
9624
9625
69.0k
    while (ctxt->nameNr > oldNameNr) {
9626
16.0k
        xmlStartTag *tag = &ctxt->pushTab[ctxt->nameNr - 1];
9627
9628
16.0k
        if (tag->nsNr != 0)
9629
2.91k
            xmlParserNsPop(ctxt, tag->nsNr);
9630
9631
16.0k
        namePop(ctxt);
9632
16.0k
    }
9633
9634
69.0k
    while (ctxt->spaceNr > oldSpaceNr)
9635
16.1k
        spacePop(ctxt);
9636
52.9k
}
9637
9638
/**
9639
 * Parse XML element content. This is useful if you're only interested
9640
 * in custom SAX callbacks. If you want a node list, use
9641
 * #xmlCtxtParseContent.
9642
 *
9643
 * @param ctxt  an XML parser context
9644
 */
9645
void
9646
0
xmlParseContent(xmlParserCtxt *ctxt) {
9647
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
9648
0
        return;
9649
9650
0
    xmlCtxtInitializeLate(ctxt);
9651
9652
0
    xmlParseContentInternal(ctxt);
9653
9654
0
    xmlParserCheckEOF(ctxt, XML_ERR_NOT_WELL_BALANCED);
9655
0
}
9656
9657
/**
9658
 * Parse an XML element
9659
 *
9660
 * @deprecated Internal function, don't use.
9661
 *
9662
 *     [39] element ::= EmptyElemTag | STag content ETag
9663
 *
9664
 * [ WFC: Element Type Match ]
9665
 * The Name in an element's end-tag must match the element type in the
9666
 * start-tag.
9667
 *
9668
 * @param ctxt  an XML parser context
9669
 */
9670
9671
void
9672
71.4k
xmlParseElement(xmlParserCtxt *ctxt) {
9673
71.4k
    if (xmlParseElementStart(ctxt) != 0)
9674
22.4k
        return;
9675
9676
48.9k
    xmlParseContentInternal(ctxt);
9677
9678
48.9k
    if (ctxt->input->cur >= ctxt->input->end) {
9679
7.44k
        if (ctxt->wellFormed) {
9680
606
            const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9681
606
            int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9682
606
            xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9683
606
                    "Premature end of data in tag %s line %d\n",
9684
606
                    name, line, NULL);
9685
606
        }
9686
7.44k
        return;
9687
7.44k
    }
9688
9689
41.5k
    xmlParseElementEnd(ctxt);
9690
41.5k
}
9691
9692
/**
9693
 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
9694
 * opening tag was parsed, 1 if an empty element was parsed.
9695
 *
9696
 * Always consumes '<'.
9697
 *
9698
 * @param ctxt  an XML parser context
9699
 */
9700
static int
9701
320k
xmlParseElementStart(xmlParserCtxtPtr ctxt) {
9702
320k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
9703
320k
    const xmlChar *name;
9704
320k
    const xmlChar *prefix = NULL;
9705
320k
    const xmlChar *URI = NULL;
9706
320k
    xmlParserNodeInfo node_info;
9707
320k
    int line;
9708
320k
    xmlNodePtr cur;
9709
320k
    int nbNs = 0;
9710
9711
320k
    if (ctxt->nameNr > maxDepth) {
9712
0
        xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
9713
0
                "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9714
0
                ctxt->nameNr);
9715
0
  return(-1);
9716
0
    }
9717
9718
    /* Capture start position */
9719
320k
    if (ctxt->record_info) {
9720
0
        node_info.begin_pos = ctxt->input->consumed +
9721
0
                          (CUR_PTR - ctxt->input->base);
9722
0
  node_info.begin_line = ctxt->input->line;
9723
0
    }
9724
9725
320k
    if (ctxt->spaceNr == 0)
9726
0
  spacePush(ctxt, -1);
9727
320k
    else if (*ctxt->space == -2)
9728
36.2k
  spacePush(ctxt, -1);
9729
283k
    else
9730
283k
  spacePush(ctxt, *ctxt->space);
9731
9732
320k
    line = ctxt->input->line;
9733
320k
#ifdef LIBXML_SAX1_ENABLED
9734
320k
    if (ctxt->sax2)
9735
218k
#endif /* LIBXML_SAX1_ENABLED */
9736
218k
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &nbNs);
9737
101k
#ifdef LIBXML_SAX1_ENABLED
9738
101k
    else
9739
101k
  name = xmlParseStartTag(ctxt);
9740
320k
#endif /* LIBXML_SAX1_ENABLED */
9741
320k
    if (name == NULL) {
9742
49.6k
  spacePop(ctxt);
9743
49.6k
        return(-1);
9744
49.6k
    }
9745
270k
    nameNsPush(ctxt, name, prefix, URI, line, nbNs);
9746
270k
    cur = ctxt->node;
9747
9748
270k
#ifdef LIBXML_VALID_ENABLED
9749
    /*
9750
     * [ VC: Root Element Type ]
9751
     * The Name in the document type declaration must match the element
9752
     * type of the root element.
9753
     */
9754
270k
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9755
270k
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
9756
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9757
270k
#endif /* LIBXML_VALID_ENABLED */
9758
9759
    /*
9760
     * Check for an Empty Element.
9761
     */
9762
270k
    if ((RAW == '/') && (NXT(1) == '>')) {
9763
36.3k
        SKIP(2);
9764
36.3k
  if (ctxt->sax2) {
9765
17.9k
      if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9766
17.9k
    (!ctxt->disableSAX))
9767
12.3k
    ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
9768
17.9k
#ifdef LIBXML_SAX1_ENABLED
9769
18.3k
  } else {
9770
18.3k
      if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9771
18.3k
    (!ctxt->disableSAX))
9772
17.7k
    ctxt->sax->endElement(ctxt->userData, name);
9773
18.3k
#endif /* LIBXML_SAX1_ENABLED */
9774
18.3k
  }
9775
36.3k
  namePop(ctxt);
9776
36.3k
  spacePop(ctxt);
9777
36.3k
  if (nbNs > 0)
9778
7.25k
      xmlParserNsPop(ctxt, nbNs);
9779
36.3k
  if (cur != NULL && ctxt->record_info) {
9780
0
            node_info.node = cur;
9781
0
            node_info.end_pos = ctxt->input->consumed +
9782
0
                                (CUR_PTR - ctxt->input->base);
9783
0
            node_info.end_line = ctxt->input->line;
9784
0
            xmlParserAddNodeInfo(ctxt, &node_info);
9785
0
  }
9786
36.3k
  return(1);
9787
36.3k
    }
9788
234k
    if (RAW == '>') {
9789
210k
        NEXT1;
9790
210k
        if (cur != NULL && ctxt->record_info) {
9791
0
            node_info.node = cur;
9792
0
            node_info.end_pos = 0;
9793
0
            node_info.end_line = 0;
9794
0
            xmlParserAddNodeInfo(ctxt, &node_info);
9795
0
        }
9796
210k
    } else {
9797
23.6k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9798
23.6k
         "Couldn't find end of Start Tag %s line %d\n",
9799
23.6k
                    name, line, NULL);
9800
9801
  /*
9802
   * end of parsing of this node.
9803
   */
9804
23.6k
  nodePop(ctxt);
9805
23.6k
  namePop(ctxt);
9806
23.6k
  spacePop(ctxt);
9807
23.6k
  if (nbNs > 0)
9808
5.55k
      xmlParserNsPop(ctxt, nbNs);
9809
23.6k
  return(-1);
9810
23.6k
    }
9811
9812
210k
    return(0);
9813
234k
}
9814
9815
/**
9816
 * Parse the end of an XML element. Always consumes '</'.
9817
 *
9818
 * @param ctxt  an XML parser context
9819
 */
9820
static void
9821
186k
xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
9822
186k
    xmlNodePtr cur = ctxt->node;
9823
9824
186k
    if (ctxt->nameNr <= 0) {
9825
20
        if ((RAW == '<') && (NXT(1) == '/'))
9826
9
            SKIP(2);
9827
20
        return;
9828
20
    }
9829
9830
    /*
9831
     * parse the end of tag: '</' should be here.
9832
     */
9833
186k
    if (ctxt->sax2) {
9834
128k
  xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
9835
128k
  namePop(ctxt);
9836
128k
    }
9837
58.1k
#ifdef LIBXML_SAX1_ENABLED
9838
58.1k
    else
9839
58.1k
  xmlParseEndTag1(ctxt, 0);
9840
186k
#endif /* LIBXML_SAX1_ENABLED */
9841
9842
    /*
9843
     * Capture end position
9844
     */
9845
186k
    if (cur != NULL && ctxt->record_info) {
9846
0
        xmlParserNodeInfoPtr node_info;
9847
9848
0
        node_info = (xmlParserNodeInfoPtr) xmlParserFindNodeInfo(ctxt, cur);
9849
0
        if (node_info != NULL) {
9850
0
            node_info->end_pos = ctxt->input->consumed +
9851
0
                                 (CUR_PTR - ctxt->input->base);
9852
0
            node_info->end_line = ctxt->input->line;
9853
0
        }
9854
0
    }
9855
186k
}
9856
9857
/**
9858
 * Parse the XML version value.
9859
 *
9860
 * @deprecated Internal function, don't use.
9861
 *
9862
 *     [26] VersionNum ::= '1.' [0-9]+
9863
 *
9864
 * In practice allow [0-9].[0-9]+ at that level
9865
 *
9866
 * @param ctxt  an XML parser context
9867
 * @returns the string giving the XML version number, or NULL
9868
 */
9869
xmlChar *
9870
95.6k
xmlParseVersionNum(xmlParserCtxt *ctxt) {
9871
95.6k
    xmlChar *buf = NULL;
9872
95.6k
    int len = 0;
9873
95.6k
    int size = 10;
9874
95.6k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9875
24.2k
                    XML_MAX_TEXT_LENGTH :
9876
95.6k
                    XML_MAX_NAME_LENGTH;
9877
95.6k
    xmlChar cur;
9878
9879
95.6k
    buf = xmlMalloc(size);
9880
95.6k
    if (buf == NULL) {
9881
4
  xmlErrMemory(ctxt);
9882
4
  return(NULL);
9883
4
    }
9884
95.6k
    cur = CUR;
9885
95.6k
    if (!((cur >= '0') && (cur <= '9'))) {
9886
4.88k
  xmlFree(buf);
9887
4.88k
  return(NULL);
9888
4.88k
    }
9889
90.7k
    buf[len++] = cur;
9890
90.7k
    NEXT;
9891
90.7k
    cur=CUR;
9892
90.7k
    if (cur != '.') {
9893
451
  xmlFree(buf);
9894
451
  return(NULL);
9895
451
    }
9896
90.2k
    buf[len++] = cur;
9897
90.2k
    NEXT;
9898
90.2k
    cur=CUR;
9899
182k
    while ((cur >= '0') && (cur <= '9')) {
9900
91.7k
  if (len + 1 >= size) {
9901
249
      xmlChar *tmp;
9902
249
            int newSize;
9903
9904
249
            newSize = xmlGrowCapacity(size, 1, 1, maxLength);
9905
249
            if (newSize < 0) {
9906
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "VersionNum");
9907
0
                xmlFree(buf);
9908
0
                return(NULL);
9909
0
            }
9910
249
      tmp = xmlRealloc(buf, newSize);
9911
249
      if (tmp == NULL) {
9912
2
    xmlErrMemory(ctxt);
9913
2
          xmlFree(buf);
9914
2
    return(NULL);
9915
2
      }
9916
247
      buf = tmp;
9917
247
            size = newSize;
9918
247
  }
9919
91.7k
  buf[len++] = cur;
9920
91.7k
  NEXT;
9921
91.7k
  cur=CUR;
9922
91.7k
    }
9923
90.2k
    buf[len] = 0;
9924
90.2k
    return(buf);
9925
90.2k
}
9926
9927
/**
9928
 * Parse the XML version.
9929
 *
9930
 * @deprecated Internal function, don't use.
9931
 *
9932
 *     [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
9933
 *
9934
 *     [25] Eq ::= S? '=' S?
9935
 *
9936
 * @param ctxt  an XML parser context
9937
 * @returns the version string, e.g. "1.0"
9938
 */
9939
9940
xmlChar *
9941
105k
xmlParseVersionInfo(xmlParserCtxt *ctxt) {
9942
105k
    xmlChar *version = NULL;
9943
9944
105k
    if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
9945
97.4k
  SKIP(7);
9946
97.4k
  SKIP_BLANKS;
9947
97.4k
  if (RAW != '=') {
9948
621
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
9949
621
      return(NULL);
9950
621
        }
9951
96.8k
  NEXT;
9952
96.8k
  SKIP_BLANKS;
9953
96.8k
  if (RAW == '"') {
9954
95.2k
      NEXT;
9955
95.2k
      version = xmlParseVersionNum(ctxt);
9956
95.2k
      if (RAW != '"') {
9957
5.52k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9958
5.52k
      } else
9959
89.7k
          NEXT;
9960
95.2k
  } else if (RAW == '\''){
9961
388
      NEXT;
9962
388
      version = xmlParseVersionNum(ctxt);
9963
388
      if (RAW != '\'') {
9964
194
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9965
194
      } else
9966
194
          NEXT;
9967
1.18k
  } else {
9968
1.18k
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
9969
1.18k
  }
9970
96.8k
    }
9971
104k
    return(version);
9972
105k
}
9973
9974
/**
9975
 * Parse the XML encoding name
9976
 *
9977
 * @deprecated Internal function, don't use.
9978
 *
9979
 *     [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
9980
 *
9981
 * @param ctxt  an XML parser context
9982
 * @returns the encoding name value or NULL
9983
 */
9984
xmlChar *
9985
18.6k
xmlParseEncName(xmlParserCtxt *ctxt) {
9986
18.6k
    xmlChar *buf = NULL;
9987
18.6k
    int len = 0;
9988
18.6k
    int size = 10;
9989
18.6k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9990
3.74k
                    XML_MAX_TEXT_LENGTH :
9991
18.6k
                    XML_MAX_NAME_LENGTH;
9992
18.6k
    xmlChar cur;
9993
9994
18.6k
    cur = CUR;
9995
18.6k
    if (((cur >= 'a') && (cur <= 'z')) ||
9996
18.6k
        ((cur >= 'A') && (cur <= 'Z'))) {
9997
9.14k
  buf = xmlMalloc(size);
9998
9.14k
  if (buf == NULL) {
9999
1
      xmlErrMemory(ctxt);
10000
1
      return(NULL);
10001
1
  }
10002
10003
9.13k
  buf[len++] = cur;
10004
9.13k
  NEXT;
10005
9.13k
  cur = CUR;
10006
62.3k
  while (((cur >= 'a') && (cur <= 'z')) ||
10007
62.3k
         ((cur >= 'A') && (cur <= 'Z')) ||
10008
62.3k
         ((cur >= '0') && (cur <= '9')) ||
10009
62.3k
         (cur == '.') || (cur == '_') ||
10010
62.3k
         (cur == '-')) {
10011
53.2k
      if (len + 1 >= size) {
10012
2.65k
          xmlChar *tmp;
10013
2.65k
                int newSize;
10014
10015
2.65k
                newSize = xmlGrowCapacity(size, 1, 1, maxLength);
10016
2.65k
                if (newSize < 0) {
10017
0
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "EncName");
10018
0
                    xmlFree(buf);
10019
0
                    return(NULL);
10020
0
                }
10021
2.65k
    tmp = xmlRealloc(buf, newSize);
10022
2.65k
    if (tmp == NULL) {
10023
1
        xmlErrMemory(ctxt);
10024
1
        xmlFree(buf);
10025
1
        return(NULL);
10026
1
    }
10027
2.65k
    buf = tmp;
10028
2.65k
                size = newSize;
10029
2.65k
      }
10030
53.2k
      buf[len++] = cur;
10031
53.2k
      NEXT;
10032
53.2k
      cur = CUR;
10033
53.2k
        }
10034
9.13k
  buf[len] = 0;
10035
9.51k
    } else {
10036
9.51k
  xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10037
9.51k
    }
10038
18.6k
    return(buf);
10039
18.6k
}
10040
10041
/**
10042
 * Parse the XML encoding declaration
10043
 *
10044
 * @deprecated Internal function, don't use.
10045
 *
10046
 *     [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | 
10047
 *                           "'" EncName "'")
10048
 *
10049
 * this setups the conversion filters.
10050
 *
10051
 * @param ctxt  an XML parser context
10052
 * @returns the encoding value or NULL
10053
 */
10054
10055
const xmlChar *
10056
100k
xmlParseEncodingDecl(xmlParserCtxt *ctxt) {
10057
100k
    xmlChar *encoding = NULL;
10058
10059
100k
    SKIP_BLANKS;
10060
100k
    if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g') == 0)
10061
81.3k
        return(NULL);
10062
10063
19.1k
    SKIP(8);
10064
19.1k
    SKIP_BLANKS;
10065
19.1k
    if (RAW != '=') {
10066
197
        xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10067
197
        return(NULL);
10068
197
    }
10069
18.9k
    NEXT;
10070
18.9k
    SKIP_BLANKS;
10071
18.9k
    if (RAW == '"') {
10072
18.2k
        NEXT;
10073
18.2k
        encoding = xmlParseEncName(ctxt);
10074
18.2k
        if (RAW != '"') {
10075
4.86k
            xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10076
4.86k
            xmlFree(encoding);
10077
4.86k
            return(NULL);
10078
4.86k
        } else
10079
13.4k
            NEXT;
10080
18.2k
    } else if (RAW == '\''){
10081
392
        NEXT;
10082
392
        encoding = xmlParseEncName(ctxt);
10083
392
        if (RAW != '\'') {
10084
198
            xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10085
198
            xmlFree(encoding);
10086
198
            return(NULL);
10087
198
        } else
10088
194
            NEXT;
10089
392
    } else {
10090
274
        xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10091
274
    }
10092
10093
13.8k
    if (encoding == NULL)
10094
5.24k
        return(NULL);
10095
10096
8.63k
    xmlSetDeclaredEncoding(ctxt, encoding);
10097
10098
8.63k
    return(ctxt->encoding);
10099
13.8k
}
10100
10101
/**
10102
 * Parse the XML standalone declaration
10103
 *
10104
 * @deprecated Internal function, don't use.
10105
 *
10106
 *     [32] SDDecl ::= S 'standalone' Eq
10107
 *                     (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10108
 *
10109
 * [ VC: Standalone Document Declaration ]
10110
 * TODO The standalone document declaration must have the value "no"
10111
 * if any external markup declarations contain declarations of:
10112
 *  - attributes with default values, if elements to which these
10113
 *    attributes apply appear in the document without specifications
10114
 *    of values for these attributes, or
10115
 *  - entities (other than amp, lt, gt, apos, quot), if references
10116
 *    to those entities appear in the document, or
10117
 *  - attributes with values subject to normalization, where the
10118
 *    attribute appears in the document with a value which will change
10119
 *    as a result of normalization, or
10120
 *  - element types with element content, if white space occurs directly
10121
 *    within any instance of those types.
10122
 *
10123
 * @param ctxt  an XML parser context
10124
 * @returns
10125
 *   1 if standalone="yes"
10126
 *   0 if standalone="no"
10127
 *  -2 if standalone attribute is missing or invalid
10128
 *    (A standalone value of -2 means that the XML declaration was found,
10129
 *     but no value was specified for the standalone attribute).
10130
 */
10131
10132
int
10133
99.3k
xmlParseSDDecl(xmlParserCtxt *ctxt) {
10134
99.3k
    int standalone = -2;
10135
10136
99.3k
    SKIP_BLANKS;
10137
99.3k
    if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10138
81.3k
  SKIP(10);
10139
81.3k
        SKIP_BLANKS;
10140
81.3k
  if (RAW != '=') {
10141
195
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10142
195
      return(standalone);
10143
195
        }
10144
81.1k
  NEXT;
10145
81.1k
  SKIP_BLANKS;
10146
81.1k
        if (RAW == '\''){
10147
1.65k
      NEXT;
10148
1.65k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10149
388
          standalone = 0;
10150
388
                SKIP(2);
10151
1.27k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10152
1.27k
                 (NXT(2) == 's')) {
10153
388
          standalone = 1;
10154
388
    SKIP(3);
10155
882
            } else {
10156
882
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10157
882
      }
10158
1.65k
      if (RAW != '\'') {
10159
1.45k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10160
1.45k
      } else
10161
202
          NEXT;
10162
79.4k
  } else if (RAW == '"'){
10163
79.2k
      NEXT;
10164
79.2k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10165
488
          standalone = 0;
10166
488
    SKIP(2);
10167
78.7k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10168
78.7k
                 (NXT(2) == 's')) {
10169
78.0k
          standalone = 1;
10170
78.0k
                SKIP(3);
10171
78.0k
            } else {
10172
764
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10173
764
      }
10174
79.2k
      if (RAW != '"') {
10175
1.15k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10176
1.15k
      } else
10177
78.1k
          NEXT;
10178
79.2k
  } else {
10179
198
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10180
198
        }
10181
81.1k
    }
10182
99.1k
    return(standalone);
10183
99.3k
}
10184
10185
/**
10186
 * Parse an XML declaration header
10187
 *
10188
 * @deprecated Internal function, don't use.
10189
 *
10190
 *     [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10191
 * @param ctxt  an XML parser context
10192
 */
10193
10194
void
10195
105k
xmlParseXMLDecl(xmlParserCtxt *ctxt) {
10196
105k
    xmlChar *version;
10197
10198
    /*
10199
     * This value for standalone indicates that the document has an
10200
     * XML declaration but it does not have a standalone attribute.
10201
     * It will be overwritten later if a standalone attribute is found.
10202
     */
10203
10204
105k
    ctxt->standalone = -2;
10205
10206
    /*
10207
     * We know that '<?xml' is here.
10208
     */
10209
105k
    SKIP(5);
10210
10211
105k
    if (!IS_BLANK_CH(RAW)) {
10212
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10213
0
                 "Blank needed after '<?xml'\n");
10214
0
    }
10215
105k
    SKIP_BLANKS;
10216
10217
    /*
10218
     * We must have the VersionInfo here.
10219
     */
10220
105k
    version = xmlParseVersionInfo(ctxt);
10221
105k
    if (version == NULL) {
10222
14.9k
  xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10223
90.2k
    } else {
10224
90.2k
  if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10225
      /*
10226
       * Changed here for XML-1.0 5th edition
10227
       */
10228
1.08k
      if (ctxt->options & XML_PARSE_OLD10) {
10229
293
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10230
293
                "Unsupported version '%s'\n",
10231
293
                version);
10232
793
      } else {
10233
793
          if ((version[0] == '1') && ((version[1] == '.'))) {
10234
346
        xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10235
346
                      "Unsupported version '%s'\n",
10236
346
          version, NULL);
10237
447
    } else {
10238
447
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10239
447
              "Unsupported version '%s'\n",
10240
447
              version);
10241
447
    }
10242
793
      }
10243
1.08k
  }
10244
90.2k
  if (ctxt->version != NULL)
10245
0
      xmlFree(ctxt->version);
10246
90.2k
  ctxt->version = version;
10247
90.2k
    }
10248
10249
    /*
10250
     * We may have the encoding declaration
10251
     */
10252
105k
    if (!IS_BLANK_CH(RAW)) {
10253
19.4k
        if ((RAW == '?') && (NXT(1) == '>')) {
10254
4.79k
      SKIP(2);
10255
4.79k
      return;
10256
4.79k
  }
10257
14.6k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10258
14.6k
    }
10259
100k
    xmlParseEncodingDecl(ctxt);
10260
10261
    /*
10262
     * We may have the standalone status.
10263
     */
10264
100k
    if ((ctxt->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10265
2.09k
        if ((RAW == '?') && (NXT(1) == '>')) {
10266
1.05k
      SKIP(2);
10267
1.05k
      return;
10268
1.05k
  }
10269
1.04k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10270
1.04k
    }
10271
10272
    /*
10273
     * We can grow the input buffer freely at that point
10274
     */
10275
99.3k
    GROW;
10276
10277
99.3k
    SKIP_BLANKS;
10278
99.3k
    ctxt->standalone = xmlParseSDDecl(ctxt);
10279
10280
99.3k
    SKIP_BLANKS;
10281
99.3k
    if ((RAW == '?') && (NXT(1) == '>')) {
10282
78.3k
        SKIP(2);
10283
78.3k
    } else if (RAW == '>') {
10284
        /* Deprecated old WD ... */
10285
212
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10286
212
  NEXT;
10287
20.7k
    } else {
10288
20.7k
        int c;
10289
10290
20.7k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10291
355k
        while ((PARSER_STOPPED(ctxt) == 0) &&
10292
355k
               ((c = CUR) != 0)) {
10293
344k
            NEXT;
10294
344k
            if (c == '>')
10295
9.06k
                break;
10296
344k
        }
10297
20.7k
    }
10298
99.3k
}
10299
10300
/**
10301
 * @since 2.14.0
10302
 *
10303
 * @param ctxt  parser context
10304
 * @returns the version from the XML declaration.
10305
 */
10306
const xmlChar *
10307
0
xmlCtxtGetVersion(xmlParserCtxt *ctxt) {
10308
0
    if (ctxt == NULL)
10309
0
        return(NULL);
10310
10311
0
    return(ctxt->version);
10312
0
}
10313
10314
/**
10315
 * @since 2.14.0
10316
 *
10317
 * @param ctxt  parser context
10318
 * @returns the value from the standalone document declaration.
10319
 */
10320
int
10321
0
xmlCtxtGetStandalone(xmlParserCtxt *ctxt) {
10322
0
    if (ctxt == NULL)
10323
0
        return(0);
10324
10325
0
    return(ctxt->standalone);
10326
0
}
10327
10328
/**
10329
 * Parse an XML Misc* optional field.
10330
 *
10331
 * @deprecated Internal function, don't use.
10332
 *
10333
 *     [27] Misc ::= Comment | PI |  S
10334
 * @param ctxt  an XML parser context
10335
 */
10336
10337
void
10338
341k
xmlParseMisc(xmlParserCtxt *ctxt) {
10339
386k
    while (PARSER_STOPPED(ctxt) == 0) {
10340
381k
        SKIP_BLANKS;
10341
381k
        GROW;
10342
381k
        if ((RAW == '<') && (NXT(1) == '?')) {
10343
38.7k
      xmlParsePI(ctxt);
10344
342k
        } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10345
5.75k
      xmlParseComment(ctxt);
10346
337k
        } else {
10347
337k
            break;
10348
337k
        }
10349
381k
    }
10350
341k
}
10351
10352
static void
10353
165k
xmlFinishDocument(xmlParserCtxtPtr ctxt) {
10354
165k
    xmlDocPtr doc;
10355
10356
    /*
10357
     * SAX: end of the document processing.
10358
     */
10359
165k
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10360
165k
        ctxt->sax->endDocument(ctxt->userData);
10361
10362
    /*
10363
     * Remove locally kept entity definitions if the tree was not built
10364
     */
10365
165k
    doc = ctxt->myDoc;
10366
165k
    if ((doc != NULL) &&
10367
165k
        (xmlStrEqual(doc->version, SAX_COMPAT_MODE))) {
10368
1.96k
        xmlFreeDoc(doc);
10369
1.96k
        ctxt->myDoc = NULL;
10370
1.96k
    }
10371
165k
}
10372
10373
/**
10374
 * Parse an XML document and invoke the SAX handlers. This is useful
10375
 * if you're only interested in custom SAX callbacks. If you want a
10376
 * document tree, use #xmlCtxtParseDocument.
10377
 *
10378
 * @param ctxt  an XML parser context
10379
 * @returns 0, -1 in case of error.
10380
 */
10381
10382
int
10383
167k
xmlParseDocument(xmlParserCtxt *ctxt) {
10384
167k
    if ((ctxt == NULL) || (ctxt->input == NULL))
10385
0
        return(-1);
10386
10387
167k
    GROW;
10388
10389
    /*
10390
     * SAX: detecting the level.
10391
     */
10392
167k
    xmlCtxtInitializeLate(ctxt);
10393
10394
167k
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10395
167k
        ctxt->sax->setDocumentLocator(ctxt->userData,
10396
167k
                (xmlSAXLocator *) &xmlDefaultSAXLocator);
10397
167k
    }
10398
10399
167k
    xmlDetectEncoding(ctxt);
10400
10401
167k
    if (CUR == 0) {
10402
2.31k
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10403
2.31k
  return(-1);
10404
2.31k
    }
10405
10406
165k
    GROW;
10407
165k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10408
10409
  /*
10410
   * Note that we will switch encoding on the fly.
10411
   */
10412
105k
  xmlParseXMLDecl(ctxt);
10413
105k
  SKIP_BLANKS;
10414
105k
    } else {
10415
59.8k
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10416
59.8k
        if (ctxt->version == NULL) {
10417
12
            xmlErrMemory(ctxt);
10418
12
            return(-1);
10419
12
        }
10420
59.8k
    }
10421
165k
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10422
143k
        ctxt->sax->startDocument(ctxt->userData);
10423
165k
    if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10424
165k
        (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10425
0
  ctxt->myDoc->compression = ctxt->input->buf->compressed;
10426
0
    }
10427
10428
    /*
10429
     * The Misc part of the Prolog
10430
     */
10431
165k
    xmlParseMisc(ctxt);
10432
10433
    /*
10434
     * Then possibly doc type declaration(s) and more Misc
10435
     * (doctypedecl Misc*)?
10436
     */
10437
165k
    GROW;
10438
165k
    if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10439
10440
105k
  ctxt->inSubset = 1;
10441
105k
  xmlParseDocTypeDecl(ctxt);
10442
105k
  if (RAW == '[') {
10443
81.4k
      xmlParseInternalSubset(ctxt);
10444
81.4k
  } else if (RAW == '>') {
10445
19.0k
            NEXT;
10446
19.0k
        }
10447
10448
  /*
10449
   * Create and update the external subset.
10450
   */
10451
105k
  ctxt->inSubset = 2;
10452
105k
  if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10453
105k
      (!ctxt->disableSAX))
10454
78.4k
      ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10455
78.4k
                                ctxt->extSubSystem, ctxt->extSubURI);
10456
105k
  ctxt->inSubset = 0;
10457
10458
105k
        xmlCleanSpecialAttr(ctxt);
10459
10460
105k
  xmlParseMisc(ctxt);
10461
105k
    }
10462
10463
    /*
10464
     * Time to start parsing the tree itself
10465
     */
10466
165k
    GROW;
10467
165k
    if (RAW != '<') {
10468
93.6k
        if (ctxt->wellFormed)
10469
15.6k
            xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10470
15.6k
                           "Start tag expected, '<' not found\n");
10471
93.6k
    } else {
10472
71.4k
  xmlParseElement(ctxt);
10473
10474
  /*
10475
   * The Misc part at the end
10476
   */
10477
71.4k
  xmlParseMisc(ctxt);
10478
10479
71.4k
        xmlParserCheckEOF(ctxt, XML_ERR_DOCUMENT_END);
10480
71.4k
    }
10481
10482
165k
    ctxt->instate = XML_PARSER_EOF;
10483
165k
    xmlFinishDocument(ctxt);
10484
10485
165k
    if (! ctxt->wellFormed) {
10486
132k
  ctxt->valid = 0;
10487
132k
  return(-1);
10488
132k
    }
10489
10490
32.2k
    return(0);
10491
165k
}
10492
10493
/**
10494
 * Parse a general parsed entity
10495
 * An external general parsed entity is well-formed if it matches the
10496
 * production labeled extParsedEnt.
10497
 *
10498
 * @deprecated Internal function, don't use.
10499
 *
10500
 *     [78] extParsedEnt ::= TextDecl? content
10501
 *
10502
 * @param ctxt  an XML parser context
10503
 * @returns 0, -1 in case of error. the parser context is augmented
10504
 *                as a result of the parsing.
10505
 */
10506
10507
int
10508
0
xmlParseExtParsedEnt(xmlParserCtxt *ctxt) {
10509
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
10510
0
        return(-1);
10511
10512
0
    xmlCtxtInitializeLate(ctxt);
10513
10514
0
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10515
0
        ctxt->sax->setDocumentLocator(ctxt->userData,
10516
0
                (xmlSAXLocator *) &xmlDefaultSAXLocator);
10517
0
    }
10518
10519
0
    xmlDetectEncoding(ctxt);
10520
10521
0
    if (CUR == 0) {
10522
0
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10523
0
    }
10524
10525
    /*
10526
     * Check for the XMLDecl in the Prolog.
10527
     */
10528
0
    GROW;
10529
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10530
10531
  /*
10532
   * Note that we will switch encoding on the fly.
10533
   */
10534
0
  xmlParseXMLDecl(ctxt);
10535
0
  SKIP_BLANKS;
10536
0
    } else {
10537
0
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10538
0
    }
10539
0
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10540
0
        ctxt->sax->startDocument(ctxt->userData);
10541
10542
    /*
10543
     * Doing validity checking on chunk doesn't make sense
10544
     */
10545
0
    ctxt->options &= ~XML_PARSE_DTDVALID;
10546
0
    ctxt->validate = 0;
10547
0
    ctxt->depth = 0;
10548
10549
0
    xmlParseContentInternal(ctxt);
10550
10551
0
    if (ctxt->input->cur < ctxt->input->end)
10552
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10553
10554
    /*
10555
     * SAX: end of the document processing.
10556
     */
10557
0
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10558
0
        ctxt->sax->endDocument(ctxt->userData);
10559
10560
0
    if (! ctxt->wellFormed) return(-1);
10561
0
    return(0);
10562
0
}
10563
10564
#ifdef LIBXML_PUSH_ENABLED
10565
/************************************************************************
10566
 *                  *
10567
 *    Progressive parsing interfaces        *
10568
 *                  *
10569
 ************************************************************************/
10570
10571
/**
10572
 * Check whether the input buffer contains a character.
10573
 *
10574
 * @param ctxt  an XML parser context
10575
 * @param c  character
10576
 */
10577
static int
10578
0
xmlParseLookupChar(xmlParserCtxtPtr ctxt, int c) {
10579
0
    const xmlChar *cur;
10580
10581
0
    if (ctxt->checkIndex == 0) {
10582
0
        cur = ctxt->input->cur + 1;
10583
0
    } else {
10584
0
        cur = ctxt->input->cur + ctxt->checkIndex;
10585
0
    }
10586
10587
0
    if (memchr(cur, c, ctxt->input->end - cur) == NULL) {
10588
0
        size_t index = ctxt->input->end - ctxt->input->cur;
10589
10590
0
        if (index > LONG_MAX) {
10591
0
            ctxt->checkIndex = 0;
10592
0
            return(1);
10593
0
        }
10594
0
        ctxt->checkIndex = index;
10595
0
        return(0);
10596
0
    } else {
10597
0
        ctxt->checkIndex = 0;
10598
0
        return(1);
10599
0
    }
10600
0
}
10601
10602
/**
10603
 * Check whether the input buffer contains a string.
10604
 *
10605
 * @param ctxt  an XML parser context
10606
 * @param startDelta  delta to apply at the start
10607
 * @param str  string
10608
 * @param strLen  length of string
10609
 */
10610
static const xmlChar *
10611
xmlParseLookupString(xmlParserCtxtPtr ctxt, size_t startDelta,
10612
0
                     const char *str, size_t strLen) {
10613
0
    const xmlChar *cur, *term;
10614
10615
0
    if (ctxt->checkIndex == 0) {
10616
0
        cur = ctxt->input->cur + startDelta;
10617
0
    } else {
10618
0
        cur = ctxt->input->cur + ctxt->checkIndex;
10619
0
    }
10620
10621
0
    term = BAD_CAST strstr((const char *) cur, str);
10622
0
    if (term == NULL) {
10623
0
        const xmlChar *end = ctxt->input->end;
10624
0
        size_t index;
10625
10626
        /* Rescan (strLen - 1) characters. */
10627
0
        if ((size_t) (end - cur) < strLen)
10628
0
            end = cur;
10629
0
        else
10630
0
            end -= strLen - 1;
10631
0
        index = end - ctxt->input->cur;
10632
0
        if (index > LONG_MAX) {
10633
0
            ctxt->checkIndex = 0;
10634
0
            return(ctxt->input->end - strLen);
10635
0
        }
10636
0
        ctxt->checkIndex = index;
10637
0
    } else {
10638
0
        ctxt->checkIndex = 0;
10639
0
    }
10640
10641
0
    return(term);
10642
0
}
10643
10644
/**
10645
 * Check whether the input buffer contains terminated char data.
10646
 *
10647
 * @param ctxt  an XML parser context
10648
 */
10649
static int
10650
0
xmlParseLookupCharData(xmlParserCtxtPtr ctxt) {
10651
0
    const xmlChar *cur = ctxt->input->cur + ctxt->checkIndex;
10652
0
    const xmlChar *end = ctxt->input->end;
10653
0
    size_t index;
10654
10655
0
    while (cur < end) {
10656
0
        if ((*cur == '<') || (*cur == '&')) {
10657
0
            ctxt->checkIndex = 0;
10658
0
            return(1);
10659
0
        }
10660
0
        cur++;
10661
0
    }
10662
10663
0
    index = cur - ctxt->input->cur;
10664
0
    if (index > LONG_MAX) {
10665
0
        ctxt->checkIndex = 0;
10666
0
        return(1);
10667
0
    }
10668
0
    ctxt->checkIndex = index;
10669
0
    return(0);
10670
0
}
10671
10672
/**
10673
 * Check whether there's enough data in the input buffer to finish parsing
10674
 * a start tag. This has to take quotes into account.
10675
 *
10676
 * @param ctxt  an XML parser context
10677
 */
10678
static int
10679
0
xmlParseLookupGt(xmlParserCtxtPtr ctxt) {
10680
0
    const xmlChar *cur;
10681
0
    const xmlChar *end = ctxt->input->end;
10682
0
    int state = ctxt->endCheckState;
10683
0
    size_t index;
10684
10685
0
    if (ctxt->checkIndex == 0)
10686
0
        cur = ctxt->input->cur + 1;
10687
0
    else
10688
0
        cur = ctxt->input->cur + ctxt->checkIndex;
10689
10690
0
    while (cur < end) {
10691
0
        if (state) {
10692
0
            if (*cur == state)
10693
0
                state = 0;
10694
0
        } else if (*cur == '\'' || *cur == '"') {
10695
0
            state = *cur;
10696
0
        } else if (*cur == '>') {
10697
0
            ctxt->checkIndex = 0;
10698
0
            ctxt->endCheckState = 0;
10699
0
            return(1);
10700
0
        }
10701
0
        cur++;
10702
0
    }
10703
10704
0
    index = cur - ctxt->input->cur;
10705
0
    if (index > LONG_MAX) {
10706
0
        ctxt->checkIndex = 0;
10707
0
        ctxt->endCheckState = 0;
10708
0
        return(1);
10709
0
    }
10710
0
    ctxt->checkIndex = index;
10711
0
    ctxt->endCheckState = state;
10712
0
    return(0);
10713
0
}
10714
10715
/**
10716
 * Check whether there's enough data in the input buffer to finish parsing
10717
 * the internal subset.
10718
 *
10719
 * @param ctxt  an XML parser context
10720
 */
10721
static int
10722
0
xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt) {
10723
    /*
10724
     * Sorry, but progressive parsing of the internal subset is not
10725
     * supported. We first check that the full content of the internal
10726
     * subset is available and parsing is launched only at that point.
10727
     * Internal subset ends with "']' S? '>'" in an unescaped section and
10728
     * not in a ']]>' sequence which are conditional sections.
10729
     */
10730
0
    const xmlChar *cur, *start;
10731
0
    const xmlChar *end = ctxt->input->end;
10732
0
    int state = ctxt->endCheckState;
10733
0
    size_t index;
10734
10735
0
    if (ctxt->checkIndex == 0) {
10736
0
        cur = ctxt->input->cur + 1;
10737
0
    } else {
10738
0
        cur = ctxt->input->cur + ctxt->checkIndex;
10739
0
    }
10740
0
    start = cur;
10741
10742
0
    while (cur < end) {
10743
0
        if (state == '-') {
10744
0
            if ((*cur == '-') &&
10745
0
                (cur[1] == '-') &&
10746
0
                (cur[2] == '>')) {
10747
0
                state = 0;
10748
0
                cur += 3;
10749
0
                start = cur;
10750
0
                continue;
10751
0
            }
10752
0
        }
10753
0
        else if (state == ']') {
10754
0
            if (*cur == '>') {
10755
0
                ctxt->checkIndex = 0;
10756
0
                ctxt->endCheckState = 0;
10757
0
                return(1);
10758
0
            }
10759
0
            if (IS_BLANK_CH(*cur)) {
10760
0
                state = ' ';
10761
0
            } else if (*cur != ']') {
10762
0
                state = 0;
10763
0
                start = cur;
10764
0
                continue;
10765
0
            }
10766
0
        }
10767
0
        else if (state == ' ') {
10768
0
            if (*cur == '>') {
10769
0
                ctxt->checkIndex = 0;
10770
0
                ctxt->endCheckState = 0;
10771
0
                return(1);
10772
0
            }
10773
0
            if (!IS_BLANK_CH(*cur)) {
10774
0
                state = 0;
10775
0
                start = cur;
10776
0
                continue;
10777
0
            }
10778
0
        }
10779
0
        else if (state != 0) {
10780
0
            if (*cur == state) {
10781
0
                state = 0;
10782
0
                start = cur + 1;
10783
0
            }
10784
0
        }
10785
0
        else if (*cur == '<') {
10786
0
            if ((cur[1] == '!') &&
10787
0
                (cur[2] == '-') &&
10788
0
                (cur[3] == '-')) {
10789
0
                state = '-';
10790
0
                cur += 4;
10791
                /* Don't treat <!--> as comment */
10792
0
                start = cur;
10793
0
                continue;
10794
0
            }
10795
0
        }
10796
0
        else if ((*cur == '"') || (*cur == '\'') || (*cur == ']')) {
10797
0
            state = *cur;
10798
0
        }
10799
10800
0
        cur++;
10801
0
    }
10802
10803
    /*
10804
     * Rescan the three last characters to detect "<!--" and "-->"
10805
     * split across chunks.
10806
     */
10807
0
    if ((state == 0) || (state == '-')) {
10808
0
        if (cur - start < 3)
10809
0
            cur = start;
10810
0
        else
10811
0
            cur -= 3;
10812
0
    }
10813
0
    index = cur - ctxt->input->cur;
10814
0
    if (index > LONG_MAX) {
10815
0
        ctxt->checkIndex = 0;
10816
0
        ctxt->endCheckState = 0;
10817
0
        return(1);
10818
0
    }
10819
0
    ctxt->checkIndex = index;
10820
0
    ctxt->endCheckState = state;
10821
0
    return(0);
10822
0
}
10823
10824
/**
10825
 * Try to progress on parsing
10826
 *
10827
 * @param ctxt  an XML parser context
10828
 * @param terminate  last chunk indicator
10829
 * @returns zero if no parsing was possible
10830
 */
10831
static int
10832
0
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
10833
0
    int ret = 0;
10834
0
    size_t avail;
10835
0
    xmlChar cur, next;
10836
10837
0
    if (ctxt->input == NULL)
10838
0
        return(0);
10839
10840
0
    if ((ctxt->input != NULL) &&
10841
0
        (ctxt->input->cur - ctxt->input->base > 4096)) {
10842
0
        xmlParserShrink(ctxt);
10843
0
    }
10844
10845
0
    while (ctxt->disableSAX == 0) {
10846
0
        avail = ctxt->input->end - ctxt->input->cur;
10847
0
        if (avail < 1)
10848
0
      goto done;
10849
0
        switch (ctxt->instate) {
10850
0
            case XML_PARSER_EOF:
10851
          /*
10852
     * Document parsing is done !
10853
     */
10854
0
          goto done;
10855
0
            case XML_PARSER_START:
10856
                /*
10857
                 * Very first chars read from the document flow.
10858
                 */
10859
0
                if ((!terminate) && (avail < 4))
10860
0
                    goto done;
10861
10862
                /*
10863
                 * We need more bytes to detect EBCDIC code pages.
10864
                 * See xmlDetectEBCDIC.
10865
                 */
10866
0
                if ((CMP4(CUR_PTR, 0x4C, 0x6F, 0xA7, 0x94)) &&
10867
0
                    (!terminate) && (avail < 200))
10868
0
                    goto done;
10869
10870
0
                xmlDetectEncoding(ctxt);
10871
0
                ctxt->instate = XML_PARSER_XML_DECL;
10872
0
    break;
10873
10874
0
            case XML_PARSER_XML_DECL:
10875
0
    if ((!terminate) && (avail < 2))
10876
0
        goto done;
10877
0
    cur = ctxt->input->cur[0];
10878
0
    next = ctxt->input->cur[1];
10879
0
          if ((cur == '<') && (next == '?')) {
10880
        /* PI or XML decl */
10881
0
        if ((!terminate) &&
10882
0
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
10883
0
      goto done;
10884
0
        if ((ctxt->input->cur[2] == 'x') &&
10885
0
      (ctxt->input->cur[3] == 'm') &&
10886
0
      (ctxt->input->cur[4] == 'l') &&
10887
0
      (IS_BLANK_CH(ctxt->input->cur[5]))) {
10888
0
      ret += 5;
10889
0
      xmlParseXMLDecl(ctxt);
10890
0
        } else {
10891
0
      ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10892
0
                        if (ctxt->version == NULL) {
10893
0
                            xmlErrMemory(ctxt);
10894
0
                            break;
10895
0
                        }
10896
0
        }
10897
0
    } else {
10898
0
        ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10899
0
        if (ctxt->version == NULL) {
10900
0
            xmlErrMemory(ctxt);
10901
0
      break;
10902
0
        }
10903
0
    }
10904
0
                if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10905
0
                    ctxt->sax->setDocumentLocator(ctxt->userData,
10906
0
                            (xmlSAXLocator *) &xmlDefaultSAXLocator);
10907
0
                }
10908
0
                if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10909
0
                    (!ctxt->disableSAX))
10910
0
                    ctxt->sax->startDocument(ctxt->userData);
10911
0
                ctxt->instate = XML_PARSER_MISC;
10912
0
    break;
10913
0
            case XML_PARSER_START_TAG: {
10914
0
          const xmlChar *name;
10915
0
    const xmlChar *prefix = NULL;
10916
0
    const xmlChar *URI = NULL;
10917
0
                int line = ctxt->input->line;
10918
0
    int nbNs = 0;
10919
10920
0
    if ((!terminate) && (avail < 2))
10921
0
        goto done;
10922
0
    cur = ctxt->input->cur[0];
10923
0
          if (cur != '<') {
10924
0
        xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10925
0
                                   "Start tag expected, '<' not found");
10926
0
                    ctxt->instate = XML_PARSER_EOF;
10927
0
                    xmlFinishDocument(ctxt);
10928
0
        goto done;
10929
0
    }
10930
0
    if ((!terminate) && (!xmlParseLookupGt(ctxt)))
10931
0
                    goto done;
10932
0
    if (ctxt->spaceNr == 0)
10933
0
        spacePush(ctxt, -1);
10934
0
    else if (*ctxt->space == -2)
10935
0
        spacePush(ctxt, -1);
10936
0
    else
10937
0
        spacePush(ctxt, *ctxt->space);
10938
0
#ifdef LIBXML_SAX1_ENABLED
10939
0
    if (ctxt->sax2)
10940
0
#endif /* LIBXML_SAX1_ENABLED */
10941
0
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &nbNs);
10942
0
#ifdef LIBXML_SAX1_ENABLED
10943
0
    else
10944
0
        name = xmlParseStartTag(ctxt);
10945
0
#endif /* LIBXML_SAX1_ENABLED */
10946
0
    if (name == NULL) {
10947
0
        spacePop(ctxt);
10948
0
                    ctxt->instate = XML_PARSER_EOF;
10949
0
                    xmlFinishDocument(ctxt);
10950
0
        goto done;
10951
0
    }
10952
0
#ifdef LIBXML_VALID_ENABLED
10953
    /*
10954
     * [ VC: Root Element Type ]
10955
     * The Name in the document type declaration must match
10956
     * the element type of the root element.
10957
     */
10958
0
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10959
0
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
10960
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10961
0
#endif /* LIBXML_VALID_ENABLED */
10962
10963
    /*
10964
     * Check for an Empty Element.
10965
     */
10966
0
    if ((RAW == '/') && (NXT(1) == '>')) {
10967
0
        SKIP(2);
10968
10969
0
        if (ctxt->sax2) {
10970
0
      if ((ctxt->sax != NULL) &&
10971
0
          (ctxt->sax->endElementNs != NULL) &&
10972
0
          (!ctxt->disableSAX))
10973
0
          ctxt->sax->endElementNs(ctxt->userData, name,
10974
0
                                  prefix, URI);
10975
0
      if (nbNs > 0)
10976
0
          xmlParserNsPop(ctxt, nbNs);
10977
0
#ifdef LIBXML_SAX1_ENABLED
10978
0
        } else {
10979
0
      if ((ctxt->sax != NULL) &&
10980
0
          (ctxt->sax->endElement != NULL) &&
10981
0
          (!ctxt->disableSAX))
10982
0
          ctxt->sax->endElement(ctxt->userData, name);
10983
0
#endif /* LIBXML_SAX1_ENABLED */
10984
0
        }
10985
0
        spacePop(ctxt);
10986
0
    } else if (RAW == '>') {
10987
0
        NEXT;
10988
0
                    nameNsPush(ctxt, name, prefix, URI, line, nbNs);
10989
0
    } else {
10990
0
        xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
10991
0
           "Couldn't find end of Start Tag %s\n",
10992
0
           name);
10993
0
        nodePop(ctxt);
10994
0
        spacePop(ctxt);
10995
0
                    if (nbNs > 0)
10996
0
                        xmlParserNsPop(ctxt, nbNs);
10997
0
    }
10998
10999
0
                if (ctxt->nameNr == 0)
11000
0
                    ctxt->instate = XML_PARSER_EPILOG;
11001
0
                else
11002
0
                    ctxt->instate = XML_PARSER_CONTENT;
11003
0
                break;
11004
0
      }
11005
0
            case XML_PARSER_CONTENT: {
11006
0
    cur = ctxt->input->cur[0];
11007
11008
0
    if (cur == '<') {
11009
0
                    if ((!terminate) && (avail < 2))
11010
0
                        goto done;
11011
0
        next = ctxt->input->cur[1];
11012
11013
0
                    if (next == '/') {
11014
0
                        ctxt->instate = XML_PARSER_END_TAG;
11015
0
                        break;
11016
0
                    } else if (next == '?') {
11017
0
                        if ((!terminate) &&
11018
0
                            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11019
0
                            goto done;
11020
0
                        xmlParsePI(ctxt);
11021
0
                        ctxt->instate = XML_PARSER_CONTENT;
11022
0
                        break;
11023
0
                    } else if (next == '!') {
11024
0
                        if ((!terminate) && (avail < 3))
11025
0
                            goto done;
11026
0
                        next = ctxt->input->cur[2];
11027
11028
0
                        if (next == '-') {
11029
0
                            if ((!terminate) && (avail < 4))
11030
0
                                goto done;
11031
0
                            if (ctxt->input->cur[3] == '-') {
11032
0
                                if ((!terminate) &&
11033
0
                                    (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11034
0
                                    goto done;
11035
0
                                xmlParseComment(ctxt);
11036
0
                                ctxt->instate = XML_PARSER_CONTENT;
11037
0
                                break;
11038
0
                            }
11039
0
                        } else if (next == '[') {
11040
0
                            if ((!terminate) && (avail < 9))
11041
0
                                goto done;
11042
0
                            if ((ctxt->input->cur[2] == '[') &&
11043
0
                                (ctxt->input->cur[3] == 'C') &&
11044
0
                                (ctxt->input->cur[4] == 'D') &&
11045
0
                                (ctxt->input->cur[5] == 'A') &&
11046
0
                                (ctxt->input->cur[6] == 'T') &&
11047
0
                                (ctxt->input->cur[7] == 'A') &&
11048
0
                                (ctxt->input->cur[8] == '[')) {
11049
0
                                if ((!terminate) &&
11050
0
                                    (!xmlParseLookupString(ctxt, 9, "]]>", 3)))
11051
0
                                    goto done;
11052
0
                                ctxt->instate = XML_PARSER_CDATA_SECTION;
11053
0
                                xmlParseCDSect(ctxt);
11054
0
                                ctxt->instate = XML_PARSER_CONTENT;
11055
0
                                break;
11056
0
                            }
11057
0
                        }
11058
0
                    }
11059
0
    } else if (cur == '&') {
11060
0
        if ((!terminate) && (!xmlParseLookupChar(ctxt, ';')))
11061
0
      goto done;
11062
0
        xmlParseReference(ctxt);
11063
0
                    break;
11064
0
    } else {
11065
        /* TODO Avoid the extra copy, handle directly !!! */
11066
        /*
11067
         * Goal of the following test is:
11068
         *  - minimize calls to the SAX 'character' callback
11069
         *    when they are mergeable
11070
         *  - handle an problem for isBlank when we only parse
11071
         *    a sequence of blank chars and the next one is
11072
         *    not available to check against '<' presence.
11073
         *  - tries to homogenize the differences in SAX
11074
         *    callbacks between the push and pull versions
11075
         *    of the parser.
11076
         */
11077
0
        if (avail < XML_PARSER_BIG_BUFFER_SIZE) {
11078
0
      if ((!terminate) && (!xmlParseLookupCharData(ctxt)))
11079
0
          goto done;
11080
0
                    }
11081
0
                    ctxt->checkIndex = 0;
11082
0
        xmlParseCharDataInternal(ctxt, !terminate);
11083
0
                    break;
11084
0
    }
11085
11086
0
                ctxt->instate = XML_PARSER_START_TAG;
11087
0
    break;
11088
0
      }
11089
0
            case XML_PARSER_END_TAG:
11090
0
    if ((!terminate) && (!xmlParseLookupChar(ctxt, '>')))
11091
0
        goto done;
11092
0
    if (ctxt->sax2) {
11093
0
              xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11094
0
        nameNsPop(ctxt);
11095
0
    }
11096
0
#ifdef LIBXML_SAX1_ENABLED
11097
0
      else
11098
0
        xmlParseEndTag1(ctxt, 0);
11099
0
#endif /* LIBXML_SAX1_ENABLED */
11100
0
    if (ctxt->nameNr == 0) {
11101
0
        ctxt->instate = XML_PARSER_EPILOG;
11102
0
    } else {
11103
0
        ctxt->instate = XML_PARSER_CONTENT;
11104
0
    }
11105
0
    break;
11106
0
            case XML_PARSER_MISC:
11107
0
            case XML_PARSER_PROLOG:
11108
0
            case XML_PARSER_EPILOG:
11109
0
    SKIP_BLANKS;
11110
0
                avail = ctxt->input->end - ctxt->input->cur;
11111
0
    if (avail < 1)
11112
0
        goto done;
11113
0
    if (ctxt->input->cur[0] == '<') {
11114
0
                    if ((!terminate) && (avail < 2))
11115
0
                        goto done;
11116
0
                    next = ctxt->input->cur[1];
11117
0
                    if (next == '?') {
11118
0
                        if ((!terminate) &&
11119
0
                            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11120
0
                            goto done;
11121
0
                        xmlParsePI(ctxt);
11122
0
                        break;
11123
0
                    } else if (next == '!') {
11124
0
                        if ((!terminate) && (avail < 3))
11125
0
                            goto done;
11126
11127
0
                        if (ctxt->input->cur[2] == '-') {
11128
0
                            if ((!terminate) && (avail < 4))
11129
0
                                goto done;
11130
0
                            if (ctxt->input->cur[3] == '-') {
11131
0
                                if ((!terminate) &&
11132
0
                                    (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11133
0
                                    goto done;
11134
0
                                xmlParseComment(ctxt);
11135
0
                                break;
11136
0
                            }
11137
0
                        } else if (ctxt->instate == XML_PARSER_MISC) {
11138
0
                            if ((!terminate) && (avail < 9))
11139
0
                                goto done;
11140
0
                            if ((ctxt->input->cur[2] == 'D') &&
11141
0
                                (ctxt->input->cur[3] == 'O') &&
11142
0
                                (ctxt->input->cur[4] == 'C') &&
11143
0
                                (ctxt->input->cur[5] == 'T') &&
11144
0
                                (ctxt->input->cur[6] == 'Y') &&
11145
0
                                (ctxt->input->cur[7] == 'P') &&
11146
0
                                (ctxt->input->cur[8] == 'E')) {
11147
0
                                if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11148
0
                                    goto done;
11149
0
                                ctxt->inSubset = 1;
11150
0
                                xmlParseDocTypeDecl(ctxt);
11151
0
                                if (RAW == '[') {
11152
0
                                    ctxt->instate = XML_PARSER_DTD;
11153
0
                                } else {
11154
0
                                    if (RAW == '>')
11155
0
                                        NEXT;
11156
                                    /*
11157
                                     * Create and update the external subset.
11158
                                     */
11159
0
                                    ctxt->inSubset = 2;
11160
0
                                    if ((ctxt->sax != NULL) &&
11161
0
                                        (!ctxt->disableSAX) &&
11162
0
                                        (ctxt->sax->externalSubset != NULL))
11163
0
                                        ctxt->sax->externalSubset(
11164
0
                                                ctxt->userData,
11165
0
                                                ctxt->intSubName,
11166
0
                                                ctxt->extSubSystem,
11167
0
                                                ctxt->extSubURI);
11168
0
                                    ctxt->inSubset = 0;
11169
0
                                    xmlCleanSpecialAttr(ctxt);
11170
0
                                    ctxt->instate = XML_PARSER_PROLOG;
11171
0
                                }
11172
0
                                break;
11173
0
                            }
11174
0
                        }
11175
0
                    }
11176
0
                }
11177
11178
0
                if (ctxt->instate == XML_PARSER_EPILOG) {
11179
0
                    if (ctxt->errNo == XML_ERR_OK)
11180
0
                        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11181
0
        ctxt->instate = XML_PARSER_EOF;
11182
0
                    xmlFinishDocument(ctxt);
11183
0
                } else {
11184
0
        ctxt->instate = XML_PARSER_START_TAG;
11185
0
    }
11186
0
    break;
11187
0
            case XML_PARSER_DTD: {
11188
0
                if ((!terminate) && (!xmlParseLookupInternalSubset(ctxt)))
11189
0
                    goto done;
11190
0
    xmlParseInternalSubset(ctxt);
11191
0
    ctxt->inSubset = 2;
11192
0
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11193
0
        (ctxt->sax->externalSubset != NULL))
11194
0
        ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11195
0
          ctxt->extSubSystem, ctxt->extSubURI);
11196
0
    ctxt->inSubset = 0;
11197
0
    xmlCleanSpecialAttr(ctxt);
11198
0
    ctxt->instate = XML_PARSER_PROLOG;
11199
0
                break;
11200
0
      }
11201
0
            default:
11202
0
                xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
11203
0
      "PP: internal error\n");
11204
0
    ctxt->instate = XML_PARSER_EOF;
11205
0
    break;
11206
0
  }
11207
0
    }
11208
0
done:
11209
0
    return(ret);
11210
0
}
11211
11212
/**
11213
 * Parse a chunk of memory in push parser mode.
11214
 *
11215
 * Assumes that the parser context was initialized with
11216
 * #xmlCreatePushParserCtxt.
11217
 *
11218
 * The last chunk, which will often be empty, must be marked with
11219
 * the `terminate` flag. With the default SAX callbacks, the resulting
11220
 * document will be available in ctxt->myDoc. This pointer will not
11221
 * be freed when calling #xmlFreeParserCtxt and must be freed by the
11222
 * caller. If the document isn't well-formed, it will still be returned
11223
 * in ctxt->myDoc.
11224
 *
11225
 * As an exception, #xmlCtxtResetPush will free the document in
11226
 * ctxt->myDoc. So ctxt->myDoc should be set to NULL after extracting
11227
 * the document.
11228
 *
11229
 * Since 2.14.0, #xmlCtxtGetDocument can be used to retrieve the
11230
 * result document.
11231
 *
11232
 * @param ctxt  an XML parser context
11233
 * @param chunk  chunk of memory
11234
 * @param size  size of chunk in bytes
11235
 * @param terminate  last chunk indicator
11236
 * @returns an xmlParserErrors code (0 on success).
11237
 */
11238
int
11239
xmlParseChunk(xmlParserCtxt *ctxt, const char *chunk, int size,
11240
0
              int terminate) {
11241
0
    size_t curBase;
11242
0
    size_t maxLength;
11243
0
    size_t pos;
11244
0
    int end_in_lf = 0;
11245
0
    int res;
11246
11247
0
    if ((ctxt == NULL) || (size < 0))
11248
0
        return(XML_ERR_ARGUMENT);
11249
0
    if ((chunk == NULL) && (size > 0))
11250
0
        return(XML_ERR_ARGUMENT);
11251
0
    if ((ctxt->input == NULL) || (ctxt->input->buf == NULL))
11252
0
        return(XML_ERR_ARGUMENT);
11253
0
    if (ctxt->disableSAX != 0)
11254
0
        return(ctxt->errNo);
11255
11256
0
    ctxt->input->flags |= XML_INPUT_PROGRESSIVE;
11257
0
    if (ctxt->instate == XML_PARSER_START)
11258
0
        xmlCtxtInitializeLate(ctxt);
11259
0
    if ((size > 0) && (chunk != NULL) && (!terminate) &&
11260
0
        (chunk[size - 1] == '\r')) {
11261
0
  end_in_lf = 1;
11262
0
  size--;
11263
0
    }
11264
11265
    /*
11266
     * Also push an empty chunk to make sure that the raw buffer
11267
     * will be flushed if there is an encoder.
11268
     */
11269
0
    pos = ctxt->input->cur - ctxt->input->base;
11270
0
    res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11271
0
    xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
11272
0
    if (res < 0) {
11273
0
        xmlCtxtErrIO(ctxt, ctxt->input->buf->error, NULL);
11274
0
        return(ctxt->errNo);
11275
0
    }
11276
11277
0
    xmlParseTryOrFinish(ctxt, terminate);
11278
11279
0
    curBase = ctxt->input->cur - ctxt->input->base;
11280
0
    maxLength = (ctxt->options & XML_PARSE_HUGE) ?
11281
0
                XML_MAX_HUGE_LENGTH :
11282
0
                XML_MAX_LOOKUP_LIMIT;
11283
0
    if (curBase > maxLength) {
11284
0
        xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
11285
0
                    "Buffer size limit exceeded, try XML_PARSE_HUGE\n");
11286
0
    }
11287
11288
0
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX != 0))
11289
0
        return(ctxt->errNo);
11290
11291
0
    if (end_in_lf == 1) {
11292
0
  pos = ctxt->input->cur - ctxt->input->base;
11293
0
  res = xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
11294
0
  xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
11295
0
        if (res < 0) {
11296
0
            xmlCtxtErrIO(ctxt, ctxt->input->buf->error, NULL);
11297
0
            return(ctxt->errNo);
11298
0
        }
11299
0
    }
11300
0
    if (terminate) {
11301
  /*
11302
   * Check for termination
11303
   */
11304
0
        if ((ctxt->instate != XML_PARSER_EOF) &&
11305
0
            (ctxt->instate != XML_PARSER_EPILOG)) {
11306
0
            if (ctxt->nameNr > 0) {
11307
0
                const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
11308
0
                int line = ctxt->pushTab[ctxt->nameNr - 1].line;
11309
0
                xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
11310
0
                        "Premature end of data in tag %s line %d\n",
11311
0
                        name, line, NULL);
11312
0
            } else if (ctxt->instate == XML_PARSER_START) {
11313
0
                xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11314
0
            } else {
11315
0
                xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
11316
0
                               "Start tag expected, '<' not found\n");
11317
0
            }
11318
0
        } else {
11319
0
            xmlParserCheckEOF(ctxt, XML_ERR_DOCUMENT_END);
11320
0
        }
11321
0
  if (ctxt->instate != XML_PARSER_EOF) {
11322
0
            ctxt->instate = XML_PARSER_EOF;
11323
0
            xmlFinishDocument(ctxt);
11324
0
  }
11325
0
    }
11326
0
    if (ctxt->wellFormed == 0)
11327
0
  return((xmlParserErrors) ctxt->errNo);
11328
0
    else
11329
0
        return(0);
11330
0
}
11331
11332
/************************************************************************
11333
 *                  *
11334
 *    I/O front end functions to the parser     *
11335
 *                  *
11336
 ************************************************************************/
11337
11338
/**
11339
 * Create a parser context for using the XML parser in push mode.
11340
 * See #xmlParseChunk.
11341
 *
11342
 * Passing an initial chunk is useless and deprecated.
11343
 *
11344
 * The push parser doesn't support recovery mode or the
11345
 * XML_PARSE_NOBLANKS option.
11346
 *
11347
 * `filename` is used as base URI to fetch external entities and for
11348
 * error reports.
11349
 *
11350
 * @param sax  a SAX handler (optional)
11351
 * @param user_data  user data for SAX callbacks (optional)
11352
 * @param chunk  initial chunk (optional, deprecated)
11353
 * @param size  size of initial chunk in bytes
11354
 * @param filename  file name or URI (optional)
11355
 * @returns the new parser context or NULL if a memory allocation
11356
 * failed.
11357
 */
11358
11359
xmlParserCtxt *
11360
xmlCreatePushParserCtxt(xmlSAXHandler *sax, void *user_data,
11361
0
                        const char *chunk, int size, const char *filename) {
11362
0
    xmlParserCtxtPtr ctxt;
11363
0
    xmlParserInputPtr input;
11364
11365
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
11366
0
    if (ctxt == NULL)
11367
0
  return(NULL);
11368
11369
0
    ctxt->options &= ~XML_PARSE_NODICT;
11370
0
    ctxt->dictNames = 1;
11371
11372
0
    input = xmlNewPushInput(filename, chunk, size);
11373
0
    if (input == NULL) {
11374
0
  xmlFreeParserCtxt(ctxt);
11375
0
  return(NULL);
11376
0
    }
11377
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
11378
0
        xmlFreeInputStream(input);
11379
0
        xmlFreeParserCtxt(ctxt);
11380
0
        return(NULL);
11381
0
    }
11382
11383
0
    return(ctxt);
11384
0
}
11385
#endif /* LIBXML_PUSH_ENABLED */
11386
11387
/**
11388
 * Blocks further parser processing
11389
 *
11390
 * @param ctxt  an XML parser context
11391
 */
11392
void
11393
0
xmlStopParser(xmlParserCtxt *ctxt) {
11394
0
    if (ctxt == NULL)
11395
0
        return;
11396
11397
    /* This stops the parser */
11398
0
    ctxt->disableSAX = 2;
11399
11400
    /*
11401
     * xmlStopParser is often called from error handlers,
11402
     * so we can't raise an error here to avoid infinite
11403
     * loops. Just make sure that an error condition is
11404
     * reported.
11405
     */
11406
0
    if (ctxt->errNo == XML_ERR_OK) {
11407
0
        ctxt->errNo = XML_ERR_USER_STOP;
11408
0
        ctxt->lastError.code = XML_ERR_USER_STOP;
11409
0
        ctxt->wellFormed = 0;
11410
0
    }
11411
0
}
11412
11413
/**
11414
 * Create a parser context for using the XML parser with an existing
11415
 * I/O stream
11416
 *
11417
 * @param sax  a SAX handler (optional)
11418
 * @param user_data  user data for SAX callbacks (optional)
11419
 * @param ioread  an I/O read function
11420
 * @param ioclose  an I/O close function (optional)
11421
 * @param ioctx  an I/O handler
11422
 * @param enc  the charset encoding if known (deprecated)
11423
 * @returns the new parser context or NULL
11424
 */
11425
xmlParserCtxt *
11426
xmlCreateIOParserCtxt(xmlSAXHandler *sax, void *user_data,
11427
                      xmlInputReadCallback ioread,
11428
                      xmlInputCloseCallback ioclose,
11429
0
                      void *ioctx, xmlCharEncoding enc) {
11430
0
    xmlParserCtxtPtr ctxt;
11431
0
    xmlParserInputPtr input;
11432
0
    const char *encoding;
11433
11434
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
11435
0
    if (ctxt == NULL)
11436
0
  return(NULL);
11437
11438
0
    encoding = xmlGetCharEncodingName(enc);
11439
0
    input = xmlCtxtNewInputFromIO(ctxt, NULL, ioread, ioclose, ioctx,
11440
0
                                  encoding, 0);
11441
0
    if (input == NULL) {
11442
0
  xmlFreeParserCtxt(ctxt);
11443
0
        return (NULL);
11444
0
    }
11445
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
11446
0
        xmlFreeInputStream(input);
11447
0
        xmlFreeParserCtxt(ctxt);
11448
0
        return(NULL);
11449
0
    }
11450
11451
0
    return(ctxt);
11452
0
}
11453
11454
#ifdef LIBXML_VALID_ENABLED
11455
/************************************************************************
11456
 *                  *
11457
 *    Front ends when parsing a DTD       *
11458
 *                  *
11459
 ************************************************************************/
11460
11461
/**
11462
 * Parse a DTD.
11463
 *
11464
 * Option XML_PARSE_DTDLOAD should be enabled in the parser context
11465
 * to make external entities work.
11466
 *
11467
 * @since 2.14.0
11468
 *
11469
 * @param ctxt  a parser context
11470
 * @param input  a parser input
11471
 * @param publicId  public ID of the DTD (optional)
11472
 * @param systemId  system ID of the DTD (optional)
11473
 * @returns the resulting xmlDtd or NULL in case of error.
11474
 * `input` will be freed by the function in any case.
11475
 */
11476
xmlDtd *
11477
xmlCtxtParseDtd(xmlParserCtxt *ctxt, xmlParserInput *input,
11478
1.08k
                const xmlChar *publicId, const xmlChar *systemId) {
11479
1.08k
    xmlDtdPtr ret = NULL;
11480
11481
1.08k
    if ((ctxt == NULL) || (input == NULL)) {
11482
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
11483
0
        xmlFreeInputStream(input);
11484
0
        return(NULL);
11485
0
    }
11486
11487
1.08k
    if (xmlCtxtPushInput(ctxt, input) < 0) {
11488
1
        xmlFreeInputStream(input);
11489
1
        return(NULL);
11490
1
    }
11491
11492
1.08k
    if (publicId == NULL)
11493
785
        publicId = BAD_CAST "none";
11494
1.08k
    if (systemId == NULL)
11495
0
        systemId = BAD_CAST "none";
11496
11497
1.08k
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11498
1.08k
    if (ctxt->myDoc == NULL) {
11499
4
        xmlErrMemory(ctxt);
11500
4
        goto error;
11501
4
    }
11502
1.07k
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
11503
1.07k
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11504
1.07k
                                       publicId, systemId);
11505
1.07k
    if (ctxt->myDoc->extSubset == NULL) {
11506
4
        xmlErrMemory(ctxt);
11507
4
        xmlFreeDoc(ctxt->myDoc);
11508
4
        goto error;
11509
4
    }
11510
11511
1.07k
    xmlParseExternalSubset(ctxt, publicId, systemId);
11512
11513
1.07k
    if (ctxt->wellFormed) {
11514
0
        ret = ctxt->myDoc->extSubset;
11515
0
        ctxt->myDoc->extSubset = NULL;
11516
0
        if (ret != NULL) {
11517
0
            xmlNodePtr tmp;
11518
11519
0
            ret->doc = NULL;
11520
0
            tmp = ret->children;
11521
0
            while (tmp != NULL) {
11522
0
                tmp->doc = NULL;
11523
0
                tmp = tmp->next;
11524
0
            }
11525
0
        }
11526
1.07k
    } else {
11527
1.07k
        ret = NULL;
11528
1.07k
    }
11529
1.07k
    xmlFreeDoc(ctxt->myDoc);
11530
1.07k
    ctxt->myDoc = NULL;
11531
11532
1.08k
error:
11533
1.08k
    xmlFreeInputStream(xmlCtxtPopInput(ctxt));
11534
11535
1.08k
    return(ret);
11536
1.07k
}
11537
11538
/**
11539
 * Load and parse a DTD
11540
 *
11541
 * @deprecated Use #xmlCtxtParseDtd.
11542
 *
11543
 * @param sax  the SAX handler block or NULL
11544
 * @param input  an Input Buffer
11545
 * @param enc  the charset encoding if known
11546
 * @returns the resulting xmlDtd or NULL in case of error.
11547
 * `input` will be freed by the function in any case.
11548
 */
11549
11550
xmlDtd *
11551
xmlIOParseDTD(xmlSAXHandler *sax, xmlParserInputBuffer *input,
11552
0
        xmlCharEncoding enc) {
11553
0
    xmlDtdPtr ret = NULL;
11554
0
    xmlParserCtxtPtr ctxt;
11555
0
    xmlParserInputPtr pinput = NULL;
11556
11557
0
    if (input == NULL)
11558
0
  return(NULL);
11559
11560
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
11561
0
    if (ctxt == NULL) {
11562
0
        xmlFreeParserInputBuffer(input);
11563
0
  return(NULL);
11564
0
    }
11565
0
    xmlCtxtSetOptions(ctxt, XML_PARSE_DTDLOAD);
11566
11567
    /*
11568
     * generate a parser input from the I/O handler
11569
     */
11570
11571
0
    pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
11572
0
    if (pinput == NULL) {
11573
0
        xmlFreeParserInputBuffer(input);
11574
0
  xmlFreeParserCtxt(ctxt);
11575
0
  return(NULL);
11576
0
    }
11577
11578
0
    if (enc != XML_CHAR_ENCODING_NONE) {
11579
0
        xmlSwitchEncoding(ctxt, enc);
11580
0
    }
11581
11582
0
    ret = xmlCtxtParseDtd(ctxt, pinput, NULL, NULL);
11583
11584
0
    xmlFreeParserCtxt(ctxt);
11585
0
    return(ret);
11586
0
}
11587
11588
/**
11589
 * Load and parse an external subset.
11590
 *
11591
 * @deprecated Use #xmlCtxtParseDtd.
11592
 *
11593
 * @param sax  the SAX handler block
11594
 * @param publicId  public identifier of the DTD (optional)
11595
 * @param systemId  system identifier (URL) of the DTD
11596
 * @returns the resulting xmlDtd or NULL in case of error.
11597
 */
11598
11599
xmlDtd *
11600
xmlSAXParseDTD(xmlSAXHandler *sax, const xmlChar *publicId,
11601
12.1k
               const xmlChar *systemId) {
11602
12.1k
    xmlDtdPtr ret = NULL;
11603
12.1k
    xmlParserCtxtPtr ctxt;
11604
12.1k
    xmlParserInputPtr input = NULL;
11605
12.1k
    xmlChar* systemIdCanonic;
11606
11607
12.1k
    if ((publicId == NULL) && (systemId == NULL)) return(NULL);
11608
11609
12.1k
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
11610
12.1k
    if (ctxt == NULL) {
11611
60
  return(NULL);
11612
60
    }
11613
12.0k
    xmlCtxtSetOptions(ctxt, XML_PARSE_DTDLOAD);
11614
11615
    /*
11616
     * Canonicalise the system ID
11617
     */
11618
12.0k
    systemIdCanonic = xmlCanonicPath(systemId);
11619
12.0k
    if ((systemId != NULL) && (systemIdCanonic == NULL)) {
11620
12
  xmlFreeParserCtxt(ctxt);
11621
12
  return(NULL);
11622
12
    }
11623
11624
    /*
11625
     * Ask the Entity resolver to load the damn thing
11626
     */
11627
11628
12.0k
    if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
11629
12.0k
  input = ctxt->sax->resolveEntity(ctxt->userData, publicId,
11630
12.0k
                                   systemIdCanonic);
11631
12.0k
    if (input == NULL) {
11632
10.9k
  xmlFreeParserCtxt(ctxt);
11633
10.9k
  if (systemIdCanonic != NULL)
11634
10.7k
      xmlFree(systemIdCanonic);
11635
10.9k
  return(NULL);
11636
10.9k
    }
11637
11638
1.08k
    if (input->filename == NULL)
11639
0
  input->filename = (char *) systemIdCanonic;
11640
1.08k
    else
11641
1.08k
  xmlFree(systemIdCanonic);
11642
11643
1.08k
    ret = xmlCtxtParseDtd(ctxt, input, publicId, systemId);
11644
11645
1.08k
    xmlFreeParserCtxt(ctxt);
11646
1.08k
    return(ret);
11647
12.0k
}
11648
11649
11650
/**
11651
 * Load and parse an external subset.
11652
 *
11653
 * @param publicId  public identifier of the DTD (optional)
11654
 * @param systemId  system identifier (URL) of the DTD
11655
 * @returns the resulting xmlDtd or NULL in case of error.
11656
 */
11657
11658
xmlDtd *
11659
12.1k
xmlParseDTD(const xmlChar *publicId, const xmlChar *systemId) {
11660
12.1k
    return(xmlSAXParseDTD(NULL, publicId, systemId));
11661
12.1k
}
11662
#endif /* LIBXML_VALID_ENABLED */
11663
11664
/************************************************************************
11665
 *                  *
11666
 *    Front ends when parsing an Entity     *
11667
 *                  *
11668
 ************************************************************************/
11669
11670
static xmlNodePtr
11671
xmlCtxtParseContentInternal(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
11672
3.94k
                            int hasTextDecl, int buildTree) {
11673
3.94k
    xmlNodePtr root = NULL;
11674
3.94k
    xmlNodePtr list = NULL;
11675
3.94k
    xmlChar *rootName = BAD_CAST "#root";
11676
3.94k
    int result;
11677
11678
3.94k
    if (buildTree) {
11679
3.94k
        root = xmlNewDocNode(ctxt->myDoc, NULL, rootName, NULL);
11680
3.94k
        if (root == NULL) {
11681
2
            xmlErrMemory(ctxt);
11682
2
            goto error;
11683
2
        }
11684
3.94k
    }
11685
11686
3.94k
    if (xmlCtxtPushInput(ctxt, input) < 0)
11687
2
        goto error;
11688
11689
3.94k
    nameNsPush(ctxt, rootName, NULL, NULL, 0, 0);
11690
3.94k
    spacePush(ctxt, -1);
11691
11692
3.94k
    if (buildTree)
11693
3.94k
        nodePush(ctxt, root);
11694
11695
3.94k
    if (hasTextDecl) {
11696
266
        xmlDetectEncoding(ctxt);
11697
11698
        /*
11699
         * Parse a possible text declaration first
11700
         */
11701
266
        if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
11702
266
            (IS_BLANK_CH(NXT(5)))) {
11703
0
            xmlParseTextDecl(ctxt);
11704
            /*
11705
             * An XML-1.0 document can't reference an entity not XML-1.0
11706
             */
11707
0
            if ((xmlStrEqual(ctxt->version, BAD_CAST "1.0")) &&
11708
0
                (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
11709
0
                xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
11710
0
                               "Version mismatch between document and "
11711
0
                               "entity\n");
11712
0
            }
11713
0
        }
11714
266
    }
11715
11716
3.94k
    xmlParseContentInternal(ctxt);
11717
11718
3.94k
    if (ctxt->input->cur < ctxt->input->end)
11719
580
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11720
11721
3.94k
    if ((ctxt->wellFormed) ||
11722
3.94k
        ((ctxt->recovery) && (!xmlCtxtIsCatastrophicError(ctxt)))) {
11723
2.53k
        if (root != NULL) {
11724
2.53k
            xmlNodePtr cur;
11725
11726
            /*
11727
             * Unlink newly created node list.
11728
             */
11729
2.53k
            list = root->children;
11730
2.53k
            root->children = NULL;
11731
2.53k
            root->last = NULL;
11732
4.55k
            for (cur = list; cur != NULL; cur = cur->next)
11733
2.01k
                cur->parent = NULL;
11734
2.53k
        }
11735
2.53k
    }
11736
11737
    /*
11738
     * Read the rest of the stream in case of errors. We want
11739
     * to account for the whole entity size.
11740
     */
11741
19.5k
    do {
11742
19.5k
        ctxt->input->cur = ctxt->input->end;
11743
19.5k
        xmlParserShrink(ctxt);
11744
19.5k
        result = xmlParserGrow(ctxt);
11745
19.5k
    } while (result > 0);
11746
11747
3.94k
    if (buildTree)
11748
3.94k
        nodePop(ctxt);
11749
11750
3.94k
    namePop(ctxt);
11751
3.94k
    spacePop(ctxt);
11752
11753
3.94k
    xmlCtxtPopInput(ctxt);
11754
11755
3.94k
error:
11756
3.94k
    xmlFreeNode(root);
11757
11758
3.94k
    return(list);
11759
3.94k
}
11760
11761
static void
11762
5.05k
xmlCtxtParseEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr ent) {
11763
5.05k
    xmlParserInputPtr input;
11764
5.05k
    xmlNodePtr list;
11765
5.05k
    unsigned long consumed;
11766
5.05k
    int isExternal;
11767
5.05k
    int buildTree;
11768
5.05k
    int oldMinNsIndex;
11769
5.05k
    int oldNodelen, oldNodemem;
11770
11771
5.05k
    isExternal = (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY);
11772
5.05k
    buildTree = (ctxt->node != NULL);
11773
11774
    /*
11775
     * Recursion check
11776
     */
11777
5.05k
    if (ent->flags & XML_ENT_EXPANDING) {
11778
772
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
11779
772
        goto error;
11780
772
    }
11781
11782
    /*
11783
     * Load entity
11784
     */
11785
4.28k
    input = xmlNewEntityInputStream(ctxt, ent);
11786
4.28k
    if (input == NULL)
11787
333
        goto error;
11788
11789
    /*
11790
     * When building a tree, we need to limit the scope of namespace
11791
     * declarations, so that entities don't reference xmlNs structs
11792
     * from the parent of a reference.
11793
     */
11794
3.94k
    oldMinNsIndex = ctxt->nsdb->minNsIndex;
11795
3.94k
    if (buildTree)
11796
3.94k
        ctxt->nsdb->minNsIndex = ctxt->nsNr;
11797
11798
3.94k
    oldNodelen = ctxt->nodelen;
11799
3.94k
    oldNodemem = ctxt->nodemem;
11800
3.94k
    ctxt->nodelen = 0;
11801
3.94k
    ctxt->nodemem = 0;
11802
11803
    /*
11804
     * Parse content
11805
     *
11806
     * This initiates a recursive call chain:
11807
     *
11808
     * - xmlCtxtParseContentInternal
11809
     * - xmlParseContentInternal
11810
     * - xmlParseReference
11811
     * - xmlCtxtParseEntity
11812
     *
11813
     * The nesting depth is limited by the maximum number of inputs,
11814
     * see xmlCtxtPushInput.
11815
     *
11816
     * It's possible to make this non-recursive (minNsIndex must be
11817
     * stored in the input struct) at the expense of code readability.
11818
     */
11819
11820
3.94k
    ent->flags |= XML_ENT_EXPANDING;
11821
11822
3.94k
    list = xmlCtxtParseContentInternal(ctxt, input, isExternal, buildTree);
11823
11824
3.94k
    ent->flags &= ~XML_ENT_EXPANDING;
11825
11826
3.94k
    ctxt->nsdb->minNsIndex = oldMinNsIndex;
11827
3.94k
    ctxt->nodelen = oldNodelen;
11828
3.94k
    ctxt->nodemem = oldNodemem;
11829
11830
    /*
11831
     * Entity size accounting
11832
     */
11833
3.94k
    consumed = input->consumed;
11834
3.94k
    xmlSaturatedAddSizeT(&consumed, input->end - input->base);
11835
11836
3.94k
    if ((ent->flags & XML_ENT_CHECKED) == 0)
11837
2.05k
        xmlSaturatedAdd(&ent->expandedSize, consumed);
11838
11839
3.94k
    if ((ent->flags & XML_ENT_PARSED) == 0) {
11840
2.05k
        if (isExternal)
11841
266
            xmlSaturatedAdd(&ctxt->sizeentities, consumed);
11842
11843
2.05k
        ent->children = list;
11844
11845
4.07k
        while (list != NULL) {
11846
2.01k
            list->parent = (xmlNodePtr) ent;
11847
11848
            /*
11849
             * Downstream code like the nginx xslt module can set
11850
             * ctxt->myDoc->extSubset to a separate DTD, so the entity
11851
             * might have a different or a NULL document.
11852
             */
11853
2.01k
            if (list->doc != ent->doc)
11854
0
                xmlSetTreeDoc(list, ent->doc);
11855
11856
2.01k
            if (list->next == NULL)
11857
829
                ent->last = list;
11858
2.01k
            list = list->next;
11859
2.01k
        }
11860
2.05k
    } else {
11861
1.89k
        xmlFreeNodeList(list);
11862
1.89k
    }
11863
11864
3.94k
    xmlFreeInputStream(input);
11865
11866
5.05k
error:
11867
5.05k
    ent->flags |= XML_ENT_PARSED | XML_ENT_CHECKED;
11868
5.05k
}
11869
11870
/**
11871
 * Parse an external general entity within an existing parsing context
11872
 * An external general parsed entity is well-formed if it matches the
11873
 * production labeled extParsedEnt.
11874
 *
11875
 *     [78] extParsedEnt ::= TextDecl? content
11876
 *
11877
 * @param ctxt  the existing parsing context
11878
 * @param URL  the URL for the entity to load
11879
 * @param ID  the System ID for the entity to load
11880
 * @param listOut  the return value for the set of parsed nodes
11881
 * @returns 0 if the entity is well formed, -1 in case of args problem and
11882
 *    the parser error code otherwise
11883
 */
11884
11885
int
11886
xmlParseCtxtExternalEntity(xmlParserCtxt *ctxt, const xmlChar *URL,
11887
0
                           const xmlChar *ID, xmlNode **listOut) {
11888
0
    xmlParserInputPtr input;
11889
0
    xmlNodePtr list;
11890
11891
0
    if (listOut != NULL)
11892
0
        *listOut = NULL;
11893
11894
0
    if (ctxt == NULL)
11895
0
        return(XML_ERR_ARGUMENT);
11896
11897
0
    input = xmlLoadResource(ctxt, (char *) URL, (char *) ID,
11898
0
                            XML_RESOURCE_GENERAL_ENTITY);
11899
0
    if (input == NULL)
11900
0
        return(ctxt->errNo);
11901
11902
0
    xmlCtxtInitializeLate(ctxt);
11903
11904
0
    list = xmlCtxtParseContentInternal(ctxt, input, /* hasTextDecl */ 1, 1);
11905
0
    if (listOut != NULL)
11906
0
        *listOut = list;
11907
0
    else
11908
0
        xmlFreeNodeList(list);
11909
11910
0
    xmlFreeInputStream(input);
11911
0
    return(ctxt->errNo);
11912
0
}
11913
11914
#ifdef LIBXML_SAX1_ENABLED
11915
/**
11916
 * Parse an external general entity
11917
 * An external general parsed entity is well-formed if it matches the
11918
 * production labeled extParsedEnt.
11919
 *
11920
 * This function uses deprecated global variables to set parser options
11921
 * which default to XML_PARSE_NODICT.
11922
 *
11923
 * @deprecated Use #xmlParseCtxtExternalEntity.
11924
 *
11925
 *     [78] extParsedEnt ::= TextDecl? content
11926
 *
11927
 * @param doc  the document the chunk pertains to
11928
 * @param sax  the SAX handler block (possibly NULL)
11929
 * @param user_data  The user data returned on SAX callbacks (possibly NULL)
11930
 * @param depth  Used for loop detection, use 0
11931
 * @param URL  the URL for the entity to load
11932
 * @param ID  the System ID for the entity to load
11933
 * @param list  the return value for the set of parsed nodes
11934
 * @returns 0 if the entity is well formed, -1 in case of args problem and
11935
 *    the parser error code otherwise
11936
 */
11937
11938
int
11939
xmlParseExternalEntity(xmlDoc *doc, xmlSAXHandler *sax, void *user_data,
11940
0
    int depth, const xmlChar *URL, const xmlChar *ID, xmlNode **list) {
11941
0
    xmlParserCtxtPtr ctxt;
11942
0
    int ret;
11943
11944
0
    if (list != NULL)
11945
0
        *list = NULL;
11946
11947
0
    if (doc == NULL)
11948
0
        return(XML_ERR_ARGUMENT);
11949
11950
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
11951
0
    if (ctxt == NULL)
11952
0
        return(XML_ERR_NO_MEMORY);
11953
11954
0
    ctxt->depth = depth;
11955
0
    ctxt->myDoc = doc;
11956
0
    ret = xmlParseCtxtExternalEntity(ctxt, URL, ID, list);
11957
11958
0
    xmlFreeParserCtxt(ctxt);
11959
0
    return(ret);
11960
0
}
11961
11962
/**
11963
 * Parse a well-balanced chunk of an XML document
11964
 * called by the parser
11965
 * The allowed sequence for the Well Balanced Chunk is the one defined by
11966
 * the content production in the XML grammar:
11967
 *
11968
 *     [43] content ::= (element | CharData | Reference | CDSect | PI |
11969
 *                       Comment)*
11970
 *
11971
 * This function uses deprecated global variables to set parser options
11972
 * which default to XML_PARSE_NODICT.
11973
 *
11974
 * @param doc  the document the chunk pertains to (must not be NULL)
11975
 * @param sax  the SAX handler block (possibly NULL)
11976
 * @param user_data  The user data returned on SAX callbacks (possibly NULL)
11977
 * @param depth  Used for loop detection, use 0
11978
 * @param string  the input string in UTF8 or ISO-Latin (zero terminated)
11979
 * @param lst  the return value for the set of parsed nodes
11980
 * @returns 0 if the chunk is well balanced, -1 in case of args problem and
11981
 *    the parser error code otherwise
11982
 */
11983
11984
int
11985
xmlParseBalancedChunkMemory(xmlDoc *doc, xmlSAXHandler *sax,
11986
0
     void *user_data, int depth, const xmlChar *string, xmlNode **lst) {
11987
0
    return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
11988
0
                                                depth, string, lst, 0 );
11989
0
}
11990
#endif /* LIBXML_SAX1_ENABLED */
11991
11992
/**
11993
 * Parse a well-balanced chunk of XML matching the 'content' production.
11994
 *
11995
 * Namespaces in scope of `node` and entities of `node`'s document are
11996
 * recognized. When validating, the DTD of `node`'s document is used.
11997
 *
11998
 * Always consumes `input` even in error case.
11999
 *
12000
 * @since 2.14.0
12001
 *
12002
 * @param ctxt  parser context
12003
 * @param input  parser input
12004
 * @param node  target node or document
12005
 * @param hasTextDecl  whether to parse text declaration
12006
 * @returns a node list or NULL in case of error.
12007
 */
12008
xmlNode *
12009
xmlCtxtParseContent(xmlParserCtxt *ctxt, xmlParserInput *input,
12010
0
                    xmlNode *node, int hasTextDecl) {
12011
0
    xmlDocPtr doc;
12012
0
    xmlNodePtr cur, list = NULL;
12013
0
    int nsnr = 0;
12014
0
    xmlDictPtr oldDict;
12015
0
    int oldOptions, oldDictNames, oldLoadSubset;
12016
12017
0
    if ((ctxt == NULL) || (input == NULL) || (node == NULL)) {
12018
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
12019
0
        goto exit;
12020
0
    }
12021
12022
0
    doc = node->doc;
12023
0
    if (doc == NULL) {
12024
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
12025
0
        goto exit;
12026
0
    }
12027
12028
0
    switch (node->type) {
12029
0
        case XML_ELEMENT_NODE:
12030
0
        case XML_DOCUMENT_NODE:
12031
0
        case XML_HTML_DOCUMENT_NODE:
12032
0
            break;
12033
12034
0
        case XML_ATTRIBUTE_NODE:
12035
0
        case XML_TEXT_NODE:
12036
0
        case XML_CDATA_SECTION_NODE:
12037
0
        case XML_ENTITY_REF_NODE:
12038
0
        case XML_PI_NODE:
12039
0
        case XML_COMMENT_NODE:
12040
0
            for (cur = node->parent; cur != NULL; cur = node->parent) {
12041
0
                if ((cur->type == XML_ELEMENT_NODE) ||
12042
0
                    (cur->type == XML_DOCUMENT_NODE) ||
12043
0
                    (cur->type == XML_HTML_DOCUMENT_NODE)) {
12044
0
                    node = cur;
12045
0
                    break;
12046
0
                }
12047
0
            }
12048
0
            break;
12049
12050
0
        default:
12051
0
            xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
12052
0
            goto exit;
12053
0
    }
12054
12055
0
    xmlCtxtReset(ctxt);
12056
12057
0
    oldDict = ctxt->dict;
12058
0
    oldOptions = ctxt->options;
12059
0
    oldDictNames = ctxt->dictNames;
12060
0
    oldLoadSubset = ctxt->loadsubset;
12061
12062
    /*
12063
     * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
12064
     */
12065
0
    if (doc->dict != NULL) {
12066
0
        ctxt->dict = doc->dict;
12067
0
    } else {
12068
0
        ctxt->options |= XML_PARSE_NODICT;
12069
0
        ctxt->dictNames = 0;
12070
0
    }
12071
12072
    /*
12073
     * Disable IDs
12074
     */
12075
0
    ctxt->loadsubset |= XML_SKIP_IDS;
12076
0
    ctxt->options |= XML_PARSE_SKIP_IDS;
12077
12078
0
    ctxt->myDoc = doc;
12079
12080
0
#ifdef LIBXML_HTML_ENABLED
12081
0
    if (ctxt->html) {
12082
        /*
12083
         * When parsing in context, it makes no sense to add implied
12084
         * elements like html/body/etc...
12085
         */
12086
0
        ctxt->options |= HTML_PARSE_NOIMPLIED;
12087
12088
0
        list = htmlCtxtParseContentInternal(ctxt, input);
12089
0
    } else
12090
0
#endif
12091
0
    {
12092
0
        xmlCtxtInitializeLate(ctxt);
12093
12094
        /*
12095
         * initialize the SAX2 namespaces stack
12096
         */
12097
0
        cur = node;
12098
0
        while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
12099
0
            xmlNsPtr ns = cur->nsDef;
12100
0
            xmlHashedString hprefix, huri;
12101
12102
0
            while (ns != NULL) {
12103
0
                hprefix = xmlDictLookupHashed(ctxt->dict, ns->prefix, -1);
12104
0
                huri = xmlDictLookupHashed(ctxt->dict, ns->href, -1);
12105
0
                if (xmlParserNsPush(ctxt, &hprefix, &huri, ns, 1) > 0)
12106
0
                    nsnr++;
12107
0
                ns = ns->next;
12108
0
            }
12109
0
            cur = cur->parent;
12110
0
        }
12111
12112
0
        list = xmlCtxtParseContentInternal(ctxt, input, hasTextDecl, 1);
12113
12114
0
        if (nsnr > 0)
12115
0
            xmlParserNsPop(ctxt, nsnr);
12116
0
    }
12117
12118
0
    ctxt->dict = oldDict;
12119
0
    ctxt->options = oldOptions;
12120
0
    ctxt->dictNames = oldDictNames;
12121
0
    ctxt->loadsubset = oldLoadSubset;
12122
0
    ctxt->myDoc = NULL;
12123
0
    ctxt->node = NULL;
12124
12125
0
exit:
12126
0
    xmlFreeInputStream(input);
12127
0
    return(list);
12128
0
}
12129
12130
/**
12131
 * Parse a well-balanced chunk of an XML document
12132
 * within the context (DTD, namespaces, etc ...) of the given node.
12133
 *
12134
 * The allowed sequence for the data is a Well Balanced Chunk defined by
12135
 * the content production in the XML grammar:
12136
 *
12137
 *     [43] content ::= (element | CharData | Reference | CDSect | PI |
12138
 *                       Comment)*
12139
 *
12140
 * This function assumes the encoding of `node`'s document which is
12141
 * typically not what you want. A better alternative is
12142
 * #xmlCtxtParseContent.
12143
 *
12144
 * @param node  the context node
12145
 * @param data  the input string
12146
 * @param datalen  the input string length in bytes
12147
 * @param options  a combination of xmlParserOption
12148
 * @param listOut  the return value for the set of parsed nodes
12149
 * @returns XML_ERR_OK if the chunk is well balanced, and the parser
12150
 * error code otherwise
12151
 */
12152
xmlParserErrors
12153
xmlParseInNodeContext(xmlNode *node, const char *data, int datalen,
12154
0
                      int options, xmlNode **listOut) {
12155
0
    xmlParserCtxtPtr ctxt;
12156
0
    xmlParserInputPtr input;
12157
0
    xmlDocPtr doc;
12158
0
    xmlNodePtr list;
12159
0
    xmlParserErrors ret;
12160
12161
0
    if (listOut == NULL)
12162
0
        return(XML_ERR_INTERNAL_ERROR);
12163
0
    *listOut = NULL;
12164
12165
0
    if ((node == NULL) || (data == NULL) || (datalen < 0))
12166
0
        return(XML_ERR_INTERNAL_ERROR);
12167
12168
0
    doc = node->doc;
12169
0
    if (doc == NULL)
12170
0
        return(XML_ERR_INTERNAL_ERROR);
12171
12172
0
#ifdef LIBXML_HTML_ENABLED
12173
0
    if (doc->type == XML_HTML_DOCUMENT_NODE) {
12174
0
        ctxt = htmlNewParserCtxt();
12175
0
    }
12176
0
    else
12177
0
#endif
12178
0
        ctxt = xmlNewParserCtxt();
12179
12180
0
    if (ctxt == NULL)
12181
0
        return(XML_ERR_NO_MEMORY);
12182
12183
0
    input = xmlCtxtNewInputFromMemory(ctxt, NULL, data, datalen,
12184
0
                                      (const char *) doc->encoding,
12185
0
                                      XML_INPUT_BUF_STATIC);
12186
0
    if (input == NULL) {
12187
0
        xmlFreeParserCtxt(ctxt);
12188
0
        return(XML_ERR_NO_MEMORY);
12189
0
    }
12190
12191
0
    xmlCtxtUseOptions(ctxt, options);
12192
12193
0
    list = xmlCtxtParseContent(ctxt, input, node, /* hasTextDecl */ 0);
12194
12195
0
    if (list == NULL) {
12196
0
        ret = ctxt->errNo;
12197
0
        if (ret == XML_ERR_ARGUMENT)
12198
0
            ret = XML_ERR_INTERNAL_ERROR;
12199
0
    } else {
12200
0
        ret = XML_ERR_OK;
12201
0
        *listOut = list;
12202
0
    }
12203
12204
0
    xmlFreeParserCtxt(ctxt);
12205
12206
0
    return(ret);
12207
0
}
12208
12209
#ifdef LIBXML_SAX1_ENABLED
12210
/**
12211
 * Parse a well-balanced chunk of an XML document
12212
 *
12213
 * The allowed sequence for the Well Balanced Chunk is the one defined by
12214
 * the content production in the XML grammar:
12215
 *
12216
 *     [43] content ::= (element | CharData | Reference | CDSect | PI |
12217
 *                       Comment)*
12218
 *
12219
 * In case recover is set to 1, the nodelist will not be empty even if
12220
 * the parsed chunk is not well balanced, assuming the parsing succeeded to
12221
 * some extent.
12222
 *
12223
 * This function uses deprecated global variables to set parser options
12224
 * which default to XML_PARSE_NODICT.
12225
 *
12226
 * @param doc  the document the chunk pertains to (must not be NULL)
12227
 * @param sax  the SAX handler block (possibly NULL)
12228
 * @param user_data  The user data returned on SAX callbacks (possibly NULL)
12229
 * @param depth  Used for loop detection, use 0
12230
 * @param string  the input string in UTF8 or ISO-Latin (zero terminated)
12231
 * @param listOut  the return value for the set of parsed nodes
12232
 * @param recover  return nodes even if the data is broken (use 0)
12233
 * @returns 0 if the chunk is well balanced, or thehe parser error code
12234
 * otherwise.
12235
 */
12236
int
12237
xmlParseBalancedChunkMemoryRecover(xmlDoc *doc, xmlSAXHandler *sax,
12238
     void *user_data, int depth, const xmlChar *string, xmlNode **listOut,
12239
0
     int recover) {
12240
0
    xmlParserCtxtPtr ctxt;
12241
0
    xmlParserInputPtr input;
12242
0
    xmlNodePtr list;
12243
0
    int ret;
12244
12245
0
    if (listOut != NULL)
12246
0
        *listOut = NULL;
12247
12248
0
    if (string == NULL)
12249
0
        return(XML_ERR_ARGUMENT);
12250
12251
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12252
0
    if (ctxt == NULL)
12253
0
        return(XML_ERR_NO_MEMORY);
12254
12255
0
    xmlCtxtInitializeLate(ctxt);
12256
12257
0
    ctxt->depth = depth;
12258
0
    ctxt->myDoc = doc;
12259
0
    if (recover) {
12260
0
        ctxt->options |= XML_PARSE_RECOVER;
12261
0
        ctxt->recovery = 1;
12262
0
    }
12263
12264
0
    input = xmlNewStringInputStream(ctxt, string);
12265
0
    if (input == NULL) {
12266
0
        ret = ctxt->errNo;
12267
0
        goto error;
12268
0
    }
12269
12270
0
    list = xmlCtxtParseContentInternal(ctxt, input, /* hasTextDecl */ 0, 1);
12271
0
    if (listOut != NULL)
12272
0
        *listOut = list;
12273
0
    else
12274
0
        xmlFreeNodeList(list);
12275
12276
0
    if (!ctxt->wellFormed)
12277
0
        ret = ctxt->errNo;
12278
0
    else
12279
0
        ret = XML_ERR_OK;
12280
12281
0
error:
12282
0
    xmlFreeInputStream(input);
12283
0
    xmlFreeParserCtxt(ctxt);
12284
0
    return(ret);
12285
0
}
12286
12287
/**
12288
 * Parse an XML external entity out of context and build a tree.
12289
 * It use the given SAX function block to handle the parsing callback.
12290
 * If sax is NULL, fallback to the default DOM tree building routines.
12291
 *
12292
 * @deprecated Don't use.
12293
 *
12294
 *     [78] extParsedEnt ::= TextDecl? content
12295
 *
12296
 * This correspond to a "Well Balanced" chunk
12297
 *
12298
 * This function uses deprecated global variables to set parser options
12299
 * which default to XML_PARSE_NODICT.
12300
 *
12301
 * @param sax  the SAX handler block
12302
 * @param filename  the filename
12303
 * @returns the resulting document tree
12304
 */
12305
12306
xmlDoc *
12307
0
xmlSAXParseEntity(xmlSAXHandler *sax, const char *filename) {
12308
0
    xmlDocPtr ret;
12309
0
    xmlParserCtxtPtr ctxt;
12310
12311
0
    ctxt = xmlCreateFileParserCtxt(filename);
12312
0
    if (ctxt == NULL) {
12313
0
  return(NULL);
12314
0
    }
12315
0
    if (sax != NULL) {
12316
0
        if (sax->initialized == XML_SAX2_MAGIC) {
12317
0
            *ctxt->sax = *sax;
12318
0
        } else {
12319
0
            memset(ctxt->sax, 0, sizeof(*ctxt->sax));
12320
0
            memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12321
0
        }
12322
0
        ctxt->userData = NULL;
12323
0
    }
12324
12325
0
    xmlParseExtParsedEnt(ctxt);
12326
12327
0
    if (ctxt->wellFormed) {
12328
0
  ret = ctxt->myDoc;
12329
0
    } else {
12330
0
        ret = NULL;
12331
0
        xmlFreeDoc(ctxt->myDoc);
12332
0
    }
12333
12334
0
    xmlFreeParserCtxt(ctxt);
12335
12336
0
    return(ret);
12337
0
}
12338
12339
/**
12340
 * Parse an XML external entity out of context and build a tree.
12341
 *
12342
 *     [78] extParsedEnt ::= TextDecl? content
12343
 *
12344
 * This correspond to a "Well Balanced" chunk
12345
 *
12346
 * This function uses deprecated global variables to set parser options
12347
 * which default to XML_PARSE_NODICT.
12348
 *
12349
 * @deprecated Don't use.
12350
 *
12351
 * @param filename  the filename
12352
 * @returns the resulting document tree
12353
 */
12354
12355
xmlDoc *
12356
0
xmlParseEntity(const char *filename) {
12357
0
    return(xmlSAXParseEntity(NULL, filename));
12358
0
}
12359
#endif /* LIBXML_SAX1_ENABLED */
12360
12361
/**
12362
 * Create a parser context for an external entity
12363
 * Automatic support for ZLIB/Compress compressed document is provided
12364
 * by default if found at compile-time.
12365
 *
12366
 * @deprecated Don't use.
12367
 *
12368
 * @param URL  the entity URL
12369
 * @param ID  the entity PUBLIC ID
12370
 * @param base  a possible base for the target URI
12371
 * @returns the new parser context or NULL
12372
 */
12373
xmlParserCtxt *
12374
xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
12375
0
                    const xmlChar *base) {
12376
0
    xmlParserCtxtPtr ctxt;
12377
0
    xmlParserInputPtr input;
12378
0
    xmlChar *uri = NULL;
12379
12380
0
    ctxt = xmlNewParserCtxt();
12381
0
    if (ctxt == NULL)
12382
0
  return(NULL);
12383
12384
0
    if (base != NULL) {
12385
0
        if (xmlBuildURISafe(URL, base, &uri) < 0)
12386
0
            goto error;
12387
0
        if (uri != NULL)
12388
0
            URL = uri;
12389
0
    }
12390
12391
0
    input = xmlLoadResource(ctxt, (char *) URL, (char *) ID,
12392
0
                            XML_RESOURCE_UNKNOWN);
12393
0
    if (input == NULL)
12394
0
        goto error;
12395
12396
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
12397
0
        xmlFreeInputStream(input);
12398
0
        goto error;
12399
0
    }
12400
12401
0
    xmlFree(uri);
12402
0
    return(ctxt);
12403
12404
0
error:
12405
0
    xmlFree(uri);
12406
0
    xmlFreeParserCtxt(ctxt);
12407
0
    return(NULL);
12408
0
}
12409
12410
/************************************************************************
12411
 *                  *
12412
 *    Front ends when parsing from a file     *
12413
 *                  *
12414
 ************************************************************************/
12415
12416
/**
12417
 * Create a parser context for a file or URL content.
12418
 * Automatic support for ZLIB/Compress compressed document is provided
12419
 * by default if found at compile-time and for file accesses
12420
 *
12421
 * @deprecated Use #xmlNewParserCtxt and #xmlCtxtReadFile.
12422
 *
12423
 * @param filename  the filename or URL
12424
 * @param options  a combination of xmlParserOption
12425
 * @returns the new parser context or NULL
12426
 */
12427
xmlParserCtxt *
12428
xmlCreateURLParserCtxt(const char *filename, int options)
12429
0
{
12430
0
    xmlParserCtxtPtr ctxt;
12431
0
    xmlParserInputPtr input;
12432
12433
0
    ctxt = xmlNewParserCtxt();
12434
0
    if (ctxt == NULL)
12435
0
  return(NULL);
12436
12437
0
    options |= XML_PARSE_UNZIP;
12438
12439
0
    xmlCtxtUseOptions(ctxt, options);
12440
12441
0
    input = xmlLoadResource(ctxt, filename, NULL, XML_RESOURCE_MAIN_DOCUMENT);
12442
0
    if (input == NULL) {
12443
0
  xmlFreeParserCtxt(ctxt);
12444
0
  return(NULL);
12445
0
    }
12446
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
12447
0
        xmlFreeInputStream(input);
12448
0
        xmlFreeParserCtxt(ctxt);
12449
0
        return(NULL);
12450
0
    }
12451
12452
0
    return(ctxt);
12453
0
}
12454
12455
/**
12456
 * Create a parser context for a file content.
12457
 * Automatic support for ZLIB/Compress compressed document is provided
12458
 * by default if found at compile-time.
12459
 *
12460
 * @deprecated Use #xmlNewParserCtxt and #xmlCtxtReadFile.
12461
 *
12462
 * @param filename  the filename
12463
 * @returns the new parser context or NULL
12464
 */
12465
xmlParserCtxt *
12466
xmlCreateFileParserCtxt(const char *filename)
12467
0
{
12468
0
    return(xmlCreateURLParserCtxt(filename, 0));
12469
0
}
12470
12471
#ifdef LIBXML_SAX1_ENABLED
12472
/**
12473
 * Parse an XML file and build a tree. Automatic support for ZLIB/Compress
12474
 * compressed document is provided by default if found at compile-time.
12475
 * It use the given SAX function block to handle the parsing callback.
12476
 * If sax is NULL, fallback to the default DOM tree building routines.
12477
 *
12478
 * This function uses deprecated global variables to set parser options
12479
 * which default to XML_PARSE_NODICT.
12480
 *
12481
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadFile.
12482
 *
12483
 * User data (void *) is stored within the parser context in the
12484
 * context's _private member, so it is available nearly everywhere in libxml
12485
 *
12486
 * @param sax  the SAX handler block
12487
 * @param filename  the filename
12488
 * @param recovery  work in recovery mode, i.e. tries to read no Well Formed
12489
 *             documents
12490
 * @param data  the userdata
12491
 * @returns the resulting document tree
12492
 */
12493
12494
xmlDoc *
12495
xmlSAXParseFileWithData(xmlSAXHandler *sax, const char *filename,
12496
0
                        int recovery, void *data) {
12497
0
    xmlDocPtr ret = NULL;
12498
0
    xmlParserCtxtPtr ctxt;
12499
0
    xmlParserInputPtr input;
12500
12501
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12502
0
    if (ctxt == NULL)
12503
0
  return(NULL);
12504
12505
0
    if (data != NULL)
12506
0
  ctxt->_private = data;
12507
12508
0
    if (recovery) {
12509
0
        ctxt->options |= XML_PARSE_RECOVER;
12510
0
        ctxt->recovery = 1;
12511
0
    }
12512
12513
0
    if ((filename != NULL) && (filename[0] == '-') && (filename[1] == 0))
12514
0
        input = xmlCtxtNewInputFromFd(ctxt, filename, STDIN_FILENO, NULL, 0);
12515
0
    else
12516
0
        input = xmlCtxtNewInputFromUrl(ctxt, filename, NULL, NULL, 0);
12517
12518
0
    if (input != NULL)
12519
0
        ret = xmlCtxtParseDocument(ctxt, input);
12520
12521
0
    xmlFreeParserCtxt(ctxt);
12522
0
    return(ret);
12523
0
}
12524
12525
/**
12526
 * Parse an XML file and build a tree. Automatic support for ZLIB/Compress
12527
 * compressed document is provided by default if found at compile-time.
12528
 * It use the given SAX function block to handle the parsing callback.
12529
 * If sax is NULL, fallback to the default DOM tree building routines.
12530
 *
12531
 * This function uses deprecated global variables to set parser options
12532
 * which default to XML_PARSE_NODICT.
12533
 *
12534
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadFile.
12535
 *
12536
 * @param sax  the SAX handler block
12537
 * @param filename  the filename
12538
 * @param recovery  work in recovery mode, i.e. tries to read no Well Formed
12539
 *             documents
12540
 * @returns the resulting document tree
12541
 */
12542
12543
xmlDoc *
12544
xmlSAXParseFile(xmlSAXHandler *sax, const char *filename,
12545
0
                          int recovery) {
12546
0
    return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
12547
0
}
12548
12549
/**
12550
 * Parse an XML in-memory document and build a tree.
12551
 * In the case the document is not Well Formed, a attempt to build a
12552
 * tree is tried anyway
12553
 *
12554
 * This function uses deprecated global variables to set parser options
12555
 * which default to XML_PARSE_NODICT | XML_PARSE_RECOVER.
12556
 *
12557
 * @deprecated Use #xmlReadDoc with XML_PARSE_RECOVER.
12558
 *
12559
 * @param cur  a pointer to an array of xmlChar
12560
 * @returns the resulting document tree or NULL in case of failure
12561
 */
12562
12563
xmlDoc *
12564
0
xmlRecoverDoc(const xmlChar *cur) {
12565
0
    return(xmlSAXParseDoc(NULL, cur, 1));
12566
0
}
12567
12568
/**
12569
 * Parse an XML file and build a tree. Automatic support for ZLIB/Compress
12570
 * compressed document is provided by default if found at compile-time.
12571
 *
12572
 * This function uses deprecated global variables to set parser options
12573
 * which default to XML_PARSE_NODICT.
12574
 *
12575
 * @deprecated Use #xmlReadFile.
12576
 *
12577
 * @param filename  the filename
12578
 * @returns the resulting document tree if the file was wellformed,
12579
 * NULL otherwise.
12580
 */
12581
12582
xmlDoc *
12583
0
xmlParseFile(const char *filename) {
12584
0
    return(xmlSAXParseFile(NULL, filename, 0));
12585
0
}
12586
12587
/**
12588
 * Parse an XML file and build a tree. Automatic support for ZLIB/Compress
12589
 * compressed document is provided by default if found at compile-time.
12590
 * In the case the document is not Well Formed, it attempts to build
12591
 * a tree anyway
12592
 *
12593
 * This function uses deprecated global variables to set parser options
12594
 * which default to XML_PARSE_NODICT | XML_PARSE_RECOVER.
12595
 *
12596
 * @deprecated Use #xmlReadFile with XML_PARSE_RECOVER.
12597
 *
12598
 * @param filename  the filename
12599
 * @returns the resulting document tree or NULL in case of failure
12600
 */
12601
12602
xmlDoc *
12603
0
xmlRecoverFile(const char *filename) {
12604
0
    return(xmlSAXParseFile(NULL, filename, 1));
12605
0
}
12606
12607
12608
/**
12609
 * Setup the parser context to parse a new buffer; Clears any prior
12610
 * contents from the parser context. The buffer parameter must not be
12611
 * NULL, but the filename parameter can be
12612
 *
12613
 * @deprecated Don't use.
12614
 *
12615
 * @param ctxt  an XML parser context
12616
 * @param buffer  a xmlChar * buffer
12617
 * @param filename  a file name
12618
 */
12619
void
12620
xmlSetupParserForBuffer(xmlParserCtxt *ctxt, const xmlChar* buffer,
12621
                             const char* filename)
12622
0
{
12623
0
    xmlParserInputPtr input;
12624
12625
0
    if ((ctxt == NULL) || (buffer == NULL))
12626
0
        return;
12627
12628
0
    xmlCtxtReset(ctxt);
12629
12630
0
    input = xmlCtxtNewInputFromString(ctxt, filename, (const char *) buffer,
12631
0
                                      NULL, 0);
12632
0
    if (input == NULL)
12633
0
        return;
12634
0
    if (xmlCtxtPushInput(ctxt, input) < 0)
12635
0
        xmlFreeInputStream(input);
12636
0
}
12637
12638
/**
12639
 * Parse an XML file and call the given SAX handler routines.
12640
 * Automatic support for ZLIB/Compress compressed document is provided
12641
 *
12642
 * This function uses deprecated global variables to set parser options
12643
 * which default to XML_PARSE_NODICT.
12644
 *
12645
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadFile.
12646
 *
12647
 * @param sax  a SAX handler
12648
 * @param user_data  The user data returned on SAX callbacks
12649
 * @param filename  a file name
12650
 * @returns 0 in case of success or a error number otherwise
12651
 */
12652
int
12653
xmlSAXUserParseFile(xmlSAXHandler *sax, void *user_data,
12654
0
                    const char *filename) {
12655
0
    int ret = 0;
12656
0
    xmlParserCtxtPtr ctxt;
12657
12658
0
    ctxt = xmlCreateFileParserCtxt(filename);
12659
0
    if (ctxt == NULL) return -1;
12660
0
    if (sax != NULL) {
12661
0
        if (sax->initialized == XML_SAX2_MAGIC) {
12662
0
            *ctxt->sax = *sax;
12663
0
        } else {
12664
0
            memset(ctxt->sax, 0, sizeof(*ctxt->sax));
12665
0
            memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12666
0
        }
12667
0
  ctxt->userData = user_data;
12668
0
    }
12669
12670
0
    xmlParseDocument(ctxt);
12671
12672
0
    if (ctxt->wellFormed)
12673
0
  ret = 0;
12674
0
    else {
12675
0
        if (ctxt->errNo != 0)
12676
0
      ret = ctxt->errNo;
12677
0
  else
12678
0
      ret = -1;
12679
0
    }
12680
0
    if (ctxt->myDoc != NULL) {
12681
0
        xmlFreeDoc(ctxt->myDoc);
12682
0
  ctxt->myDoc = NULL;
12683
0
    }
12684
0
    xmlFreeParserCtxt(ctxt);
12685
12686
0
    return ret;
12687
0
}
12688
#endif /* LIBXML_SAX1_ENABLED */
12689
12690
/************************************************************************
12691
 *                  *
12692
 *    Front ends when parsing from memory     *
12693
 *                  *
12694
 ************************************************************************/
12695
12696
/**
12697
 * Create a parser context for an XML in-memory document. The input buffer
12698
 * must not contain a terminating null byte.
12699
 *
12700
 * @param buffer  a pointer to a char array
12701
 * @param size  the size of the array
12702
 * @returns the new parser context or NULL
12703
 */
12704
xmlParserCtxt *
12705
0
xmlCreateMemoryParserCtxt(const char *buffer, int size) {
12706
0
    xmlParserCtxtPtr ctxt;
12707
0
    xmlParserInputPtr input;
12708
12709
0
    if (size < 0)
12710
0
  return(NULL);
12711
12712
0
    ctxt = xmlNewParserCtxt();
12713
0
    if (ctxt == NULL)
12714
0
  return(NULL);
12715
12716
0
    input = xmlCtxtNewInputFromMemory(ctxt, NULL, buffer, size, NULL, 0);
12717
0
    if (input == NULL) {
12718
0
  xmlFreeParserCtxt(ctxt);
12719
0
  return(NULL);
12720
0
    }
12721
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
12722
0
        xmlFreeInputStream(input);
12723
0
        xmlFreeParserCtxt(ctxt);
12724
0
        return(NULL);
12725
0
    }
12726
12727
0
    return(ctxt);
12728
0
}
12729
12730
#ifdef LIBXML_SAX1_ENABLED
12731
/**
12732
 * Parse an XML in-memory block and use the given SAX function block
12733
 * to handle the parsing callback. If sax is NULL, fallback to the default
12734
 * DOM tree building routines.
12735
 *
12736
 * This function uses deprecated global variables to set parser options
12737
 * which default to XML_PARSE_NODICT.
12738
 *
12739
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadMemory.
12740
 *
12741
 * User data (void *) is stored within the parser context in the
12742
 * context's _private member, so it is available nearly everywhere in libxml
12743
 *
12744
 * @param sax  the SAX handler block
12745
 * @param buffer  an pointer to a char array
12746
 * @param size  the size of the array
12747
 * @param recovery  work in recovery mode, i.e. tries to read no Well Formed
12748
 *             documents
12749
 * @param data  the userdata
12750
 * @returns the resulting document tree
12751
 */
12752
12753
xmlDoc *
12754
xmlSAXParseMemoryWithData(xmlSAXHandler *sax, const char *buffer,
12755
0
                          int size, int recovery, void *data) {
12756
0
    xmlDocPtr ret = NULL;
12757
0
    xmlParserCtxtPtr ctxt;
12758
0
    xmlParserInputPtr input;
12759
12760
0
    if (size < 0)
12761
0
        return(NULL);
12762
12763
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12764
0
    if (ctxt == NULL)
12765
0
        return(NULL);
12766
12767
0
    if (data != NULL)
12768
0
  ctxt->_private=data;
12769
12770
0
    if (recovery) {
12771
0
        ctxt->options |= XML_PARSE_RECOVER;
12772
0
        ctxt->recovery = 1;
12773
0
    }
12774
12775
0
    input = xmlCtxtNewInputFromMemory(ctxt, NULL, buffer, size, NULL,
12776
0
                                      XML_INPUT_BUF_STATIC);
12777
12778
0
    if (input != NULL)
12779
0
        ret = xmlCtxtParseDocument(ctxt, input);
12780
12781
0
    xmlFreeParserCtxt(ctxt);
12782
0
    return(ret);
12783
0
}
12784
12785
/**
12786
 * Parse an XML in-memory block and use the given SAX function block
12787
 * to handle the parsing callback. If sax is NULL, fallback to the default
12788
 * DOM tree building routines.
12789
 *
12790
 * This function uses deprecated global variables to set parser options
12791
 * which default to XML_PARSE_NODICT.
12792
 *
12793
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadMemory.
12794
 *
12795
 * @param sax  the SAX handler block
12796
 * @param buffer  an pointer to a char array
12797
 * @param size  the size of the array
12798
 * @param recovery  work in recovery mode, i.e. tries to read not Well Formed
12799
 *             documents
12800
 * @returns the resulting document tree
12801
 */
12802
xmlDoc *
12803
xmlSAXParseMemory(xmlSAXHandler *sax, const char *buffer,
12804
0
            int size, int recovery) {
12805
0
    return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
12806
0
}
12807
12808
/**
12809
 * Parse an XML in-memory block and build a tree.
12810
 *
12811
 * This function uses deprecated global variables to set parser options
12812
 * which default to XML_PARSE_NODICT.
12813
 *
12814
 * @deprecated Use #xmlReadMemory.
12815
 *
12816
 * @param buffer  an pointer to a char array
12817
 * @param size  the size of the array
12818
 * @returns the resulting document tree
12819
 */
12820
12821
0
xmlDoc *xmlParseMemory(const char *buffer, int size) {
12822
0
   return(xmlSAXParseMemory(NULL, buffer, size, 0));
12823
0
}
12824
12825
/**
12826
 * Parse an XML in-memory block and build a tree.
12827
 * In the case the document is not Well Formed, an attempt to
12828
 * build a tree is tried anyway
12829
 *
12830
 * This function uses deprecated global variables to set parser options
12831
 * which default to XML_PARSE_NODICT | XML_PARSE_RECOVER.
12832
 *
12833
 * @deprecated Use #xmlReadMemory with XML_PARSE_RECOVER.
12834
 *
12835
 * @param buffer  an pointer to a char array
12836
 * @param size  the size of the array
12837
 * @returns the resulting document tree or NULL in case of error
12838
 */
12839
12840
0
xmlDoc *xmlRecoverMemory(const char *buffer, int size) {
12841
0
   return(xmlSAXParseMemory(NULL, buffer, size, 1));
12842
0
}
12843
12844
/**
12845
 * Parse an XML in-memory buffer and call the given SAX handler routines.
12846
 *
12847
 * This function uses deprecated global variables to set parser options
12848
 * which default to XML_PARSE_NODICT.
12849
 *
12850
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadMemory.
12851
 *
12852
 * @param sax  a SAX handler
12853
 * @param user_data  The user data returned on SAX callbacks
12854
 * @param buffer  an in-memory XML document input
12855
 * @param size  the length of the XML document in bytes
12856
 * @returns 0 in case of success or a error number otherwise
12857
 */
12858
int xmlSAXUserParseMemory(xmlSAXHandler *sax, void *user_data,
12859
0
        const char *buffer, int size) {
12860
0
    int ret = 0;
12861
0
    xmlParserCtxtPtr ctxt;
12862
12863
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12864
0
    if (ctxt == NULL) return -1;
12865
0
    if (sax != NULL) {
12866
0
        if (sax->initialized == XML_SAX2_MAGIC) {
12867
0
            *ctxt->sax = *sax;
12868
0
        } else {
12869
0
            memset(ctxt->sax, 0, sizeof(*ctxt->sax));
12870
0
            memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12871
0
        }
12872
0
  ctxt->userData = user_data;
12873
0
    }
12874
12875
0
    xmlParseDocument(ctxt);
12876
12877
0
    if (ctxt->wellFormed)
12878
0
  ret = 0;
12879
0
    else {
12880
0
        if (ctxt->errNo != 0)
12881
0
      ret = ctxt->errNo;
12882
0
  else
12883
0
      ret = -1;
12884
0
    }
12885
0
    if (ctxt->myDoc != NULL) {
12886
0
        xmlFreeDoc(ctxt->myDoc);
12887
0
  ctxt->myDoc = NULL;
12888
0
    }
12889
0
    xmlFreeParserCtxt(ctxt);
12890
12891
0
    return ret;
12892
0
}
12893
#endif /* LIBXML_SAX1_ENABLED */
12894
12895
/**
12896
 * Creates a parser context for an XML in-memory document.
12897
 *
12898
 * @param str  a pointer to an array of xmlChar
12899
 * @returns the new parser context or NULL
12900
 */
12901
xmlParserCtxt *
12902
0
xmlCreateDocParserCtxt(const xmlChar *str) {
12903
0
    xmlParserCtxtPtr ctxt;
12904
0
    xmlParserInputPtr input;
12905
12906
0
    ctxt = xmlNewParserCtxt();
12907
0
    if (ctxt == NULL)
12908
0
  return(NULL);
12909
12910
0
    input = xmlCtxtNewInputFromString(ctxt, NULL, (const char *) str, NULL, 0);
12911
0
    if (input == NULL) {
12912
0
  xmlFreeParserCtxt(ctxt);
12913
0
  return(NULL);
12914
0
    }
12915
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
12916
0
        xmlFreeInputStream(input);
12917
0
        xmlFreeParserCtxt(ctxt);
12918
0
        return(NULL);
12919
0
    }
12920
12921
0
    return(ctxt);
12922
0
}
12923
12924
#ifdef LIBXML_SAX1_ENABLED
12925
/**
12926
 * Parse an XML in-memory document and build a tree.
12927
 * It use the given SAX function block to handle the parsing callback.
12928
 * If sax is NULL, fallback to the default DOM tree building routines.
12929
 *
12930
 * This function uses deprecated global variables to set parser options
12931
 * which default to XML_PARSE_NODICT.
12932
 *
12933
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadDoc.
12934
 *
12935
 * @param sax  the SAX handler block
12936
 * @param cur  a pointer to an array of xmlChar
12937
 * @param recovery  work in recovery mode, i.e. tries to read no Well Formed
12938
 *             documents
12939
 * @returns the resulting document tree
12940
 */
12941
12942
xmlDoc *
12943
0
xmlSAXParseDoc(xmlSAXHandler *sax, const xmlChar *cur, int recovery) {
12944
0
    xmlDocPtr ret;
12945
0
    xmlParserCtxtPtr ctxt;
12946
0
    xmlSAXHandlerPtr oldsax = NULL;
12947
12948
0
    if (cur == NULL) return(NULL);
12949
12950
12951
0
    ctxt = xmlCreateDocParserCtxt(cur);
12952
0
    if (ctxt == NULL) return(NULL);
12953
0
    if (sax != NULL) {
12954
0
        oldsax = ctxt->sax;
12955
0
        ctxt->sax = sax;
12956
0
        ctxt->userData = NULL;
12957
0
    }
12958
12959
0
    xmlParseDocument(ctxt);
12960
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12961
0
    else {
12962
0
       ret = NULL;
12963
0
       xmlFreeDoc(ctxt->myDoc);
12964
0
       ctxt->myDoc = NULL;
12965
0
    }
12966
0
    if (sax != NULL)
12967
0
  ctxt->sax = oldsax;
12968
0
    xmlFreeParserCtxt(ctxt);
12969
12970
0
    return(ret);
12971
0
}
12972
12973
/**
12974
 * Parse an XML in-memory document and build a tree.
12975
 *
12976
 * This function uses deprecated global variables to set parser options
12977
 * which default to XML_PARSE_NODICT.
12978
 *
12979
 * @deprecated Use #xmlReadDoc.
12980
 *
12981
 * @param cur  a pointer to an array of xmlChar
12982
 * @returns the resulting document tree
12983
 */
12984
12985
xmlDoc *
12986
0
xmlParseDoc(const xmlChar *cur) {
12987
0
    return(xmlSAXParseDoc(NULL, cur, 0));
12988
0
}
12989
#endif /* LIBXML_SAX1_ENABLED */
12990
12991
/************************************************************************
12992
 *                  *
12993
 *  New set (2.6.0) of simpler and more flexible APIs   *
12994
 *                  *
12995
 ************************************************************************/
12996
12997
/**
12998
 * Reset a parser context
12999
 *
13000
 * @param ctxt  an XML parser context
13001
 */
13002
void
13003
xmlCtxtReset(xmlParserCtxt *ctxt)
13004
0
{
13005
0
    xmlParserInputPtr input;
13006
13007
0
    if (ctxt == NULL)
13008
0
        return;
13009
13010
0
    while ((input = xmlCtxtPopInput(ctxt)) != NULL) { /* Non consuming */
13011
0
        xmlFreeInputStream(input);
13012
0
    }
13013
0
    ctxt->inputNr = 0;
13014
0
    ctxt->input = NULL;
13015
13016
0
    ctxt->spaceNr = 0;
13017
0
    if (ctxt->spaceTab != NULL) {
13018
0
  ctxt->spaceTab[0] = -1;
13019
0
  ctxt->space = &ctxt->spaceTab[0];
13020
0
    } else {
13021
0
        ctxt->space = NULL;
13022
0
    }
13023
13024
13025
0
    ctxt->nodeNr = 0;
13026
0
    ctxt->node = NULL;
13027
13028
0
    ctxt->nameNr = 0;
13029
0
    ctxt->name = NULL;
13030
13031
0
    ctxt->nsNr = 0;
13032
0
    xmlParserNsReset(ctxt->nsdb);
13033
13034
0
    if (ctxt->version != NULL) {
13035
0
        xmlFree(ctxt->version);
13036
0
        ctxt->version = NULL;
13037
0
    }
13038
0
    if (ctxt->encoding != NULL) {
13039
0
        xmlFree(ctxt->encoding);
13040
0
        ctxt->encoding = NULL;
13041
0
    }
13042
0
    if (ctxt->extSubURI != NULL) {
13043
0
        xmlFree(ctxt->extSubURI);
13044
0
        ctxt->extSubURI = NULL;
13045
0
    }
13046
0
    if (ctxt->extSubSystem != NULL) {
13047
0
        xmlFree(ctxt->extSubSystem);
13048
0
        ctxt->extSubSystem = NULL;
13049
0
    }
13050
0
    if (ctxt->directory != NULL) {
13051
0
        xmlFree(ctxt->directory);
13052
0
        ctxt->directory = NULL;
13053
0
    }
13054
13055
0
    if (ctxt->myDoc != NULL)
13056
0
        xmlFreeDoc(ctxt->myDoc);
13057
0
    ctxt->myDoc = NULL;
13058
13059
0
    ctxt->standalone = -1;
13060
0
    ctxt->hasExternalSubset = 0;
13061
0
    ctxt->hasPErefs = 0;
13062
0
    ctxt->html = ctxt->html ? 1 : 0;
13063
0
    ctxt->instate = XML_PARSER_START;
13064
13065
0
    ctxt->wellFormed = 1;
13066
0
    ctxt->nsWellFormed = 1;
13067
0
    ctxt->disableSAX = 0;
13068
0
    ctxt->valid = 1;
13069
0
    ctxt->record_info = 0;
13070
0
    ctxt->checkIndex = 0;
13071
0
    ctxt->endCheckState = 0;
13072
0
    ctxt->inSubset = 0;
13073
0
    ctxt->errNo = XML_ERR_OK;
13074
0
    ctxt->depth = 0;
13075
0
    ctxt->catalogs = NULL;
13076
0
    ctxt->sizeentities = 0;
13077
0
    ctxt->sizeentcopy = 0;
13078
0
    xmlInitNodeInfoSeq(&ctxt->node_seq);
13079
13080
0
    if (ctxt->attsDefault != NULL) {
13081
0
        xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
13082
0
        ctxt->attsDefault = NULL;
13083
0
    }
13084
0
    if (ctxt->attsSpecial != NULL) {
13085
0
        xmlHashFree(ctxt->attsSpecial, NULL);
13086
0
        ctxt->attsSpecial = NULL;
13087
0
    }
13088
13089
0
#ifdef LIBXML_CATALOG_ENABLED
13090
0
    if (ctxt->catalogs != NULL)
13091
0
  xmlCatalogFreeLocal(ctxt->catalogs);
13092
0
#endif
13093
0
    ctxt->nbErrors = 0;
13094
0
    ctxt->nbWarnings = 0;
13095
0
    if (ctxt->lastError.code != XML_ERR_OK)
13096
0
        xmlResetError(&ctxt->lastError);
13097
0
}
13098
13099
/**
13100
 * Reset a push parser context
13101
 *
13102
 * @param ctxt  an XML parser context
13103
 * @param chunk  a pointer to an array of chars
13104
 * @param size  number of chars in the array
13105
 * @param filename  an optional file name or URI
13106
 * @param encoding  the document encoding, or NULL
13107
 * @returns 0 in case of success and 1 in case of error
13108
 */
13109
int
13110
xmlCtxtResetPush(xmlParserCtxt *ctxt, const char *chunk,
13111
                 int size, const char *filename, const char *encoding)
13112
0
{
13113
0
    xmlParserInputPtr input;
13114
13115
0
    if (ctxt == NULL)
13116
0
        return(1);
13117
13118
0
    xmlCtxtReset(ctxt);
13119
13120
0
    input = xmlNewPushInput(filename, chunk, size);
13121
0
    if (input == NULL)
13122
0
        return(1);
13123
13124
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
13125
0
        xmlFreeInputStream(input);
13126
0
        return(1);
13127
0
    }
13128
13129
0
    if (encoding != NULL)
13130
0
        xmlSwitchEncodingName(ctxt, encoding);
13131
13132
0
    return(0);
13133
0
}
13134
13135
static int
13136
xmlCtxtSetOptionsInternal(xmlParserCtxtPtr ctxt, int options, int keepMask)
13137
182k
{
13138
182k
    int allMask;
13139
13140
182k
    if (ctxt == NULL)
13141
0
        return(-1);
13142
13143
    /*
13144
     * XInclude options aren't handled by the parser.
13145
     *
13146
     * XML_PARSE_XINCLUDE
13147
     * XML_PARSE_NOXINCNODE
13148
     * XML_PARSE_NOBASEFIX
13149
     */
13150
182k
    allMask = XML_PARSE_RECOVER |
13151
182k
              XML_PARSE_NOENT |
13152
182k
              XML_PARSE_DTDLOAD |
13153
182k
              XML_PARSE_DTDATTR |
13154
182k
              XML_PARSE_DTDVALID |
13155
182k
              XML_PARSE_NOERROR |
13156
182k
              XML_PARSE_NOWARNING |
13157
182k
              XML_PARSE_PEDANTIC |
13158
182k
              XML_PARSE_NOBLANKS |
13159
182k
#ifdef LIBXML_SAX1_ENABLED
13160
182k
              XML_PARSE_SAX1 |
13161
182k
#endif
13162
182k
              XML_PARSE_NONET |
13163
182k
              XML_PARSE_NODICT |
13164
182k
              XML_PARSE_NSCLEAN |
13165
182k
              XML_PARSE_NOCDATA |
13166
182k
              XML_PARSE_COMPACT |
13167
182k
              XML_PARSE_OLD10 |
13168
182k
              XML_PARSE_HUGE |
13169
182k
              XML_PARSE_OLDSAX |
13170
182k
              XML_PARSE_IGNORE_ENC |
13171
182k
              XML_PARSE_BIG_LINES |
13172
182k
              XML_PARSE_NO_XXE |
13173
182k
              XML_PARSE_UNZIP |
13174
182k
              XML_PARSE_NO_SYS_CATALOG |
13175
182k
              XML_PARSE_CATALOG_PI;
13176
13177
182k
    ctxt->options = (ctxt->options & keepMask) | (options & allMask);
13178
13179
    /*
13180
     * For some options, struct members are historically the source
13181
     * of truth. The values are initalized from global variables and
13182
     * old code could also modify them directly. Several older API
13183
     * functions that don't take an options argument rely on these
13184
     * deprecated mechanisms.
13185
     *
13186
     * Once public access to struct members and the globals are
13187
     * disabled, we can use the options bitmask as source of
13188
     * truth, making all these struct members obsolete.
13189
     *
13190
     * The XML_DETECT_IDS flags is misnamed. It simply enables
13191
     * loading of the external subset.
13192
     */
13193
182k
    ctxt->recovery = (options & XML_PARSE_RECOVER) ? 1 : 0;
13194
182k
    ctxt->replaceEntities = (options & XML_PARSE_NOENT) ? 1 : 0;
13195
182k
    ctxt->loadsubset = (options & XML_PARSE_DTDLOAD) ? XML_DETECT_IDS : 0;
13196
182k
    ctxt->loadsubset |= (options & XML_PARSE_DTDATTR) ? XML_COMPLETE_ATTRS : 0;
13197
182k
    ctxt->loadsubset |= (options & XML_PARSE_SKIP_IDS) ? XML_SKIP_IDS : 0;
13198
182k
    ctxt->validate = (options & XML_PARSE_DTDVALID) ? 1 : 0;
13199
182k
    ctxt->pedantic = (options & XML_PARSE_PEDANTIC) ? 1 : 0;
13200
182k
    ctxt->keepBlanks = (options & XML_PARSE_NOBLANKS) ? 0 : 1;
13201
182k
    ctxt->dictNames = (options & XML_PARSE_NODICT) ? 0 : 1;
13202
13203
182k
    return(options & ~allMask);
13204
182k
}
13205
13206
/**
13207
 * Applies the options to the parser context. Unset options are
13208
 * cleared.
13209
 *
13210
 * @since 2.13.0
13211
 *
13212
 * With older versions, you can use #xmlCtxtUseOptions.
13213
 *
13214
 * @param ctxt  an XML parser context
13215
 * @param options  a bitmask of xmlParserOption values
13216
 * @returns 0 in case of success, the set of unknown or unimplemented options
13217
 *         in case of error.
13218
 */
13219
int
13220
xmlCtxtSetOptions(xmlParserCtxt *ctxt, int options)
13221
12.0k
{
13222
12.0k
#ifdef LIBXML_HTML_ENABLED
13223
12.0k
    if ((ctxt != NULL) && (ctxt->html))
13224
0
        return(htmlCtxtSetOptions(ctxt, options));
13225
12.0k
#endif
13226
13227
12.0k
    return(xmlCtxtSetOptionsInternal(ctxt, options, 0));
13228
12.0k
}
13229
13230
/**
13231
 * Get the current options of the parser context.
13232
 *
13233
 * @since 2.14.0
13234
 *
13235
 * @param ctxt  an XML parser context
13236
 * @returns the current options set in the parser context, or -1 if ctxt is NULL.
13237
 */
13238
int
13239
xmlCtxtGetOptions(xmlParserCtxt *ctxt)
13240
0
{
13241
0
    if (ctxt == NULL)
13242
0
        return(-1);
13243
13244
0
    return(ctxt->options);
13245
0
}
13246
13247
/**
13248
 * Applies the options to the parser context. The following options
13249
 * are never cleared and can only be enabled:
13250
 *
13251
 * - XML_PARSE_NOERROR
13252
 * - XML_PARSE_NOWARNING
13253
 * - XML_PARSE_NONET
13254
 * - XML_PARSE_NSCLEAN
13255
 * - XML_PARSE_NOCDATA
13256
 * - XML_PARSE_COMPACT
13257
 * - XML_PARSE_OLD10
13258
 * - XML_PARSE_HUGE
13259
 * - XML_PARSE_OLDSAX
13260
 * - XML_PARSE_IGNORE_ENC
13261
 * - XML_PARSE_BIG_LINES
13262
 *
13263
 * @deprecated Use #xmlCtxtSetOptions.
13264
 *
13265
 * @param ctxt  an XML parser context
13266
 * @param options  a combination of xmlParserOption
13267
 * @returns 0 in case of success, the set of unknown or unimplemented options
13268
 *         in case of error.
13269
 */
13270
int
13271
xmlCtxtUseOptions(xmlParserCtxt *ctxt, int options)
13272
170k
{
13273
170k
    int keepMask;
13274
13275
170k
#ifdef LIBXML_HTML_ENABLED
13276
170k
    if ((ctxt != NULL) && (ctxt->html))
13277
0
        return(htmlCtxtUseOptions(ctxt, options));
13278
170k
#endif
13279
13280
    /*
13281
     * For historic reasons, some options can only be enabled.
13282
     */
13283
170k
    keepMask = XML_PARSE_NOERROR |
13284
170k
               XML_PARSE_NOWARNING |
13285
170k
               XML_PARSE_NONET |
13286
170k
               XML_PARSE_NSCLEAN |
13287
170k
               XML_PARSE_NOCDATA |
13288
170k
               XML_PARSE_COMPACT |
13289
170k
               XML_PARSE_OLD10 |
13290
170k
               XML_PARSE_HUGE |
13291
170k
               XML_PARSE_OLDSAX |
13292
170k
               XML_PARSE_IGNORE_ENC |
13293
170k
               XML_PARSE_BIG_LINES;
13294
13295
170k
    return(xmlCtxtSetOptionsInternal(ctxt, options, keepMask));
13296
170k
}
13297
13298
/**
13299
 * To protect against exponential entity expansion ("billion laughs"), the
13300
 * size of serialized output is (roughly) limited to the input size
13301
 * multiplied by this factor. The default value is 5.
13302
 *
13303
 * When working with documents making heavy use of entity expansion, it can
13304
 * be necessary to increase the value. For security reasons, this should only
13305
 * be considered when processing trusted input.
13306
 *
13307
 * @param ctxt  an XML parser context
13308
 * @param maxAmpl  maximum amplification factor
13309
 */
13310
void
13311
xmlCtxtSetMaxAmplification(xmlParserCtxt *ctxt, unsigned maxAmpl)
13312
0
{
13313
0
    if (ctxt == NULL)
13314
0
        return;
13315
0
    ctxt->maxAmpl = maxAmpl;
13316
0
}
13317
13318
/**
13319
 * Parse an XML document and return the resulting document tree.
13320
 * Takes ownership of the input object.
13321
 *
13322
 * @since 2.13.0
13323
 *
13324
 * @param ctxt  an XML parser context
13325
 * @param input  parser input
13326
 * @returns the resulting document tree or NULL
13327
 */
13328
xmlDoc *
13329
xmlCtxtParseDocument(xmlParserCtxt *ctxt, xmlParserInput *input)
13330
167k
{
13331
167k
    xmlDocPtr ret = NULL;
13332
13333
167k
    if ((ctxt == NULL) || (input == NULL)) {
13334
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
13335
0
        xmlFreeInputStream(input);
13336
0
        return(NULL);
13337
0
    }
13338
13339
    /* assert(ctxt->inputNr == 0); */
13340
167k
    while (ctxt->inputNr > 0)
13341
0
        xmlFreeInputStream(xmlCtxtPopInput(ctxt));
13342
13343
167k
    if (xmlCtxtPushInput(ctxt, input) < 0) {
13344
13
        xmlFreeInputStream(input);
13345
13
        return(NULL);
13346
13
    }
13347
13348
167k
    xmlParseDocument(ctxt);
13349
13350
167k
    ret = xmlCtxtGetDocument(ctxt);
13351
13352
    /* assert(ctxt->inputNr == 1); */
13353
334k
    while (ctxt->inputNr > 0)
13354
167k
        xmlFreeInputStream(xmlCtxtPopInput(ctxt));
13355
13356
167k
    return(ret);
13357
167k
}
13358
13359
/**
13360
 * Convenience function to parse an XML document from a
13361
 * zero-terminated string.
13362
 *
13363
 * See #xmlCtxtReadDoc for details.
13364
 *
13365
 * @param cur  a pointer to a zero terminated string
13366
 * @param URL  base URL (optional)
13367
 * @param encoding  the document encoding (optional)
13368
 * @param options  a combination of xmlParserOption
13369
 * @returns the resulting document tree
13370
 */
13371
xmlDoc *
13372
xmlReadDoc(const xmlChar *cur, const char *URL, const char *encoding,
13373
           int options)
13374
170k
{
13375
170k
    xmlParserCtxtPtr ctxt;
13376
170k
    xmlParserInputPtr input;
13377
170k
    xmlDocPtr doc = NULL;
13378
13379
170k
    ctxt = xmlNewParserCtxt();
13380
170k
    if (ctxt == NULL)
13381
87
        return(NULL);
13382
13383
170k
    xmlCtxtUseOptions(ctxt, options);
13384
13385
170k
    input = xmlCtxtNewInputFromString(ctxt, URL, (const char *) cur, encoding,
13386
170k
                                      XML_INPUT_BUF_STATIC);
13387
13388
170k
    if (input != NULL)
13389
167k
        doc = xmlCtxtParseDocument(ctxt, input);
13390
13391
170k
    xmlFreeParserCtxt(ctxt);
13392
170k
    return(doc);
13393
170k
}
13394
13395
/**
13396
 * Convenience function to parse an XML file from the filesystem
13397
 * or a global, user-defined resource loader.
13398
 *
13399
 * This function always enables the XML_PARSE_UNZIP option for
13400
 * backward compatibility. If a "-" filename is passed, it will
13401
 * read from stdin. Both of these features are potentially
13402
 * insecure and might be removed from later versions.
13403
 *
13404
 * See #xmlCtxtReadFile for details.
13405
 *
13406
 * @param filename  a file or URL
13407
 * @param encoding  the document encoding (optional)
13408
 * @param options  a combination of xmlParserOption
13409
 * @returns the resulting document tree
13410
 */
13411
xmlDoc *
13412
xmlReadFile(const char *filename, const char *encoding, int options)
13413
0
{
13414
0
    xmlParserCtxtPtr ctxt;
13415
0
    xmlParserInputPtr input;
13416
0
    xmlDocPtr doc = NULL;
13417
13418
0
    ctxt = xmlNewParserCtxt();
13419
0
    if (ctxt == NULL)
13420
0
        return(NULL);
13421
13422
0
    options |= XML_PARSE_UNZIP;
13423
13424
0
    xmlCtxtUseOptions(ctxt, options);
13425
13426
    /*
13427
     * Backward compatibility for users of command line utilities like
13428
     * xmlstarlet expecting "-" to mean stdin. This is dangerous and
13429
     * should be removed at some point.
13430
     */
13431
0
    if ((filename != NULL) && (filename[0] == '-') && (filename[1] == 0))
13432
0
        input = xmlCtxtNewInputFromFd(ctxt, filename, STDIN_FILENO,
13433
0
                                      encoding, 0);
13434
0
    else
13435
0
        input = xmlCtxtNewInputFromUrl(ctxt, filename, NULL, encoding, 0);
13436
13437
0
    if (input != NULL)
13438
0
        doc = xmlCtxtParseDocument(ctxt, input);
13439
13440
0
    xmlFreeParserCtxt(ctxt);
13441
0
    return(doc);
13442
0
}
13443
13444
/**
13445
 * Parse an XML in-memory document and build a tree. The input buffer must
13446
 * not contain a terminating null byte.
13447
 *
13448
 * See #xmlCtxtReadMemory for details.
13449
 *
13450
 * @param buffer  a pointer to a char array
13451
 * @param size  the size of the array
13452
 * @param url  base URL (optional)
13453
 * @param encoding  the document encoding (optional)
13454
 * @param options  a combination of xmlParserOption
13455
 * @returns the resulting document tree
13456
 */
13457
xmlDoc *
13458
xmlReadMemory(const char *buffer, int size, const char *url,
13459
              const char *encoding, int options)
13460
0
{
13461
0
    xmlParserCtxtPtr ctxt;
13462
0
    xmlParserInputPtr input;
13463
0
    xmlDocPtr doc = NULL;
13464
13465
0
    if (size < 0)
13466
0
  return(NULL);
13467
13468
0
    ctxt = xmlNewParserCtxt();
13469
0
    if (ctxt == NULL)
13470
0
        return(NULL);
13471
13472
0
    xmlCtxtUseOptions(ctxt, options);
13473
13474
0
    input = xmlCtxtNewInputFromMemory(ctxt, url, buffer, size, encoding,
13475
0
                                      XML_INPUT_BUF_STATIC);
13476
13477
0
    if (input != NULL)
13478
0
        doc = xmlCtxtParseDocument(ctxt, input);
13479
13480
0
    xmlFreeParserCtxt(ctxt);
13481
0
    return(doc);
13482
0
}
13483
13484
/**
13485
 * Parse an XML from a file descriptor and build a tree.
13486
 *
13487
 * See #xmlCtxtReadFd for details.
13488
 *
13489
 * NOTE that the file descriptor will not be closed when the
13490
 * context is freed or reset.
13491
 *
13492
 * @param fd  an open file descriptor
13493
 * @param URL  base URL (optional)
13494
 * @param encoding  the document encoding (optional)
13495
 * @param options  a combination of xmlParserOption
13496
 * @returns the resulting document tree
13497
 */
13498
xmlDoc *
13499
xmlReadFd(int fd, const char *URL, const char *encoding, int options)
13500
0
{
13501
0
    xmlParserCtxtPtr ctxt;
13502
0
    xmlParserInputPtr input;
13503
0
    xmlDocPtr doc = NULL;
13504
13505
0
    ctxt = xmlNewParserCtxt();
13506
0
    if (ctxt == NULL)
13507
0
        return(NULL);
13508
13509
0
    xmlCtxtUseOptions(ctxt, options);
13510
13511
0
    input = xmlCtxtNewInputFromFd(ctxt, URL, fd, encoding, 0);
13512
13513
0
    if (input != NULL)
13514
0
        doc = xmlCtxtParseDocument(ctxt, input);
13515
13516
0
    xmlFreeParserCtxt(ctxt);
13517
0
    return(doc);
13518
0
}
13519
13520
/**
13521
 * Parse an XML document from I/O functions and context and build a tree.
13522
 *
13523
 * See #xmlCtxtReadIO for details.
13524
 *
13525
 * @param ioread  an I/O read function
13526
 * @param ioclose  an I/O close function (optional)
13527
 * @param ioctx  an I/O handler
13528
 * @param URL  base URL (optional)
13529
 * @param encoding  the document encoding (optional)
13530
 * @param options  a combination of xmlParserOption
13531
 * @returns the resulting document tree
13532
 */
13533
xmlDoc *
13534
xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
13535
          void *ioctx, const char *URL, const char *encoding, int options)
13536
0
{
13537
0
    xmlParserCtxtPtr ctxt;
13538
0
    xmlParserInputPtr input;
13539
0
    xmlDocPtr doc = NULL;
13540
13541
0
    ctxt = xmlNewParserCtxt();
13542
0
    if (ctxt == NULL)
13543
0
        return(NULL);
13544
13545
0
    xmlCtxtUseOptions(ctxt, options);
13546
13547
0
    input = xmlCtxtNewInputFromIO(ctxt, URL, ioread, ioclose, ioctx,
13548
0
                                  encoding, 0);
13549
13550
0
    if (input != NULL)
13551
0
        doc = xmlCtxtParseDocument(ctxt, input);
13552
13553
0
    xmlFreeParserCtxt(ctxt);
13554
0
    return(doc);
13555
0
}
13556
13557
/**
13558
 * Parse an XML in-memory document and build a tree.
13559
 *
13560
 * `URL` is used as base to resolve external entities and for error
13561
 * reporting.
13562
 *
13563
 * @param ctxt  an XML parser context
13564
 * @param str  a pointer to a zero terminated string
13565
 * @param URL  base URL (optional)
13566
 * @param encoding  the document encoding (optional)
13567
 * @param options  a combination of xmlParserOption
13568
 * @returns the resulting document tree
13569
 */
13570
xmlDoc *
13571
xmlCtxtReadDoc(xmlParserCtxt *ctxt, const xmlChar *str,
13572
               const char *URL, const char *encoding, int options)
13573
0
{
13574
0
    xmlParserInputPtr input;
13575
13576
0
    if (ctxt == NULL)
13577
0
        return(NULL);
13578
13579
0
    xmlCtxtReset(ctxt);
13580
0
    xmlCtxtUseOptions(ctxt, options);
13581
13582
0
    input = xmlCtxtNewInputFromString(ctxt, URL, (const char *) str, encoding,
13583
0
                                      XML_INPUT_BUF_STATIC);
13584
0
    if (input == NULL)
13585
0
        return(NULL);
13586
13587
0
    return(xmlCtxtParseDocument(ctxt, input));
13588
0
}
13589
13590
/**
13591
 * Parse an XML file from the filesystem or a global, user-defined
13592
 * resource loader.
13593
 *
13594
 * This function always enables the XML_PARSE_UNZIP option for
13595
 * backward compatibility. This feature is potentially insecure
13596
 * and might be removed from later versions.
13597
 *
13598
 * @param ctxt  an XML parser context
13599
 * @param filename  a file or URL
13600
 * @param encoding  the document encoding (optional)
13601
 * @param options  a combination of xmlParserOption
13602
 * @returns the resulting document tree
13603
 */
13604
xmlDoc *
13605
xmlCtxtReadFile(xmlParserCtxt *ctxt, const char *filename,
13606
                const char *encoding, int options)
13607
0
{
13608
0
    xmlParserInputPtr input;
13609
13610
0
    if (ctxt == NULL)
13611
0
        return(NULL);
13612
13613
0
    options |= XML_PARSE_UNZIP;
13614
13615
0
    xmlCtxtReset(ctxt);
13616
0
    xmlCtxtUseOptions(ctxt, options);
13617
13618
0
    input = xmlCtxtNewInputFromUrl(ctxt, filename, NULL, encoding, 0);
13619
0
    if (input == NULL)
13620
0
        return(NULL);
13621
13622
0
    return(xmlCtxtParseDocument(ctxt, input));
13623
0
}
13624
13625
/**
13626
 * Parse an XML in-memory document and build a tree. The input buffer must
13627
 * not contain a terminating null byte.
13628
 *
13629
 * `URL` is used as base to resolve external entities and for error
13630
 * reporting.
13631
 *
13632
 * @param ctxt  an XML parser context
13633
 * @param buffer  a pointer to a char array
13634
 * @param size  the size of the array
13635
 * @param URL  base URL (optional)
13636
 * @param encoding  the document encoding (optional)
13637
 * @param options  a combination of xmlParserOption
13638
 * @returns the resulting document tree
13639
 */
13640
xmlDoc *
13641
xmlCtxtReadMemory(xmlParserCtxt *ctxt, const char *buffer, int size,
13642
                  const char *URL, const char *encoding, int options)
13643
0
{
13644
0
    xmlParserInputPtr input;
13645
13646
0
    if ((ctxt == NULL) || (size < 0))
13647
0
        return(NULL);
13648
13649
0
    xmlCtxtReset(ctxt);
13650
0
    xmlCtxtUseOptions(ctxt, options);
13651
13652
0
    input = xmlCtxtNewInputFromMemory(ctxt, URL, buffer, size, encoding,
13653
0
                                      XML_INPUT_BUF_STATIC);
13654
0
    if (input == NULL)
13655
0
        return(NULL);
13656
13657
0
    return(xmlCtxtParseDocument(ctxt, input));
13658
0
}
13659
13660
/**
13661
 * Parse an XML document from a file descriptor and build a tree.
13662
 *
13663
 * NOTE that the file descriptor will not be closed when the
13664
 * context is freed or reset.
13665
 *
13666
 * `URL` is used as base to resolve external entities and for error
13667
 * reporting.
13668
 *
13669
 * @param ctxt  an XML parser context
13670
 * @param fd  an open file descriptor
13671
 * @param URL  base URL (optional)
13672
 * @param encoding  the document encoding (optional)
13673
 * @param options  a combination of xmlParserOption
13674
 * @returns the resulting document tree
13675
 */
13676
xmlDoc *
13677
xmlCtxtReadFd(xmlParserCtxt *ctxt, int fd,
13678
              const char *URL, const char *encoding, int options)
13679
0
{
13680
0
    xmlParserInputPtr input;
13681
13682
0
    if (ctxt == NULL)
13683
0
        return(NULL);
13684
13685
0
    xmlCtxtReset(ctxt);
13686
0
    xmlCtxtUseOptions(ctxt, options);
13687
13688
0
    input = xmlCtxtNewInputFromFd(ctxt, URL, fd, encoding, 0);
13689
0
    if (input == NULL)
13690
0
        return(NULL);
13691
13692
0
    return(xmlCtxtParseDocument(ctxt, input));
13693
0
}
13694
13695
/**
13696
 * Parse an XML document from I/O functions and source and build a tree.
13697
 * This reuses the existing `ctxt` parser context
13698
 *
13699
 * `URL` is used as base to resolve external entities and for error
13700
 * reporting.
13701
 *
13702
 * @param ctxt  an XML parser context
13703
 * @param ioread  an I/O read function
13704
 * @param ioclose  an I/O close function
13705
 * @param ioctx  an I/O handler
13706
 * @param URL  the base URL to use for the document
13707
 * @param encoding  the document encoding, or NULL
13708
 * @param options  a combination of xmlParserOption
13709
 * @returns the resulting document tree
13710
 */
13711
xmlDoc *
13712
xmlCtxtReadIO(xmlParserCtxt *ctxt, xmlInputReadCallback ioread,
13713
              xmlInputCloseCallback ioclose, void *ioctx,
13714
        const char *URL,
13715
              const char *encoding, int options)
13716
0
{
13717
0
    xmlParserInputPtr input;
13718
13719
0
    if (ctxt == NULL)
13720
0
        return(NULL);
13721
13722
0
    xmlCtxtReset(ctxt);
13723
0
    xmlCtxtUseOptions(ctxt, options);
13724
13725
0
    input = xmlCtxtNewInputFromIO(ctxt, URL, ioread, ioclose, ioctx,
13726
0
                                  encoding, 0);
13727
0
    if (input == NULL)
13728
0
        return(NULL);
13729
13730
0
    return(xmlCtxtParseDocument(ctxt, input));
13731
0
}
13732