Coverage Report

Created: 2026-06-30 11:14

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/workdir/UnpackedTarball/libxml2/parser.c
Line
Count
Source
1
/*
2
 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
 *            implemented on top of the SAX interfaces
4
 *
5
 * References:
6
 *   The XML specification:
7
 *     http://www.w3.org/TR/REC-xml
8
 *   Original 1.0 version:
9
 *     http://www.w3.org/TR/1998/REC-xml-19980210
10
 *   XML second edition working draft
11
 *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12
 *
13
 * Okay this is a big file, the parser core is around 7000 lines, then it
14
 * is followed by the progressive parser top routines, then the various
15
 * high level APIs to call the parser and a few miscellaneous functions.
16
 * A number of helper functions and deprecated ones have been moved to
17
 * parserInternals.c to reduce this file size.
18
 * As much as possible the functions are associated with their relative
19
 * production in the XML specification. A few productions defining the
20
 * different ranges of character are actually implanted either in
21
 * parserInternals.h or parserInternals.c
22
 * The DOM tree build is realized from the default SAX callbacks in
23
 * the module SAX2.c.
24
 * The routines doing the validation checks are in valid.c and called either
25
 * from the SAX callbacks or as standalone functions using a preparsed
26
 * document.
27
 *
28
 * See Copyright for the status of this software.
29
 *
30
 * Author: Daniel Veillard
31
 */
32
33
/* To avoid EBCDIC trouble when parsing on zOS */
34
#if defined(__MVS__)
35
#pragma convert("ISO8859-1")
36
#endif
37
38
#define IN_LIBXML
39
#include "libxml.h"
40
41
#if defined(_WIN32)
42
#define XML_DIR_SEP '\\'
43
#else
44
#define XML_DIR_SEP '/'
45
#endif
46
47
#include <stdlib.h>
48
#include <limits.h>
49
#include <string.h>
50
#include <stdarg.h>
51
#include <stddef.h>
52
#include <ctype.h>
53
#include <stdlib.h>
54
#include <libxml/parser.h>
55
#include <libxml/xmlmemory.h>
56
#include <libxml/tree.h>
57
#include <libxml/parserInternals.h>
58
#include <libxml/valid.h>
59
#include <libxml/entities.h>
60
#include <libxml/xmlerror.h>
61
#include <libxml/encoding.h>
62
#include <libxml/xmlIO.h>
63
#include <libxml/uri.h>
64
#include <libxml/SAX2.h>
65
#include <libxml/HTMLparser.h>
66
#ifdef LIBXML_CATALOG_ENABLED
67
#include <libxml/catalog.h>
68
#endif
69
70
#include "private/buf.h"
71
#include "private/dict.h"
72
#include "private/entities.h"
73
#include "private/error.h"
74
#include "private/html.h"
75
#include "private/io.h"
76
#include "private/memory.h"
77
#include "private/parser.h"
78
#include "private/tree.h"
79
80
28.6M
#define NS_INDEX_EMPTY  INT_MAX
81
4.26M
#define NS_INDEX_XML    (INT_MAX - 1)
82
10.6M
#define URI_HASH_EMPTY  0xD943A04E
83
37.3k
#define URI_HASH_XML    0xF0451F02
84
85
#ifndef STDIN_FILENO
86
0
  #define STDIN_FILENO 0
87
#endif
88
89
#ifndef SIZE_MAX
90
  #define SIZE_MAX ((size_t) -1)
91
#endif
92
93
605k
#define XML_MAX_ATTRS 100000000 /* 100 million */
94
95
19.7M
#define XML_SPECIAL_EXTERNAL    (1 << 20)
96
19.7M
#define XML_SPECIAL_TYPE_MASK   (XML_SPECIAL_EXTERNAL - 1)
97
98
19.8M
#define XML_ATTVAL_ALLOC        (1 << 0)
99
44.1k
#define XML_ATTVAL_NORM_CHANGE  (1 << 1)
100
101
struct _xmlStartTag {
102
    const xmlChar *prefix;
103
    const xmlChar *URI;
104
    int line;
105
    int nsNr;
106
};
107
108
typedef struct {
109
    void *saxData;
110
    unsigned prefixHashValue;
111
    unsigned uriHashValue;
112
    unsigned elementId;
113
    int oldIndex;
114
} xmlParserNsExtra;
115
116
typedef struct {
117
    unsigned hashValue;
118
    int index;
119
} xmlParserNsBucket;
120
121
struct _xmlParserNsData {
122
    xmlParserNsExtra *extra;
123
124
    unsigned hashSize;
125
    unsigned hashElems;
126
    xmlParserNsBucket *hash;
127
128
    unsigned elementId;
129
    int defaultNsIndex;
130
    int minNsIndex;
131
};
132
133
static int
134
xmlParseElementStart(xmlParserCtxtPtr ctxt);
135
136
static void
137
xmlParseElementEnd(xmlParserCtxtPtr ctxt);
138
139
static xmlEntityPtr
140
xmlLookupGeneralEntity(xmlParserCtxtPtr ctxt, const xmlChar *name, int inAttr);
141
142
static const xmlChar *
143
xmlParseEntityRefInternal(xmlParserCtxtPtr ctxt);
144
145
/************************************************************************
146
 *                  *
147
 *  Arbitrary limits set in the parser. See XML_PARSE_HUGE    *
148
 *                  *
149
 ************************************************************************/
150
151
#define XML_PARSER_BIG_ENTITY 1000
152
#define XML_PARSER_LOT_ENTITY 5000
153
154
/*
155
 * Constants for protection against abusive entity expansion
156
 * ("billion laughs").
157
 */
158
159
/*
160
 * A certain amount of entity expansion which is always allowed.
161
 */
162
252k
#define XML_PARSER_ALLOWED_EXPANSION 1000000
163
164
/*
165
 * Fixed cost for each entity reference. This crudely models processing time
166
 * as well to protect, for example, against exponential expansion of empty
167
 * or very short entities.
168
 */
169
252k
#define XML_ENT_FIXED_COST 20
170
171
51.0M
#define XML_PARSER_BIG_BUFFER_SIZE 300
172
117k
#define XML_PARSER_BUFFER_SIZE 100
173
21.4k
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
174
175
/**
176
 * XML_PARSER_CHUNK_SIZE
177
 *
178
 * When calling GROW that's the minimal amount of data
179
 * the parser expected to have received. It is not a hard
180
 * limit but an optimization when reading strings like Names
181
 * It is not strictly needed as long as inputs available characters
182
 * are followed by 0, which should be provided by the I/O level
183
 */
184
#define XML_PARSER_CHUNK_SIZE 100
185
186
/**
187
 * Constant string describing the version of the library used at
188
 * run-time.
189
 */
190
const char *const
191
xmlParserVersion = LIBXML_VERSION_STRING LIBXML_VERSION_EXTRA;
192
193
/*
194
 * List of XML prefixed PI allowed by W3C specs
195
 */
196
197
static const char* const xmlW3CPIs[] = {
198
    "xml-stylesheet",
199
    "xml-model",
200
    NULL
201
};
202
203
204
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
205
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
206
                                              const xmlChar **str);
207
208
static void
209
xmlCtxtParseEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr ent);
210
211
static int
212
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
213
214
static void
215
xmlParsePERefInternal(xmlParserCtxt *ctxt, int markupDecl);
216
217
/************************************************************************
218
 *                  *
219
 *    Some factorized error routines        *
220
 *                  *
221
 ************************************************************************/
222
223
static void
224
0
xmlErrMemory(xmlParserCtxtPtr ctxt) {
225
0
    xmlCtxtErrMemory(ctxt);
226
0
}
227
228
/**
229
 * Handle a redefinition of attribute error
230
 *
231
 * @param ctxt  an XML parser context
232
 * @param prefix  the attribute prefix
233
 * @param localname  the attribute localname
234
 */
235
static void
236
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
237
                   const xmlChar * localname)
238
154k
{
239
154k
    if (prefix == NULL)
240
118k
        xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, XML_ERR_ATTRIBUTE_REDEFINED,
241
118k
                   XML_ERR_FATAL, localname, NULL, NULL, 0,
242
118k
                   "Attribute %s redefined\n", localname);
243
36.0k
    else
244
36.0k
        xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, XML_ERR_ATTRIBUTE_REDEFINED,
245
36.0k
                   XML_ERR_FATAL, prefix, localname, NULL, 0,
246
36.0k
                   "Attribute %s:%s redefined\n", prefix, localname);
247
154k
}
248
249
/**
250
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
251
 *
252
 * @param ctxt  an XML parser context
253
 * @param error  the error number
254
 * @param msg  the error message
255
 */
256
static void LIBXML_ATTR_FORMAT(3,0)
257
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
258
               const char *msg)
259
1.33M
{
260
1.33M
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
261
1.33M
               NULL, NULL, NULL, 0, "%s", msg);
262
1.33M
}
263
264
/**
265
 * Handle a warning.
266
 *
267
 * @param ctxt  an XML parser context
268
 * @param error  the error number
269
 * @param msg  the error message
270
 * @param str1  extra data
271
 * @param str2  extra data
272
 */
273
void LIBXML_ATTR_FORMAT(3,0)
274
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
275
              const char *msg, const xmlChar *str1, const xmlChar *str2)
276
11.3k
{
277
11.3k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_WARNING,
278
11.3k
               str1, str2, NULL, 0, msg, str1, str2);
279
11.3k
}
280
281
#ifdef LIBXML_VALID_ENABLED
282
/**
283
 * Handle a validity error.
284
 *
285
 * @param ctxt  an XML parser context
286
 * @param error  the error number
287
 * @param msg  the error message
288
 * @param str1  extra data
289
 * @param str2  extra data
290
 */
291
static void LIBXML_ATTR_FORMAT(3,0)
292
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
293
              const char *msg, const xmlChar *str1, const xmlChar *str2)
294
0
{
295
0
    ctxt->valid = 0;
296
297
0
    xmlCtxtErr(ctxt, NULL, XML_FROM_DTD, error, XML_ERR_ERROR,
298
0
               str1, str2, NULL, 0, msg, str1, str2);
299
0
}
300
#endif
301
302
/**
303
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
304
 *
305
 * @param ctxt  an XML parser context
306
 * @param error  the error number
307
 * @param msg  the error message
308
 * @param val  an integer value
309
 */
310
static void LIBXML_ATTR_FORMAT(3,0)
311
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
312
                  const char *msg, int val)
313
7.58k
{
314
7.58k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
315
7.58k
               NULL, NULL, NULL, val, msg, val);
316
7.58k
}
317
318
/**
319
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
320
 *
321
 * @param ctxt  an XML parser context
322
 * @param error  the error number
323
 * @param msg  the error message
324
 * @param str1  an string info
325
 * @param val  an integer value
326
 * @param str2  an string info
327
 */
328
static void LIBXML_ATTR_FORMAT(3,0)
329
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
330
                  const char *msg, const xmlChar *str1, int val,
331
      const xmlChar *str2)
332
64.7k
{
333
64.7k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
334
64.7k
               str1, str2, NULL, val, msg, str1, val, str2);
335
64.7k
}
336
337
/**
338
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
339
 *
340
 * @param ctxt  an XML parser context
341
 * @param error  the error number
342
 * @param msg  the error message
343
 * @param val  a string value
344
 */
345
static void LIBXML_ATTR_FORMAT(3,0)
346
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
347
                  const char *msg, const xmlChar * val)
348
65.8k
{
349
65.8k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
350
65.8k
               val, NULL, NULL, 0, msg, val);
351
65.8k
}
352
353
/**
354
 * Handle a non fatal parser error
355
 *
356
 * @param ctxt  an XML parser context
357
 * @param error  the error number
358
 * @param msg  the error message
359
 * @param val  a string value
360
 */
361
static void LIBXML_ATTR_FORMAT(3,0)
362
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
363
                  const char *msg, const xmlChar * val)
364
7.57k
{
365
7.57k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_ERROR,
366
7.57k
               val, NULL, NULL, 0, msg, val);
367
7.57k
}
368
369
/**
370
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
371
 *
372
 * @param ctxt  an XML parser context
373
 * @param error  the error number
374
 * @param msg  the message
375
 * @param info1  extra information string
376
 * @param info2  extra information string
377
 * @param info3  extra information string
378
 */
379
static void LIBXML_ATTR_FORMAT(3,0)
380
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
381
         const char *msg,
382
         const xmlChar * info1, const xmlChar * info2,
383
         const xmlChar * info3)
384
1.06M
{
385
1.06M
    ctxt->nsWellFormed = 0;
386
387
1.06M
    xmlCtxtErr(ctxt, NULL, XML_FROM_NAMESPACE, error, XML_ERR_ERROR,
388
1.06M
               info1, info2, info3, 0, msg, info1, info2, info3);
389
1.06M
}
390
391
/**
392
 * Handle a namespace warning error
393
 *
394
 * @param ctxt  an XML parser context
395
 * @param error  the error number
396
 * @param msg  the message
397
 * @param info1  extra information string
398
 * @param info2  extra information string
399
 * @param info3  extra information string
400
 */
401
static void LIBXML_ATTR_FORMAT(3,0)
402
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
403
         const char *msg,
404
         const xmlChar * info1, const xmlChar * info2,
405
         const xmlChar * info3)
406
35.9k
{
407
35.9k
    xmlCtxtErr(ctxt, NULL, XML_FROM_NAMESPACE, error, XML_ERR_WARNING,
408
35.9k
               info1, info2, info3, 0, msg, info1, info2, info3);
409
35.9k
}
410
411
/**
412
 * Check for non-linear entity expansion behaviour.
413
 *
414
 * In some cases like xmlExpandEntityInAttValue, this function is called
415
 * for each, possibly nested entity and its unexpanded content length.
416
 *
417
 * In other cases like #xmlParseReference, it's only called for each
418
 * top-level entity with its unexpanded content length plus the sum of
419
 * the unexpanded content lengths (plus fixed cost) of all nested
420
 * entities.
421
 *
422
 * Summing the unexpanded lengths also adds the length of the reference.
423
 * This is by design. Taking the length of the entity name into account
424
 * discourages attacks that try to waste CPU time with abusively long
425
 * entity names. See test/recurse/lol6.xml for example. Each call also
426
 * adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with
427
 * short entities.
428
 *
429
 * @param ctxt  parser context
430
 * @param extra  sum of unexpanded entity sizes
431
 * @returns 1 on error, 0 on success.
432
 */
433
static int
434
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long extra)
435
252k
{
436
252k
    unsigned long consumed;
437
252k
    unsigned long *expandedSize;
438
252k
    xmlParserInputPtr input = ctxt->input;
439
252k
    xmlEntityPtr entity = input->entity;
440
441
252k
    if ((entity) && (entity->flags & XML_ENT_CHECKED))
442
0
        return(0);
443
444
    /*
445
     * Compute total consumed bytes so far, including input streams of
446
     * external entities.
447
     */
448
252k
    consumed = input->consumed;
449
252k
    xmlSaturatedAddSizeT(&consumed, input->cur - input->base);
450
252k
    xmlSaturatedAdd(&consumed, ctxt->sizeentities);
451
452
252k
    if (entity)
453
0
        expandedSize = &entity->expandedSize;
454
252k
    else
455
252k
        expandedSize = &ctxt->sizeentcopy;
456
457
    /*
458
     * Add extra cost and some fixed cost.
459
     */
460
252k
    xmlSaturatedAdd(expandedSize, extra);
461
252k
    xmlSaturatedAdd(expandedSize, XML_ENT_FIXED_COST);
462
463
    /*
464
     * It's important to always use saturation arithmetic when tracking
465
     * entity sizes to make the size checks reliable. If "sizeentcopy"
466
     * overflows, we have to abort.
467
     */
468
252k
    if ((*expandedSize > XML_PARSER_ALLOWED_EXPANSION) &&
469
209
        ((*expandedSize >= ULONG_MAX) ||
470
209
         (*expandedSize / ctxt->maxAmpl > consumed))) {
471
209
        xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
472
209
                       "Maximum entity amplification factor exceeded, see "
473
209
                       "xmlCtxtSetMaxAmplification.\n");
474
209
        return(1);
475
209
    }
476
477
252k
    return(0);
478
252k
}
479
480
/************************************************************************
481
 *                  *
482
 *    Library wide options          *
483
 *                  *
484
 ************************************************************************/
485
486
/**
487
 * Examines if the library has been compiled with a given feature.
488
 *
489
 * @param feature  the feature to be examined
490
 * @returns zero (0) if the feature does not exist or an unknown
491
 * feature is requested, non-zero otherwise.
492
 */
493
int
494
xmlHasFeature(xmlFeature feature)
495
0
{
496
0
    switch (feature) {
497
0
  case XML_WITH_THREAD:
498
0
#ifdef LIBXML_THREAD_ENABLED
499
0
      return(1);
500
#else
501
      return(0);
502
#endif
503
0
        case XML_WITH_TREE:
504
0
            return(1);
505
0
        case XML_WITH_OUTPUT:
506
0
#ifdef LIBXML_OUTPUT_ENABLED
507
0
            return(1);
508
#else
509
            return(0);
510
#endif
511
0
        case XML_WITH_PUSH:
512
0
#ifdef LIBXML_PUSH_ENABLED
513
0
            return(1);
514
#else
515
            return(0);
516
#endif
517
0
        case XML_WITH_READER:
518
0
#ifdef LIBXML_READER_ENABLED
519
0
            return(1);
520
#else
521
            return(0);
522
#endif
523
0
        case XML_WITH_PATTERN:
524
0
#ifdef LIBXML_PATTERN_ENABLED
525
0
            return(1);
526
#else
527
            return(0);
528
#endif
529
0
        case XML_WITH_WRITER:
530
0
#ifdef LIBXML_WRITER_ENABLED
531
0
            return(1);
532
#else
533
            return(0);
534
#endif
535
0
        case XML_WITH_SAX1:
536
0
#ifdef LIBXML_SAX1_ENABLED
537
0
            return(1);
538
#else
539
            return(0);
540
#endif
541
0
        case XML_WITH_HTTP:
542
0
            return(0);
543
0
        case XML_WITH_VALID:
544
0
#ifdef LIBXML_VALID_ENABLED
545
0
            return(1);
546
#else
547
            return(0);
548
#endif
549
0
        case XML_WITH_HTML:
550
0
#ifdef LIBXML_HTML_ENABLED
551
0
            return(1);
552
#else
553
            return(0);
554
#endif
555
0
        case XML_WITH_LEGACY:
556
0
            return(0);
557
0
        case XML_WITH_C14N:
558
0
#ifdef LIBXML_C14N_ENABLED
559
0
            return(1);
560
#else
561
            return(0);
562
#endif
563
0
        case XML_WITH_CATALOG:
564
0
#ifdef LIBXML_CATALOG_ENABLED
565
0
            return(1);
566
#else
567
            return(0);
568
#endif
569
0
        case XML_WITH_XPATH:
570
0
#ifdef LIBXML_XPATH_ENABLED
571
0
            return(1);
572
#else
573
            return(0);
574
#endif
575
0
        case XML_WITH_XPTR:
576
0
#ifdef LIBXML_XPTR_ENABLED
577
0
            return(1);
578
#else
579
            return(0);
580
#endif
581
0
        case XML_WITH_XINCLUDE:
582
0
#ifdef LIBXML_XINCLUDE_ENABLED
583
0
            return(1);
584
#else
585
            return(0);
586
#endif
587
0
        case XML_WITH_ICONV:
588
#ifdef LIBXML_ICONV_ENABLED
589
            return(1);
590
#else
591
0
            return(0);
592
0
#endif
593
0
        case XML_WITH_ISO8859X:
594
0
#ifdef LIBXML_ISO8859X_ENABLED
595
0
            return(1);
596
#else
597
            return(0);
598
#endif
599
0
        case XML_WITH_UNICODE:
600
0
            return(0);
601
0
        case XML_WITH_REGEXP:
602
0
#ifdef LIBXML_REGEXP_ENABLED
603
0
            return(1);
604
#else
605
            return(0);
606
#endif
607
0
        case XML_WITH_AUTOMATA:
608
0
#ifdef LIBXML_REGEXP_ENABLED
609
0
            return(1);
610
#else
611
            return(0);
612
#endif
613
0
        case XML_WITH_EXPR:
614
0
            return(0);
615
0
        case XML_WITH_RELAXNG:
616
0
#ifdef LIBXML_RELAXNG_ENABLED
617
0
            return(1);
618
#else
619
            return(0);
620
#endif
621
0
        case XML_WITH_SCHEMAS:
622
0
#ifdef LIBXML_SCHEMAS_ENABLED
623
0
            return(1);
624
#else
625
            return(0);
626
#endif
627
0
        case XML_WITH_SCHEMATRON:
628
#ifdef LIBXML_SCHEMATRON_ENABLED
629
            return(1);
630
#else
631
0
            return(0);
632
0
#endif
633
0
        case XML_WITH_MODULES:
634
0
#ifdef LIBXML_MODULES_ENABLED
635
0
            return(1);
636
#else
637
            return(0);
638
#endif
639
0
        case XML_WITH_DEBUG:
640
0
#ifdef LIBXML_DEBUG_ENABLED
641
0
            return(1);
642
#else
643
            return(0);
644
#endif
645
0
        case XML_WITH_DEBUG_MEM:
646
0
            return(0);
647
0
        case XML_WITH_ZLIB:
648
#ifdef LIBXML_ZLIB_ENABLED
649
            return(1);
650
#else
651
0
            return(0);
652
0
#endif
653
0
        case XML_WITH_LZMA:
654
0
            return(0);
655
0
        case XML_WITH_ICU:
656
#ifdef LIBXML_ICU_ENABLED
657
            return(1);
658
#else
659
0
            return(0);
660
0
#endif
661
0
        default:
662
0
      break;
663
0
     }
664
0
     return(0);
665
0
}
666
667
/************************************************************************
668
 *                  *
669
 *      Simple string buffer        *
670
 *                  *
671
 ************************************************************************/
672
673
typedef struct {
674
    xmlChar *mem;
675
    unsigned size;
676
    unsigned cap; /* size < cap */
677
    unsigned max; /* size <= max */
678
    xmlParserErrors code;
679
} xmlSBuf;
680
681
static void
682
19.7M
xmlSBufInit(xmlSBuf *buf, unsigned max) {
683
19.7M
    buf->mem = NULL;
684
19.7M
    buf->size = 0;
685
19.7M
    buf->cap = 0;
686
19.7M
    buf->max = max;
687
19.7M
    buf->code = XML_ERR_OK;
688
19.7M
}
689
690
static int
691
161k
xmlSBufGrow(xmlSBuf *buf, unsigned len) {
692
161k
    xmlChar *mem;
693
161k
    unsigned cap;
694
695
161k
    if (len >= UINT_MAX / 2 - buf->size) {
696
0
        if (buf->code == XML_ERR_OK)
697
0
            buf->code = XML_ERR_RESOURCE_LIMIT;
698
0
        return(-1);
699
0
    }
700
701
161k
    cap = (buf->size + len) * 2;
702
161k
    if (cap < 240)
703
135k
        cap = 240;
704
705
161k
    mem = xmlRealloc(buf->mem, cap);
706
161k
    if (mem == NULL) {
707
0
        buf->code = XML_ERR_NO_MEMORY;
708
0
        return(-1);
709
0
    }
710
711
161k
    buf->mem = mem;
712
161k
    buf->cap = cap;
713
714
161k
    return(0);
715
161k
}
716
717
static void
718
3.33M
xmlSBufAddString(xmlSBuf *buf, const xmlChar *str, unsigned len) {
719
3.33M
    if (buf->max - buf->size < len) {
720
0
        if (buf->code == XML_ERR_OK)
721
0
            buf->code = XML_ERR_RESOURCE_LIMIT;
722
0
        return;
723
0
    }
724
725
3.33M
    if (buf->cap - buf->size <= len) {
726
159k
        if (xmlSBufGrow(buf, len) < 0)
727
0
            return;
728
159k
    }
729
730
3.33M
    if (len > 0)
731
3.33M
        memcpy(buf->mem + buf->size, str, len);
732
3.33M
    buf->size += len;
733
3.33M
}
734
735
static void
736
2.67M
xmlSBufAddCString(xmlSBuf *buf, const char *str, unsigned len) {
737
2.67M
    xmlSBufAddString(buf, (const xmlChar *) str, len);
738
2.67M
}
739
740
static void
741
39.6k
xmlSBufAddChar(xmlSBuf *buf, int c) {
742
39.6k
    xmlChar *end;
743
744
39.6k
    if (buf->max - buf->size < 4) {
745
0
        if (buf->code == XML_ERR_OK)
746
0
            buf->code = XML_ERR_RESOURCE_LIMIT;
747
0
        return;
748
0
    }
749
750
39.6k
    if (buf->cap - buf->size <= 4) {
751
2.00k
        if (xmlSBufGrow(buf, 4) < 0)
752
0
            return;
753
2.00k
    }
754
755
39.6k
    end = buf->mem + buf->size;
756
757
39.6k
    if (c < 0x80) {
758
12.3k
        *end = (xmlChar) c;
759
12.3k
        buf->size += 1;
760
27.2k
    } else {
761
27.2k
        buf->size += xmlCopyCharMultiByte(end, c);
762
27.2k
    }
763
39.6k
}
764
765
static void
766
2.22M
xmlSBufAddReplChar(xmlSBuf *buf) {
767
2.22M
    xmlSBufAddCString(buf, "\xEF\xBF\xBD", 3);
768
2.22M
}
769
770
static void
771
0
xmlSBufReportError(xmlSBuf *buf, xmlParserCtxtPtr ctxt, const char *errMsg) {
772
0
    if (buf->code == XML_ERR_NO_MEMORY)
773
0
        xmlCtxtErrMemory(ctxt);
774
0
    else
775
0
        xmlFatalErr(ctxt, buf->code, errMsg);
776
0
}
777
778
static xmlChar *
779
xmlSBufFinish(xmlSBuf *buf, int *sizeOut, xmlParserCtxtPtr ctxt,
780
147k
              const char *errMsg) {
781
147k
    if (buf->mem == NULL) {
782
8.43k
        buf->mem = xmlMalloc(1);
783
8.43k
        if (buf->mem == NULL) {
784
0
            buf->code = XML_ERR_NO_MEMORY;
785
8.43k
        } else {
786
8.43k
            buf->mem[0] = 0;
787
8.43k
        }
788
139k
    } else {
789
139k
        buf->mem[buf->size] = 0;
790
139k
    }
791
792
147k
    if (buf->code == XML_ERR_OK) {
793
147k
        if (sizeOut != NULL)
794
121k
            *sizeOut = buf->size;
795
147k
        return(buf->mem);
796
147k
    }
797
798
0
    xmlSBufReportError(buf, ctxt, errMsg);
799
800
0
    xmlFree(buf->mem);
801
802
0
    if (sizeOut != NULL)
803
0
        *sizeOut = 0;
804
0
    return(NULL);
805
147k
}
806
807
static void
808
19.6M
xmlSBufCleanup(xmlSBuf *buf, xmlParserCtxtPtr ctxt, const char *errMsg) {
809
19.6M
    if (buf->code != XML_ERR_OK)
810
0
        xmlSBufReportError(buf, ctxt, errMsg);
811
812
19.6M
    xmlFree(buf->mem);
813
19.6M
}
814
815
static int
816
xmlUTF8MultibyteLen(xmlParserCtxtPtr ctxt, const xmlChar *str,
817
4.18M
                    const char *errMsg) {
818
4.18M
    int c = str[0];
819
4.18M
    int c1 = str[1];
820
821
4.18M
    if ((c1 & 0xC0) != 0x80)
822
872k
        goto encoding_error;
823
824
3.31M
    if (c < 0xE0) {
825
        /* 2-byte sequence */
826
766k
        if (c < 0xC2)
827
218k
            goto encoding_error;
828
829
548k
        return(2);
830
2.54M
    } else {
831
2.54M
        int c2 = str[2];
832
833
2.54M
        if ((c2 & 0xC0) != 0x80)
834
14.0k
            goto encoding_error;
835
836
2.52M
        if (c < 0xF0) {
837
            /* 3-byte sequence */
838
2.44M
            if (c == 0xE0) {
839
                /* overlong */
840
130k
                if (c1 < 0xA0)
841
1.07k
                    goto encoding_error;
842
2.31M
            } else if (c == 0xED) {
843
                /* surrogate */
844
5.41k
                if (c1 >= 0xA0)
845
752
                    goto encoding_error;
846
2.30M
            } else if (c == 0xEF) {
847
                /* U+FFFE and U+FFFF are invalid Chars */
848
1.23M
                if ((c1 == 0xBF) && (c2 >= 0xBE))
849
26.1k
                    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR, errMsg);
850
1.23M
            }
851
852
2.43M
            return(3);
853
2.44M
        } else {
854
            /* 4-byte sequence */
855
87.7k
            if ((str[3] & 0xC0) != 0x80)
856
3.97k
                goto encoding_error;
857
83.8k
            if (c == 0xF0) {
858
                /* overlong */
859
1.27k
                if (c1 < 0x90)
860
738
                    goto encoding_error;
861
82.5k
            } else if (c >= 0xF4) {
862
                /* greater than 0x10FFFF */
863
2.87k
                if ((c > 0xF4) || (c1 >= 0x90))
864
1.57k
                    goto encoding_error;
865
2.87k
            }
866
867
81.5k
            return(4);
868
83.8k
        }
869
2.52M
    }
870
871
1.11M
encoding_error:
872
    /* Only report the first error */
873
1.11M
    if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
874
2.57k
        xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
875
2.57k
        ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
876
2.57k
    }
877
878
1.11M
    return(0);
879
3.31M
}
880
881
/************************************************************************
882
 *                  *
883
 *    SAX2 defaulted attributes handling      *
884
 *                  *
885
 ************************************************************************/
886
887
/**
888
 * Final initialization of the parser context before starting to parse.
889
 *
890
 * This accounts for users modifying struct members of parser context
891
 * directly.
892
 *
893
 * @param ctxt  an XML parser context
894
 */
895
static void
896
151k
xmlCtxtInitializeLate(xmlParserCtxtPtr ctxt) {
897
151k
    xmlSAXHandlerPtr sax;
898
899
    /* Avoid unused variable warning if features are disabled. */
900
151k
    (void) sax;
901
902
    /*
903
     * Changing the SAX struct directly is still widespread practice
904
     * in internal and external code.
905
     */
906
151k
    if (ctxt == NULL) return;
907
151k
    sax = ctxt->sax;
908
151k
#ifdef LIBXML_SAX1_ENABLED
909
    /*
910
     * Only enable SAX2 if there SAX2 element handlers, except when there
911
     * are no element handlers at all.
912
     */
913
151k
    if (((ctxt->options & XML_PARSE_SAX1) == 0) &&
914
151k
        (sax) &&
915
151k
        (sax->initialized == XML_SAX2_MAGIC) &&
916
151k
        ((sax->startElementNs != NULL) ||
917
0
         (sax->endElementNs != NULL) ||
918
0
         ((sax->startElement == NULL) && (sax->endElement == NULL))))
919
151k
        ctxt->sax2 = 1;
920
#else
921
    ctxt->sax2 = 1;
922
#endif /* LIBXML_SAX1_ENABLED */
923
924
    /*
925
     * Some users replace the dictionary directly in the context struct.
926
     * We really need an API function to do that cleanly.
927
     */
928
151k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
929
151k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
930
151k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
931
151k
    if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
932
151k
    (ctxt->str_xml_ns == NULL)) {
933
0
        xmlErrMemory(ctxt);
934
0
    }
935
936
151k
    xmlDictSetLimit(ctxt->dict,
937
151k
                    (ctxt->options & XML_PARSE_HUGE) ?
938
146k
                        0 :
939
151k
                        XML_MAX_DICTIONARY_LIMIT);
940
941
151k
#ifdef LIBXML_VALID_ENABLED
942
151k
    if (ctxt->validate)
943
0
        ctxt->vctxt.flags |= XML_VCTXT_VALIDATE;
944
151k
    else
945
151k
        ctxt->vctxt.flags &= ~XML_VCTXT_VALIDATE;
946
151k
#endif /* LIBXML_VALID_ENABLED */
947
151k
}
948
949
typedef struct {
950
    xmlHashedString prefix;
951
    xmlHashedString name;
952
    xmlHashedString value;
953
    const xmlChar *valueEnd;
954
    int external;
955
    int expandedSize;
956
} xmlDefAttr;
957
958
typedef struct _xmlDefAttrs xmlDefAttrs;
959
typedef xmlDefAttrs *xmlDefAttrsPtr;
960
struct _xmlDefAttrs {
961
    int nbAttrs;  /* number of defaulted attributes on that element */
962
    int maxAttrs;       /* the size of the array */
963
#if __STDC_VERSION__ >= 199901L
964
    /* Using a C99 flexible array member avoids UBSan errors. */
965
    xmlDefAttr attrs[] ATTRIBUTE_COUNTED_BY(maxAttrs);
966
#else
967
    xmlDefAttr attrs[1];
968
#endif
969
};
970
971
/**
972
 * Normalize the space in non CDATA attribute values:
973
 * If the attribute type is not CDATA, then the XML processor MUST further
974
 * process the normalized attribute value by discarding any leading and
975
 * trailing space (\#x20) characters, and by replacing sequences of space
976
 * (\#x20) characters by a single space (\#x20) character.
977
 * Note that the size of dst need to be at least src, and if one doesn't need
978
 * to preserve dst (and it doesn't come from a dictionary or read-only) then
979
 * passing src as dst is just fine.
980
 *
981
 * @param src  the source string
982
 * @param dst  the target string
983
 * @returns a pointer to the normalized value (dst) or NULL if no conversion
984
 *         is needed.
985
 */
986
static xmlChar *
987
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
988
6.52k
{
989
6.52k
    if ((src == NULL) || (dst == NULL))
990
0
        return(NULL);
991
992
13.0k
    while (*src == 0x20) src++;
993
434k
    while (*src != 0) {
994
428k
  if (*src == 0x20) {
995
71.7k
      while (*src == 0x20) src++;
996
17.9k
      if (*src != 0)
997
17.1k
    *dst++ = 0x20;
998
410k
  } else {
999
410k
      *dst++ = *src++;
1000
410k
  }
1001
428k
    }
1002
6.52k
    *dst = 0;
1003
6.52k
    if (dst == src)
1004
3.52k
       return(NULL);
1005
2.99k
    return(dst);
1006
6.52k
}
1007
1008
/**
1009
 * Add a defaulted attribute for an element
1010
 *
1011
 * @param ctxt  an XML parser context
1012
 * @param fullname  the element fullname
1013
 * @param fullattr  the attribute fullname
1014
 * @param value  the attribute value
1015
 */
1016
static void
1017
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1018
               const xmlChar *fullname,
1019
               const xmlChar *fullattr,
1020
8.12k
               const xmlChar *value) {
1021
8.12k
    xmlDefAttrsPtr defaults;
1022
8.12k
    xmlDefAttr *attr;
1023
8.12k
    int len, expandedSize;
1024
8.12k
    xmlHashedString name;
1025
8.12k
    xmlHashedString prefix;
1026
8.12k
    xmlHashedString hvalue;
1027
8.12k
    const xmlChar *localname;
1028
1029
    /*
1030
     * Allows to detect attribute redefinitions
1031
     */
1032
8.12k
    if (ctxt->attsSpecial != NULL) {
1033
7.51k
        if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1034
2.29k
      return;
1035
7.51k
    }
1036
1037
5.83k
    if (ctxt->attsDefault == NULL) {
1038
618
        ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1039
618
  if (ctxt->attsDefault == NULL)
1040
0
      goto mem_error;
1041
618
    }
1042
1043
    /*
1044
     * split the element name into prefix:localname , the string found
1045
     * are within the DTD and then not associated to namespace names.
1046
     */
1047
5.83k
    localname = xmlSplitQName3(fullname, &len);
1048
5.83k
    if (localname == NULL) {
1049
5.30k
        name = xmlDictLookupHashed(ctxt->dict, fullname, -1);
1050
5.30k
  prefix.name = NULL;
1051
5.30k
    } else {
1052
525
        name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1053
525
  prefix = xmlDictLookupHashed(ctxt->dict, fullname, len);
1054
525
        if (prefix.name == NULL)
1055
0
            goto mem_error;
1056
525
    }
1057
5.83k
    if (name.name == NULL)
1058
0
        goto mem_error;
1059
1060
    /*
1061
     * make sure there is some storage
1062
     */
1063
5.83k
    defaults = xmlHashLookup2(ctxt->attsDefault, name.name, prefix.name);
1064
5.83k
    if ((defaults == NULL) ||
1065
4.37k
        (defaults->nbAttrs >= defaults->maxAttrs)) {
1066
2.12k
        xmlDefAttrsPtr temp;
1067
2.12k
        int newSize;
1068
1069
2.12k
        if (defaults == NULL) {
1070
1.46k
            newSize = 4;
1071
1.46k
        } else {
1072
668
            if ((defaults->maxAttrs >= XML_MAX_ATTRS) ||
1073
668
                ((size_t) defaults->maxAttrs >
1074
668
                     SIZE_MAX / 2 / sizeof(temp[0]) - sizeof(*defaults)))
1075
0
                goto mem_error;
1076
1077
668
            if (defaults->maxAttrs > XML_MAX_ATTRS / 2)
1078
0
                newSize = XML_MAX_ATTRS;
1079
668
            else
1080
668
                newSize = defaults->maxAttrs * 2;
1081
668
        }
1082
2.12k
        temp = xmlRealloc(defaults,
1083
2.12k
                          sizeof(*defaults) + newSize * sizeof(xmlDefAttr));
1084
2.12k
  if (temp == NULL)
1085
0
      goto mem_error;
1086
2.12k
        if (defaults == NULL)
1087
1.46k
            temp->nbAttrs = 0;
1088
2.12k
  temp->maxAttrs = newSize;
1089
2.12k
        defaults = temp;
1090
2.12k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name.name, prefix.name,
1091
2.12k
                          defaults, NULL) < 0) {
1092
0
      xmlFree(defaults);
1093
0
      goto mem_error;
1094
0
  }
1095
2.12k
    }
1096
1097
    /*
1098
     * Split the attribute name into prefix:localname , the string found
1099
     * are within the DTD and hen not associated to namespace names.
1100
     */
1101
5.83k
    localname = xmlSplitQName3(fullattr, &len);
1102
5.83k
    if (localname == NULL) {
1103
2.41k
        name = xmlDictLookupHashed(ctxt->dict, fullattr, -1);
1104
2.41k
  prefix.name = NULL;
1105
3.41k
    } else {
1106
3.41k
        name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1107
3.41k
  prefix = xmlDictLookupHashed(ctxt->dict, fullattr, len);
1108
3.41k
        if (prefix.name == NULL)
1109
0
            goto mem_error;
1110
3.41k
    }
1111
5.83k
    if (name.name == NULL)
1112
0
        goto mem_error;
1113
1114
    /* intern the string and precompute the end */
1115
5.83k
    len = strlen((const char *) value);
1116
5.83k
    hvalue = xmlDictLookupHashed(ctxt->dict, value, len);
1117
5.83k
    if (hvalue.name == NULL)
1118
0
        goto mem_error;
1119
1120
5.83k
    expandedSize = strlen((const char *) name.name);
1121
5.83k
    if (prefix.name != NULL)
1122
3.41k
        expandedSize += strlen((const char *) prefix.name);
1123
5.83k
    expandedSize += len;
1124
1125
5.83k
    attr = &defaults->attrs[defaults->nbAttrs++];
1126
5.83k
    attr->name = name;
1127
5.83k
    attr->prefix = prefix;
1128
5.83k
    attr->value = hvalue;
1129
5.83k
    attr->valueEnd = hvalue.name + len;
1130
5.83k
    attr->external = PARSER_EXTERNAL(ctxt);
1131
5.83k
    attr->expandedSize = expandedSize;
1132
1133
5.83k
    return;
1134
1135
0
mem_error:
1136
0
    xmlErrMemory(ctxt);
1137
0
}
1138
1139
/**
1140
 * Register this attribute type
1141
 *
1142
 * @param ctxt  an XML parser context
1143
 * @param fullname  the element fullname
1144
 * @param fullattr  the attribute fullname
1145
 * @param type  the attribute type
1146
 */
1147
static void
1148
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1149
      const xmlChar *fullname,
1150
      const xmlChar *fullattr,
1151
      int type)
1152
12.4k
{
1153
12.4k
    if (ctxt->attsSpecial == NULL) {
1154
695
        ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1155
695
  if (ctxt->attsSpecial == NULL)
1156
0
      goto mem_error;
1157
695
    }
1158
1159
12.4k
    if (PARSER_EXTERNAL(ctxt))
1160
0
        type |= XML_SPECIAL_EXTERNAL;
1161
1162
12.4k
    if (xmlHashAdd2(ctxt->attsSpecial, fullname, fullattr,
1163
12.4k
                    XML_INT_TO_PTR(type)) < 0)
1164
0
        goto mem_error;
1165
12.4k
    return;
1166
1167
12.4k
mem_error:
1168
0
    xmlErrMemory(ctxt);
1169
0
}
1170
1171
/**
1172
 * Removes CDATA attributes from the special attribute table
1173
 */
1174
static void
1175
xmlCleanSpecialAttrCallback(void *payload, void *data,
1176
                            const xmlChar *fullname, const xmlChar *fullattr,
1177
7.26k
                            const xmlChar *unused ATTRIBUTE_UNUSED) {
1178
7.26k
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1179
1180
7.26k
    if (XML_PTR_TO_INT(payload) == XML_ATTRIBUTE_CDATA) {
1181
1.02k
        xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1182
1.02k
    }
1183
7.26k
}
1184
1185
/**
1186
 * Trim the list of attributes defined to remove all those of type
1187
 * CDATA as they are not special. This call should be done when finishing
1188
 * to parse the DTD and before starting to parse the document root.
1189
 *
1190
 * @param ctxt  an XML parser context
1191
 */
1192
static void
1193
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1194
1.78k
{
1195
1.78k
    if (ctxt->attsSpecial == NULL)
1196
1.08k
        return;
1197
1198
695
    xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1199
1200
695
    if (xmlHashSize(ctxt->attsSpecial) == 0) {
1201
79
        xmlHashFree(ctxt->attsSpecial, NULL);
1202
79
        ctxt->attsSpecial = NULL;
1203
79
    }
1204
695
}
1205
1206
/**
1207
 * Checks that the value conforms to the LanguageID production:
1208
 *
1209
 * @deprecated Internal function, do not use.
1210
 *
1211
 * NOTE: this is somewhat deprecated, those productions were removed from
1212
 * the XML Second edition.
1213
 *
1214
 *     [33] LanguageID ::= Langcode ('-' Subcode)*
1215
 *     [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1216
 *     [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1217
 *     [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1218
 *     [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1219
 *     [38] Subcode ::= ([a-z] | [A-Z])+
1220
 *
1221
 * The current REC reference the successors of RFC 1766, currently 5646
1222
 *
1223
 * http://www.rfc-editor.org/rfc/rfc5646.txt
1224
 *
1225
 *     langtag       = language
1226
 *                     ["-" script]
1227
 *                     ["-" region]
1228
 *                     *("-" variant)
1229
 *                     *("-" extension)
1230
 *                     ["-" privateuse]
1231
 *     language      = 2*3ALPHA            ; shortest ISO 639 code
1232
 *                     ["-" extlang]       ; sometimes followed by
1233
 *                                         ; extended language subtags
1234
 *                   / 4ALPHA              ; or reserved for future use
1235
 *                   / 5*8ALPHA            ; or registered language subtag
1236
 *
1237
 *     extlang       = 3ALPHA              ; selected ISO 639 codes
1238
 *                     *2("-" 3ALPHA)      ; permanently reserved
1239
 *
1240
 *     script        = 4ALPHA              ; ISO 15924 code
1241
 *
1242
 *     region        = 2ALPHA              ; ISO 3166-1 code
1243
 *                   / 3DIGIT              ; UN M.49 code
1244
 *
1245
 *     variant       = 5*8alphanum         ; registered variants
1246
 *                   / (DIGIT 3alphanum)
1247
 *
1248
 *     extension     = singleton 1*("-" (2*8alphanum))
1249
 *
1250
 *                                         ; Single alphanumerics
1251
 *                                         ; "x" reserved for private use
1252
 *     singleton     = DIGIT               ; 0 - 9
1253
 *                   / %x41-57             ; A - W
1254
 *                   / %x59-5A             ; Y - Z
1255
 *                   / %x61-77             ; a - w
1256
 *                   / %x79-7A             ; y - z
1257
 *
1258
 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1259
 * The parser below doesn't try to cope with extension or privateuse
1260
 * that could be added but that's not interoperable anyway
1261
 *
1262
 * @param lang  pointer to the string value
1263
 * @returns 1 if correct 0 otherwise
1264
 **/
1265
int
1266
xmlCheckLanguageID(const xmlChar * lang)
1267
0
{
1268
0
    const xmlChar *cur = lang, *nxt;
1269
1270
0
    if (cur == NULL)
1271
0
        return (0);
1272
0
    if (((cur[0] == 'i') && (cur[1] == '-')) ||
1273
0
        ((cur[0] == 'I') && (cur[1] == '-')) ||
1274
0
        ((cur[0] == 'x') && (cur[1] == '-')) ||
1275
0
        ((cur[0] == 'X') && (cur[1] == '-'))) {
1276
        /*
1277
         * Still allow IANA code and user code which were coming
1278
         * from the previous version of the XML-1.0 specification
1279
         * it's deprecated but we should not fail
1280
         */
1281
0
        cur += 2;
1282
0
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1283
0
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
1284
0
            cur++;
1285
0
        return(cur[0] == 0);
1286
0
    }
1287
0
    nxt = cur;
1288
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1289
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1290
0
           nxt++;
1291
0
    if (nxt - cur >= 4) {
1292
        /*
1293
         * Reserved
1294
         */
1295
0
        if ((nxt - cur > 8) || (nxt[0] != 0))
1296
0
            return(0);
1297
0
        return(1);
1298
0
    }
1299
0
    if (nxt - cur < 2)
1300
0
        return(0);
1301
    /* we got an ISO 639 code */
1302
0
    if (nxt[0] == 0)
1303
0
        return(1);
1304
0
    if (nxt[0] != '-')
1305
0
        return(0);
1306
1307
0
    nxt++;
1308
0
    cur = nxt;
1309
    /* now we can have extlang or script or region or variant */
1310
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1311
0
        goto region_m49;
1312
1313
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1314
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1315
0
           nxt++;
1316
0
    if (nxt - cur == 4)
1317
0
        goto script;
1318
0
    if (nxt - cur == 2)
1319
0
        goto region;
1320
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1321
0
        goto variant;
1322
0
    if (nxt - cur != 3)
1323
0
        return(0);
1324
    /* we parsed an extlang */
1325
0
    if (nxt[0] == 0)
1326
0
        return(1);
1327
0
    if (nxt[0] != '-')
1328
0
        return(0);
1329
1330
0
    nxt++;
1331
0
    cur = nxt;
1332
    /* now we can have script or region or variant */
1333
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1334
0
        goto region_m49;
1335
1336
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1337
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1338
0
           nxt++;
1339
0
    if (nxt - cur == 2)
1340
0
        goto region;
1341
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1342
0
        goto variant;
1343
0
    if (nxt - cur != 4)
1344
0
        return(0);
1345
    /* we parsed a script */
1346
0
script:
1347
0
    if (nxt[0] == 0)
1348
0
        return(1);
1349
0
    if (nxt[0] != '-')
1350
0
        return(0);
1351
1352
0
    nxt++;
1353
0
    cur = nxt;
1354
    /* now we can have region or variant */
1355
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1356
0
        goto region_m49;
1357
1358
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1359
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1360
0
           nxt++;
1361
1362
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1363
0
        goto variant;
1364
0
    if (nxt - cur != 2)
1365
0
        return(0);
1366
    /* we parsed a region */
1367
0
region:
1368
0
    if (nxt[0] == 0)
1369
0
        return(1);
1370
0
    if (nxt[0] != '-')
1371
0
        return(0);
1372
1373
0
    nxt++;
1374
0
    cur = nxt;
1375
    /* now we can just have a variant */
1376
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1377
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1378
0
           nxt++;
1379
1380
0
    if ((nxt - cur < 5) || (nxt - cur > 8))
1381
0
        return(0);
1382
1383
    /* we parsed a variant */
1384
0
variant:
1385
0
    if (nxt[0] == 0)
1386
0
        return(1);
1387
0
    if (nxt[0] != '-')
1388
0
        return(0);
1389
    /* extensions and private use subtags not checked */
1390
0
    return (1);
1391
1392
0
region_m49:
1393
0
    if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1394
0
        ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1395
0
        nxt += 3;
1396
0
        goto region;
1397
0
    }
1398
0
    return(0);
1399
0
}
1400
1401
/************************************************************************
1402
 *                  *
1403
 *    Parser stacks related functions and macros    *
1404
 *                  *
1405
 ************************************************************************/
1406
1407
static xmlChar *
1408
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar **str);
1409
1410
/**
1411
 * Create a new namespace database.
1412
 *
1413
 * @returns the new obejct.
1414
 */
1415
xmlParserNsData *
1416
151k
xmlParserNsCreate(void) {
1417
151k
    xmlParserNsData *nsdb = xmlMalloc(sizeof(*nsdb));
1418
1419
151k
    if (nsdb == NULL)
1420
0
        return(NULL);
1421
151k
    memset(nsdb, 0, sizeof(*nsdb));
1422
151k
    nsdb->defaultNsIndex = INT_MAX;
1423
1424
151k
    return(nsdb);
1425
151k
}
1426
1427
/**
1428
 * Free a namespace database.
1429
 *
1430
 * @param nsdb  namespace database
1431
 */
1432
void
1433
151k
xmlParserNsFree(xmlParserNsData *nsdb) {
1434
151k
    if (nsdb == NULL)
1435
0
        return;
1436
1437
151k
    xmlFree(nsdb->extra);
1438
151k
    xmlFree(nsdb->hash);
1439
151k
    xmlFree(nsdb);
1440
151k
}
1441
1442
/**
1443
 * Reset a namespace database.
1444
 *
1445
 * @param nsdb  namespace database
1446
 */
1447
static void
1448
5.12k
xmlParserNsReset(xmlParserNsData *nsdb) {
1449
5.12k
    if (nsdb == NULL)
1450
0
        return;
1451
1452
5.12k
    nsdb->hashElems = 0;
1453
5.12k
    nsdb->elementId = 0;
1454
5.12k
    nsdb->defaultNsIndex = INT_MAX;
1455
1456
5.12k
    if (nsdb->hash)
1457
0
        memset(nsdb->hash, 0, nsdb->hashSize * sizeof(nsdb->hash[0]));
1458
5.12k
}
1459
1460
/**
1461
 * Signal that a new element has started.
1462
 *
1463
 * @param nsdb  namespace database
1464
 * @returns 0 on success, -1 if the element counter overflowed.
1465
 */
1466
static int
1467
24.1M
xmlParserNsStartElement(xmlParserNsData *nsdb) {
1468
24.1M
    if (nsdb->elementId == UINT_MAX)
1469
0
        return(-1);
1470
24.1M
    nsdb->elementId++;
1471
1472
24.1M
    return(0);
1473
24.1M
}
1474
1475
/**
1476
 * Lookup namespace with given prefix. If `bucketPtr` is non-NULL, it will
1477
 * be set to the matching bucket, or the first empty bucket if no match
1478
 * was found.
1479
 *
1480
 * @param ctxt  parser context
1481
 * @param prefix  namespace prefix
1482
 * @param bucketPtr  optional bucket (return value)
1483
 * @returns the namespace index on success, INT_MAX if no namespace was
1484
 * found.
1485
 */
1486
static int
1487
xmlParserNsLookup(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1488
31.7M
                  xmlParserNsBucket **bucketPtr) {
1489
31.7M
    xmlParserNsBucket *bucket, *tombstone;
1490
31.7M
    unsigned index, hashValue;
1491
1492
31.7M
    if (prefix->name == NULL)
1493
13.5M
        return(ctxt->nsdb->defaultNsIndex);
1494
1495
18.2M
    if (ctxt->nsdb->hashSize == 0)
1496
335k
        return(INT_MAX);
1497
1498
17.8M
    hashValue = prefix->hashValue;
1499
17.8M
    index = hashValue & (ctxt->nsdb->hashSize - 1);
1500
17.8M
    bucket = &ctxt->nsdb->hash[index];
1501
17.8M
    tombstone = NULL;
1502
1503
18.7M
    while (bucket->hashValue) {
1504
17.7M
        if (bucket->index == INT_MAX) {
1505
126k
            if (tombstone == NULL)
1506
124k
                tombstone = bucket;
1507
17.6M
        } else if (bucket->hashValue == hashValue) {
1508
16.8M
            if (ctxt->nsTab[bucket->index * 2] == prefix->name) {
1509
16.8M
                if (bucketPtr != NULL)
1510
601k
                    *bucketPtr = bucket;
1511
16.8M
                return(bucket->index);
1512
16.8M
            }
1513
16.8M
        }
1514
1515
876k
        index++;
1516
876k
        bucket++;
1517
876k
        if (index == ctxt->nsdb->hashSize) {
1518
7.95k
            index = 0;
1519
7.95k
            bucket = ctxt->nsdb->hash;
1520
7.95k
        }
1521
876k
    }
1522
1523
975k
    if (bucketPtr != NULL)
1524
543k
        *bucketPtr = tombstone ? tombstone : bucket;
1525
975k
    return(INT_MAX);
1526
17.8M
}
1527
1528
/**
1529
 * Lookup namespace URI with given prefix.
1530
 *
1531
 * @param ctxt  parser context
1532
 * @param prefix  namespace prefix
1533
 * @returns the namespace URI on success, NULL if no namespace was found.
1534
 */
1535
static const xmlChar *
1536
24.1M
xmlParserNsLookupUri(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix) {
1537
24.1M
    const xmlChar *ret;
1538
24.1M
    int nsIndex;
1539
1540
24.1M
    if (prefix->name == ctxt->str_xml)
1541
21.9k
        return(ctxt->str_xml_ns);
1542
1543
    /*
1544
     * minNsIndex is used when building an entity tree. We must
1545
     * ignore namespaces declared outside the entity.
1546
     */
1547
24.1M
    nsIndex = xmlParserNsLookup(ctxt, prefix, NULL);
1548
24.1M
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1549
9.59M
        return(NULL);
1550
1551
14.5M
    ret = ctxt->nsTab[nsIndex * 2 + 1];
1552
14.5M
    if (ret[0] == 0)
1553
2.90k
        ret = NULL;
1554
14.5M
    return(ret);
1555
24.1M
}
1556
1557
/**
1558
 * Lookup extra data for the given prefix. This returns data stored
1559
 * with xmlParserNsUdpateSax.
1560
 *
1561
 * @param ctxt  parser context
1562
 * @param prefix  namespace prefix
1563
 * @returns the data on success, NULL if no namespace was found.
1564
 */
1565
void *
1566
810k
xmlParserNsLookupSax(xmlParserCtxt *ctxt, const xmlChar *prefix) {
1567
810k
    xmlHashedString hprefix;
1568
810k
    int nsIndex;
1569
1570
810k
    if (prefix == ctxt->str_xml)
1571
0
        return(NULL);
1572
1573
810k
    hprefix.name = prefix;
1574
810k
    if (prefix != NULL)
1575
809k
        hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1576
1.04k
    else
1577
1.04k
        hprefix.hashValue = 0;
1578
810k
    nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1579
810k
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1580
0
        return(NULL);
1581
1582
810k
    return(ctxt->nsdb->extra[nsIndex].saxData);
1583
810k
}
1584
1585
/**
1586
 * Sets or updates extra data for the given prefix. This value will be
1587
 * returned by xmlParserNsLookupSax as long as the namespace with the
1588
 * given prefix is in scope.
1589
 *
1590
 * @param ctxt  parser context
1591
 * @param prefix  namespace prefix
1592
 * @param saxData  extra data for SAX handler
1593
 * @returns the data on success, NULL if no namespace was found.
1594
 */
1595
int
1596
xmlParserNsUpdateSax(xmlParserCtxt *ctxt, const xmlChar *prefix,
1597
10.8k
                     void *saxData) {
1598
10.8k
    xmlHashedString hprefix;
1599
10.8k
    int nsIndex;
1600
1601
10.8k
    if (prefix == ctxt->str_xml)
1602
0
        return(-1);
1603
1604
10.8k
    hprefix.name = prefix;
1605
10.8k
    if (prefix != NULL)
1606
10.1k
        hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1607
736
    else
1608
736
        hprefix.hashValue = 0;
1609
10.8k
    nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1610
10.8k
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1611
0
        return(-1);
1612
1613
10.8k
    ctxt->nsdb->extra[nsIndex].saxData = saxData;
1614
10.8k
    return(0);
1615
10.8k
}
1616
1617
/**
1618
 * Grows the namespace tables.
1619
 *
1620
 * @param ctxt  parser context
1621
 * @returns 0 on success, -1 if a memory allocation failed.
1622
 */
1623
static int
1624
389k
xmlParserNsGrow(xmlParserCtxtPtr ctxt) {
1625
389k
    const xmlChar **table;
1626
389k
    xmlParserNsExtra *extra;
1627
389k
    int newSize;
1628
1629
389k
    newSize = xmlGrowCapacity(ctxt->nsMax,
1630
389k
                              sizeof(table[0]) + sizeof(extra[0]),
1631
389k
                              16, XML_MAX_ITEMS);
1632
389k
    if (newSize < 0)
1633
0
        goto error;
1634
1635
389k
    table = xmlRealloc(ctxt->nsTab, 2 * newSize * sizeof(table[0]));
1636
389k
    if (table == NULL)
1637
0
        goto error;
1638
389k
    ctxt->nsTab = table;
1639
1640
389k
    extra = xmlRealloc(ctxt->nsdb->extra, newSize * sizeof(extra[0]));
1641
389k
    if (extra == NULL)
1642
0
        goto error;
1643
389k
    ctxt->nsdb->extra = extra;
1644
1645
389k
    ctxt->nsMax = newSize;
1646
389k
    return(0);
1647
1648
0
error:
1649
0
    xmlErrMemory(ctxt);
1650
0
    return(-1);
1651
389k
}
1652
1653
/**
1654
 * Push a new namespace on the table.
1655
 *
1656
 * @param ctxt  parser context
1657
 * @param prefix  prefix with hash value
1658
 * @param uri  uri with hash value
1659
 * @param saxData  extra data for SAX handler
1660
 * @param defAttr  whether the namespace comes from a default attribute
1661
 * @returns 1 if the namespace was pushed, 0 if the namespace was ignored,
1662
 * -1 if a memory allocation failed.
1663
 */
1664
static int
1665
xmlParserNsPush(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1666
970k
                const xmlHashedString *uri, void *saxData, int defAttr) {
1667
970k
    xmlParserNsBucket *bucket = NULL;
1668
970k
    xmlParserNsExtra *extra;
1669
970k
    const xmlChar **ns;
1670
970k
    unsigned hashValue, nsIndex, oldIndex;
1671
1672
970k
    if ((prefix != NULL) && (prefix->name == ctxt->str_xml))
1673
0
        return(0);
1674
1675
970k
    if ((ctxt->nsNr >= ctxt->nsMax) && (xmlParserNsGrow(ctxt) < 0)) {
1676
0
        xmlErrMemory(ctxt);
1677
0
        return(-1);
1678
0
    }
1679
1680
    /*
1681
     * Default namespace and 'xml' namespace
1682
     */
1683
970k
    if ((prefix == NULL) || (prefix->name == NULL)) {
1684
207k
        oldIndex = ctxt->nsdb->defaultNsIndex;
1685
1686
207k
        if (oldIndex != INT_MAX) {
1687
109k
            extra = &ctxt->nsdb->extra[oldIndex];
1688
1689
109k
            if (extra->elementId == ctxt->nsdb->elementId) {
1690
75.4k
                if (defAttr == 0)
1691
75.4k
                    xmlErrAttributeDup(ctxt, NULL, BAD_CAST "xmlns");
1692
75.4k
                return(0);
1693
75.4k
            }
1694
1695
33.5k
            if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1696
0
                (uri->name == ctxt->nsTab[oldIndex * 2 + 1]))
1697
0
                return(0);
1698
33.5k
        }
1699
1700
131k
        ctxt->nsdb->defaultNsIndex = ctxt->nsNr;
1701
131k
        goto populate_entry;
1702
207k
    }
1703
1704
    /*
1705
     * Hash table lookup
1706
     */
1707
763k
    oldIndex = xmlParserNsLookup(ctxt, prefix, &bucket);
1708
763k
    if (oldIndex != INT_MAX) {
1709
130k
        extra = &ctxt->nsdb->extra[oldIndex];
1710
1711
        /*
1712
         * Check for duplicate definitions on the same element.
1713
         */
1714
130k
        if (extra->elementId == ctxt->nsdb->elementId) {
1715
34.1k
            if (defAttr == 0)
1716
34.1k
                xmlErrAttributeDup(ctxt, BAD_CAST "xmlns", prefix->name);
1717
34.1k
            return(0);
1718
34.1k
        }
1719
1720
96.2k
        if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1721
0
            (uri->name == ctxt->nsTab[bucket->index * 2 + 1]))
1722
0
            return(0);
1723
1724
96.2k
        bucket->index = ctxt->nsNr;
1725
96.2k
        goto populate_entry;
1726
96.2k
    }
1727
1728
    /*
1729
     * Insert new bucket
1730
     */
1731
1732
632k
    hashValue = prefix->hashValue;
1733
1734
    /*
1735
     * Grow hash table, 50% fill factor
1736
     */
1737
632k
    if (ctxt->nsdb->hashElems + 1 > ctxt->nsdb->hashSize / 2) {
1738
114k
        xmlParserNsBucket *newHash;
1739
114k
        unsigned newSize, i, index;
1740
1741
114k
        if (ctxt->nsdb->hashSize > UINT_MAX / 2) {
1742
0
            xmlErrMemory(ctxt);
1743
0
            return(-1);
1744
0
        }
1745
114k
        newSize = ctxt->nsdb->hashSize ? ctxt->nsdb->hashSize * 2 : 16;
1746
114k
        newHash = xmlMalloc(newSize * sizeof(newHash[0]));
1747
114k
        if (newHash == NULL) {
1748
0
            xmlErrMemory(ctxt);
1749
0
            return(-1);
1750
0
        }
1751
114k
        memset(newHash, 0, newSize * sizeof(newHash[0]));
1752
1753
1.04M
        for (i = 0; i < ctxt->nsdb->hashSize; i++) {
1754
929k
            unsigned hv = ctxt->nsdb->hash[i].hashValue;
1755
929k
            unsigned newIndex;
1756
1757
929k
            if ((hv == 0) || (ctxt->nsdb->hash[i].index == INT_MAX))
1758
635k
                continue;
1759
294k
            newIndex = hv & (newSize - 1);
1760
1761
304k
            while (newHash[newIndex].hashValue != 0) {
1762
9.99k
                newIndex++;
1763
9.99k
                if (newIndex == newSize)
1764
12
                    newIndex = 0;
1765
9.99k
            }
1766
1767
294k
            newHash[newIndex] = ctxt->nsdb->hash[i];
1768
294k
        }
1769
1770
114k
        xmlFree(ctxt->nsdb->hash);
1771
114k
        ctxt->nsdb->hash = newHash;
1772
114k
        ctxt->nsdb->hashSize = newSize;
1773
1774
        /*
1775
         * Relookup
1776
         */
1777
114k
        index = hashValue & (newSize - 1);
1778
1779
116k
        while (newHash[index].hashValue != 0) {
1780
1.74k
            index++;
1781
1.74k
            if (index == newSize)
1782
23
                index = 0;
1783
1.74k
        }
1784
1785
114k
        bucket = &newHash[index];
1786
114k
    }
1787
1788
632k
    bucket->hashValue = hashValue;
1789
632k
    bucket->index = ctxt->nsNr;
1790
632k
    ctxt->nsdb->hashElems++;
1791
632k
    oldIndex = INT_MAX;
1792
1793
860k
populate_entry:
1794
860k
    nsIndex = ctxt->nsNr;
1795
1796
860k
    ns = &ctxt->nsTab[nsIndex * 2];
1797
860k
    ns[0] = prefix ? prefix->name : NULL;
1798
860k
    ns[1] = uri->name;
1799
1800
860k
    extra = &ctxt->nsdb->extra[nsIndex];
1801
860k
    extra->saxData = saxData;
1802
860k
    extra->prefixHashValue = prefix ? prefix->hashValue : 0;
1803
860k
    extra->uriHashValue = uri->hashValue;
1804
860k
    extra->elementId = ctxt->nsdb->elementId;
1805
860k
    extra->oldIndex = oldIndex;
1806
1807
860k
    ctxt->nsNr++;
1808
1809
860k
    return(1);
1810
632k
}
1811
1812
/**
1813
 * Pops the top `nr` namespaces and restores the hash table.
1814
 *
1815
 * @param ctxt  an XML parser context
1816
 * @param nr  the number to pop
1817
 * @returns the number of namespaces popped.
1818
 */
1819
static int
1820
xmlParserNsPop(xmlParserCtxtPtr ctxt, int nr)
1821
226k
{
1822
226k
    int i;
1823
1824
    /* assert(nr <= ctxt->nsNr); */
1825
1826
800k
    for (i = ctxt->nsNr - 1; i >= ctxt->nsNr - nr; i--) {
1827
574k
        const xmlChar *prefix = ctxt->nsTab[i * 2];
1828
574k
        xmlParserNsExtra *extra = &ctxt->nsdb->extra[i];
1829
1830
574k
        if (prefix == NULL) {
1831
103k
            ctxt->nsdb->defaultNsIndex = extra->oldIndex;
1832
471k
        } else {
1833
471k
            xmlHashedString hprefix;
1834
471k
            xmlParserNsBucket *bucket = NULL;
1835
1836
471k
            hprefix.name = prefix;
1837
471k
            hprefix.hashValue = extra->prefixHashValue;
1838
471k
            xmlParserNsLookup(ctxt, &hprefix, &bucket);
1839
            /* assert(bucket && bucket->hashValue); */
1840
471k
            bucket->index = extra->oldIndex;
1841
471k
        }
1842
574k
    }
1843
1844
226k
    ctxt->nsNr -= nr;
1845
226k
    return(nr);
1846
226k
}
1847
1848
static int
1849
488k
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt) {
1850
488k
    const xmlChar **atts;
1851
488k
    unsigned *attallocs;
1852
488k
    int newSize;
1853
1854
488k
    newSize = xmlGrowCapacity(ctxt->maxatts / 5,
1855
488k
                              sizeof(atts[0]) * 5 + sizeof(attallocs[0]),
1856
488k
                              10, XML_MAX_ATTRS);
1857
488k
    if (newSize < 0) {
1858
0
        xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
1859
0
                    "Maximum number of attributes exceeded");
1860
0
        return(-1);
1861
0
    }
1862
1863
488k
    atts = xmlRealloc(ctxt->atts, newSize * sizeof(atts[0]) * 5);
1864
488k
    if (atts == NULL)
1865
0
        goto mem_error;
1866
488k
    ctxt->atts = atts;
1867
1868
488k
    attallocs = xmlRealloc(ctxt->attallocs,
1869
488k
                           newSize * sizeof(attallocs[0]));
1870
488k
    if (attallocs == NULL)
1871
0
        goto mem_error;
1872
488k
    ctxt->attallocs = attallocs;
1873
1874
488k
    ctxt->maxatts = newSize * 5;
1875
1876
488k
    return(0);
1877
1878
0
mem_error:
1879
0
    xmlErrMemory(ctxt);
1880
0
    return(-1);
1881
488k
}
1882
1883
/**
1884
 * Pushes a new parser input on top of the input stack
1885
 *
1886
 * @param ctxt  an XML parser context
1887
 * @param value  the parser input
1888
 * @returns -1 in case of error, the index in the stack otherwise
1889
 */
1890
int
1891
xmlCtxtPushInput(xmlParserCtxt *ctxt, xmlParserInput *value)
1892
151k
{
1893
151k
    char *directory = NULL;
1894
151k
    int maxDepth;
1895
1896
151k
    if ((ctxt == NULL) || (value == NULL))
1897
0
        return(-1);
1898
1899
151k
    maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
1900
1901
151k
    if (ctxt->inputNr >= ctxt->inputMax) {
1902
0
        xmlParserInputPtr *tmp;
1903
0
        int newSize;
1904
1905
0
        newSize = xmlGrowCapacity(ctxt->inputMax, sizeof(tmp[0]),
1906
0
                                  5, maxDepth);
1907
0
        if (newSize < 0) {
1908
0
            xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
1909
0
                           "Maximum entity nesting depth exceeded");
1910
0
            return(-1);
1911
0
        }
1912
0
        tmp = xmlRealloc(ctxt->inputTab, newSize * sizeof(tmp[0]));
1913
0
        if (tmp == NULL) {
1914
0
            xmlErrMemory(ctxt);
1915
0
            return(-1);
1916
0
        }
1917
0
        ctxt->inputTab = tmp;
1918
0
        ctxt->inputMax = newSize;
1919
0
    }
1920
1921
151k
    if ((ctxt->inputNr == 0) && (value->filename != NULL)) {
1922
0
        directory = xmlParserGetDirectory(value->filename);
1923
0
        if (directory == NULL) {
1924
0
            xmlErrMemory(ctxt);
1925
0
            return(-1);
1926
0
        }
1927
0
    }
1928
1929
151k
    if (ctxt->input_id >= INT_MAX) {
1930
0
        xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT, "Input ID overflow\n");
1931
0
        return(-1);
1932
0
    }
1933
1934
151k
    ctxt->inputTab[ctxt->inputNr] = value;
1935
151k
    ctxt->input = value;
1936
1937
151k
    if (ctxt->inputNr == 0) {
1938
151k
        xmlFree(ctxt->directory);
1939
151k
        ctxt->directory = directory;
1940
151k
    }
1941
1942
    /*
1943
     * The input ID is unused internally, but there are entity
1944
     * loaders in downstream code that detect the main document
1945
     * by checking for "input_id == 1".
1946
     */
1947
151k
    value->id = ctxt->input_id++;
1948
1949
151k
    return(ctxt->inputNr++);
1950
151k
}
1951
1952
/**
1953
 * Pops the top parser input from the input stack
1954
 *
1955
 * @param ctxt  an XML parser context
1956
 * @returns the input just removed
1957
 */
1958
xmlParserInput *
1959
xmlCtxtPopInput(xmlParserCtxt *ctxt)
1960
459k
{
1961
459k
    xmlParserInputPtr ret;
1962
1963
459k
    if (ctxt == NULL)
1964
0
        return(NULL);
1965
459k
    if (ctxt->inputNr <= 0)
1966
308k
        return (NULL);
1967
151k
    ctxt->inputNr--;
1968
151k
    if (ctxt->inputNr > 0)
1969
0
        ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1970
151k
    else
1971
151k
        ctxt->input = NULL;
1972
151k
    ret = ctxt->inputTab[ctxt->inputNr];
1973
151k
    ctxt->inputTab[ctxt->inputNr] = NULL;
1974
151k
    return (ret);
1975
459k
}
1976
1977
/**
1978
 * Pushes a new element node on top of the node stack
1979
 *
1980
 * @deprecated Internal function, do not use.
1981
 *
1982
 * @param ctxt  an XML parser context
1983
 * @param value  the element node
1984
 * @returns -1 in case of error, the index in the stack otherwise
1985
 */
1986
int
1987
nodePush(xmlParserCtxt *ctxt, xmlNode *value)
1988
811k
{
1989
811k
    if (ctxt == NULL)
1990
0
        return(0);
1991
1992
811k
    if (ctxt->nodeNr >= ctxt->nodeMax) {
1993
19.8k
        int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
1994
19.8k
        xmlNodePtr *tmp;
1995
19.8k
        int newSize;
1996
1997
19.8k
        newSize = xmlGrowCapacity(ctxt->nodeMax, sizeof(tmp[0]),
1998
19.8k
                                  10, maxDepth);
1999
19.8k
        if (newSize < 0) {
2000
0
            xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
2001
0
                    "Excessive depth in document: %d,"
2002
0
                    " use XML_PARSE_HUGE option\n",
2003
0
                    ctxt->nodeNr);
2004
0
            return(-1);
2005
0
        }
2006
2007
19.8k
  tmp = xmlRealloc(ctxt->nodeTab, newSize * sizeof(tmp[0]));
2008
19.8k
        if (tmp == NULL) {
2009
0
            xmlErrMemory(ctxt);
2010
0
            return (-1);
2011
0
        }
2012
19.8k
        ctxt->nodeTab = tmp;
2013
19.8k
  ctxt->nodeMax = newSize;
2014
19.8k
    }
2015
2016
811k
    ctxt->nodeTab[ctxt->nodeNr] = value;
2017
811k
    ctxt->node = value;
2018
811k
    return (ctxt->nodeNr++);
2019
811k
}
2020
2021
/**
2022
 * Pops the top element node from the node stack
2023
 *
2024
 * @deprecated Internal function, do not use.
2025
 *
2026
 * @param ctxt  an XML parser context
2027
 * @returns the node just removed
2028
 */
2029
xmlNode *
2030
nodePop(xmlParserCtxt *ctxt)
2031
879k
{
2032
879k
    xmlNodePtr ret;
2033
2034
879k
    if (ctxt == NULL) return(NULL);
2035
879k
    if (ctxt->nodeNr <= 0)
2036
68.1k
        return (NULL);
2037
811k
    ctxt->nodeNr--;
2038
811k
    if (ctxt->nodeNr > 0)
2039
806k
        ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
2040
5.01k
    else
2041
5.01k
        ctxt->node = NULL;
2042
811k
    ret = ctxt->nodeTab[ctxt->nodeNr];
2043
811k
    ctxt->nodeTab[ctxt->nodeNr] = NULL;
2044
811k
    return (ret);
2045
879k
}
2046
2047
/**
2048
 * Pushes a new element name/prefix/URL on top of the name stack
2049
 *
2050
 * @param ctxt  an XML parser context
2051
 * @param value  the element name
2052
 * @param prefix  the element prefix
2053
 * @param URI  the element namespace name
2054
 * @param line  the current line number for error messages
2055
 * @param nsNr  the number of namespaces pushed on the namespace table
2056
 * @returns -1 in case of error, the index in the stack otherwise
2057
 */
2058
static int
2059
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
2060
           const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
2061
17.1M
{
2062
17.1M
    xmlStartTag *tag;
2063
2064
17.1M
    if (ctxt->nameNr >= ctxt->nameMax) {
2065
432k
        const xmlChar **tmp;
2066
432k
        xmlStartTag *tmp2;
2067
432k
        int newSize;
2068
2069
432k
        newSize = xmlGrowCapacity(ctxt->nameMax,
2070
432k
                                  sizeof(tmp[0]) + sizeof(tmp2[0]),
2071
432k
                                  10, XML_MAX_ITEMS);
2072
432k
        if (newSize < 0)
2073
0
            goto mem_error;
2074
2075
432k
        tmp = xmlRealloc(ctxt->nameTab, newSize * sizeof(tmp[0]));
2076
432k
        if (tmp == NULL)
2077
0
      goto mem_error;
2078
432k
  ctxt->nameTab = tmp;
2079
2080
432k
        tmp2 = xmlRealloc(ctxt->pushTab, newSize * sizeof(tmp2[0]));
2081
432k
        if (tmp2 == NULL)
2082
0
      goto mem_error;
2083
432k
  ctxt->pushTab = tmp2;
2084
2085
432k
        ctxt->nameMax = newSize;
2086
16.6M
    } else if (ctxt->pushTab == NULL) {
2087
144k
        ctxt->pushTab = xmlMalloc(ctxt->nameMax * sizeof(ctxt->pushTab[0]));
2088
144k
        if (ctxt->pushTab == NULL)
2089
0
            goto mem_error;
2090
144k
    }
2091
17.1M
    ctxt->nameTab[ctxt->nameNr] = value;
2092
17.1M
    ctxt->name = value;
2093
17.1M
    tag = &ctxt->pushTab[ctxt->nameNr];
2094
17.1M
    tag->prefix = prefix;
2095
17.1M
    tag->URI = URI;
2096
17.1M
    tag->line = line;
2097
17.1M
    tag->nsNr = nsNr;
2098
17.1M
    return (ctxt->nameNr++);
2099
0
mem_error:
2100
0
    xmlErrMemory(ctxt);
2101
0
    return (-1);
2102
17.1M
}
2103
#ifdef LIBXML_PUSH_ENABLED
2104
/**
2105
 * Pops the top element/prefix/URI name from the name stack
2106
 *
2107
 * @param ctxt  an XML parser context
2108
 * @returns the name just removed
2109
 */
2110
static const xmlChar *
2111
nameNsPop(xmlParserCtxtPtr ctxt)
2112
7.12M
{
2113
7.12M
    const xmlChar *ret;
2114
2115
7.12M
    if (ctxt->nameNr <= 0)
2116
0
        return (NULL);
2117
7.12M
    ctxt->nameNr--;
2118
7.12M
    if (ctxt->nameNr > 0)
2119
7.01M
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2120
110k
    else
2121
110k
        ctxt->name = NULL;
2122
7.12M
    ret = ctxt->nameTab[ctxt->nameNr];
2123
7.12M
    ctxt->nameTab[ctxt->nameNr] = NULL;
2124
7.12M
    return (ret);
2125
7.12M
}
2126
#endif /* LIBXML_PUSH_ENABLED */
2127
2128
/**
2129
 * Pops the top element name from the name stack
2130
 *
2131
 * @deprecated Internal function, do not use.
2132
 *
2133
 * @param ctxt  an XML parser context
2134
 * @returns the name just removed
2135
 */
2136
static const xmlChar *
2137
namePop(xmlParserCtxtPtr ctxt)
2138
900k
{
2139
900k
    const xmlChar *ret;
2140
2141
900k
    if ((ctxt == NULL) || (ctxt->nameNr <= 0))
2142
0
        return (NULL);
2143
900k
    ctxt->nameNr--;
2144
900k
    if (ctxt->nameNr > 0)
2145
896k
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2146
4.86k
    else
2147
4.86k
        ctxt->name = NULL;
2148
900k
    ret = ctxt->nameTab[ctxt->nameNr];
2149
900k
    ctxt->nameTab[ctxt->nameNr] = NULL;
2150
900k
    return (ret);
2151
900k
}
2152
2153
24.1M
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
2154
24.1M
    if (ctxt->spaceNr >= ctxt->spaceMax) {
2155
591k
        int *tmp;
2156
591k
        int newSize;
2157
2158
591k
        newSize = xmlGrowCapacity(ctxt->spaceMax, sizeof(tmp[0]),
2159
591k
                                  10, XML_MAX_ITEMS);
2160
591k
        if (newSize < 0) {
2161
0
      xmlErrMemory(ctxt);
2162
0
      return(-1);
2163
0
        }
2164
2165
591k
        tmp = xmlRealloc(ctxt->spaceTab, newSize * sizeof(tmp[0]));
2166
591k
        if (tmp == NULL) {
2167
0
      xmlErrMemory(ctxt);
2168
0
      return(-1);
2169
0
  }
2170
591k
  ctxt->spaceTab = tmp;
2171
2172
591k
        ctxt->spaceMax = newSize;
2173
591k
    }
2174
24.1M
    ctxt->spaceTab[ctxt->spaceNr] = val;
2175
24.1M
    ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
2176
24.1M
    return(ctxt->spaceNr++);
2177
24.1M
}
2178
2179
15.0M
static int spacePop(xmlParserCtxtPtr ctxt) {
2180
15.0M
    int ret;
2181
15.0M
    if (ctxt->spaceNr <= 0) return(0);
2182
15.0M
    ctxt->spaceNr--;
2183
15.0M
    if (ctxt->spaceNr > 0)
2184
15.0M
  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
2185
4.84k
    else
2186
4.84k
        ctxt->space = &ctxt->spaceTab[0];
2187
15.0M
    ret = ctxt->spaceTab[ctxt->spaceNr];
2188
15.0M
    ctxt->spaceTab[ctxt->spaceNr] = -1;
2189
15.0M
    return(ret);
2190
15.0M
}
2191
2192
/*
2193
 * Macros for accessing the content. Those should be used only by the parser,
2194
 * and not exported.
2195
 *
2196
 * Dirty macros, i.e. one often need to make assumption on the context to
2197
 * use them
2198
 *
2199
 *   CUR_PTR return the current pointer to the xmlChar to be parsed.
2200
 *           To be used with extreme caution since operations consuming
2201
 *           characters may move the input buffer to a different location !
2202
 *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
2203
 *           This should be used internally by the parser
2204
 *           only to compare to ASCII values otherwise it would break when
2205
 *           running with UTF-8 encoding.
2206
 *   RAW     same as CUR but in the input buffer, bypass any token
2207
 *           extraction that may have been done
2208
 *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
2209
 *           to compare on ASCII based substring.
2210
 *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2211
 *           strings without newlines within the parser.
2212
 *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2213
 *           defined char within the parser.
2214
 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2215
 *
2216
 *   NEXT    Skip to the next character, this does the proper decoding
2217
 *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2218
 *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2219
 *   COPY_BUF  copy the current unicode char to the target buffer, increment
2220
 *            the index
2221
 *   GROW, SHRINK  handling of input buffers
2222
 */
2223
2224
197M
#define RAW (*ctxt->input->cur)
2225
307M
#define CUR (*ctxt->input->cur)
2226
26.6M
#define NXT(val) ctxt->input->cur[(val)]
2227
464M
#define CUR_PTR ctxt->input->cur
2228
83.7M
#define BASE_PTR ctxt->input->base
2229
2230
#define CMP4( s, c1, c2, c3, c4 ) \
2231
4.40M
  ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2232
2.28M
    ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2233
#define CMP5( s, c1, c2, c3, c4, c5 ) \
2234
4.21M
  ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2235
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2236
4.11M
  ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2237
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2238
3.91M
  ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2239
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2240
3.61M
  ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2241
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2242
1.81M
  ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2243
1.81M
    ((unsigned char *) s)[ 8 ] == c9 )
2244
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2245
90.7k
  ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2246
90.7k
    ((unsigned char *) s)[ 9 ] == c10 )
2247
2248
16.0M
#define SKIP(val) do {             \
2249
16.0M
    ctxt->input->cur += (val),ctxt->input->col+=(val);      \
2250
16.0M
    if (*ctxt->input->cur == 0)           \
2251
16.0M
        xmlParserGrow(ctxt);           \
2252
16.0M
  } while (0)
2253
2254
#define SKIPL(val) do {             \
2255
    int skipl;                \
2256
    for(skipl=0; skipl<val; skipl++) {          \
2257
  if (*(ctxt->input->cur) == '\n') {        \
2258
  ctxt->input->line++; ctxt->input->col = 1;      \
2259
  } else ctxt->input->col++;          \
2260
  ctxt->input->cur++;           \
2261
    }                 \
2262
    if (*ctxt->input->cur == 0)           \
2263
        xmlParserGrow(ctxt);            \
2264
  } while (0)
2265
2266
#define SHRINK \
2267
2.35M
    if (!PARSER_PROGRESSIVE(ctxt)) \
2268
2.35M
  xmlParserShrink(ctxt);
2269
2270
#define GROW \
2271
172M
    if ((!PARSER_PROGRESSIVE(ctxt)) && \
2272
172M
        (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2273
360k
  xmlParserGrow(ctxt);
2274
2275
83.7M
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2276
2277
134k
#define SKIP_BLANKS_PE xmlSkipBlankCharsPE(ctxt)
2278
2279
56.8M
#define NEXT xmlNextChar(ctxt)
2280
2281
31.9M
#define NEXT1 {               \
2282
31.9M
  ctxt->input->col++;           \
2283
31.9M
  ctxt->input->cur++;           \
2284
31.9M
  if (*ctxt->input->cur == 0)         \
2285
31.9M
      xmlParserGrow(ctxt);           \
2286
31.9M
    }
2287
2288
306M
#define NEXTL(l) do {             \
2289
306M
    if (*(ctxt->input->cur) == '\n') {         \
2290
845k
  ctxt->input->line++; ctxt->input->col = 1;      \
2291
305M
    } else ctxt->input->col++;           \
2292
306M
    ctxt->input->cur += l;        \
2293
306M
  } while (0)
2294
2295
#define COPY_BUF(b, i, v)           \
2296
72.4M
    if (v < 0x80) b[i++] = v;           \
2297
72.4M
    else i += xmlCopyCharMultiByte(&b[i],v)
2298
2299
static int
2300
70.5M
xmlCurrentCharRecover(xmlParserCtxtPtr ctxt, int *len) {
2301
70.5M
    int c = xmlCurrentChar(ctxt, len);
2302
2303
70.5M
    if (c == XML_INVALID_CHAR)
2304
237k
        c = 0xFFFD; /* replacement character */
2305
2306
70.5M
    return(c);
2307
70.5M
}
2308
2309
/**
2310
 * Skip whitespace in the input stream.
2311
 *
2312
 * @deprecated Internal function, do not use.
2313
 *
2314
 * @param ctxt  the XML parser context
2315
 * @returns the number of space chars skipped
2316
 */
2317
int
2318
83.8M
xmlSkipBlankChars(xmlParserCtxt *ctxt) {
2319
83.8M
    const xmlChar *cur;
2320
83.8M
    int res = 0;
2321
2322
83.8M
    cur = ctxt->input->cur;
2323
83.8M
    while (IS_BLANK_CH(*cur)) {
2324
26.1M
        if (*cur == '\n') {
2325
595k
            ctxt->input->line++; ctxt->input->col = 1;
2326
25.5M
        } else {
2327
25.5M
            ctxt->input->col++;
2328
25.5M
        }
2329
26.1M
        cur++;
2330
26.1M
        if (res < INT_MAX)
2331
26.1M
            res++;
2332
26.1M
        if (*cur == 0) {
2333
4.72k
            ctxt->input->cur = cur;
2334
4.72k
            xmlParserGrow(ctxt);
2335
4.72k
            cur = ctxt->input->cur;
2336
4.72k
        }
2337
26.1M
    }
2338
83.8M
    ctxt->input->cur = cur;
2339
2340
83.8M
    if (res > 4)
2341
331k
        GROW;
2342
2343
83.8M
    return(res);
2344
83.8M
}
2345
2346
static void
2347
0
xmlPopPE(xmlParserCtxtPtr ctxt) {
2348
0
    unsigned long consumed;
2349
0
    xmlEntityPtr ent;
2350
2351
0
    ent = ctxt->input->entity;
2352
2353
0
    ent->flags &= ~XML_ENT_EXPANDING;
2354
2355
0
    if ((ent->flags & XML_ENT_CHECKED) == 0) {
2356
0
        int result;
2357
2358
        /*
2359
         * Read the rest of the stream in case of errors. We want
2360
         * to account for the whole entity size.
2361
         */
2362
0
        do {
2363
0
            ctxt->input->cur = ctxt->input->end;
2364
0
            xmlParserShrink(ctxt);
2365
0
            result = xmlParserGrow(ctxt);
2366
0
        } while (result > 0);
2367
2368
0
        consumed = ctxt->input->consumed;
2369
0
        xmlSaturatedAddSizeT(&consumed,
2370
0
                             ctxt->input->end - ctxt->input->base);
2371
2372
0
        xmlSaturatedAdd(&ent->expandedSize, consumed);
2373
2374
        /*
2375
         * Add to sizeentities when parsing an external entity
2376
         * for the first time.
2377
         */
2378
0
        if (ent->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
2379
0
            xmlSaturatedAdd(&ctxt->sizeentities, consumed);
2380
0
        }
2381
2382
0
        ent->flags |= XML_ENT_CHECKED;
2383
0
    }
2384
2385
0
    xmlFreeInputStream(xmlCtxtPopInput(ctxt));
2386
2387
0
    xmlParserEntityCheck(ctxt, ent->expandedSize);
2388
2389
0
    GROW;
2390
0
}
2391
2392
/**
2393
 * Skip whitespace in the input stream, also handling parameter
2394
 * entities.
2395
 *
2396
 * @param ctxt  the XML parser context
2397
 * @returns the number of space chars skipped
2398
 */
2399
static int
2400
134k
xmlSkipBlankCharsPE(xmlParserCtxtPtr ctxt) {
2401
134k
    int res = 0;
2402
134k
    int inParam;
2403
134k
    int expandParam;
2404
2405
134k
    inParam = PARSER_IN_PE(ctxt);
2406
134k
    expandParam = PARSER_EXTERNAL(ctxt);
2407
2408
134k
    if (!inParam && !expandParam)
2409
134k
        return(xmlSkipBlankChars(ctxt));
2410
2411
    /*
2412
     * It's Okay to use CUR/NEXT here since all the blanks are on
2413
     * the ASCII range.
2414
     */
2415
0
    while (PARSER_STOPPED(ctxt) == 0) {
2416
0
        if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2417
0
            NEXT;
2418
0
        } else if (CUR == '%') {
2419
0
            if ((expandParam == 0) ||
2420
0
                (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2421
0
                break;
2422
2423
            /*
2424
             * Expand parameter entity. We continue to consume
2425
             * whitespace at the start of the entity and possible
2426
             * even consume the whole entity and pop it. We might
2427
             * even pop multiple PEs in this loop.
2428
             */
2429
0
            xmlParsePERefInternal(ctxt, 0);
2430
2431
0
            inParam = PARSER_IN_PE(ctxt);
2432
0
            expandParam = PARSER_EXTERNAL(ctxt);
2433
0
        } else if (CUR == 0) {
2434
0
            if (inParam == 0)
2435
0
                break;
2436
2437
            /*
2438
             * Don't pop parameter entities that start a markup
2439
             * declaration to detect Well-formedness constraint:
2440
             * PE Between Declarations.
2441
             */
2442
0
            if (ctxt->input->flags & XML_INPUT_MARKUP_DECL)
2443
0
                break;
2444
2445
0
            xmlPopPE(ctxt);
2446
2447
0
            inParam = PARSER_IN_PE(ctxt);
2448
0
            expandParam = PARSER_EXTERNAL(ctxt);
2449
0
        } else {
2450
0
            break;
2451
0
        }
2452
2453
        /*
2454
         * Also increase the counter when entering or exiting a PERef.
2455
         * The spec says: "When a parameter-entity reference is recognized
2456
         * in the DTD and included, its replacement text MUST be enlarged
2457
         * by the attachment of one leading and one following space (#x20)
2458
         * character."
2459
         */
2460
0
        if (res < INT_MAX)
2461
0
            res++;
2462
0
    }
2463
2464
0
    return(res);
2465
134k
}
2466
2467
/************************************************************************
2468
 *                  *
2469
 *    Commodity functions to handle entities      *
2470
 *                  *
2471
 ************************************************************************/
2472
2473
/**
2474
 * @deprecated Internal function, don't use.
2475
 *
2476
 * @param ctxt  an XML parser context
2477
 * @returns the current xmlChar in the parser context
2478
 */
2479
xmlChar
2480
0
xmlPopInput(xmlParserCtxt *ctxt) {
2481
0
    xmlParserInputPtr input;
2482
2483
0
    if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2484
0
    input = xmlCtxtPopInput(ctxt);
2485
0
    xmlFreeInputStream(input);
2486
0
    if (*ctxt->input->cur == 0)
2487
0
        xmlParserGrow(ctxt);
2488
0
    return(CUR);
2489
0
}
2490
2491
/**
2492
 * Push an input stream onto the stack.
2493
 *
2494
 * @deprecated Internal function, don't use.
2495
 *
2496
 * @param ctxt  an XML parser context
2497
 * @param input  an XML parser input fragment (entity, XML fragment ...).
2498
 * @returns -1 in case of error or the index in the input stack
2499
 */
2500
int
2501
0
xmlPushInput(xmlParserCtxt *ctxt, xmlParserInput *input) {
2502
0
    int ret;
2503
2504
0
    if ((ctxt == NULL) || (input == NULL))
2505
0
        return(-1);
2506
2507
0
    ret = xmlCtxtPushInput(ctxt, input);
2508
0
    if (ret >= 0)
2509
0
        GROW;
2510
0
    return(ret);
2511
0
}
2512
2513
/**
2514
 * Parse a numeric character reference. Always consumes '&'.
2515
 *
2516
 * @deprecated Internal function, don't use.
2517
 *
2518
 *     [66] CharRef ::= '&#' [0-9]+ ';' |
2519
 *                      '&#x' [0-9a-fA-F]+ ';'
2520
 *
2521
 * [ WFC: Legal Character ]
2522
 * Characters referred to using character references must match the
2523
 * production for Char.
2524
 *
2525
 * @param ctxt  an XML parser context
2526
 * @returns the value parsed (as an int), 0 in case of error
2527
 */
2528
int
2529
90.8k
xmlParseCharRef(xmlParserCtxt *ctxt) {
2530
90.8k
    int val = 0;
2531
90.8k
    int count = 0;
2532
2533
    /*
2534
     * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2535
     */
2536
90.8k
    if ((RAW == '&') && (NXT(1) == '#') &&
2537
90.8k
        (NXT(2) == 'x')) {
2538
61.3k
  SKIP(3);
2539
61.3k
  GROW;
2540
292k
  while ((RAW != ';') && (PARSER_STOPPED(ctxt) == 0)) {
2541
231k
      if (count++ > 20) {
2542
225
    count = 0;
2543
225
    GROW;
2544
225
      }
2545
231k
      if ((RAW >= '0') && (RAW <= '9'))
2546
76.5k
          val = val * 16 + (CUR - '0');
2547
155k
      else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2548
129k
          val = val * 16 + (CUR - 'a') + 10;
2549
26.1k
      else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2550
26.0k
          val = val * 16 + (CUR - 'A') + 10;
2551
174
      else {
2552
174
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2553
174
    val = 0;
2554
174
    break;
2555
174
      }
2556
231k
      if (val > 0x110000)
2557
2.96k
          val = 0x110000;
2558
2559
231k
      NEXT;
2560
231k
      count++;
2561
231k
  }
2562
61.3k
  if (RAW == ';') {
2563
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2564
61.2k
      ctxt->input->col++;
2565
61.2k
      ctxt->input->cur++;
2566
61.2k
  }
2567
61.3k
    } else if  ((RAW == '&') && (NXT(1) == '#')) {
2568
29.4k
  SKIP(2);
2569
29.4k
  GROW;
2570
94.5k
  while (RAW != ';') { /* loop blocked by count */
2571
65.1k
      if (count++ > 20) {
2572
208
    count = 0;
2573
208
    GROW;
2574
208
      }
2575
65.1k
      if ((RAW >= '0') && (RAW <= '9'))
2576
65.0k
          val = val * 10 + (CUR - '0');
2577
124
      else {
2578
124
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2579
124
    val = 0;
2580
124
    break;
2581
124
      }
2582
65.0k
      if (val > 0x110000)
2583
2.26k
          val = 0x110000;
2584
2585
65.0k
      NEXT;
2586
65.0k
      count++;
2587
65.0k
  }
2588
29.4k
  if (RAW == ';') {
2589
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2590
29.3k
      ctxt->input->col++;
2591
29.3k
      ctxt->input->cur++;
2592
29.3k
  }
2593
29.4k
    } else {
2594
0
        if (RAW == '&')
2595
0
            SKIP(1);
2596
0
        xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2597
0
    }
2598
2599
    /*
2600
     * [ WFC: Legal Character ]
2601
     * Characters referred to using character references must match the
2602
     * production for Char.
2603
     */
2604
90.8k
    if (val >= 0x110000) {
2605
179
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2606
179
                "xmlParseCharRef: character reference out of bounds\n",
2607
179
          val);
2608
179
        val = 0xFFFD;
2609
90.7k
    } else if (!IS_CHAR(val)) {
2610
1.27k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2611
1.27k
                          "xmlParseCharRef: invalid xmlChar value %d\n",
2612
1.27k
                    val);
2613
1.27k
    }
2614
90.8k
    return(val);
2615
90.8k
}
2616
2617
/**
2618
 * Parse Reference declarations, variant parsing from a string rather
2619
 * than an an input flow.
2620
 *
2621
 *     [66] CharRef ::= '&#' [0-9]+ ';' |
2622
 *                      '&#x' [0-9a-fA-F]+ ';'
2623
 *
2624
 * [ WFC: Legal Character ]
2625
 * Characters referred to using character references must match the
2626
 * production for Char.
2627
 *
2628
 * @param ctxt  an XML parser context
2629
 * @param str  a pointer to an index in the string
2630
 * @returns the value parsed (as an int), 0 in case of error, str will be
2631
 *         updated to the current value of the index
2632
 */
2633
static int
2634
27.3k
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2635
27.3k
    const xmlChar *ptr;
2636
27.3k
    xmlChar cur;
2637
27.3k
    int val = 0;
2638
2639
27.3k
    if ((str == NULL) || (*str == NULL)) return(0);
2640
27.3k
    ptr = *str;
2641
27.3k
    cur = *ptr;
2642
27.3k
    if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2643
9.78k
  ptr += 3;
2644
9.78k
  cur = *ptr;
2645
38.0k
  while (cur != ';') { /* Non input consuming loop */
2646
31.2k
      if ((cur >= '0') && (cur <= '9'))
2647
10.8k
          val = val * 16 + (cur - '0');
2648
20.3k
      else if ((cur >= 'a') && (cur <= 'f'))
2649
16.3k
          val = val * 16 + (cur - 'a') + 10;
2650
4.01k
      else if ((cur >= 'A') && (cur <= 'F'))
2651
1.10k
          val = val * 16 + (cur - 'A') + 10;
2652
2.90k
      else {
2653
2.90k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2654
2.90k
    val = 0;
2655
2.90k
    break;
2656
2.90k
      }
2657
28.2k
      if (val > 0x110000)
2658
2.60k
          val = 0x110000;
2659
2660
28.2k
      ptr++;
2661
28.2k
      cur = *ptr;
2662
28.2k
  }
2663
9.78k
  if (cur == ';')
2664
6.87k
      ptr++;
2665
17.6k
    } else if  ((cur == '&') && (ptr[1] == '#')){
2666
17.6k
  ptr += 2;
2667
17.6k
  cur = *ptr;
2668
95.0k
  while (cur != ';') { /* Non input consuming loops */
2669
78.2k
      if ((cur >= '0') && (cur <= '9'))
2670
77.4k
          val = val * 10 + (cur - '0');
2671
870
      else {
2672
870
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2673
870
    val = 0;
2674
870
    break;
2675
870
      }
2676
77.4k
      if (val > 0x110000)
2677
414
          val = 0x110000;
2678
2679
77.4k
      ptr++;
2680
77.4k
      cur = *ptr;
2681
77.4k
  }
2682
17.6k
  if (cur == ';')
2683
16.7k
      ptr++;
2684
17.6k
    } else {
2685
0
  xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2686
0
  return(0);
2687
0
    }
2688
27.3k
    *str = ptr;
2689
2690
    /*
2691
     * [ WFC: Legal Character ]
2692
     * Characters referred to using character references must match the
2693
     * production for Char.
2694
     */
2695
27.3k
    if (val >= 0x110000) {
2696
99
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2697
99
                "xmlParseStringCharRef: character reference out of bounds\n",
2698
99
                val);
2699
27.2k
    } else if (IS_CHAR(val)) {
2700
22.7k
        return(val);
2701
22.7k
    } else {
2702
4.53k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2703
4.53k
        "xmlParseStringCharRef: invalid xmlChar value %d\n",
2704
4.53k
        val);
2705
4.53k
    }
2706
4.63k
    return(0);
2707
27.3k
}
2708
2709
/**
2710
 *     [69] PEReference ::= '%' Name ';'
2711
 *
2712
 * @deprecated Internal function, do not use.
2713
 *
2714
 * [ WFC: No Recursion ]
2715
 * A parsed entity must not contain a recursive
2716
 * reference to itself, either directly or indirectly.
2717
 *
2718
 * [ WFC: Entity Declared ]
2719
 * In a document without any DTD, a document with only an internal DTD
2720
 * subset which contains no parameter entity references, or a document
2721
 * with "standalone='yes'", ...  ... The declaration of a parameter
2722
 * entity must precede any reference to it...
2723
 *
2724
 * [ VC: Entity Declared ]
2725
 * In a document with an external subset or external parameter entities
2726
 * with "standalone='no'", ...  ... The declaration of a parameter entity
2727
 * must precede any reference to it...
2728
 *
2729
 * [ WFC: In DTD ]
2730
 * Parameter-entity references may only appear in the DTD.
2731
 * NOTE: misleading but this is handled.
2732
 *
2733
 * A PEReference may have been detected in the current input stream
2734
 * the handling is done accordingly to
2735
 *      http://www.w3.org/TR/REC-xml#entproc
2736
 * i.e.
2737
 *   - Included in literal in entity values
2738
 *   - Included as Parameter Entity reference within DTDs
2739
 * @param ctxt  the parser context
2740
 */
2741
void
2742
0
xmlParserHandlePEReference(xmlParserCtxt *ctxt) {
2743
0
    xmlParsePERefInternal(ctxt, 0);
2744
0
}
2745
2746
/**
2747
 * @deprecated Internal function, don't use.
2748
 *
2749
 * @param ctxt  the parser context
2750
 * @param str  the input string
2751
 * @param len  the string length
2752
 * @param what  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2753
 * @param end  an end marker xmlChar, 0 if none
2754
 * @param end2  an end marker xmlChar, 0 if none
2755
 * @param end3  an end marker xmlChar, 0 if none
2756
 * @returns A newly allocated string with the substitution done. The caller
2757
 *      must deallocate it !
2758
 */
2759
xmlChar *
2760
xmlStringLenDecodeEntities(xmlParserCtxt *ctxt, const xmlChar *str, int len,
2761
                           int what ATTRIBUTE_UNUSED,
2762
0
                           xmlChar end, xmlChar end2, xmlChar end3) {
2763
0
    if ((ctxt == NULL) || (str == NULL) || (len < 0))
2764
0
        return(NULL);
2765
2766
0
    if ((str[len] != 0) ||
2767
0
        (end != 0) || (end2 != 0) || (end3 != 0))
2768
0
        return(NULL);
2769
2770
0
    return(xmlExpandEntitiesInAttValue(ctxt, str, 0));
2771
0
}
2772
2773
/**
2774
 * @deprecated Internal function, don't use.
2775
 *
2776
 * @param ctxt  the parser context
2777
 * @param str  the input string
2778
 * @param what  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2779
 * @param end  an end marker xmlChar, 0 if none
2780
 * @param end2  an end marker xmlChar, 0 if none
2781
 * @param end3  an end marker xmlChar, 0 if none
2782
 * @returns A newly allocated string with the substitution done. The caller
2783
 *      must deallocate it !
2784
 */
2785
xmlChar *
2786
xmlStringDecodeEntities(xmlParserCtxt *ctxt, const xmlChar *str,
2787
                        int what ATTRIBUTE_UNUSED,
2788
0
            xmlChar end, xmlChar  end2, xmlChar end3) {
2789
0
    if ((ctxt == NULL) || (str == NULL))
2790
0
        return(NULL);
2791
2792
0
    if ((end != 0) || (end2 != 0) || (end3 != 0))
2793
0
        return(NULL);
2794
2795
0
    return(xmlExpandEntitiesInAttValue(ctxt, str, 0));
2796
0
}
2797
2798
/************************************************************************
2799
 *                  *
2800
 *    Commodity functions, cleanup needed ?     *
2801
 *                  *
2802
 ************************************************************************/
2803
2804
/**
2805
 * Is this a sequence of blank chars that one can ignore ?
2806
 *
2807
 * @param ctxt  an XML parser context
2808
 * @param str  a xmlChar *
2809
 * @param len  the size of `str`
2810
 * @param blank_chars  we know the chars are blanks
2811
 * @returns 1 if ignorable 0 otherwise.
2812
 */
2813
2814
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2815
7.58M
                     int blank_chars) {
2816
7.58M
    int i;
2817
7.58M
    xmlNodePtr lastChild;
2818
2819
    /*
2820
     * Check for xml:space value.
2821
     */
2822
7.58M
    if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2823
7.48M
        (*(ctxt->space) == -2))
2824
3.10M
  return(0);
2825
2826
    /*
2827
     * Check that the string is made of blanks
2828
     */
2829
4.48M
    if (blank_chars == 0) {
2830
4.70M
  for (i = 0;i < len;i++)
2831
4.60M
      if (!(IS_BLANK_CH(str[i]))) return(0);
2832
3.89M
    }
2833
2834
    /*
2835
     * Look if the element is mixed content in the DTD if available
2836
     */
2837
691k
    if (ctxt->node == NULL) return(0);
2838
18.4E
    if (ctxt->myDoc != NULL) {
2839
0
        xmlElementPtr elemDecl = NULL;
2840
0
        xmlDocPtr doc = ctxt->myDoc;
2841
0
        const xmlChar *prefix = NULL;
2842
2843
0
        if (ctxt->node->ns)
2844
0
            prefix = ctxt->node->ns->prefix;
2845
0
        if (doc->intSubset != NULL)
2846
0
            elemDecl = xmlHashLookup2(doc->intSubset->elements, ctxt->node->name,
2847
0
                                      prefix);
2848
0
        if ((elemDecl == NULL) && (doc->extSubset != NULL))
2849
0
            elemDecl = xmlHashLookup2(doc->extSubset->elements, ctxt->node->name,
2850
0
                                      prefix);
2851
0
        if (elemDecl != NULL) {
2852
0
            if (elemDecl->etype == XML_ELEMENT_TYPE_ELEMENT)
2853
0
                return(1);
2854
0
            if ((elemDecl->etype == XML_ELEMENT_TYPE_ANY) ||
2855
0
                (elemDecl->etype == XML_ELEMENT_TYPE_MIXED))
2856
0
                return(0);
2857
0
        }
2858
0
    }
2859
2860
    /*
2861
     * Otherwise, heuristic :-\
2862
     *
2863
     * When push parsing, we could be at the end of a chunk.
2864
     * This makes the look-ahead and consequently the NOBLANKS
2865
     * option unreliable.
2866
     */
2867
18.4E
    if ((RAW != '<') && (RAW != 0xD)) return(0);
2868
18.4E
    if ((ctxt->node->children == NULL) &&
2869
0
  (RAW == '<') && (NXT(1) == '/')) return(0);
2870
2871
18.4E
    lastChild = xmlGetLastChild(ctxt->node);
2872
18.4E
    if (lastChild == NULL) {
2873
0
        if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2874
0
            (ctxt->node->content != NULL)) return(0);
2875
18.4E
    } else if (xmlNodeIsText(lastChild))
2876
0
        return(0);
2877
18.4E
    else if ((ctxt->node->children != NULL) &&
2878
0
             (xmlNodeIsText(ctxt->node->children)))
2879
0
        return(0);
2880
18.4E
    return(1);
2881
18.4E
}
2882
2883
/************************************************************************
2884
 *                  *
2885
 *    Extra stuff for namespace support     *
2886
 *  Relates to http://www.w3.org/TR/WD-xml-names      *
2887
 *                  *
2888
 ************************************************************************/
2889
2890
/**
2891
 * Parse an UTF8 encoded XML qualified name string
2892
 *
2893
 * @deprecated Don't use.
2894
 *
2895
 * @param ctxt  an XML parser context
2896
 * @param name  an XML parser context
2897
 * @param prefixOut  a xmlChar **
2898
 * @returns the local part, and prefix is updated
2899
 *   to get the Prefix if any.
2900
 */
2901
2902
xmlChar *
2903
0
xmlSplitQName(xmlParserCtxt *ctxt, const xmlChar *name, xmlChar **prefixOut) {
2904
0
    xmlChar *ret;
2905
0
    const xmlChar *localname;
2906
2907
0
    localname = xmlSplitQName4(name, prefixOut);
2908
0
    if (localname == NULL) {
2909
0
        xmlCtxtErrMemory(ctxt);
2910
0
        return(NULL);
2911
0
    }
2912
2913
0
    ret = xmlStrdup(localname);
2914
0
    if (ret == NULL) {
2915
0
        xmlCtxtErrMemory(ctxt);
2916
0
        xmlFree(*prefixOut);
2917
0
    }
2918
2919
0
    return(ret);
2920
0
}
2921
2922
/************************************************************************
2923
 *                  *
2924
 *      The parser itself       *
2925
 *  Relates to http://www.w3.org/TR/REC-xml       *
2926
 *                  *
2927
 ************************************************************************/
2928
2929
/************************************************************************
2930
 *                  *
2931
 *  Routines to parse Name, NCName and NmToken      *
2932
 *                  *
2933
 ************************************************************************/
2934
2935
/*
2936
 * The two following functions are related to the change of accepted
2937
 * characters for Name and NmToken in the Revision 5 of XML-1.0
2938
 * They correspond to the modified production [4] and the new production [4a]
2939
 * changes in that revision. Also note that the macros used for the
2940
 * productions Letter, Digit, CombiningChar and Extender are not needed
2941
 * anymore.
2942
 * We still keep compatibility to pre-revision5 parsing semantic if the
2943
 * new XML_PARSE_OLD10 option is given to the parser.
2944
 */
2945
2946
static int
2947
1.03M
xmlIsNameStartCharNew(int c) {
2948
    /*
2949
     * Use the new checks of production [4] [4a] amd [5] of the
2950
     * Update 5 of XML-1.0
2951
     */
2952
1.03M
    if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
2953
1.02M
        (((c >= 'a') && (c <= 'z')) ||
2954
527k
         ((c >= 'A') && (c <= 'Z')) ||
2955
376k
         (c == '_') || (c == ':') ||
2956
287k
         ((c >= 0xC0) && (c <= 0xD6)) ||
2957
283k
         ((c >= 0xD8) && (c <= 0xF6)) ||
2958
274k
         ((c >= 0xF8) && (c <= 0x2FF)) ||
2959
270k
         ((c >= 0x370) && (c <= 0x37D)) ||
2960
269k
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
2961
261k
         ((c >= 0x200C) && (c <= 0x200D)) ||
2962
260k
         ((c >= 0x2070) && (c <= 0x218F)) ||
2963
250k
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
2964
248k
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
2965
238k
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
2966
236k
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
2967
168k
         ((c >= 0x10000) && (c <= 0xEFFFF))))
2968
857k
        return(1);
2969
173k
    return(0);
2970
1.03M
}
2971
2972
static int
2973
20.1M
xmlIsNameCharNew(int c) {
2974
    /*
2975
     * Use the new checks of production [4] [4a] amd [5] of the
2976
     * Update 5 of XML-1.0
2977
     */
2978
20.1M
    if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
2979
19.9M
        (((c >= 'a') && (c <= 'z')) ||
2980
11.3M
         ((c >= 'A') && (c <= 'Z')) ||
2981
8.71M
         ((c >= '0') && (c <= '9')) || /* !start */
2982
7.65M
         (c == '_') || (c == ':') ||
2983
7.26M
         (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
2984
6.97M
         ((c >= 0xC0) && (c <= 0xD6)) ||
2985
6.90M
         ((c >= 0xD8) && (c <= 0xF6)) ||
2986
6.88M
         ((c >= 0xF8) && (c <= 0x2FF)) ||
2987
6.51M
         ((c >= 0x300) && (c <= 0x36F)) || /* !start */
2988
6.51M
         ((c >= 0x370) && (c <= 0x37D)) ||
2989
6.51M
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
2990
6.42M
         ((c >= 0x200C) && (c <= 0x200D)) ||
2991
6.42M
         ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
2992
6.42M
         ((c >= 0x2070) && (c <= 0x218F)) ||
2993
6.39M
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
2994
6.38M
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
2995
6.29M
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
2996
6.29M
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
2997
186k
         ((c >= 0x10000) && (c <= 0xEFFFF))))
2998
19.7M
         return(1);
2999
375k
    return(0);
3000
20.1M
}
3001
3002
static int
3003
0
xmlIsNameStartCharOld(int c) {
3004
0
    if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3005
0
        ((IS_LETTER(c) || (c == '_') || (c == ':'))))
3006
0
        return(1);
3007
0
    return(0);
3008
0
}
3009
3010
static int
3011
0
xmlIsNameCharOld(int c) {
3012
0
    if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3013
0
        ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3014
0
         (c == '.') || (c == '-') ||
3015
0
         (c == '_') || (c == ':') ||
3016
0
         (IS_COMBINING(c)) ||
3017
0
         (IS_EXTENDER(c))))
3018
0
        return(1);
3019
0
    return(0);
3020
0
}
3021
3022
static int
3023
1.03M
xmlIsNameStartChar(int c, int old10) {
3024
1.03M
    if (!old10)
3025
1.03M
        return(xmlIsNameStartCharNew(c));
3026
0
    else
3027
0
        return(xmlIsNameStartCharOld(c));
3028
1.03M
}
3029
3030
static int
3031
20.1M
xmlIsNameChar(int c, int old10) {
3032
20.1M
    if (!old10)
3033
20.1M
        return(xmlIsNameCharNew(c));
3034
0
    else
3035
0
        return(xmlIsNameCharOld(c));
3036
20.1M
}
3037
3038
/*
3039
 * Scan an XML Name, NCName or Nmtoken.
3040
 *
3041
 * Returns a pointer to the end of the name on success. If the
3042
 * name is invalid, returns `ptr`. If the name is longer than
3043
 * `maxSize` bytes, returns NULL.
3044
 *
3045
 * @param ptr  pointer to the start of the name
3046
 * @param maxSize  maximum size in bytes
3047
 * @param flags  XML_SCAN_* flags
3048
 * @returns a pointer to the end of the name or NULL
3049
 */
3050
const xmlChar *
3051
36.8k
xmlScanName(const xmlChar *ptr, size_t maxSize, int flags) {
3052
36.8k
    int stop = flags & XML_SCAN_NC ? ':' : 0;
3053
36.8k
    int old10 = flags & XML_SCAN_OLD10 ? 1 : 0;
3054
3055
427k
    while (1) {
3056
427k
        int c, len;
3057
3058
427k
        c = *ptr;
3059
427k
        if (c < 0x80) {
3060
197k
            if (c == stop)
3061
1.29k
                break;
3062
196k
            len = 1;
3063
230k
        } else {
3064
230k
            len = 4;
3065
230k
            c = xmlGetUTF8Char(ptr, &len);
3066
230k
            if (c < 0)
3067
806
                break;
3068
230k
        }
3069
3070
425k
        if (flags & XML_SCAN_NMTOKEN ?
3071
389k
                !xmlIsNameChar(c, old10) :
3072
425k
                !xmlIsNameStartChar(c, old10))
3073
34.7k
            break;
3074
3075
390k
        if ((size_t) len > maxSize)
3076
0
            return(NULL);
3077
390k
        ptr += len;
3078
390k
        maxSize -= len;
3079
390k
        flags |= XML_SCAN_NMTOKEN;
3080
390k
    }
3081
3082
36.8k
    return(ptr);
3083
36.8k
}
3084
3085
static const xmlChar *
3086
164k
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3087
164k
    const xmlChar *ret;
3088
164k
    int len = 0, l;
3089
164k
    int c;
3090
164k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3091
163k
                    XML_MAX_TEXT_LENGTH :
3092
164k
                    XML_MAX_NAME_LENGTH;
3093
164k
    int old10 = (ctxt->options & XML_PARSE_OLD10) ? 1 : 0;
3094
3095
    /*
3096
     * Handler for more complex cases
3097
     */
3098
164k
    c = xmlCurrentChar(ctxt, &l);
3099
164k
    if (!xmlIsNameStartChar(c, old10))
3100
95.8k
        return(NULL);
3101
68.7k
    len += l;
3102
68.7k
    NEXTL(l);
3103
68.7k
    c = xmlCurrentChar(ctxt, &l);
3104
573k
    while (xmlIsNameChar(c, old10)) {
3105
504k
        if (len <= INT_MAX - l)
3106
504k
            len += l;
3107
504k
        NEXTL(l);
3108
504k
        c = xmlCurrentChar(ctxt, &l);
3109
504k
    }
3110
68.7k
    if (len > maxLength) {
3111
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3112
0
        return(NULL);
3113
0
    }
3114
68.7k
    if (ctxt->input->cur - ctxt->input->base < len) {
3115
        /*
3116
         * There were a couple of bugs where PERefs lead to to a change
3117
         * of the buffer. Check the buffer size to avoid passing an invalid
3118
         * pointer to xmlDictLookup.
3119
         */
3120
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3121
0
                    "unexpected change of input buffer");
3122
0
        return (NULL);
3123
0
    }
3124
68.7k
    if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3125
401
        ret = xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len);
3126
68.3k
    else
3127
68.3k
        ret = xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len);
3128
68.7k
    if (ret == NULL)
3129
0
        xmlErrMemory(ctxt);
3130
68.7k
    return(ret);
3131
68.7k
}
3132
3133
/**
3134
 * Parse an XML name.
3135
 *
3136
 * @deprecated Internal function, don't use.
3137
 *
3138
 *     [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3139
 *                      CombiningChar | Extender
3140
 *
3141
 *     [5] Name ::= (Letter | '_' | ':') (NameChar)*
3142
 *
3143
 *     [6] Names ::= Name (#x20 Name)*
3144
 *
3145
 * @param ctxt  an XML parser context
3146
 * @returns the Name parsed or NULL
3147
 */
3148
3149
const xmlChar *
3150
647k
xmlParseName(xmlParserCtxt *ctxt) {
3151
647k
    const xmlChar *in;
3152
647k
    const xmlChar *ret;
3153
647k
    size_t count = 0;
3154
647k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3155
642k
                       XML_MAX_TEXT_LENGTH :
3156
647k
                       XML_MAX_NAME_LENGTH;
3157
3158
647k
    GROW;
3159
3160
    /*
3161
     * Accelerator for simple ASCII names
3162
     */
3163
647k
    in = ctxt->input->cur;
3164
647k
    if (((*in >= 0x61) && (*in <= 0x7A)) ||
3165
171k
  ((*in >= 0x41) && (*in <= 0x5A)) ||
3166
512k
  (*in == '_') || (*in == ':')) {
3167
512k
  in++;
3168
2.46M
  while (((*in >= 0x61) && (*in <= 0x7A)) ||
3169
926k
         ((*in >= 0x41) && (*in <= 0x5A)) ||
3170
805k
         ((*in >= 0x30) && (*in <= 0x39)) ||
3171
584k
         (*in == '_') || (*in == '-') ||
3172
538k
         (*in == ':') || (*in == '.'))
3173
1.95M
      in++;
3174
512k
  if ((*in > 0) && (*in < 0x80)) {
3175
482k
      count = in - ctxt->input->cur;
3176
482k
            if (count > maxLength) {
3177
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3178
0
                return(NULL);
3179
0
            }
3180
482k
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3181
482k
      ctxt->input->cur = in;
3182
482k
      ctxt->input->col += count;
3183
482k
      if (ret == NULL)
3184
0
          xmlErrMemory(ctxt);
3185
482k
      return(ret);
3186
482k
  }
3187
512k
    }
3188
    /* accelerator for special cases */
3189
164k
    return(xmlParseNameComplex(ctxt));
3190
647k
}
3191
3192
static xmlHashedString
3193
860k
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3194
860k
    xmlHashedString ret;
3195
860k
    int len = 0, l;
3196
860k
    int c;
3197
860k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3198
816k
                    XML_MAX_TEXT_LENGTH :
3199
860k
                    XML_MAX_NAME_LENGTH;
3200
860k
    int old10 = (ctxt->options & XML_PARSE_OLD10) ? 1 : 0;
3201
860k
    size_t startPosition = 0;
3202
3203
860k
    ret.name = NULL;
3204
860k
    ret.hashValue = 0;
3205
3206
    /*
3207
     * Handler for more complex cases
3208
     */
3209
860k
    startPosition = CUR_PTR - BASE_PTR;
3210
860k
    c = xmlCurrentChar(ctxt, &l);
3211
860k
    if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3212
829k
  (!xmlIsNameStartChar(c, old10) || (c == ':'))) {
3213
191k
  return(ret);
3214
191k
    }
3215
3216
17.0M
    while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3217
16.4M
     (xmlIsNameChar(c, old10) && (c != ':'))) {
3218
16.3M
        if (len <= INT_MAX - l)
3219
16.3M
      len += l;
3220
16.3M
  NEXTL(l);
3221
16.3M
  c = xmlCurrentChar(ctxt, &l);
3222
16.3M
    }
3223
668k
    if (len > maxLength) {
3224
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3225
0
        return(ret);
3226
0
    }
3227
668k
    ret = xmlDictLookupHashed(ctxt->dict, (BASE_PTR + startPosition), len);
3228
668k
    if (ret.name == NULL)
3229
0
        xmlErrMemory(ctxt);
3230
668k
    return(ret);
3231
668k
}
3232
3233
/**
3234
 * Parse an XML name.
3235
 *
3236
 *     [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3237
 *                          CombiningChar | Extender
3238
 *
3239
 *     [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3240
 *
3241
 * @param ctxt  an XML parser context
3242
 * @returns the Name parsed or NULL
3243
 */
3244
3245
static xmlHashedString
3246
60.9M
xmlParseNCName(xmlParserCtxtPtr ctxt) {
3247
60.9M
    const xmlChar *in, *e;
3248
60.9M
    xmlHashedString ret;
3249
60.9M
    size_t count = 0;
3250
60.9M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3251
58.0M
                       XML_MAX_TEXT_LENGTH :
3252
60.9M
                       XML_MAX_NAME_LENGTH;
3253
3254
60.9M
    ret.name = NULL;
3255
3256
    /*
3257
     * Accelerator for simple ASCII names
3258
     */
3259
60.9M
    in = ctxt->input->cur;
3260
60.9M
    e = ctxt->input->end;
3261
60.9M
    if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3262
3.34M
   ((*in >= 0x41) && (*in <= 0x5A)) ||
3263
60.7M
   (*in == '_')) && (in < e)) {
3264
60.7M
  in++;
3265
311M
  while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3266
102M
          ((*in >= 0x41) && (*in <= 0x5A)) ||
3267
72.6M
          ((*in >= 0x30) && (*in <= 0x39)) ||
3268
65.9M
          (*in == '_') || (*in == '-') ||
3269
250M
          (*in == '.')) && (in < e))
3270
250M
      in++;
3271
60.7M
  if (in >= e)
3272
5.67k
      goto complex;
3273
60.7M
  if ((*in > 0) && (*in < 0x80)) {
3274
60.1M
      count = in - ctxt->input->cur;
3275
60.1M
            if (count > maxLength) {
3276
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3277
0
                return(ret);
3278
0
            }
3279
60.1M
      ret = xmlDictLookupHashed(ctxt->dict, ctxt->input->cur, count);
3280
60.1M
      ctxt->input->cur = in;
3281
60.1M
      ctxt->input->col += count;
3282
60.1M
      if (ret.name == NULL) {
3283
0
          xmlErrMemory(ctxt);
3284
0
      }
3285
60.1M
      return(ret);
3286
60.1M
  }
3287
60.7M
    }
3288
860k
complex:
3289
860k
    return(xmlParseNCNameComplex(ctxt));
3290
60.9M
}
3291
3292
/**
3293
 * Parse an XML name and compares for match
3294
 * (specialized for endtag parsing)
3295
 *
3296
 * @param ctxt  an XML parser context
3297
 * @param other  the name to compare with
3298
 * @returns NULL for an illegal name, (xmlChar*) 1 for success
3299
 * and the name for mismatch
3300
 */
3301
3302
static const xmlChar *
3303
2.62M
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3304
2.62M
    register const xmlChar *cmp = other;
3305
2.62M
    register const xmlChar *in;
3306
2.62M
    const xmlChar *ret;
3307
3308
2.62M
    GROW;
3309
3310
2.62M
    in = ctxt->input->cur;
3311
8.51M
    while (*in != 0 && *in == *cmp) {
3312
5.88M
  ++in;
3313
5.88M
  ++cmp;
3314
5.88M
    }
3315
2.62M
    if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3316
  /* success */
3317
2.62M
  ctxt->input->col += in - ctxt->input->cur;
3318
2.62M
  ctxt->input->cur = in;
3319
2.62M
  return (const xmlChar*) 1;
3320
2.62M
    }
3321
    /* failure (or end of input buffer), check with full function */
3322
4.51k
    ret = xmlParseName (ctxt);
3323
    /* strings coming from the dictionary direct compare possible */
3324
4.51k
    if (ret == other) {
3325
19
  return (const xmlChar*) 1;
3326
19
    }
3327
4.49k
    return ret;
3328
4.51k
}
3329
3330
/**
3331
 * Parse an XML name.
3332
 *
3333
 * @param ctxt  an XML parser context
3334
 * @param str  a pointer to the string pointer (IN/OUT)
3335
 * @returns the Name parsed or NULL. The `str` pointer
3336
 * is updated to the current location in the string.
3337
 */
3338
3339
static xmlChar *
3340
27.9k
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3341
27.9k
    xmlChar *ret;
3342
27.9k
    const xmlChar *cur = *str;
3343
27.9k
    int flags = 0;
3344
27.9k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3345
27.9k
                    XML_MAX_TEXT_LENGTH :
3346
27.9k
                    XML_MAX_NAME_LENGTH;
3347
3348
27.9k
    if (ctxt->options & XML_PARSE_OLD10)
3349
0
        flags |= XML_SCAN_OLD10;
3350
3351
27.9k
    cur = xmlScanName(*str, maxLength, flags);
3352
27.9k
    if (cur == NULL) {
3353
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3354
0
        return(NULL);
3355
0
    }
3356
27.9k
    if (cur == *str)
3357
3.43k
        return(NULL);
3358
3359
24.5k
    ret = xmlStrndup(*str, cur - *str);
3360
24.5k
    if (ret == NULL)
3361
0
        xmlErrMemory(ctxt);
3362
24.5k
    *str = cur;
3363
24.5k
    return(ret);
3364
27.9k
}
3365
3366
/**
3367
 * Parse an XML Nmtoken.
3368
 *
3369
 * @deprecated Internal function, don't use.
3370
 *
3371
 *     [7] Nmtoken ::= (NameChar)+
3372
 *
3373
 *     [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3374
 *
3375
 * @param ctxt  an XML parser context
3376
 * @returns the Nmtoken parsed or NULL
3377
 */
3378
3379
xmlChar *
3380
252k
xmlParseNmtoken(xmlParserCtxt *ctxt) {
3381
252k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3382
252k
    xmlChar *ret;
3383
252k
    int len = 0, l;
3384
252k
    int c;
3385
252k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3386
246k
                    XML_MAX_TEXT_LENGTH :
3387
252k
                    XML_MAX_NAME_LENGTH;
3388
252k
    int old10 = (ctxt->options & XML_PARSE_OLD10) ? 1 : 0;
3389
3390
252k
    c = xmlCurrentChar(ctxt, &l);
3391
3392
2.36M
    while (xmlIsNameChar(c, old10)) {
3393
2.11M
  COPY_BUF(buf, len, c);
3394
2.11M
  NEXTL(l);
3395
2.11M
  c = xmlCurrentChar(ctxt, &l);
3396
2.11M
  if (len >= XML_MAX_NAMELEN) {
3397
      /*
3398
       * Okay someone managed to make a huge token, so he's ready to pay
3399
       * for the processing speed.
3400
       */
3401
1.83k
      xmlChar *buffer;
3402
1.83k
      int max = len * 2;
3403
3404
1.83k
      buffer = xmlMalloc(max);
3405
1.83k
      if (buffer == NULL) {
3406
0
          xmlErrMemory(ctxt);
3407
0
    return(NULL);
3408
0
      }
3409
1.83k
      memcpy(buffer, buf, len);
3410
345k
      while (xmlIsNameChar(c, old10)) {
3411
344k
    if (len + 10 > max) {
3412
835
        xmlChar *tmp;
3413
835
                    int newSize;
3414
3415
835
                    newSize = xmlGrowCapacity(max, 1, 1, maxLength);
3416
835
                    if (newSize < 0) {
3417
0
                        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3418
0
                        xmlFree(buffer);
3419
0
                        return(NULL);
3420
0
                    }
3421
835
        tmp = xmlRealloc(buffer, newSize);
3422
835
        if (tmp == NULL) {
3423
0
      xmlErrMemory(ctxt);
3424
0
      xmlFree(buffer);
3425
0
      return(NULL);
3426
0
        }
3427
835
        buffer = tmp;
3428
835
                    max = newSize;
3429
835
    }
3430
344k
    COPY_BUF(buffer, len, c);
3431
344k
    NEXTL(l);
3432
344k
    c = xmlCurrentChar(ctxt, &l);
3433
344k
      }
3434
1.83k
      buffer[len] = 0;
3435
1.83k
      return(buffer);
3436
1.83k
  }
3437
2.11M
    }
3438
251k
    if (len == 0)
3439
28.0k
        return(NULL);
3440
223k
    if (len > maxLength) {
3441
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3442
0
        return(NULL);
3443
0
    }
3444
223k
    ret = xmlStrndup(buf, len);
3445
223k
    if (ret == NULL)
3446
0
        xmlErrMemory(ctxt);
3447
223k
    return(ret);
3448
223k
}
3449
3450
/**
3451
 * Validate an entity value and expand parameter entities.
3452
 *
3453
 * @param ctxt  parser context
3454
 * @param buf  string buffer
3455
 * @param str  entity value
3456
 * @param length  size of entity value
3457
 * @param depth  nesting depth
3458
 */
3459
static void
3460
xmlExpandPEsInEntityValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf,
3461
18.7k
                          const xmlChar *str, int length, int depth) {
3462
18.7k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3463
18.7k
    const xmlChar *end, *chunk;
3464
18.7k
    int c, l;
3465
3466
18.7k
    if (str == NULL)
3467
0
        return;
3468
3469
18.7k
    depth += 1;
3470
18.7k
    if (depth > maxDepth) {
3471
0
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3472
0
                       "Maximum entity nesting depth exceeded");
3473
0
  return;
3474
0
    }
3475
3476
18.7k
    end = str + length;
3477
18.7k
    chunk = str;
3478
3479
1.49M
    while ((str < end) && (!PARSER_STOPPED(ctxt))) {
3480
1.49M
        c = *str;
3481
3482
1.49M
        if (c >= 0x80) {
3483
314k
            l = xmlUTF8MultibyteLen(ctxt, str,
3484
314k
                    "invalid character in entity value\n");
3485
314k
            if (l == 0) {
3486
133k
                if (chunk < str)
3487
13.6k
                    xmlSBufAddString(buf, chunk, str - chunk);
3488
133k
                xmlSBufAddReplChar(buf);
3489
133k
                str += 1;
3490
133k
                chunk = str;
3491
181k
            } else {
3492
181k
                str += l;
3493
181k
            }
3494
1.17M
        } else if (c == '&') {
3495
53.7k
            if (str[1] == '#') {
3496
27.3k
                if (chunk < str)
3497
15.6k
                    xmlSBufAddString(buf, chunk, str - chunk);
3498
3499
27.3k
                c = xmlParseStringCharRef(ctxt, &str);
3500
27.3k
                if (c == 0)
3501
4.63k
                    return;
3502
3503
22.7k
                xmlSBufAddChar(buf, c);
3504
3505
22.7k
                chunk = str;
3506
26.3k
            } else {
3507
26.3k
                xmlChar *name;
3508
3509
                /*
3510
                 * General entity references are checked for
3511
                 * syntactic validity.
3512
                 */
3513
26.3k
                str++;
3514
26.3k
                name = xmlParseStringName(ctxt, &str);
3515
3516
26.3k
                if ((name == NULL) || (*str++ != ';')) {
3517
7.89k
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3518
7.89k
                            "EntityValue: '&' forbidden except for entities "
3519
7.89k
                            "references\n");
3520
7.89k
                    xmlFree(name);
3521
7.89k
                    return;
3522
7.89k
                }
3523
3524
18.4k
                xmlFree(name);
3525
18.4k
            }
3526
1.12M
        } else if (c == '%') {
3527
1.64k
            xmlEntityPtr ent;
3528
3529
1.64k
            if (chunk < str)
3530
1.51k
                xmlSBufAddString(buf, chunk, str - chunk);
3531
3532
1.64k
            ent = xmlParseStringPEReference(ctxt, &str);
3533
1.64k
            if (ent == NULL)
3534
1.64k
                return;
3535
3536
0
            if (!PARSER_EXTERNAL(ctxt)) {
3537
0
                xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3538
0
                return;
3539
0
            }
3540
3541
0
            if (ent->content == NULL) {
3542
                /*
3543
                 * Note: external parsed entities will not be loaded,
3544
                 * it is not required for a non-validating parser to
3545
                 * complete external PEReferences coming from the
3546
                 * internal subset
3547
                 */
3548
0
                if (((ctxt->options & XML_PARSE_NO_XXE) == 0) &&
3549
0
                    ((ctxt->replaceEntities) ||
3550
0
                     (ctxt->validate))) {
3551
0
                    xmlLoadEntityContent(ctxt, ent);
3552
0
                } else {
3553
0
                    xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
3554
0
                                  "not validating will not read content for "
3555
0
                                  "PE entity %s\n", ent->name, NULL);
3556
0
                }
3557
0
            }
3558
3559
            /*
3560
             * TODO: Skip if ent->content is still NULL.
3561
             */
3562
3563
0
            if (xmlParserEntityCheck(ctxt, ent->length))
3564
0
                return;
3565
3566
0
            if (ent->flags & XML_ENT_EXPANDING) {
3567
0
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
3568
0
                return;
3569
0
            }
3570
3571
0
            ent->flags |= XML_ENT_EXPANDING;
3572
0
            xmlExpandPEsInEntityValue(ctxt, buf, ent->content, ent->length,
3573
0
                                      depth);
3574
0
            ent->flags &= ~XML_ENT_EXPANDING;
3575
3576
0
            chunk = str;
3577
1.11M
        } else {
3578
            /* Normal ASCII char */
3579
1.11M
            if (!IS_BYTE_CHAR(c)) {
3580
119k
                xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3581
119k
                        "invalid character in entity value\n");
3582
119k
                if (chunk < str)
3583
4.71k
                    xmlSBufAddString(buf, chunk, str - chunk);
3584
119k
                xmlSBufAddReplChar(buf);
3585
119k
                str += 1;
3586
119k
                chunk = str;
3587
1.00M
            } else {
3588
1.00M
                str += 1;
3589
1.00M
            }
3590
1.11M
        }
3591
1.49M
    }
3592
3593
4.57k
    if (chunk < str)
3594
3.18k
        xmlSBufAddString(buf, chunk, str - chunk);
3595
4.57k
}
3596
3597
/**
3598
 * Parse a value for ENTITY declarations
3599
 *
3600
 * @deprecated Internal function, don't use.
3601
 *
3602
 *     [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3603
 *                         "'" ([^%&'] | PEReference | Reference)* "'"
3604
 *
3605
 * @param ctxt  an XML parser context
3606
 * @param orig  if non-NULL store a copy of the original entity value
3607
 * @returns the EntityValue parsed with reference substituted or NULL
3608
 */
3609
xmlChar *
3610
18.8k
xmlParseEntityValue(xmlParserCtxt *ctxt, xmlChar **orig) {
3611
18.8k
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3612
18.8k
                         XML_MAX_HUGE_LENGTH :
3613
18.8k
                         XML_MAX_TEXT_LENGTH;
3614
18.8k
    xmlSBuf buf;
3615
18.8k
    const xmlChar *start;
3616
18.8k
    int quote, length;
3617
3618
18.8k
    xmlSBufInit(&buf, maxLength);
3619
3620
18.8k
    GROW;
3621
3622
18.8k
    quote = CUR;
3623
18.8k
    if ((quote != '"') && (quote != '\'')) {
3624
0
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3625
0
  return(NULL);
3626
0
    }
3627
18.8k
    CUR_PTR++;
3628
3629
18.8k
    length = 0;
3630
3631
    /*
3632
     * Copy raw content of the entity into a buffer
3633
     */
3634
4.21M
    while (1) {
3635
4.21M
        int c;
3636
3637
4.21M
        if (PARSER_STOPPED(ctxt))
3638
0
            goto error;
3639
3640
4.21M
        if (CUR_PTR >= ctxt->input->end) {
3641
121
            xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3642
121
            goto error;
3643
121
        }
3644
3645
4.21M
        c = CUR;
3646
3647
4.21M
        if (c == 0) {
3648
28
            xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3649
28
                    "invalid character in entity value\n");
3650
28
            goto error;
3651
28
        }
3652
4.21M
        if (c == quote)
3653
18.7k
            break;
3654
4.20M
        NEXTL(1);
3655
4.20M
        length += 1;
3656
3657
        /*
3658
         * TODO: Check growth threshold
3659
         */
3660
4.20M
        if (ctxt->input->end - CUR_PTR < 10)
3661
1.35k
            GROW;
3662
4.20M
    }
3663
3664
18.7k
    start = CUR_PTR - length;
3665
3666
18.7k
    if (orig != NULL) {
3667
18.7k
        *orig = xmlStrndup(start, length);
3668
18.7k
        if (*orig == NULL)
3669
0
            xmlErrMemory(ctxt);
3670
18.7k
    }
3671
3672
18.7k
    xmlExpandPEsInEntityValue(ctxt, &buf, start, length, ctxt->inputNr);
3673
3674
18.7k
    NEXTL(1);
3675
3676
18.7k
    return(xmlSBufFinish(&buf, NULL, ctxt, "entity length too long"));
3677
3678
149
error:
3679
149
    xmlSBufCleanup(&buf, ctxt, "entity length too long");
3680
149
    return(NULL);
3681
18.8k
}
3682
3683
/**
3684
 * Check an entity reference in an attribute value for validity
3685
 * without expanding it.
3686
 *
3687
 * @param ctxt  parser context
3688
 * @param pent  entity
3689
 * @param depth  nesting depth
3690
 */
3691
static void
3692
0
xmlCheckEntityInAttValue(xmlParserCtxtPtr ctxt, xmlEntityPtr pent, int depth) {
3693
0
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3694
0
    const xmlChar *str;
3695
0
    unsigned long expandedSize = pent->length;
3696
0
    int c, flags;
3697
3698
0
    depth += 1;
3699
0
    if (depth > maxDepth) {
3700
0
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3701
0
                       "Maximum entity nesting depth exceeded");
3702
0
  return;
3703
0
    }
3704
3705
0
    if (pent->flags & XML_ENT_EXPANDING) {
3706
0
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
3707
0
        return;
3708
0
    }
3709
3710
    /*
3711
     * If we're parsing a default attribute value in DTD content,
3712
     * the entity might reference other entities which weren't
3713
     * defined yet, so the check isn't reliable.
3714
     */
3715
0
    if (ctxt->inSubset == 0)
3716
0
        flags = XML_ENT_CHECKED | XML_ENT_VALIDATED;
3717
0
    else
3718
0
        flags = XML_ENT_VALIDATED;
3719
3720
0
    str = pent->content;
3721
0
    if (str == NULL)
3722
0
        goto done;
3723
3724
    /*
3725
     * Note that entity values are already validated. We only check
3726
     * for illegal less-than signs and compute the expanded size
3727
     * of the entity. No special handling for multi-byte characters
3728
     * is needed.
3729
     */
3730
0
    while (!PARSER_STOPPED(ctxt)) {
3731
0
        c = *str;
3732
3733
0
  if (c != '&') {
3734
0
            if (c == 0)
3735
0
                break;
3736
3737
0
            if (c == '<')
3738
0
                xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
3739
0
                        "'<' in entity '%s' is not allowed in attributes "
3740
0
                        "values\n", pent->name);
3741
3742
0
            str += 1;
3743
0
        } else if (str[1] == '#') {
3744
0
            int val;
3745
3746
0
      val = xmlParseStringCharRef(ctxt, &str);
3747
0
      if (val == 0) {
3748
0
                pent->content[0] = 0;
3749
0
                break;
3750
0
            }
3751
0
  } else {
3752
0
            xmlChar *name;
3753
0
            xmlEntityPtr ent;
3754
3755
0
      name = xmlParseStringEntityRef(ctxt, &str);
3756
0
      if (name == NULL) {
3757
0
                pent->content[0] = 0;
3758
0
                break;
3759
0
            }
3760
3761
0
            ent = xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 1);
3762
0
            xmlFree(name);
3763
3764
0
            if ((ent != NULL) &&
3765
0
                (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
3766
0
                if ((ent->flags & flags) != flags) {
3767
0
                    pent->flags |= XML_ENT_EXPANDING;
3768
0
                    xmlCheckEntityInAttValue(ctxt, ent, depth);
3769
0
                    pent->flags &= ~XML_ENT_EXPANDING;
3770
0
                }
3771
3772
0
                xmlSaturatedAdd(&expandedSize, ent->expandedSize);
3773
0
                xmlSaturatedAdd(&expandedSize, XML_ENT_FIXED_COST);
3774
0
            }
3775
0
        }
3776
0
    }
3777
3778
0
done:
3779
0
    if (ctxt->inSubset == 0)
3780
0
        pent->expandedSize = expandedSize;
3781
3782
0
    pent->flags |= flags;
3783
0
}
3784
3785
/**
3786
 * Expand general entity references in an entity or attribute value.
3787
 * Perform attribute value normalization.
3788
 *
3789
 * @param ctxt  parser context
3790
 * @param buf  string buffer
3791
 * @param str  entity or attribute value
3792
 * @param pent  entity for entity value, NULL for attribute values
3793
 * @param normalize  whether to collapse whitespace
3794
 * @param inSpace  whitespace state
3795
 * @param depth  nesting depth
3796
 * @param check  whether to check for amplification
3797
 * @returns  whether there was a normalization change
3798
 */
3799
static int
3800
xmlExpandEntityInAttValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf,
3801
                          const xmlChar *str, xmlEntityPtr pent, int normalize,
3802
0
                          int *inSpace, int depth, int check) {
3803
0
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3804
0
    int c, chunkSize;
3805
0
    int normChange = 0;
3806
3807
0
    if (str == NULL)
3808
0
        return(0);
3809
3810
0
    depth += 1;
3811
0
    if (depth > maxDepth) {
3812
0
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3813
0
                       "Maximum entity nesting depth exceeded");
3814
0
  return(0);
3815
0
    }
3816
3817
0
    if (pent != NULL) {
3818
0
        if (pent->flags & XML_ENT_EXPANDING) {
3819
0
            xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
3820
0
            return(0);
3821
0
        }
3822
3823
0
        if (check) {
3824
0
            if (xmlParserEntityCheck(ctxt, pent->length))
3825
0
                return(0);
3826
0
        }
3827
0
    }
3828
3829
0
    chunkSize = 0;
3830
3831
    /*
3832
     * Note that entity values are already validated. No special
3833
     * handling for multi-byte characters is needed.
3834
     */
3835
0
    while (!PARSER_STOPPED(ctxt)) {
3836
0
        c = *str;
3837
3838
0
  if (c != '&') {
3839
0
            if (c == 0)
3840
0
                break;
3841
3842
            /*
3843
             * If this function is called without an entity, it is used to
3844
             * expand entities in an attribute content where less-than was
3845
             * already unscaped and is allowed.
3846
             */
3847
0
            if ((pent != NULL) && (c == '<')) {
3848
0
                xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
3849
0
                        "'<' in entity '%s' is not allowed in attributes "
3850
0
                        "values\n", pent->name);
3851
0
                break;
3852
0
            }
3853
3854
0
            if (c <= 0x20) {
3855
0
                if ((normalize) && (*inSpace)) {
3856
                    /* Skip char */
3857
0
                    if (chunkSize > 0) {
3858
0
                        xmlSBufAddString(buf, str - chunkSize, chunkSize);
3859
0
                        chunkSize = 0;
3860
0
                    }
3861
0
                    normChange = 1;
3862
0
                } else if (c < 0x20) {
3863
0
                    if (chunkSize > 0) {
3864
0
                        xmlSBufAddString(buf, str - chunkSize, chunkSize);
3865
0
                        chunkSize = 0;
3866
0
                    }
3867
3868
0
                    xmlSBufAddCString(buf, " ", 1);
3869
0
                } else {
3870
0
                    chunkSize += 1;
3871
0
                }
3872
3873
0
                *inSpace = 1;
3874
0
            } else {
3875
0
                chunkSize += 1;
3876
0
                *inSpace = 0;
3877
0
            }
3878
3879
0
            str += 1;
3880
0
        } else if (str[1] == '#') {
3881
0
            int val;
3882
3883
0
            if (chunkSize > 0) {
3884
0
                xmlSBufAddString(buf, str - chunkSize, chunkSize);
3885
0
                chunkSize = 0;
3886
0
            }
3887
3888
0
      val = xmlParseStringCharRef(ctxt, &str);
3889
0
      if (val == 0) {
3890
0
                if (pent != NULL)
3891
0
                    pent->content[0] = 0;
3892
0
                break;
3893
0
            }
3894
3895
0
            if (val == ' ') {
3896
0
                if ((normalize) && (*inSpace))
3897
0
                    normChange = 1;
3898
0
                else
3899
0
                    xmlSBufAddCString(buf, " ", 1);
3900
0
                *inSpace = 1;
3901
0
            } else {
3902
0
                xmlSBufAddChar(buf, val);
3903
0
                *inSpace = 0;
3904
0
            }
3905
0
  } else {
3906
0
            xmlChar *name;
3907
0
            xmlEntityPtr ent;
3908
3909
0
            if (chunkSize > 0) {
3910
0
                xmlSBufAddString(buf, str - chunkSize, chunkSize);
3911
0
                chunkSize = 0;
3912
0
            }
3913
3914
0
      name = xmlParseStringEntityRef(ctxt, &str);
3915
0
            if (name == NULL) {
3916
0
                if (pent != NULL)
3917
0
                    pent->content[0] = 0;
3918
0
                break;
3919
0
            }
3920
3921
0
            ent = xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 1);
3922
0
            xmlFree(name);
3923
3924
0
      if ((ent != NULL) &&
3925
0
    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3926
0
    if (ent->content == NULL) {
3927
0
        xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
3928
0
          "predefined entity has no content\n");
3929
0
                    break;
3930
0
                }
3931
3932
0
                xmlSBufAddString(buf, ent->content, ent->length);
3933
3934
0
                *inSpace = 0;
3935
0
      } else if ((ent != NULL) && (ent->content != NULL)) {
3936
0
                if (pent != NULL)
3937
0
                    pent->flags |= XML_ENT_EXPANDING;
3938
0
    normChange |= xmlExpandEntityInAttValue(ctxt, buf,
3939
0
                        ent->content, ent, normalize, inSpace, depth, check);
3940
0
                if (pent != NULL)
3941
0
                    pent->flags &= ~XML_ENT_EXPANDING;
3942
0
      }
3943
0
        }
3944
0
    }
3945
3946
0
    if (chunkSize > 0)
3947
0
        xmlSBufAddString(buf, str - chunkSize, chunkSize);
3948
3949
0
    return(normChange);
3950
0
}
3951
3952
/**
3953
 * Expand general entity references in an entity or attribute value.
3954
 * Perform attribute value normalization.
3955
 *
3956
 * @param ctxt  parser context
3957
 * @param str  entity or attribute value
3958
 * @param normalize  whether to collapse whitespace
3959
 * @returns the expanded attribtue value.
3960
 */
3961
xmlChar *
3962
xmlExpandEntitiesInAttValue(xmlParserCtxt *ctxt, const xmlChar *str,
3963
0
                            int normalize) {
3964
0
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3965
0
                         XML_MAX_HUGE_LENGTH :
3966
0
                         XML_MAX_TEXT_LENGTH;
3967
0
    xmlSBuf buf;
3968
0
    int inSpace = 1;
3969
3970
0
    xmlSBufInit(&buf, maxLength);
3971
3972
0
    xmlExpandEntityInAttValue(ctxt, &buf, str, NULL, normalize, &inSpace,
3973
0
                              ctxt->inputNr, /* check */ 0);
3974
3975
0
    if ((normalize) && (inSpace) && (buf.size > 0))
3976
0
        buf.size--;
3977
3978
0
    return(xmlSBufFinish(&buf, NULL, ctxt, "AttValue length too long"));
3979
0
}
3980
3981
/**
3982
 * Parse a value for an attribute.
3983
 *
3984
 * NOTE: if no normalization is needed, the routine will return pointers
3985
 * directly from the data buffer.
3986
 *
3987
 * 3.3.3 Attribute-Value Normalization:
3988
 *
3989
 * Before the value of an attribute is passed to the application or
3990
 * checked for validity, the XML processor must normalize it as follows:
3991
 *
3992
 * - a character reference is processed by appending the referenced
3993
 *   character to the attribute value
3994
 * - an entity reference is processed by recursively processing the
3995
 *   replacement text of the entity
3996
 * - a whitespace character (\#x20, \#xD, \#xA, \#x9) is processed by
3997
 *   appending \#x20 to the normalized value, except that only a single
3998
 *   \#x20 is appended for a "#xD#xA" sequence that is part of an external
3999
 *   parsed entity or the literal entity value of an internal parsed entity
4000
 * - other characters are processed by appending them to the normalized value
4001
 *
4002
 * If the declared value is not CDATA, then the XML processor must further
4003
 * process the normalized attribute value by discarding any leading and
4004
 * trailing space (\#x20) characters, and by replacing sequences of space
4005
 * (\#x20) characters by a single space (\#x20) character.
4006
 * All attributes for which no declaration has been read should be treated
4007
 * by a non-validating parser as if declared CDATA.
4008
 *
4009
 * @param ctxt  an XML parser context
4010
 * @param attlen  attribute len result
4011
 * @param outFlags  resulting XML_ATTVAL_* flags
4012
 * @param special  value from attsSpecial
4013
 * @param isNamespace  whether this is a namespace declaration
4014
 * @returns the AttValue parsed or NULL. The value has to be freed by the
4015
 *     caller if it was copied, this can be detected by val[*len] == 0.
4016
 */
4017
static xmlChar *
4018
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *attlen, int *outFlags,
4019
19.7M
                         int special, int isNamespace) {
4020
19.7M
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4021
18.7M
                         XML_MAX_HUGE_LENGTH :
4022
19.7M
                         XML_MAX_TEXT_LENGTH;
4023
19.7M
    xmlSBuf buf;
4024
19.7M
    xmlChar *ret;
4025
19.7M
    int c, l, quote, entFlags, chunkSize;
4026
19.7M
    int inSpace = 1;
4027
19.7M
    int replaceEntities;
4028
19.7M
    int normalize = (special & XML_SPECIAL_TYPE_MASK) > XML_ATTRIBUTE_CDATA;
4029
19.7M
    int attvalFlags = 0;
4030
4031
    /* Always expand namespace URIs */
4032
19.7M
    replaceEntities = (ctxt->replaceEntities) || (isNamespace);
4033
4034
19.7M
    xmlSBufInit(&buf, maxLength);
4035
4036
19.7M
    GROW;
4037
4038
19.7M
    quote = CUR;
4039
19.7M
    if ((quote != '"') && (quote != '\'')) {
4040
2.77k
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
4041
2.77k
  return(NULL);
4042
2.77k
    }
4043
19.7M
    NEXTL(1);
4044
4045
19.7M
    if (ctxt->inSubset == 0)
4046
19.7M
        entFlags = XML_ENT_CHECKED | XML_ENT_VALIDATED;
4047
8.48k
    else
4048
8.48k
        entFlags = XML_ENT_VALIDATED;
4049
4050
19.7M
    inSpace = 1;
4051
19.7M
    chunkSize = 0;
4052
4053
193M
    while (1) {
4054
193M
        if (PARSER_STOPPED(ctxt))
4055
0
            goto error;
4056
4057
193M
        if (CUR_PTR >= ctxt->input->end) {
4058
2.44k
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4059
2.44k
                           "AttValue: ' expected\n");
4060
2.44k
            goto error;
4061
2.44k
        }
4062
4063
        /*
4064
         * TODO: Check growth threshold
4065
         */
4066
193M
        if (ctxt->input->end - CUR_PTR < 10)
4067
27.7k
            GROW;
4068
4069
193M
        c = CUR;
4070
4071
193M
        if (c >= 0x80) {
4072
3.86M
            l = xmlUTF8MultibyteLen(ctxt, CUR_PTR,
4073
3.86M
                    "invalid character in attribute value\n");
4074
3.86M
            if (l == 0) {
4075
980k
                if (chunkSize > 0) {
4076
131k
                    xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4077
131k
                    chunkSize = 0;
4078
131k
                }
4079
980k
                xmlSBufAddReplChar(&buf);
4080
980k
                NEXTL(1);
4081
2.88M
            } else {
4082
2.88M
                chunkSize += l;
4083
2.88M
                NEXTL(l);
4084
2.88M
            }
4085
4086
3.86M
            inSpace = 0;
4087
189M
        } else if (c != '&') {
4088
189M
            if (c > 0x20) {
4089
186M
                if (c == quote)
4090
19.7M
                    break;
4091
4092
166M
                if (c == '<')
4093
91.4k
                    xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4094
4095
166M
                chunkSize += 1;
4096
166M
                inSpace = 0;
4097
166M
            } else if (!IS_BYTE_CHAR(c)) {
4098
993k
                xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4099
993k
                        "invalid character in attribute value\n");
4100
993k
                if (chunkSize > 0) {
4101
35.4k
                    xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4102
35.4k
                    chunkSize = 0;
4103
35.4k
                }
4104
993k
                xmlSBufAddReplChar(&buf);
4105
993k
                inSpace = 0;
4106
1.84M
            } else {
4107
                /* Whitespace */
4108
1.84M
                if ((normalize) && (inSpace)) {
4109
                    /* Skip char */
4110
41.9k
                    if (chunkSize > 0) {
4111
1.13k
                        xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4112
1.13k
                        chunkSize = 0;
4113
1.13k
                    }
4114
41.9k
                    attvalFlags |= XML_ATTVAL_NORM_CHANGE;
4115
1.80M
                } else if (c < 0x20) {
4116
                    /* Convert to space */
4117
427k
                    if (chunkSize > 0) {
4118
104k
                        xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4119
104k
                        chunkSize = 0;
4120
104k
                    }
4121
4122
427k
                    xmlSBufAddCString(&buf, " ", 1);
4123
1.37M
                } else {
4124
1.37M
                    chunkSize += 1;
4125
1.37M
                }
4126
4127
1.84M
                inSpace = 1;
4128
4129
1.84M
                if ((c == 0xD) && (NXT(1) == 0xA))
4130
21.1k
                    CUR_PTR++;
4131
1.84M
            }
4132
4133
169M
            NEXTL(1);
4134
169M
        } else if (NXT(1) == '#') {
4135
40.6k
            int val;
4136
4137
40.6k
            if (chunkSize > 0) {
4138
18.8k
                xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4139
18.8k
                chunkSize = 0;
4140
18.8k
            }
4141
4142
40.6k
            val = xmlParseCharRef(ctxt);
4143
40.6k
            if (val == 0)
4144
305
                goto error;
4145
4146
40.3k
            if ((val == '&') && (!replaceEntities)) {
4147
                /*
4148
                 * The reparsing will be done in xmlNodeParseContent()
4149
                 * called from SAX2.c
4150
                 */
4151
0
                xmlSBufAddCString(&buf, "&#38;", 5);
4152
0
                inSpace = 0;
4153
40.3k
            } else if (val == ' ') {
4154
23.4k
                if ((normalize) && (inSpace))
4155
1.75k
                    attvalFlags |= XML_ATTVAL_NORM_CHANGE;
4156
21.6k
                else
4157
21.6k
                    xmlSBufAddCString(&buf, " ", 1);
4158
23.4k
                inSpace = 1;
4159
23.4k
            } else {
4160
16.8k
                xmlSBufAddChar(&buf, val);
4161
16.8k
                inSpace = 0;
4162
16.8k
            }
4163
268k
        } else {
4164
268k
            const xmlChar *name;
4165
268k
            xmlEntityPtr ent;
4166
4167
268k
            if (chunkSize > 0) {
4168
142k
                xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4169
142k
                chunkSize = 0;
4170
142k
            }
4171
4172
268k
            name = xmlParseEntityRefInternal(ctxt);
4173
268k
            if (name == NULL) {
4174
                /*
4175
                 * Probably a literal '&' which wasn't escaped.
4176
                 * TODO: Handle gracefully in recovery mode.
4177
                 */
4178
169k
                continue;
4179
169k
            }
4180
4181
99.2k
            ent = xmlLookupGeneralEntity(ctxt, name, /* isAttr */ 1);
4182
99.2k
            if (ent == NULL)
4183
8.81k
                continue;
4184
4185
90.4k
            if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
4186
90.3k
                if ((ent->content[0] == '&') && (!replaceEntities))
4187
0
                    xmlSBufAddCString(&buf, "&#38;", 5);
4188
90.3k
                else
4189
90.3k
                    xmlSBufAddString(&buf, ent->content, ent->length);
4190
90.3k
                inSpace = 0;
4191
90.3k
            } else if (replaceEntities) {
4192
0
                if (xmlExpandEntityInAttValue(ctxt, &buf,
4193
0
                        ent->content, ent, normalize, &inSpace, ctxt->inputNr,
4194
0
                        /* check */ 1) > 0)
4195
0
                    attvalFlags |= XML_ATTVAL_NORM_CHANGE;
4196
116
            } else {
4197
116
                if ((ent->flags & entFlags) != entFlags)
4198
0
                    xmlCheckEntityInAttValue(ctxt, ent, ctxt->inputNr);
4199
4200
116
                if (xmlParserEntityCheck(ctxt, ent->expandedSize)) {
4201
0
                    ent->content[0] = 0;
4202
0
                    goto error;
4203
0
                }
4204
4205
                /*
4206
                 * Just output the reference
4207
                 */
4208
116
                xmlSBufAddCString(&buf, "&", 1);
4209
116
                xmlSBufAddString(&buf, ent->name, xmlStrlen(ent->name));
4210
116
                xmlSBufAddCString(&buf, ";", 1);
4211
4212
116
                inSpace = 0;
4213
116
            }
4214
90.4k
  }
4215
193M
    }
4216
4217
19.7M
    if ((buf.mem == NULL) && (outFlags != NULL)) {
4218
19.6M
        ret = (xmlChar *) CUR_PTR - chunkSize;
4219
4220
19.6M
        if (attlen != NULL)
4221
19.6M
            *attlen = chunkSize;
4222
19.6M
        if ((normalize) && (inSpace) && (chunkSize > 0)) {
4223
10
            attvalFlags |= XML_ATTVAL_NORM_CHANGE;
4224
10
            *attlen -= 1;
4225
10
        }
4226
4227
        /* Report potential error */
4228
19.6M
        xmlSBufCleanup(&buf, ctxt, "AttValue length too long");
4229
19.6M
    } else {
4230
130k
        if (chunkSize > 0)
4231
97.6k
            xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4232
4233
130k
        if ((normalize) && (inSpace) && (buf.size > 0)) {
4234
523
            attvalFlags |= XML_ATTVAL_NORM_CHANGE;
4235
523
            buf.size--;
4236
523
        }
4237
4238
130k
        ret = xmlSBufFinish(&buf, attlen, ctxt, "AttValue length too long");
4239
130k
        attvalFlags |= XML_ATTVAL_ALLOC;
4240
4241
130k
        if (ret != NULL) {
4242
129k
            if (attlen != NULL)
4243
121k
                *attlen = buf.size;
4244
129k
        }
4245
130k
    }
4246
4247
19.7M
    if (outFlags != NULL)
4248
19.7M
        *outFlags = attvalFlags;
4249
4250
19.7M
    NEXTL(1);
4251
4252
19.7M
    return(ret);
4253
4254
2.74k
error:
4255
2.74k
    xmlSBufCleanup(&buf, ctxt, "AttValue length too long");
4256
2.74k
    return(NULL);
4257
19.7M
}
4258
4259
/**
4260
 * Parse a value for an attribute
4261
 * Note: the parser won't do substitution of entities here, this
4262
 * will be handled later in #xmlStringGetNodeList
4263
 *
4264
 * @deprecated Internal function, don't use.
4265
 *
4266
 *     [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4267
 *                       "'" ([^<&'] | Reference)* "'"
4268
 *
4269
 * 3.3.3 Attribute-Value Normalization:
4270
 *
4271
 * Before the value of an attribute is passed to the application or
4272
 * checked for validity, the XML processor must normalize it as follows:
4273
 *
4274
 * - a character reference is processed by appending the referenced
4275
 *   character to the attribute value
4276
 * - an entity reference is processed by recursively processing the
4277
 *   replacement text of the entity
4278
 * - a whitespace character (\#x20, \#xD, \#xA, \#x9) is processed by
4279
 *   appending \#x20 to the normalized value, except that only a single
4280
 *   \#x20 is appended for a "#xD#xA" sequence that is part of an external
4281
 *   parsed entity or the literal entity value of an internal parsed entity
4282
 * - other characters are processed by appending them to the normalized value
4283
 *
4284
 * If the declared value is not CDATA, then the XML processor must further
4285
 * process the normalized attribute value by discarding any leading and
4286
 * trailing space (\#x20) characters, and by replacing sequences of space
4287
 * (\#x20) characters by a single space (\#x20) character.
4288
 * All attributes for which no declaration has been read should be treated
4289
 * by a non-validating parser as if declared CDATA.
4290
 *
4291
 * @param ctxt  an XML parser context
4292
 * @returns the AttValue parsed or NULL. The value has to be freed by the
4293
 * caller.
4294
 */
4295
xmlChar *
4296
8.36k
xmlParseAttValue(xmlParserCtxt *ctxt) {
4297
8.36k
    if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4298
8.36k
    return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0, 0));
4299
8.36k
}
4300
4301
/**
4302
 * Parse an XML Literal
4303
 *
4304
 * @deprecated Internal function, don't use.
4305
 *
4306
 *     [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4307
 *
4308
 * @param ctxt  an XML parser context
4309
 * @returns the SystemLiteral parsed or NULL
4310
 */
4311
4312
xmlChar *
4313
3.83k
xmlParseSystemLiteral(xmlParserCtxt *ctxt) {
4314
3.83k
    xmlChar *buf = NULL;
4315
3.83k
    int len = 0;
4316
3.83k
    int size = XML_PARSER_BUFFER_SIZE;
4317
3.83k
    int cur, l;
4318
3.83k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4319
3.83k
                    XML_MAX_TEXT_LENGTH :
4320
3.83k
                    XML_MAX_NAME_LENGTH;
4321
3.83k
    xmlChar stop;
4322
4323
3.83k
    if (RAW == '"') {
4324
3.23k
        NEXT;
4325
3.23k
  stop = '"';
4326
3.23k
    } else if (RAW == '\'') {
4327
370
        NEXT;
4328
370
  stop = '\'';
4329
370
    } else {
4330
236
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4331
236
  return(NULL);
4332
236
    }
4333
4334
3.60k
    buf = xmlMalloc(size);
4335
3.60k
    if (buf == NULL) {
4336
0
        xmlErrMemory(ctxt);
4337
0
  return(NULL);
4338
0
    }
4339
3.60k
    cur = xmlCurrentCharRecover(ctxt, &l);
4340
263k
    while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4341
259k
  if (len + 5 >= size) {
4342
975
      xmlChar *tmp;
4343
975
            int newSize;
4344
4345
975
            newSize = xmlGrowCapacity(size, 1, 1, maxLength);
4346
975
            if (newSize < 0) {
4347
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4348
0
                xmlFree(buf);
4349
0
                return(NULL);
4350
0
            }
4351
975
      tmp = xmlRealloc(buf, newSize);
4352
975
      if (tmp == NULL) {
4353
0
          xmlFree(buf);
4354
0
    xmlErrMemory(ctxt);
4355
0
    return(NULL);
4356
0
      }
4357
975
      buf = tmp;
4358
975
            size = newSize;
4359
975
  }
4360
259k
  COPY_BUF(buf, len, cur);
4361
259k
  NEXTL(l);
4362
259k
  cur = xmlCurrentCharRecover(ctxt, &l);
4363
259k
    }
4364
3.60k
    buf[len] = 0;
4365
3.60k
    if (!IS_CHAR(cur)) {
4366
48
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4367
3.55k
    } else {
4368
3.55k
  NEXT;
4369
3.55k
    }
4370
3.60k
    return(buf);
4371
3.60k
}
4372
4373
/**
4374
 * Parse an XML public literal
4375
 *
4376
 * @deprecated Internal function, don't use.
4377
 *
4378
 *     [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4379
 *
4380
 * @param ctxt  an XML parser context
4381
 * @returns the PubidLiteral parsed or NULL.
4382
 */
4383
4384
xmlChar *
4385
1.86k
xmlParsePubidLiteral(xmlParserCtxt *ctxt) {
4386
1.86k
    xmlChar *buf = NULL;
4387
1.86k
    int len = 0;
4388
1.86k
    int size = XML_PARSER_BUFFER_SIZE;
4389
1.86k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4390
1.86k
                    XML_MAX_TEXT_LENGTH :
4391
1.86k
                    XML_MAX_NAME_LENGTH;
4392
1.86k
    xmlChar cur;
4393
1.86k
    xmlChar stop;
4394
4395
1.86k
    if (RAW == '"') {
4396
991
        NEXT;
4397
991
  stop = '"';
4398
991
    } else if (RAW == '\'') {
4399
677
        NEXT;
4400
677
  stop = '\'';
4401
677
    } else {
4402
196
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4403
196
  return(NULL);
4404
196
    }
4405
1.66k
    buf = xmlMalloc(size);
4406
1.66k
    if (buf == NULL) {
4407
0
  xmlErrMemory(ctxt);
4408
0
  return(NULL);
4409
0
    }
4410
1.66k
    cur = CUR;
4411
94.9k
    while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop) &&
4412
93.2k
           (PARSER_STOPPED(ctxt) == 0)) { /* checked */
4413
93.2k
  if (len + 1 >= size) {
4414
50
      xmlChar *tmp;
4415
50
            int newSize;
4416
4417
50
      newSize = xmlGrowCapacity(size, 1, 1, maxLength);
4418
50
            if (newSize < 0) {
4419
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4420
0
                xmlFree(buf);
4421
0
                return(NULL);
4422
0
            }
4423
50
      tmp = xmlRealloc(buf, newSize);
4424
50
      if (tmp == NULL) {
4425
0
    xmlErrMemory(ctxt);
4426
0
    xmlFree(buf);
4427
0
    return(NULL);
4428
0
      }
4429
50
      buf = tmp;
4430
50
            size = newSize;
4431
50
  }
4432
93.2k
  buf[len++] = cur;
4433
93.2k
  NEXT;
4434
93.2k
  cur = CUR;
4435
93.2k
    }
4436
1.66k
    buf[len] = 0;
4437
1.66k
    if (cur != stop) {
4438
25
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4439
1.64k
    } else {
4440
1.64k
  NEXTL(1);
4441
1.64k
    }
4442
1.66k
    return(buf);
4443
1.66k
}
4444
4445
static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial);
4446
4447
/*
4448
 * used for the test in the inner loop of the char data testing
4449
 */
4450
static const unsigned char test_char_data[256] = {
4451
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4452
    0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4453
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4454
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4455
    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4456
    0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4457
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4458
    0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4459
    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4460
    0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4461
    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4462
    0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4463
    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4464
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4465
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4466
    0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4467
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4468
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4469
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4470
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4471
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4472
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4473
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4474
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4475
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4476
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4477
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4478
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4479
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4480
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4481
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4482
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4483
};
4484
4485
static void
4486
xmlCharacters(xmlParserCtxtPtr ctxt, const xmlChar *buf, int size,
4487
7.94M
              int isBlank) {
4488
7.94M
    int checkBlanks;
4489
4490
7.94M
    if ((ctxt->sax == NULL) || (ctxt->disableSAX))
4491
66.2k
        return;
4492
4493
7.87M
    checkBlanks = (!ctxt->keepBlanks) ||
4494
7.87M
                  (ctxt->sax->ignorableWhitespace != ctxt->sax->characters);
4495
4496
    /*
4497
     * Calling areBlanks with only parts of a text node
4498
     * is fundamentally broken, making the NOBLANKS option
4499
     * essentially unusable.
4500
     */
4501
7.87M
    if ((checkBlanks) &&
4502
7.58M
        (areBlanks(ctxt, buf, size, isBlank))) {
4503
0
        if ((ctxt->sax->ignorableWhitespace != NULL) &&
4504
0
            (ctxt->keepBlanks))
4505
0
            ctxt->sax->ignorableWhitespace(ctxt->userData, buf, size);
4506
7.87M
    } else {
4507
7.87M
        if (ctxt->sax->characters != NULL)
4508
7.87M
            ctxt->sax->characters(ctxt->userData, buf, size);
4509
4510
        /*
4511
         * The old code used to update this value for "complex" data
4512
         * even if checkBlanks was false. This was probably a bug.
4513
         */
4514
7.87M
        if ((checkBlanks) && (*ctxt->space == -1))
4515
4.48M
            *ctxt->space = -2;
4516
7.87M
    }
4517
7.87M
}
4518
4519
/**
4520
 * Parse character data. Always makes progress if the first char isn't
4521
 * '<' or '&'.
4522
 *
4523
 * The right angle bracket (>) may be represented using the string "&gt;",
4524
 * and must, for compatibility, be escaped using "&gt;" or a character
4525
 * reference when it appears in the string "]]>" in content, when that
4526
 * string is not marking the end of a CDATA section.
4527
 *
4528
 *     [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4529
 * @param ctxt  an XML parser context
4530
 * @param partial  buffer may contain partial UTF-8 sequences
4531
 */
4532
static void
4533
7.48M
xmlParseCharDataInternal(xmlParserCtxtPtr ctxt, int partial) {
4534
7.48M
    const xmlChar *in;
4535
7.48M
    int line = ctxt->input->line;
4536
7.48M
    int col = ctxt->input->col;
4537
7.48M
    int ccol;
4538
7.48M
    int terminate = 0;
4539
4540
7.48M
    GROW;
4541
    /*
4542
     * Accelerated common case where input don't need to be
4543
     * modified before passing it to the handler.
4544
     */
4545
7.48M
    in = ctxt->input->cur;
4546
7.79M
    do {
4547
10.0M
get_more_space:
4548
23.6M
        while (*in == 0x20) { in++; ctxt->input->col++; }
4549
10.0M
        if (*in == 0xA) {
4550
2.36M
            do {
4551
2.36M
                ctxt->input->line++; ctxt->input->col = 1;
4552
2.36M
                in++;
4553
2.36M
            } while (*in == 0xA);
4554
2.22M
            goto get_more_space;
4555
2.22M
        }
4556
7.79M
        if (*in == '<') {
4557
4.94M
            while (in > ctxt->input->cur) {
4558
2.47M
                const xmlChar *tmp = ctxt->input->cur;
4559
2.47M
                size_t nbchar = in - tmp;
4560
4561
2.47M
                if (nbchar > XML_MAX_ITEMS)
4562
0
                    nbchar = XML_MAX_ITEMS;
4563
2.47M
                ctxt->input->cur += nbchar;
4564
4565
2.47M
                xmlCharacters(ctxt, tmp, nbchar, 1);
4566
2.47M
            }
4567
2.47M
            return;
4568
2.47M
        }
4569
4570
5.98M
get_more:
4571
5.98M
        ccol = ctxt->input->col;
4572
79.4M
        while (test_char_data[*in]) {
4573
73.4M
            in++;
4574
73.4M
            ccol++;
4575
73.4M
        }
4576
5.98M
        ctxt->input->col = ccol;
4577
5.98M
        if (*in == 0xA) {
4578
609k
            do {
4579
609k
                ctxt->input->line++; ctxt->input->col = 1;
4580
609k
                in++;
4581
609k
            } while (*in == 0xA);
4582
530k
            goto get_more;
4583
530k
        }
4584
5.45M
        if (*in == ']') {
4585
138k
            size_t avail = ctxt->input->end - in;
4586
4587
138k
            if (partial && avail < 2) {
4588
1
                terminate = 1;
4589
1
                goto invoke_callback;
4590
1
            }
4591
138k
            if (in[1] == ']') {
4592
22.0k
                if (partial && avail < 3) {
4593
1
                    terminate = 1;
4594
1
                    goto invoke_callback;
4595
1
                }
4596
22.0k
                if (in[2] == '>')
4597
17
                    xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4598
22.0k
            }
4599
4600
138k
            in++;
4601
138k
            ctxt->input->col++;
4602
138k
            goto get_more;
4603
138k
        }
4604
4605
5.31M
invoke_callback:
4606
10.1M
        while (in > ctxt->input->cur) {
4607
4.86M
            const xmlChar *tmp = ctxt->input->cur;
4608
4.86M
            size_t nbchar = in - tmp;
4609
4610
4.86M
            if (nbchar > XML_MAX_ITEMS)
4611
0
                nbchar = XML_MAX_ITEMS;
4612
4.86M
            ctxt->input->cur += nbchar;
4613
4614
4.86M
            xmlCharacters(ctxt, tmp, nbchar, 0);
4615
4616
4.86M
            line = ctxt->input->line;
4617
4.86M
            col = ctxt->input->col;
4618
4.86M
        }
4619
5.31M
        ctxt->input->cur = in;
4620
5.31M
        if (*in == 0xD) {
4621
329k
            in++;
4622
329k
            if (*in == 0xA) {
4623
321k
                ctxt->input->cur = in;
4624
321k
                in++;
4625
321k
                ctxt->input->line++; ctxt->input->col = 1;
4626
321k
                continue; /* while */
4627
321k
            }
4628
8.24k
            in--;
4629
8.24k
        }
4630
4.99M
        if (*in == '<') {
4631
4.36M
            return;
4632
4.36M
        }
4633
630k
        if (*in == '&') {
4634
176k
            return;
4635
176k
        }
4636
453k
        if (terminate) {
4637
2
            return;
4638
2
        }
4639
453k
        SHRINK;
4640
453k
        GROW;
4641
453k
        in = ctxt->input->cur;
4642
774k
    } while (((*in >= 0x20) && (*in <= 0x7F)) ||
4643
684k
             (*in == 0x09) || (*in == 0x0a));
4644
470k
    ctxt->input->line = line;
4645
470k
    ctxt->input->col = col;
4646
470k
    xmlParseCharDataComplex(ctxt, partial);
4647
470k
}
4648
4649
/**
4650
 * Always makes progress if the first char isn't '<' or '&'.
4651
 *
4652
 * parse a CharData section.this is the fallback function
4653
 * of #xmlParseCharData when the parsing requires handling
4654
 * of non-ASCII characters.
4655
 *
4656
 * @param ctxt  an XML parser context
4657
 * @param partial  whether the input can end with truncated UTF-8
4658
 */
4659
static void
4660
470k
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial) {
4661
470k
    xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4662
470k
    int nbchar = 0;
4663
470k
    int cur, l;
4664
4665
470k
    cur = xmlCurrentCharRecover(ctxt, &l);
4666
44.3M
    while ((cur != '<') && /* checked */
4667
43.8M
           (cur != '&') &&
4668
43.8M
     (IS_CHAR(cur))) {
4669
43.8M
        if (cur == ']') {
4670
22.9k
            size_t avail = ctxt->input->end - ctxt->input->cur;
4671
4672
22.9k
            if (partial && avail < 2)
4673
1
                break;
4674
22.9k
            if (NXT(1) == ']') {
4675
10.9k
                if (partial && avail < 3)
4676
1
                    break;
4677
10.9k
                if (NXT(2) == '>')
4678
49
                    xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4679
10.9k
            }
4680
22.9k
        }
4681
4682
43.8M
  COPY_BUF(buf, nbchar, cur);
4683
  /* move current position before possible calling of ctxt->sax->characters */
4684
43.8M
  NEXTL(l);
4685
43.8M
  if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4686
143k
      buf[nbchar] = 0;
4687
4688
143k
            xmlCharacters(ctxt, buf, nbchar, 0);
4689
143k
      nbchar = 0;
4690
143k
            SHRINK;
4691
143k
  }
4692
43.8M
  cur = xmlCurrentCharRecover(ctxt, &l);
4693
43.8M
    }
4694
470k
    if (nbchar != 0) {
4695
467k
        buf[nbchar] = 0;
4696
4697
467k
        xmlCharacters(ctxt, buf, nbchar, 0);
4698
467k
    }
4699
    /*
4700
     * cur == 0 can mean
4701
     *
4702
     * - End of buffer.
4703
     * - An actual 0 character.
4704
     * - An incomplete UTF-8 sequence. This is allowed if partial is set.
4705
     */
4706
470k
    if (ctxt->input->cur < ctxt->input->end) {
4707
467k
        if ((cur == 0) && (CUR != 0)) {
4708
50
            if (partial == 0) {
4709
46
                xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4710
46
                        "Incomplete UTF-8 sequence starting with %02X\n", CUR);
4711
46
                NEXTL(1);
4712
46
            }
4713
467k
        } else if ((cur != '<') && (cur != '&') && (cur != ']')) {
4714
            /* Generate the error and skip the offending character */
4715
1.33k
            xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4716
1.33k
                              "PCDATA invalid Char value %d\n", cur);
4717
1.33k
            NEXTL(l);
4718
1.33k
        }
4719
467k
    }
4720
470k
}
4721
4722
/**
4723
 * @deprecated Internal function, don't use.
4724
 * @param ctxt  an XML parser context
4725
 * @param cdata  unused
4726
 */
4727
void
4728
0
xmlParseCharData(xmlParserCtxt *ctxt, ATTRIBUTE_UNUSED int cdata) {
4729
0
    xmlParseCharDataInternal(ctxt, 0);
4730
0
}
4731
4732
/**
4733
 * Parse an External ID or a Public ID
4734
 *
4735
 * @deprecated Internal function, don't use.
4736
 *
4737
 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4738
 * `'PUBLIC' S PubidLiteral S SystemLiteral`
4739
 *
4740
 *     [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4741
 *                       | 'PUBLIC' S PubidLiteral S SystemLiteral
4742
 *
4743
 *     [83] PublicID ::= 'PUBLIC' S PubidLiteral
4744
 *
4745
 * @param ctxt  an XML parser context
4746
 * @param publicId  a xmlChar** receiving PubidLiteral
4747
 * @param strict  indicate whether we should restrict parsing to only
4748
 *          production [75], see NOTE below
4749
 * @returns the function returns SystemLiteral and in the second
4750
 *                case publicID receives PubidLiteral, is strict is off
4751
 *                it is possible to return NULL and have publicID set.
4752
 */
4753
4754
xmlChar *
4755
6.14k
xmlParseExternalID(xmlParserCtxt *ctxt, xmlChar **publicId, int strict) {
4756
6.14k
    xmlChar *URI = NULL;
4757
4758
6.14k
    *publicId = NULL;
4759
6.14k
    if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4760
2.46k
        SKIP(6);
4761
2.46k
  if (SKIP_BLANKS == 0) {
4762
182
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4763
182
                     "Space required after 'SYSTEM'\n");
4764
182
  }
4765
2.46k
  URI = xmlParseSystemLiteral(ctxt);
4766
2.46k
  if (URI == NULL) {
4767
5
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4768
5
        }
4769
3.68k
    } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4770
1.86k
        SKIP(6);
4771
1.86k
  if (SKIP_BLANKS == 0) {
4772
3
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4773
3
        "Space required after 'PUBLIC'\n");
4774
3
  }
4775
1.86k
  *publicId = xmlParsePubidLiteral(ctxt);
4776
1.86k
  if (*publicId == NULL) {
4777
196
      xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4778
196
  }
4779
1.86k
  if (strict) {
4780
      /*
4781
       * We don't handle [83] so "S SystemLiteral" is required.
4782
       */
4783
1.29k
      if (SKIP_BLANKS == 0) {
4784
236
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4785
236
      "Space required after the Public Identifier\n");
4786
236
      }
4787
1.29k
  } else {
4788
      /*
4789
       * We handle [83] so we return immediately, if
4790
       * "S SystemLiteral" is not detected. We skip blanks if no
4791
             * system literal was found, but this is harmless since we must
4792
             * be at the end of a NotationDecl.
4793
       */
4794
565
      if (SKIP_BLANKS == 0) return(NULL);
4795
82
      if ((CUR != '\'') && (CUR != '"')) return(NULL);
4796
82
  }
4797
1.37k
  URI = xmlParseSystemLiteral(ctxt);
4798
1.37k
  if (URI == NULL) {
4799
231
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4800
231
        }
4801
1.37k
    }
4802
5.65k
    return(URI);
4803
6.14k
}
4804
4805
/**
4806
 * Skip an XML (SGML) comment <!-- .... -->
4807
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4808
 *  must not occur within comments. "
4809
 * This is the slow routine in case the accelerator for ascii didn't work
4810
 *
4811
 *     [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4812
 * @param ctxt  an XML parser context
4813
 * @param buf  the already parsed part of the buffer
4814
 * @param len  number of bytes in the buffer
4815
 * @param size  allocated size of the buffer
4816
 */
4817
static void
4818
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4819
17.5k
                       size_t len, size_t size) {
4820
17.5k
    int q, ql;
4821
17.5k
    int r, rl;
4822
17.5k
    int cur, l;
4823
17.5k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4824
17.5k
                    XML_MAX_HUGE_LENGTH :
4825
17.5k
                    XML_MAX_TEXT_LENGTH;
4826
4827
17.5k
    if (buf == NULL) {
4828
6.75k
        len = 0;
4829
6.75k
  size = XML_PARSER_BUFFER_SIZE;
4830
6.75k
  buf = xmlMalloc(size);
4831
6.75k
  if (buf == NULL) {
4832
0
      xmlErrMemory(ctxt);
4833
0
      return;
4834
0
  }
4835
6.75k
    }
4836
17.5k
    q = xmlCurrentCharRecover(ctxt, &ql);
4837
17.5k
    if (q == 0)
4838
118
        goto not_terminated;
4839
17.4k
    if (!IS_CHAR(q)) {
4840
28
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4841
28
                          "xmlParseComment: invalid xmlChar value %d\n",
4842
28
                    q);
4843
28
  xmlFree (buf);
4844
28
  return;
4845
28
    }
4846
17.3k
    NEXTL(ql);
4847
17.3k
    r = xmlCurrentCharRecover(ctxt, &rl);
4848
17.3k
    if (r == 0)
4849
13
        goto not_terminated;
4850
17.3k
    if (!IS_CHAR(r)) {
4851
9
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4852
9
                          "xmlParseComment: invalid xmlChar value %d\n",
4853
9
                    r);
4854
9
  xmlFree (buf);
4855
9
  return;
4856
9
    }
4857
17.3k
    NEXTL(rl);
4858
17.3k
    cur = xmlCurrentCharRecover(ctxt, &l);
4859
17.3k
    if (cur == 0)
4860
8
        goto not_terminated;
4861
1.40M
    while (IS_CHAR(cur) && /* checked */
4862
1.40M
           ((cur != '>') ||
4863
1.38M
      (r != '-') || (q != '-'))) {
4864
1.38M
  if ((r == '-') && (q == '-')) {
4865
13.5k
      xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4866
13.5k
  }
4867
1.38M
  if (len + 5 >= size) {
4868
8.56k
      xmlChar *tmp;
4869
8.56k
            int newSize;
4870
4871
8.56k
      newSize = xmlGrowCapacity(size, 1, 1, maxLength);
4872
8.56k
            if (newSize < 0) {
4873
0
                xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4874
0
                             "Comment too big found", NULL);
4875
0
                xmlFree (buf);
4876
0
                return;
4877
0
            }
4878
8.56k
      tmp = xmlRealloc(buf, newSize);
4879
8.56k
      if (tmp == NULL) {
4880
0
    xmlErrMemory(ctxt);
4881
0
    xmlFree(buf);
4882
0
    return;
4883
0
      }
4884
8.56k
      buf = tmp;
4885
8.56k
            size = newSize;
4886
8.56k
  }
4887
1.38M
  COPY_BUF(buf, len, q);
4888
4889
1.38M
  q = r;
4890
1.38M
  ql = rl;
4891
1.38M
  r = cur;
4892
1.38M
  rl = l;
4893
4894
1.38M
  NEXTL(l);
4895
1.38M
  cur = xmlCurrentCharRecover(ctxt, &l);
4896
4897
1.38M
    }
4898
17.3k
    buf[len] = 0;
4899
17.3k
    if (cur == 0) {
4900
195
  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4901
195
                       "Comment not terminated \n<!--%.50s\n", buf);
4902
17.1k
    } else if (!IS_CHAR(cur)) {
4903
80
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4904
80
                          "xmlParseComment: invalid xmlChar value %d\n",
4905
80
                    cur);
4906
17.0k
    } else {
4907
17.0k
        NEXT;
4908
17.0k
  if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4909
0
      (!ctxt->disableSAX))
4910
0
      ctxt->sax->comment(ctxt->userData, buf);
4911
17.0k
    }
4912
17.3k
    xmlFree(buf);
4913
17.3k
    return;
4914
139
not_terminated:
4915
139
    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4916
139
       "Comment not terminated\n", NULL);
4917
139
    xmlFree(buf);
4918
139
}
4919
4920
/**
4921
 * Parse an XML (SGML) comment. Always consumes '<!'.
4922
 *
4923
 * @deprecated Internal function, don't use.
4924
 *
4925
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4926
 *  must not occur within comments. "
4927
 *
4928
 *     [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4929
 * @param ctxt  an XML parser context
4930
 */
4931
void
4932
49.1k
xmlParseComment(xmlParserCtxt *ctxt) {
4933
49.1k
    xmlChar *buf = NULL;
4934
49.1k
    size_t size = XML_PARSER_BUFFER_SIZE;
4935
49.1k
    size_t len = 0;
4936
49.1k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4937
49.1k
                       XML_MAX_HUGE_LENGTH :
4938
49.1k
                       XML_MAX_TEXT_LENGTH;
4939
49.1k
    const xmlChar *in;
4940
49.1k
    size_t nbchar = 0;
4941
49.1k
    int ccol;
4942
4943
    /*
4944
     * Check that there is a comment right here.
4945
     */
4946
49.1k
    if ((RAW != '<') || (NXT(1) != '!'))
4947
0
        return;
4948
49.1k
    SKIP(2);
4949
49.1k
    if ((RAW != '-') || (NXT(1) != '-'))
4950
6
        return;
4951
49.1k
    SKIP(2);
4952
49.1k
    GROW;
4953
4954
    /*
4955
     * Accelerated common case where input don't need to be
4956
     * modified before passing it to the handler.
4957
     */
4958
49.1k
    in = ctxt->input->cur;
4959
49.1k
    do {
4960
49.1k
  if (*in == 0xA) {
4961
4.85k
      do {
4962
4.85k
    ctxt->input->line++; ctxt->input->col = 1;
4963
4.85k
    in++;
4964
4.85k
      } while (*in == 0xA);
4965
2.36k
  }
4966
103k
get_more:
4967
103k
        ccol = ctxt->input->col;
4968
1.29M
  while (((*in > '-') && (*in <= 0x7F)) ||
4969
297k
         ((*in >= 0x20) && (*in < '-')) ||
4970
1.19M
         (*in == 0x09)) {
4971
1.19M
        in++;
4972
1.19M
        ccol++;
4973
1.19M
  }
4974
103k
  ctxt->input->col = ccol;
4975
103k
  if (*in == 0xA) {
4976
14.9k
      do {
4977
14.9k
    ctxt->input->line++; ctxt->input->col = 1;
4978
14.9k
    in++;
4979
14.9k
      } while (*in == 0xA);
4980
11.1k
      goto get_more;
4981
11.1k
  }
4982
92.2k
  nbchar = in - ctxt->input->cur;
4983
  /*
4984
   * save current set of data
4985
   */
4986
92.2k
  if (nbchar > 0) {
4987
76.0k
            if (nbchar > maxLength - len) {
4988
0
                xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4989
0
                                  "Comment too big found", NULL);
4990
0
                xmlFree(buf);
4991
0
                return;
4992
0
            }
4993
76.0k
            if (buf == NULL) {
4994
39.8k
                if ((*in == '-') && (in[1] == '-'))
4995
26.6k
                    size = nbchar + 1;
4996
13.1k
                else
4997
13.1k
                    size = XML_PARSER_BUFFER_SIZE + nbchar;
4998
39.8k
                buf = xmlMalloc(size);
4999
39.8k
                if (buf == NULL) {
5000
0
                    xmlErrMemory(ctxt);
5001
0
                    return;
5002
0
                }
5003
39.8k
                len = 0;
5004
39.8k
            } else if (len + nbchar + 1 >= size) {
5005
1.94k
                xmlChar *new_buf;
5006
1.94k
                size += len + nbchar + XML_PARSER_BUFFER_SIZE;
5007
1.94k
                new_buf = xmlRealloc(buf, size);
5008
1.94k
                if (new_buf == NULL) {
5009
0
                    xmlErrMemory(ctxt);
5010
0
                    xmlFree(buf);
5011
0
                    return;
5012
0
                }
5013
1.94k
                buf = new_buf;
5014
1.94k
            }
5015
76.0k
            memcpy(&buf[len], ctxt->input->cur, nbchar);
5016
76.0k
            len += nbchar;
5017
76.0k
            buf[len] = 0;
5018
76.0k
  }
5019
92.2k
  ctxt->input->cur = in;
5020
92.2k
  if (*in == 0xA) {
5021
0
      in++;
5022
0
      ctxt->input->line++; ctxt->input->col = 1;
5023
0
  }
5024
92.2k
  if (*in == 0xD) {
5025
5.31k
      in++;
5026
5.31k
      if (*in == 0xA) {
5027
2.75k
    ctxt->input->cur = in;
5028
2.75k
    in++;
5029
2.75k
    ctxt->input->line++; ctxt->input->col = 1;
5030
2.75k
    goto get_more;
5031
2.75k
      }
5032
2.55k
      in--;
5033
2.55k
  }
5034
89.4k
  SHRINK;
5035
89.4k
  GROW;
5036
89.4k
  in = ctxt->input->cur;
5037
89.4k
  if (*in == '-') {
5038
71.9k
      if (in[1] == '-') {
5039
51.8k
          if (in[2] == '>') {
5040
31.6k
        SKIP(3);
5041
31.6k
        if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5042
0
            (!ctxt->disableSAX)) {
5043
0
      if (buf != NULL)
5044
0
          ctxt->sax->comment(ctxt->userData, buf);
5045
0
      else
5046
0
          ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5047
0
        }
5048
31.6k
        if (buf != NULL)
5049
29.0k
            xmlFree(buf);
5050
31.6k
        return;
5051
31.6k
    }
5052
20.1k
    if (buf != NULL) {
5053
17.9k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5054
17.9k
                          "Double hyphen within comment: "
5055
17.9k
                                      "<!--%.50s\n",
5056
17.9k
              buf);
5057
17.9k
    } else
5058
2.21k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5059
2.21k
                          "Double hyphen within comment\n", NULL);
5060
20.1k
    in++;
5061
20.1k
    ctxt->input->col++;
5062
20.1k
      }
5063
40.2k
      in++;
5064
40.2k
      ctxt->input->col++;
5065
40.2k
      goto get_more;
5066
71.9k
  }
5067
89.4k
    } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5068
17.5k
    xmlParseCommentComplex(ctxt, buf, len, size);
5069
17.5k
}
5070
5071
5072
/**
5073
 * Parse the name of a PI
5074
 *
5075
 * @deprecated Internal function, don't use.
5076
 *
5077
 *     [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5078
 *
5079
 * @param ctxt  an XML parser context
5080
 * @returns the PITarget name or NULL
5081
 */
5082
5083
const xmlChar *
5084
35.0k
xmlParsePITarget(xmlParserCtxt *ctxt) {
5085
35.0k
    const xmlChar *name;
5086
5087
35.0k
    name = xmlParseName(ctxt);
5088
35.0k
    if ((name != NULL) &&
5089
33.2k
        ((name[0] == 'x') || (name[0] == 'X')) &&
5090
15.6k
        ((name[1] == 'm') || (name[1] == 'M')) &&
5091
12.9k
        ((name[2] == 'l') || (name[2] == 'L'))) {
5092
10.0k
  int i;
5093
10.0k
  if ((name[0] == 'x') && (name[1] == 'm') &&
5094
8.87k
      (name[2] == 'l') && (name[3] == 0)) {
5095
315
      xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5096
315
     "XML declaration allowed only at the start of the document\n");
5097
315
      return(name);
5098
9.74k
  } else if (name[3] == 0) {
5099
899
      xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5100
899
      return(name);
5101
899
  }
5102
26.1k
  for (i = 0;;i++) {
5103
26.1k
      if (xmlW3CPIs[i] == NULL) break;
5104
17.5k
      if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5105
232
          return(name);
5106
17.5k
  }
5107
8.61k
  xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5108
8.61k
          "xmlParsePITarget: invalid name prefix 'xml'\n",
5109
8.61k
          NULL, NULL);
5110
8.61k
    }
5111
33.5k
    if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5112
2.35k
  xmlNsErr(ctxt, XML_NS_ERR_COLON,
5113
2.35k
     "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5114
2.35k
    }
5115
33.5k
    return(name);
5116
35.0k
}
5117
5118
#ifdef LIBXML_CATALOG_ENABLED
5119
/**
5120
 * Parse an XML Catalog Processing Instruction.
5121
 *
5122
 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5123
 *
5124
 * Occurs only if allowed by the user and if happening in the Misc
5125
 * part of the document before any doctype information
5126
 * This will add the given catalog to the parsing context in order
5127
 * to be used if there is a resolution need further down in the document
5128
 *
5129
 * @param ctxt  an XML parser context
5130
 * @param catalog  the PI value string
5131
 */
5132
5133
static void
5134
0
xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5135
0
    xmlChar *URL = NULL;
5136
0
    const xmlChar *tmp, *base;
5137
0
    xmlChar marker;
5138
5139
0
    tmp = catalog;
5140
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5141
0
    if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5142
0
  goto error;
5143
0
    tmp += 7;
5144
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5145
0
    if (*tmp != '=') {
5146
0
  return;
5147
0
    }
5148
0
    tmp++;
5149
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5150
0
    marker = *tmp;
5151
0
    if ((marker != '\'') && (marker != '"'))
5152
0
  goto error;
5153
0
    tmp++;
5154
0
    base = tmp;
5155
0
    while ((*tmp != 0) && (*tmp != marker)) tmp++;
5156
0
    if (*tmp == 0)
5157
0
  goto error;
5158
0
    URL = xmlStrndup(base, tmp - base);
5159
0
    tmp++;
5160
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5161
0
    if (*tmp != 0)
5162
0
  goto error;
5163
5164
0
    if (URL != NULL) {
5165
        /*
5166
         * Unfortunately, the catalog API doesn't report OOM errors.
5167
         * xmlGetLastError isn't very helpful since we don't know
5168
         * where the last error came from. We'd have to reset it
5169
         * before this call and restore it afterwards.
5170
         */
5171
0
  ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5172
0
  xmlFree(URL);
5173
0
    }
5174
0
    return;
5175
5176
0
error:
5177
0
    xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5178
0
            "Catalog PI syntax error: %s\n",
5179
0
      catalog, NULL);
5180
0
    if (URL != NULL)
5181
0
  xmlFree(URL);
5182
0
}
5183
#endif
5184
5185
/**
5186
 * Parse an XML Processing Instruction.
5187
 *
5188
 * @deprecated Internal function, don't use.
5189
 *
5190
 *     [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5191
 *
5192
 * The processing is transferred to SAX once parsed.
5193
 *
5194
 * @param ctxt  an XML parser context
5195
 */
5196
5197
void
5198
35.0k
xmlParsePI(xmlParserCtxt *ctxt) {
5199
35.0k
    xmlChar *buf = NULL;
5200
35.0k
    size_t len = 0;
5201
35.0k
    size_t size = XML_PARSER_BUFFER_SIZE;
5202
35.0k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5203
33.7k
                       XML_MAX_HUGE_LENGTH :
5204
35.0k
                       XML_MAX_TEXT_LENGTH;
5205
35.0k
    int cur, l;
5206
35.0k
    const xmlChar *target;
5207
5208
35.0k
    if ((RAW == '<') && (NXT(1) == '?')) {
5209
  /*
5210
   * this is a Processing Instruction.
5211
   */
5212
35.0k
  SKIP(2);
5213
5214
  /*
5215
   * Parse the target name and check for special support like
5216
   * namespace.
5217
   */
5218
35.0k
        target = xmlParsePITarget(ctxt);
5219
35.0k
  if (target != NULL) {
5220
33.2k
      if ((RAW == '?') && (NXT(1) == '>')) {
5221
6.59k
    SKIP(2);
5222
5223
    /*
5224
     * SAX: PI detected.
5225
     */
5226
6.59k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5227
4.43k
        (ctxt->sax->processingInstruction != NULL))
5228
4.43k
        ctxt->sax->processingInstruction(ctxt->userData,
5229
4.43k
                                         target, NULL);
5230
6.59k
    return;
5231
6.59k
      }
5232
26.6k
      buf = xmlMalloc(size);
5233
26.6k
      if (buf == NULL) {
5234
0
    xmlErrMemory(ctxt);
5235
0
    return;
5236
0
      }
5237
26.6k
      if (SKIP_BLANKS == 0) {
5238
3.09k
    xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5239
3.09k
        "ParsePI: PI %s space expected\n", target);
5240
3.09k
      }
5241
26.6k
      cur = xmlCurrentCharRecover(ctxt, &l);
5242
2.93M
      while (IS_CHAR(cur) && /* checked */
5243
2.93M
       ((cur != '?') || (NXT(1) != '>'))) {
5244
2.91M
    if (len + 5 >= size) {
5245
16.5k
        xmlChar *tmp;
5246
16.5k
                    int newSize;
5247
5248
16.5k
                    newSize = xmlGrowCapacity(size, 1, 1, maxLength);
5249
16.5k
                    if (newSize < 0) {
5250
0
                        xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5251
0
                                          "PI %s too big found", target);
5252
0
                        xmlFree(buf);
5253
0
                        return;
5254
0
                    }
5255
16.5k
        tmp = xmlRealloc(buf, newSize);
5256
16.5k
        if (tmp == NULL) {
5257
0
      xmlErrMemory(ctxt);
5258
0
      xmlFree(buf);
5259
0
      return;
5260
0
        }
5261
16.5k
        buf = tmp;
5262
16.5k
                    size = newSize;
5263
16.5k
    }
5264
2.91M
    COPY_BUF(buf, len, cur);
5265
2.91M
    NEXTL(l);
5266
2.91M
    cur = xmlCurrentCharRecover(ctxt, &l);
5267
2.91M
      }
5268
26.6k
      buf[len] = 0;
5269
26.6k
      if (cur != '?') {
5270
605
    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5271
605
          "ParsePI: PI %s never end ...\n", target);
5272
26.0k
      } else {
5273
26.0k
    SKIP(2);
5274
5275
26.0k
#ifdef LIBXML_CATALOG_ENABLED
5276
26.0k
    if ((ctxt->inSubset == 0) &&
5277
22.1k
        (xmlStrEqual(target, XML_CATALOG_PI))) {
5278
7.02k
        xmlCatalogAllow allow = xmlCatalogGetDefaults();
5279
5280
7.02k
        if ((ctxt->options & XML_PARSE_CATALOG_PI) &&
5281
0
                        ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5282
0
       (allow == XML_CATA_ALLOW_ALL)))
5283
0
      xmlParseCatalogPI(ctxt, buf);
5284
7.02k
    }
5285
26.0k
#endif
5286
5287
    /*
5288
     * SAX: PI detected.
5289
     */
5290
26.0k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5291
22.9k
        (ctxt->sax->processingInstruction != NULL))
5292
22.9k
        ctxt->sax->processingInstruction(ctxt->userData,
5293
22.9k
                                         target, buf);
5294
26.0k
      }
5295
26.6k
      xmlFree(buf);
5296
26.6k
  } else {
5297
1.80k
      xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5298
1.80k
  }
5299
35.0k
    }
5300
35.0k
}
5301
5302
/**
5303
 * Parse a notation declaration. Always consumes '<!'.
5304
 *
5305
 * @deprecated Internal function, don't use.
5306
 *
5307
 *     [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID)
5308
 *                           S? '>'
5309
 *
5310
 * Hence there is actually 3 choices:
5311
 *
5312
 *     'PUBLIC' S PubidLiteral
5313
 *     'PUBLIC' S PubidLiteral S SystemLiteral
5314
 *     'SYSTEM' S SystemLiteral
5315
 *
5316
 * See the NOTE on #xmlParseExternalID.
5317
 *
5318
 * @param ctxt  an XML parser context
5319
 */
5320
5321
void
5322
630
xmlParseNotationDecl(xmlParserCtxt *ctxt) {
5323
630
    const xmlChar *name;
5324
630
    xmlChar *Pubid;
5325
630
    xmlChar *Systemid;
5326
5327
630
    if ((CUR != '<') || (NXT(1) != '!'))
5328
0
        return;
5329
630
    SKIP(2);
5330
5331
630
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5332
614
#ifdef LIBXML_VALID_ENABLED
5333
614
  int oldInputNr = ctxt->inputNr;
5334
614
#endif
5335
5336
614
  SKIP(8);
5337
614
  if (SKIP_BLANKS_PE == 0) {
5338
1
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5339
1
         "Space required after '<!NOTATION'\n");
5340
1
      return;
5341
1
  }
5342
5343
613
        name = xmlParseName(ctxt);
5344
613
  if (name == NULL) {
5345
7
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5346
7
      return;
5347
7
  }
5348
606
  if (xmlStrchr(name, ':') != NULL) {
5349
1
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5350
1
         "colons are forbidden from notation names '%s'\n",
5351
1
         name, NULL, NULL);
5352
1
  }
5353
606
  if (SKIP_BLANKS_PE == 0) {
5354
4
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5355
4
         "Space required after the NOTATION name'\n");
5356
4
      return;
5357
4
  }
5358
5359
  /*
5360
   * Parse the IDs.
5361
   */
5362
602
  Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5363
602
  SKIP_BLANKS_PE;
5364
5365
602
  if (RAW == '>') {
5366
574
#ifdef LIBXML_VALID_ENABLED
5367
574
      if ((ctxt->validate) && (ctxt->inputNr > oldInputNr)) {
5368
0
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5369
0
                           "Notation declaration doesn't start and stop"
5370
0
                                 " in the same entity\n",
5371
0
                                 NULL, NULL);
5372
0
      }
5373
574
#endif
5374
574
      NEXT;
5375
574
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5376
442
    (ctxt->sax->notationDecl != NULL))
5377
0
    ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5378
574
  } else {
5379
28
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5380
28
  }
5381
602
  if (Systemid != NULL) xmlFree(Systemid);
5382
602
  if (Pubid != NULL) xmlFree(Pubid);
5383
602
    }
5384
630
}
5385
5386
/**
5387
 * Parse an entity declaration. Always consumes '<!'.
5388
 *
5389
 * @deprecated Internal function, don't use.
5390
 *
5391
 *     [70] EntityDecl ::= GEDecl | PEDecl
5392
 *
5393
 *     [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5394
 *
5395
 *     [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5396
 *
5397
 *     [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5398
 *
5399
 *     [74] PEDef ::= EntityValue | ExternalID
5400
 *
5401
 *     [76] NDataDecl ::= S 'NDATA' S Name
5402
 *
5403
 * [ VC: Notation Declared ]
5404
 * The Name must match the declared name of a notation.
5405
 *
5406
 * @param ctxt  an XML parser context
5407
 */
5408
5409
void
5410
22.7k
xmlParseEntityDecl(xmlParserCtxt *ctxt) {
5411
22.7k
    const xmlChar *name = NULL;
5412
22.7k
    xmlChar *value = NULL;
5413
22.7k
    xmlChar *URI = NULL, *literal = NULL;
5414
22.7k
    const xmlChar *ndata = NULL;
5415
22.7k
    int isParameter = 0;
5416
22.7k
    xmlChar *orig = NULL;
5417
5418
22.7k
    if ((CUR != '<') || (NXT(1) != '!'))
5419
0
        return;
5420
22.7k
    SKIP(2);
5421
5422
    /* GROW; done in the caller */
5423
22.7k
    if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5424
22.7k
#ifdef LIBXML_VALID_ENABLED
5425
22.7k
  int oldInputNr = ctxt->inputNr;
5426
22.7k
#endif
5427
5428
22.7k
  SKIP(6);
5429
22.7k
  if (SKIP_BLANKS_PE == 0) {
5430
2.83k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5431
2.83k
         "Space required after '<!ENTITY'\n");
5432
2.83k
  }
5433
5434
22.7k
  if (RAW == '%') {
5435
4.70k
      NEXT;
5436
4.70k
      if (SKIP_BLANKS_PE == 0) {
5437
2.53k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5438
2.53k
             "Space required after '%%'\n");
5439
2.53k
      }
5440
4.70k
      isParameter = 1;
5441
4.70k
  }
5442
5443
22.7k
        name = xmlParseName(ctxt);
5444
22.7k
  if (name == NULL) {
5445
75
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5446
75
                     "xmlParseEntityDecl: no name\n");
5447
75
            return;
5448
75
  }
5449
22.6k
  if (xmlStrchr(name, ':') != NULL) {
5450
1.06k
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5451
1.06k
         "colons are forbidden from entities names '%s'\n",
5452
1.06k
         name, NULL, NULL);
5453
1.06k
  }
5454
22.6k
  if (SKIP_BLANKS_PE == 0) {
5455
10.1k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5456
10.1k
         "Space required after the entity name\n");
5457
10.1k
  }
5458
5459
  /*
5460
   * handle the various case of definitions...
5461
   */
5462
22.6k
  if (isParameter) {
5463
4.69k
      if ((RAW == '"') || (RAW == '\'')) {
5464
4.20k
          value = xmlParseEntityValue(ctxt, &orig);
5465
4.20k
    if (value) {
5466
4.17k
        if ((ctxt->sax != NULL) &&
5467
4.17k
      (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5468
0
      ctxt->sax->entityDecl(ctxt->userData, name,
5469
0
                        XML_INTERNAL_PARAMETER_ENTITY,
5470
0
            NULL, NULL, value);
5471
4.17k
    }
5472
4.20k
      } else {
5473
488
          URI = xmlParseExternalID(ctxt, &literal, 1);
5474
488
    if ((URI == NULL) && (literal == NULL)) {
5475
17
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5476
17
    }
5477
488
    if (URI) {
5478
456
                    if (xmlStrchr(URI, '#')) {
5479
2
                        xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5480
454
                    } else {
5481
454
                        if ((ctxt->sax != NULL) &&
5482
454
                            (!ctxt->disableSAX) &&
5483
308
                            (ctxt->sax->entityDecl != NULL))
5484
0
                            ctxt->sax->entityDecl(ctxt->userData, name,
5485
0
                                        XML_EXTERNAL_PARAMETER_ENTITY,
5486
0
                                        literal, URI, NULL);
5487
454
                    }
5488
456
    }
5489
488
      }
5490
17.9k
  } else {
5491
17.9k
      if ((RAW == '"') || (RAW == '\'')) {
5492
14.6k
          value = xmlParseEntityValue(ctxt, &orig);
5493
14.6k
    if ((ctxt->sax != NULL) &&
5494
14.6k
        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5495
0
        ctxt->sax->entityDecl(ctxt->userData, name,
5496
0
        XML_INTERNAL_GENERAL_ENTITY,
5497
0
        NULL, NULL, value);
5498
    /*
5499
     * For expat compatibility in SAX mode.
5500
     */
5501
14.6k
    if ((ctxt->myDoc == NULL) ||
5502
14.6k
        (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5503
14.6k
        if (ctxt->myDoc == NULL) {
5504
402
      ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5505
402
      if (ctxt->myDoc == NULL) {
5506
0
          xmlErrMemory(ctxt);
5507
0
          goto done;
5508
0
      }
5509
402
      ctxt->myDoc->properties = XML_DOC_INTERNAL;
5510
402
        }
5511
14.6k
        if (ctxt->myDoc->intSubset == NULL) {
5512
402
      ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5513
402
              BAD_CAST "fake", NULL, NULL);
5514
402
                        if (ctxt->myDoc->intSubset == NULL) {
5515
0
                            xmlErrMemory(ctxt);
5516
0
                            goto done;
5517
0
                        }
5518
402
                    }
5519
5520
14.6k
        xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5521
14.6k
                    NULL, NULL, value);
5522
14.6k
    }
5523
14.6k
      } else {
5524
3.25k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5525
3.25k
    if ((URI == NULL) && (literal == NULL)) {
5526
257
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5527
257
    }
5528
3.25k
    if (URI) {
5529
2.98k
                    if (xmlStrchr(URI, '#')) {
5530
319
                        xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5531
319
                    }
5532
2.98k
    }
5533
3.25k
    if ((RAW != '>') && (SKIP_BLANKS_PE == 0)) {
5534
789
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5535
789
           "Space required before 'NDATA'\n");
5536
789
    }
5537
3.25k
    if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5538
1.58k
        SKIP(5);
5539
1.58k
        if (SKIP_BLANKS_PE == 0) {
5540
67
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5541
67
               "Space required after 'NDATA'\n");
5542
67
        }
5543
1.58k
        ndata = xmlParseName(ctxt);
5544
1.58k
        if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5545
529
            (ctxt->sax->unparsedEntityDecl != NULL))
5546
0
      ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5547
0
            literal, URI, ndata);
5548
1.66k
    } else {
5549
1.66k
        if ((ctxt->sax != NULL) &&
5550
1.66k
            (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5551
0
      ctxt->sax->entityDecl(ctxt->userData, name,
5552
0
            XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5553
0
            literal, URI, NULL);
5554
        /*
5555
         * For expat compatibility in SAX mode.
5556
         * assuming the entity replacement was asked for
5557
         */
5558
1.66k
        if ((ctxt->replaceEntities != 0) &&
5559
1.66k
      ((ctxt->myDoc == NULL) ||
5560
1.66k
      (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5561
1.66k
      if (ctxt->myDoc == NULL) {
5562
57
          ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5563
57
          if (ctxt->myDoc == NULL) {
5564
0
              xmlErrMemory(ctxt);
5565
0
        goto done;
5566
0
          }
5567
57
          ctxt->myDoc->properties = XML_DOC_INTERNAL;
5568
57
      }
5569
5570
1.66k
      if (ctxt->myDoc->intSubset == NULL) {
5571
57
          ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5572
57
            BAD_CAST "fake", NULL, NULL);
5573
57
                            if (ctxt->myDoc->intSubset == NULL) {
5574
0
                                xmlErrMemory(ctxt);
5575
0
                                goto done;
5576
0
                            }
5577
57
                        }
5578
1.66k
      xmlSAX2EntityDecl(ctxt, name,
5579
1.66k
                  XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5580
1.66k
                  literal, URI, NULL);
5581
1.66k
        }
5582
1.66k
    }
5583
3.25k
      }
5584
17.9k
  }
5585
22.6k
  SKIP_BLANKS_PE;
5586
22.6k
  if (RAW != '>') {
5587
394
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5588
394
              "xmlParseEntityDecl: entity %s not terminated\n", name);
5589
22.2k
  } else {
5590
22.2k
#ifdef LIBXML_VALID_ENABLED
5591
22.2k
      if ((ctxt->validate) && (ctxt->inputNr > oldInputNr)) {
5592
0
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5593
0
                           "Entity declaration doesn't start and stop in"
5594
0
                                 " the same entity\n",
5595
0
                                 NULL, NULL);
5596
0
      }
5597
22.2k
#endif
5598
22.2k
      NEXT;
5599
22.2k
  }
5600
22.6k
  if (orig != NULL) {
5601
      /*
5602
       * Ugly mechanism to save the raw entity value.
5603
       */
5604
18.7k
      xmlEntityPtr cur = NULL;
5605
5606
18.7k
      if (isParameter) {
5607
4.17k
          if ((ctxt->sax != NULL) &&
5608
4.17k
        (ctxt->sax->getParameterEntity != NULL))
5609
0
        cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5610
14.5k
      } else {
5611
14.5k
          if ((ctxt->sax != NULL) &&
5612
14.5k
        (ctxt->sax->getEntity != NULL))
5613
14.5k
        cur = ctxt->sax->getEntity(ctxt->userData, name);
5614
14.5k
    if ((cur == NULL) && (ctxt->userData==ctxt)) {
5615
0
        cur = xmlSAX2GetEntity(ctxt, name);
5616
0
    }
5617
14.5k
      }
5618
18.7k
            if ((cur != NULL) && (cur->orig == NULL)) {
5619
0
    cur->orig = orig;
5620
0
                orig = NULL;
5621
0
      }
5622
18.7k
  }
5623
5624
22.6k
done:
5625
22.6k
  if (value != NULL) xmlFree(value);
5626
22.6k
  if (URI != NULL) xmlFree(URI);
5627
22.6k
  if (literal != NULL) xmlFree(literal);
5628
22.6k
        if (orig != NULL) xmlFree(orig);
5629
22.6k
    }
5630
22.7k
}
5631
5632
/**
5633
 * Parse an attribute default declaration
5634
 *
5635
 * @deprecated Internal function, don't use.
5636
 *
5637
 *     [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5638
 *
5639
 * [ VC: Required Attribute ]
5640
 * if the default declaration is the keyword \#REQUIRED, then the
5641
 * attribute must be specified for all elements of the type in the
5642
 * attribute-list declaration.
5643
 *
5644
 * [ VC: Attribute Default Legal ]
5645
 * The declared default value must meet the lexical constraints of
5646
 * the declared attribute type c.f. #xmlValidateAttributeDecl
5647
 *
5648
 * [ VC: Fixed Attribute Default ]
5649
 * if an attribute has a default value declared with the \#FIXED
5650
 * keyword, instances of that attribute must match the default value.
5651
 *
5652
 * [ WFC: No < in Attribute Values ]
5653
 * handled in #xmlParseAttValue
5654
 *
5655
 * @param ctxt  an XML parser context
5656
 * @param value  Receive a possible fixed default value for the attribute
5657
 * @returns XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5658
 *          or XML_ATTRIBUTE_FIXED.
5659
 */
5660
5661
int
5662
12.6k
xmlParseDefaultDecl(xmlParserCtxt *ctxt, xmlChar **value) {
5663
12.6k
    int val;
5664
12.6k
    xmlChar *ret;
5665
5666
12.6k
    *value = NULL;
5667
12.6k
    if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5668
391
  SKIP(9);
5669
391
  return(XML_ATTRIBUTE_REQUIRED);
5670
391
    }
5671
12.2k
    if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5672
3.92k
  SKIP(8);
5673
3.92k
  return(XML_ATTRIBUTE_IMPLIED);
5674
3.92k
    }
5675
8.36k
    val = XML_ATTRIBUTE_NONE;
5676
8.36k
    if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5677
97
  SKIP(6);
5678
97
  val = XML_ATTRIBUTE_FIXED;
5679
97
  if (SKIP_BLANKS_PE == 0) {
5680
1
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5681
1
         "Space required after '#FIXED'\n");
5682
1
  }
5683
97
    }
5684
8.36k
    ret = xmlParseAttValue(ctxt);
5685
8.36k
    if (ret == NULL) {
5686
197
  xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5687
197
           "Attribute default value declaration error\n");
5688
197
    } else
5689
8.16k
        *value = ret;
5690
8.36k
    return(val);
5691
12.2k
}
5692
5693
/**
5694
 * Parse an Notation attribute type.
5695
 *
5696
 * @deprecated Internal function, don't use.
5697
 *
5698
 * Note: the leading 'NOTATION' S part has already being parsed...
5699
 *
5700
 *     [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5701
 *
5702
 * [ VC: Notation Attributes ]
5703
 * Values of this type must match one of the notation names included
5704
 * in the declaration; all notation names in the declaration must be declared.
5705
 *
5706
 * @param ctxt  an XML parser context
5707
 * @returns the notation attribute tree built while parsing
5708
 */
5709
5710
xmlEnumeration *
5711
203
xmlParseNotationType(xmlParserCtxt *ctxt) {
5712
203
    const xmlChar *name;
5713
203
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5714
5715
203
    if (RAW != '(') {
5716
2
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5717
2
  return(NULL);
5718
2
    }
5719
251
    do {
5720
251
        NEXT;
5721
251
  SKIP_BLANKS_PE;
5722
251
        name = xmlParseName(ctxt);
5723
251
  if (name == NULL) {
5724
5
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5725
5
         "Name expected in NOTATION declaration\n");
5726
5
            xmlFreeEnumeration(ret);
5727
5
      return(NULL);
5728
5
  }
5729
246
        tmp = NULL;
5730
246
#ifdef LIBXML_VALID_ENABLED
5731
246
        if (ctxt->validate) {
5732
0
            tmp = ret;
5733
0
            while (tmp != NULL) {
5734
0
                if (xmlStrEqual(name, tmp->name)) {
5735
0
                    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5736
0
              "standalone: attribute notation value token %s duplicated\n",
5737
0
                                     name, NULL);
5738
0
                    if (!xmlDictOwns(ctxt->dict, name))
5739
0
                        xmlFree((xmlChar *) name);
5740
0
                    break;
5741
0
                }
5742
0
                tmp = tmp->next;
5743
0
            }
5744
0
        }
5745
246
#endif /* LIBXML_VALID_ENABLED */
5746
246
  if (tmp == NULL) {
5747
246
      cur = xmlCreateEnumeration(name);
5748
246
      if (cur == NULL) {
5749
0
                xmlErrMemory(ctxt);
5750
0
                xmlFreeEnumeration(ret);
5751
0
                return(NULL);
5752
0
            }
5753
246
      if (last == NULL) ret = last = cur;
5754
46
      else {
5755
46
    last->next = cur;
5756
46
    last = cur;
5757
46
      }
5758
246
  }
5759
246
  SKIP_BLANKS_PE;
5760
246
    } while (RAW == '|');
5761
196
    if (RAW != ')') {
5762
2
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5763
2
        xmlFreeEnumeration(ret);
5764
2
  return(NULL);
5765
2
    }
5766
194
    NEXT;
5767
194
    return(ret);
5768
196
}
5769
5770
/**
5771
 * Parse an Enumeration attribute type.
5772
 *
5773
 * @deprecated Internal function, don't use.
5774
 *
5775
 *     [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5776
 *
5777
 * [ VC: Enumeration ]
5778
 * Values of this type must match one of the Nmtoken tokens in
5779
 * the declaration
5780
 *
5781
 * @param ctxt  an XML parser context
5782
 * @returns the enumeration attribute tree built while parsing
5783
 */
5784
5785
xmlEnumeration *
5786
5.13k
xmlParseEnumerationType(xmlParserCtxt *ctxt) {
5787
5.13k
    xmlChar *name;
5788
5.13k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5789
5790
5.13k
    if (RAW != '(') {
5791
71
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5792
71
  return(NULL);
5793
71
    }
5794
5.32k
    do {
5795
5.32k
        NEXT;
5796
5.32k
  SKIP_BLANKS_PE;
5797
5.32k
        name = xmlParseNmtoken(ctxt);
5798
5.32k
  if (name == NULL) {
5799
9
      xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5800
9
      return(ret);
5801
9
  }
5802
5.31k
        tmp = NULL;
5803
5.31k
#ifdef LIBXML_VALID_ENABLED
5804
5.31k
        if (ctxt->validate) {
5805
0
            tmp = ret;
5806
0
            while (tmp != NULL) {
5807
0
                if (xmlStrEqual(name, tmp->name)) {
5808
0
                    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5809
0
              "standalone: attribute enumeration value token %s duplicated\n",
5810
0
                                     name, NULL);
5811
0
                    if (!xmlDictOwns(ctxt->dict, name))
5812
0
                        xmlFree(name);
5813
0
                    break;
5814
0
                }
5815
0
                tmp = tmp->next;
5816
0
            }
5817
0
        }
5818
5.31k
#endif /* LIBXML_VALID_ENABLED */
5819
5.31k
  if (tmp == NULL) {
5820
5.31k
      cur = xmlCreateEnumeration(name);
5821
5.31k
      if (!xmlDictOwns(ctxt->dict, name))
5822
5.31k
    xmlFree(name);
5823
5.31k
      if (cur == NULL) {
5824
0
                xmlErrMemory(ctxt);
5825
0
                xmlFreeEnumeration(ret);
5826
0
                return(NULL);
5827
0
            }
5828
5.31k
      if (last == NULL) ret = last = cur;
5829
252
      else {
5830
252
    last->next = cur;
5831
252
    last = cur;
5832
252
      }
5833
5.31k
  }
5834
5.31k
  SKIP_BLANKS_PE;
5835
5.31k
    } while (RAW == '|');
5836
5.05k
    if (RAW != ')') {
5837
11
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5838
11
  return(ret);
5839
11
    }
5840
5.04k
    NEXT;
5841
5.04k
    return(ret);
5842
5.05k
}
5843
5844
/**
5845
 * Parse an Enumerated attribute type.
5846
 *
5847
 * @deprecated Internal function, don't use.
5848
 *
5849
 *     [57] EnumeratedType ::= NotationType | Enumeration
5850
 *
5851
 *     [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5852
 *
5853
 * @param ctxt  an XML parser context
5854
 * @param tree  the enumeration tree built while parsing
5855
 * @returns XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5856
 */
5857
5858
int
5859
5.34k
xmlParseEnumeratedType(xmlParserCtxt *ctxt, xmlEnumeration **tree) {
5860
5.34k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5861
205
  SKIP(8);
5862
205
  if (SKIP_BLANKS_PE == 0) {
5863
2
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5864
2
         "Space required after 'NOTATION'\n");
5865
2
      return(0);
5866
2
  }
5867
203
  *tree = xmlParseNotationType(ctxt);
5868
203
  if (*tree == NULL) return(0);
5869
194
  return(XML_ATTRIBUTE_NOTATION);
5870
203
    }
5871
5.13k
    *tree = xmlParseEnumerationType(ctxt);
5872
5.13k
    if (*tree == NULL) return(0);
5873
5.06k
    return(XML_ATTRIBUTE_ENUMERATION);
5874
5.13k
}
5875
5876
/**
5877
 * Parse the Attribute list def for an element
5878
 *
5879
 * @deprecated Internal function, don't use.
5880
 *
5881
 *     [54] AttType ::= StringType | TokenizedType | EnumeratedType
5882
 *
5883
 *     [55] StringType ::= 'CDATA'
5884
 *
5885
 *     [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5886
 *                            'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5887
 *
5888
 * Validity constraints for attribute values syntax are checked in
5889
 * #xmlValidateAttributeValue
5890
 *
5891
 * [ VC: ID ]
5892
 * Values of type ID must match the Name production. A name must not
5893
 * appear more than once in an XML document as a value of this type;
5894
 * i.e., ID values must uniquely identify the elements which bear them.
5895
 *
5896
 * [ VC: One ID per Element Type ]
5897
 * No element type may have more than one ID attribute specified.
5898
 *
5899
 * [ VC: ID Attribute Default ]
5900
 * An ID attribute must have a declared default of \#IMPLIED or \#REQUIRED.
5901
 *
5902
 * [ VC: IDREF ]
5903
 * Values of type IDREF must match the Name production, and values
5904
 * of type IDREFS must match Names; each IDREF Name must match the value
5905
 * of an ID attribute on some element in the XML document; i.e. IDREF
5906
 * values must match the value of some ID attribute.
5907
 *
5908
 * [ VC: Entity Name ]
5909
 * Values of type ENTITY must match the Name production, values
5910
 * of type ENTITIES must match Names; each Entity Name must match the
5911
 * name of an unparsed entity declared in the DTD.
5912
 *
5913
 * [ VC: Name Token ]
5914
 * Values of type NMTOKEN must match the Nmtoken production; values
5915
 * of type NMTOKENS must match Nmtokens.
5916
 *
5917
 * @param ctxt  an XML parser context
5918
 * @param tree  the enumeration tree built while parsing
5919
 * @returns the attribute type
5920
 */
5921
int
5922
12.7k
xmlParseAttributeType(xmlParserCtxt *ctxt, xmlEnumeration **tree) {
5923
12.7k
    if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5924
1.65k
  SKIP(5);
5925
1.65k
  return(XML_ATTRIBUTE_CDATA);
5926
11.1k
     } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5927
37
  SKIP(6);
5928
37
  return(XML_ATTRIBUTE_IDREFS);
5929
11.1k
     } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5930
48
  SKIP(5);
5931
48
  return(XML_ATTRIBUTE_IDREF);
5932
11.0k
     } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5933
4.27k
        SKIP(2);
5934
4.27k
  return(XML_ATTRIBUTE_ID);
5935
6.78k
     } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5936
52
  SKIP(6);
5937
52
  return(XML_ATTRIBUTE_ENTITY);
5938
6.72k
     } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5939
1.36k
  SKIP(8);
5940
1.36k
  return(XML_ATTRIBUTE_ENTITIES);
5941
5.36k
     } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5942
18
  SKIP(8);
5943
18
  return(XML_ATTRIBUTE_NMTOKENS);
5944
5.35k
     } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5945
11
  SKIP(7);
5946
11
  return(XML_ATTRIBUTE_NMTOKEN);
5947
11
     }
5948
5.34k
     return(xmlParseEnumeratedType(ctxt, tree));
5949
12.7k
}
5950
5951
/**
5952
 * Parse an attribute list declaration for an element. Always consumes '<!'.
5953
 *
5954
 * @deprecated Internal function, don't use.
5955
 *
5956
 *     [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5957
 *
5958
 *     [53] AttDef ::= S Name S AttType S DefaultDecl
5959
 * @param ctxt  an XML parser context
5960
 */
5961
void
5962
3.47k
xmlParseAttributeListDecl(xmlParserCtxt *ctxt) {
5963
3.47k
    const xmlChar *elemName;
5964
3.47k
    const xmlChar *attrName;
5965
3.47k
    xmlEnumerationPtr tree;
5966
5967
3.47k
    if ((CUR != '<') || (NXT(1) != '!'))
5968
0
        return;
5969
3.47k
    SKIP(2);
5970
5971
3.47k
    if (CMP7(CUR_PTR, 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5972
3.45k
#ifdef LIBXML_VALID_ENABLED
5973
3.45k
  int oldInputNr = ctxt->inputNr;
5974
3.45k
#endif
5975
5976
3.45k
  SKIP(7);
5977
3.45k
  if (SKIP_BLANKS_PE == 0) {
5978
20
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5979
20
                     "Space required after '<!ATTLIST'\n");
5980
20
  }
5981
3.45k
        elemName = xmlParseName(ctxt);
5982
3.45k
  if (elemName == NULL) {
5983
13
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5984
13
         "ATTLIST: no name for Element\n");
5985
13
      return;
5986
13
  }
5987
3.44k
  SKIP_BLANKS_PE;
5988
3.44k
  GROW;
5989
15.8k
  while ((RAW != '>') && (PARSER_STOPPED(ctxt) == 0)) {
5990
12.9k
      int type;
5991
12.9k
      int def;
5992
12.9k
      xmlChar *defaultValue = NULL;
5993
5994
12.9k
      GROW;
5995
12.9k
            tree = NULL;
5996
12.9k
      attrName = xmlParseName(ctxt);
5997
12.9k
      if (attrName == NULL) {
5998
31
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5999
31
             "ATTLIST: no name for Attribute\n");
6000
31
    break;
6001
31
      }
6002
12.8k
      GROW;
6003
12.8k
      if (SKIP_BLANKS_PE == 0) {
6004
88
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6005
88
            "Space required after the attribute name\n");
6006
88
    break;
6007
88
      }
6008
6009
12.7k
      type = xmlParseAttributeType(ctxt, &tree);
6010
12.7k
      if (type <= 0) {
6011
84
          break;
6012
84
      }
6013
6014
12.7k
      GROW;
6015
12.7k
      if (SKIP_BLANKS_PE == 0) {
6016
35
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6017
35
             "Space required after the attribute type\n");
6018
35
          if (tree != NULL)
6019
24
        xmlFreeEnumeration(tree);
6020
35
    break;
6021
35
      }
6022
6023
12.6k
      def = xmlParseDefaultDecl(ctxt, &defaultValue);
6024
12.6k
      if (def <= 0) {
6025
0
                if (defaultValue != NULL)
6026
0
        xmlFree(defaultValue);
6027
0
          if (tree != NULL)
6028
0
        xmlFreeEnumeration(tree);
6029
0
          break;
6030
0
      }
6031
12.6k
      if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6032
6.52k
          xmlAttrNormalizeSpace(defaultValue, defaultValue);
6033
6034
12.6k
      GROW;
6035
12.6k
            if (RAW != '>') {
6036
10.6k
    if (SKIP_BLANKS_PE == 0) {
6037
243
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6038
243
      "Space required after the attribute default value\n");
6039
243
        if (defaultValue != NULL)
6040
46
      xmlFree(defaultValue);
6041
243
        if (tree != NULL)
6042
36
      xmlFreeEnumeration(tree);
6043
243
        break;
6044
243
    }
6045
10.6k
      }
6046
12.4k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6047
11.3k
    (ctxt->sax->attributeDecl != NULL))
6048
0
    ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6049
0
                          type, def, defaultValue, tree);
6050
12.4k
      else if (tree != NULL)
6051
5.19k
    xmlFreeEnumeration(tree);
6052
6053
12.4k
      if ((ctxt->sax2) && (defaultValue != NULL) &&
6054
8.12k
          (def != XML_ATTRIBUTE_IMPLIED) &&
6055
8.12k
    (def != XML_ATTRIBUTE_REQUIRED)) {
6056
8.12k
    xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6057
8.12k
      }
6058
12.4k
      if (ctxt->sax2) {
6059
12.4k
    xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6060
12.4k
      }
6061
12.4k
      if (defaultValue != NULL)
6062
8.12k
          xmlFree(defaultValue);
6063
12.4k
      GROW;
6064
12.4k
  }
6065
3.44k
  if (RAW == '>') {
6066
2.97k
#ifdef LIBXML_VALID_ENABLED
6067
2.97k
      if ((ctxt->validate) && (ctxt->inputNr > oldInputNr)) {
6068
0
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6069
0
                                 "Attribute list declaration doesn't start and"
6070
0
                                 " stop in the same entity\n",
6071
0
                                 NULL, NULL);
6072
0
      }
6073
2.97k
#endif
6074
2.97k
      NEXT;
6075
2.97k
  }
6076
3.44k
    }
6077
3.47k
}
6078
6079
/**
6080
 * Handle PEs and check that we don't pop the entity that started
6081
 * a balanced group.
6082
 *
6083
 * @param ctxt  parser context
6084
 * @param openInputNr  input nr of the entity with opening '('
6085
 */
6086
static void
6087
199k
xmlSkipBlankCharsPEBalanced(xmlParserCtxt *ctxt, int openInputNr) {
6088
199k
    SKIP_BLANKS;
6089
199k
    GROW;
6090
6091
199k
    (void) openInputNr;
6092
6093
199k
    if (!PARSER_EXTERNAL(ctxt) && !PARSER_IN_PE(ctxt))
6094
199k
        return;
6095
6096
0
    while (!PARSER_STOPPED(ctxt)) {
6097
0
        if (ctxt->input->cur >= ctxt->input->end) {
6098
0
#ifdef LIBXML_VALID_ENABLED
6099
0
            if ((ctxt->validate) && (ctxt->inputNr <= openInputNr)) {
6100
0
                xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6101
0
                                 "Element content declaration doesn't start "
6102
0
                                 "and stop in the same entity\n",
6103
0
                                 NULL, NULL);
6104
0
            }
6105
0
#endif
6106
0
            if (PARSER_IN_PE(ctxt))
6107
0
                xmlPopPE(ctxt);
6108
0
            else
6109
0
                break;
6110
0
        } else if (RAW == '%') {
6111
0
            xmlParsePERefInternal(ctxt, 0);
6112
0
        } else {
6113
0
            break;
6114
0
        }
6115
6116
0
        SKIP_BLANKS;
6117
0
        GROW;
6118
0
    }
6119
0
}
6120
6121
/**
6122
 * Parse the declaration for a Mixed Element content
6123
 * The leading '(' and spaces have been skipped in #xmlParseElementContentDecl
6124
 *
6125
 * @deprecated Internal function, don't use.
6126
 *
6127
 *     [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6128
 *                    '(' S? '#PCDATA' S? ')'
6129
 *
6130
 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6131
 *
6132
 * [ VC: No Duplicate Types ]
6133
 * The same name must not appear more than once in a single
6134
 * mixed-content declaration.
6135
 *
6136
 * @param ctxt  an XML parser context
6137
 * @param openInputNr  the input used for the current entity, needed for
6138
 * boundary checks
6139
 * @returns the list of the xmlElementContent describing the element choices
6140
 */
6141
xmlElementContent *
6142
159
xmlParseElementMixedContentDecl(xmlParserCtxt *ctxt, int openInputNr) {
6143
159
    xmlElementContentPtr ret = NULL, cur = NULL, n;
6144
159
    const xmlChar *elem = NULL;
6145
6146
159
    GROW;
6147
159
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6148
159
  SKIP(7);
6149
159
        xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6150
159
  if (RAW == ')') {
6151
85
#ifdef LIBXML_VALID_ENABLED
6152
85
      if ((ctxt->validate) && (ctxt->inputNr > openInputNr)) {
6153
0
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6154
0
                                 "Element content declaration doesn't start "
6155
0
                                 "and stop in the same entity\n",
6156
0
                                 NULL, NULL);
6157
0
      }
6158
85
#endif
6159
85
      NEXT;
6160
85
      ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6161
85
      if (ret == NULL)
6162
0
                goto mem_error;
6163
85
      if (RAW == '*') {
6164
1
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6165
1
    NEXT;
6166
1
      }
6167
85
      return(ret);
6168
85
  }
6169
74
  if ((RAW == '(') || (RAW == '|')) {
6170
73
      ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6171
73
      if (ret == NULL)
6172
0
                goto mem_error;
6173
73
  }
6174
289
  while ((RAW == '|') && (PARSER_STOPPED(ctxt) == 0)) {
6175
272
      NEXT;
6176
272
            n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6177
272
            if (n == NULL)
6178
0
                goto mem_error;
6179
272
      if (elem == NULL) {
6180
73
    n->c1 = cur;
6181
73
    if (cur != NULL)
6182
73
        cur->parent = n;
6183
73
    ret = cur = n;
6184
199
      } else {
6185
199
          cur->c2 = n;
6186
199
    n->parent = cur;
6187
199
    n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6188
199
                if (n->c1 == NULL)
6189
0
                    goto mem_error;
6190
199
    n->c1->parent = n;
6191
199
    cur = n;
6192
199
      }
6193
272
            xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6194
272
      elem = xmlParseName(ctxt);
6195
272
      if (elem == NULL) {
6196
57
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6197
57
      "xmlParseElementMixedContentDecl : Name expected\n");
6198
57
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6199
57
    return(NULL);
6200
57
      }
6201
215
            xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6202
215
  }
6203
17
  if ((RAW == ')') && (NXT(1) == '*')) {
6204
0
      if (elem != NULL) {
6205
0
    cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6206
0
                                   XML_ELEMENT_CONTENT_ELEMENT);
6207
0
    if (cur->c2 == NULL)
6208
0
                    goto mem_error;
6209
0
    cur->c2->parent = cur;
6210
0
            }
6211
0
            if (ret != NULL)
6212
0
                ret->ocur = XML_ELEMENT_CONTENT_MULT;
6213
0
#ifdef LIBXML_VALID_ENABLED
6214
0
      if ((ctxt->validate) && (ctxt->inputNr > openInputNr)) {
6215
0
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6216
0
                                 "Element content declaration doesn't start "
6217
0
                                 "and stop in the same entity\n",
6218
0
                                 NULL, NULL);
6219
0
      }
6220
0
#endif
6221
0
      SKIP(2);
6222
17
  } else {
6223
17
      xmlFreeDocElementContent(ctxt->myDoc, ret);
6224
17
      xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6225
17
      return(NULL);
6226
17
  }
6227
6228
17
    } else {
6229
0
  xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6230
0
    }
6231
0
    return(ret);
6232
6233
0
mem_error:
6234
0
    xmlErrMemory(ctxt);
6235
0
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6236
0
    return(NULL);
6237
159
}
6238
6239
/**
6240
 * Parse the declaration for a Mixed Element content
6241
 * The leading '(' and spaces have been skipped in #xmlParseElementContentDecl
6242
 *
6243
 *     [47] children ::= (choice | seq) ('?' | '*' | '+')?
6244
 *
6245
 *     [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6246
 *
6247
 *     [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6248
 *
6249
 *     [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6250
 *
6251
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6252
 * TODO Parameter-entity replacement text must be properly nested
6253
 *  with parenthesized groups. That is to say, if either of the
6254
 *  opening or closing parentheses in a choice, seq, or Mixed
6255
 *  construct is contained in the replacement text for a parameter
6256
 *  entity, both must be contained in the same replacement text. For
6257
 *  interoperability, if a parameter-entity reference appears in a
6258
 *  choice, seq, or Mixed construct, its replacement text should not
6259
 *  be empty, and neither the first nor last non-blank character of
6260
 *  the replacement text should be a connector (| or ,).
6261
 *
6262
 * @param ctxt  an XML parser context
6263
 * @param openInputNr  the input used for the current entity, needed for
6264
 * boundary checks
6265
 * @param depth  the level of recursion
6266
 * @returns the tree of xmlElementContent describing the element
6267
 *          hierarchy.
6268
 */
6269
static xmlElementContentPtr
6270
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int openInputNr,
6271
36.0k
                                       int depth) {
6272
36.0k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
6273
36.0k
    xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6274
36.0k
    const xmlChar *elem;
6275
36.0k
    xmlChar type = 0;
6276
6277
36.0k
    if (depth > maxDepth) {
6278
1
        xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
6279
1
                "xmlParseElementChildrenContentDecl : depth %d too deep, "
6280
1
                "use XML_PARSE_HUGE\n", depth);
6281
1
  return(NULL);
6282
1
    }
6283
36.0k
    xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6284
36.0k
    if (RAW == '(') {
6285
32.1k
        int newInputNr = ctxt->inputNr;
6286
6287
        /* Recurse on first child */
6288
32.1k
  NEXT;
6289
32.1k
        cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, newInputNr,
6290
32.1k
                                                           depth + 1);
6291
32.1k
        if (cur == NULL)
6292
25.4k
            return(NULL);
6293
32.1k
    } else {
6294
3.88k
  elem = xmlParseName(ctxt);
6295
3.88k
  if (elem == NULL) {
6296
51
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6297
51
      return(NULL);
6298
51
  }
6299
3.83k
        cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6300
3.83k
  if (cur == NULL) {
6301
0
      xmlErrMemory(ctxt);
6302
0
      return(NULL);
6303
0
  }
6304
3.83k
  GROW;
6305
3.83k
  if (RAW == '?') {
6306
60
      cur->ocur = XML_ELEMENT_CONTENT_OPT;
6307
60
      NEXT;
6308
3.77k
  } else if (RAW == '*') {
6309
346
      cur->ocur = XML_ELEMENT_CONTENT_MULT;
6310
346
      NEXT;
6311
3.42k
  } else if (RAW == '+') {
6312
508
      cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6313
508
      NEXT;
6314
2.92k
  } else {
6315
2.92k
      cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6316
2.92k
  }
6317
3.83k
  GROW;
6318
3.83k
    }
6319
85.6k
    while (!PARSER_STOPPED(ctxt)) {
6320
85.6k
        xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6321
85.6k
        if (RAW == ')')
6322
8.70k
            break;
6323
        /*
6324
   * Each loop we parse one separator and one element.
6325
   */
6326
76.9k
        if (RAW == ',') {
6327
71.4k
      if (type == 0) type = CUR;
6328
6329
      /*
6330
       * Detect "Name | Name , Name" error
6331
       */
6332
69.3k
      else if (type != CUR) {
6333
1
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6334
1
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6335
1
                      type);
6336
1
    if ((last != NULL) && (last != ret))
6337
1
        xmlFreeDocElementContent(ctxt->myDoc, last);
6338
1
    if (ret != NULL)
6339
1
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6340
1
    return(NULL);
6341
1
      }
6342
71.4k
      NEXT;
6343
6344
71.4k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6345
71.4k
      if (op == NULL) {
6346
0
                xmlErrMemory(ctxt);
6347
0
    if ((last != NULL) && (last != ret))
6348
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6349
0
          xmlFreeDocElementContent(ctxt->myDoc, ret);
6350
0
    return(NULL);
6351
0
      }
6352
71.4k
      if (last == NULL) {
6353
2.08k
    op->c1 = ret;
6354
2.08k
    if (ret != NULL)
6355
2.08k
        ret->parent = op;
6356
2.08k
    ret = cur = op;
6357
69.3k
      } else {
6358
69.3k
          cur->c2 = op;
6359
69.3k
    if (op != NULL)
6360
69.3k
        op->parent = cur;
6361
69.3k
    op->c1 = last;
6362
69.3k
    if (last != NULL)
6363
69.3k
        last->parent = op;
6364
69.3k
    cur =op;
6365
69.3k
    last = NULL;
6366
69.3k
      }
6367
71.4k
  } else if (RAW == '|') {
6368
5.44k
      if (type == 0) type = CUR;
6369
6370
      /*
6371
       * Detect "Name , Name | Name" error
6372
       */
6373
3.64k
      else if (type != CUR) {
6374
1
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6375
1
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6376
1
          type);
6377
1
    if ((last != NULL) && (last != ret))
6378
1
        xmlFreeDocElementContent(ctxt->myDoc, last);
6379
1
    if (ret != NULL)
6380
1
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6381
1
    return(NULL);
6382
1
      }
6383
5.44k
      NEXT;
6384
6385
5.44k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6386
5.44k
      if (op == NULL) {
6387
0
                xmlErrMemory(ctxt);
6388
0
    if ((last != NULL) && (last != ret))
6389
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6390
0
    if (ret != NULL)
6391
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6392
0
    return(NULL);
6393
0
      }
6394
5.44k
      if (last == NULL) {
6395
1.79k
    op->c1 = ret;
6396
1.79k
    if (ret != NULL)
6397
1.79k
        ret->parent = op;
6398
1.79k
    ret = cur = op;
6399
3.64k
      } else {
6400
3.64k
          cur->c2 = op;
6401
3.64k
    if (op != NULL)
6402
3.64k
        op->parent = cur;
6403
3.64k
    op->c1 = last;
6404
3.64k
    if (last != NULL)
6405
3.64k
        last->parent = op;
6406
3.64k
    cur =op;
6407
3.64k
    last = NULL;
6408
3.64k
      }
6409
5.44k
  } else {
6410
58
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6411
58
      if ((last != NULL) && (last != ret))
6412
22
          xmlFreeDocElementContent(ctxt->myDoc, last);
6413
58
      if (ret != NULL)
6414
58
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6415
58
      return(NULL);
6416
58
  }
6417
76.8k
        xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6418
76.8k
        if (RAW == '(') {
6419
3.60k
            int newInputNr = ctxt->inputNr;
6420
6421
      /* Recurse on second child */
6422
3.60k
      NEXT;
6423
3.60k
      last = xmlParseElementChildrenContentDeclPriv(ctxt, newInputNr,
6424
3.60k
                                                          depth + 1);
6425
3.60k
            if (last == NULL) {
6426
1.77k
    if (ret != NULL)
6427
1.77k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6428
1.77k
    return(NULL);
6429
1.77k
            }
6430
73.2k
  } else {
6431
73.2k
      elem = xmlParseName(ctxt);
6432
73.2k
      if (elem == NULL) {
6433
28
    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6434
28
    if (ret != NULL)
6435
28
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6436
28
    return(NULL);
6437
28
      }
6438
73.2k
      last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6439
73.2k
      if (last == NULL) {
6440
0
                xmlErrMemory(ctxt);
6441
0
    if (ret != NULL)
6442
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6443
0
    return(NULL);
6444
0
      }
6445
73.2k
      if (RAW == '?') {
6446
246
    last->ocur = XML_ELEMENT_CONTENT_OPT;
6447
246
    NEXT;
6448
73.0k
      } else if (RAW == '*') {
6449
50
    last->ocur = XML_ELEMENT_CONTENT_MULT;
6450
50
    NEXT;
6451
72.9k
      } else if (RAW == '+') {
6452
2.77k
    last->ocur = XML_ELEMENT_CONTENT_PLUS;
6453
2.77k
    NEXT;
6454
70.1k
      } else {
6455
70.1k
    last->ocur = XML_ELEMENT_CONTENT_ONCE;
6456
70.1k
      }
6457
73.2k
  }
6458
76.8k
    }
6459
8.70k
    if ((cur != NULL) && (last != NULL)) {
6460
2.04k
        cur->c2 = last;
6461
2.04k
  if (last != NULL)
6462
2.04k
      last->parent = cur;
6463
2.04k
    }
6464
8.70k
#ifdef LIBXML_VALID_ENABLED
6465
8.70k
    if ((ctxt->validate) && (ctxt->inputNr > openInputNr)) {
6466
0
        xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6467
0
                         "Element content declaration doesn't start "
6468
0
                         "and stop in the same entity\n",
6469
0
                         NULL, NULL);
6470
0
    }
6471
8.70k
#endif
6472
8.70k
    NEXT;
6473
8.70k
    if (RAW == '?') {
6474
915
  if (ret != NULL) {
6475
915
      if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6476
886
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6477
312
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6478
603
      else
6479
603
          ret->ocur = XML_ELEMENT_CONTENT_OPT;
6480
915
  }
6481
915
  NEXT;
6482
7.79k
    } else if (RAW == '*') {
6483
676
  if (ret != NULL) {
6484
676
      ret->ocur = XML_ELEMENT_CONTENT_MULT;
6485
676
      cur = ret;
6486
      /*
6487
       * Some normalization:
6488
       * (a | b* | c?)* == (a | b | c)*
6489
       */
6490
1.54k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6491
864
    if ((cur->c1 != NULL) &&
6492
864
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6493
860
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6494
226
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6495
864
    if ((cur->c2 != NULL) &&
6496
864
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6497
864
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6498
23
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6499
864
    cur = cur->c2;
6500
864
      }
6501
676
  }
6502
676
  NEXT;
6503
7.11k
    } else if (RAW == '+') {
6504
2.21k
  if (ret != NULL) {
6505
2.21k
      int found = 0;
6506
6507
2.21k
      if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6508
2.05k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6509
672
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6510
1.53k
      else
6511
1.53k
          ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6512
      /*
6513
       * Some normalization:
6514
       * (a | b*)+ == (a | b)*
6515
       * (a | b?)+ == (a | b)*
6516
       */
6517
2.74k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6518
535
    if ((cur->c1 != NULL) &&
6519
535
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6520
467
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6521
138
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6522
138
        found = 1;
6523
138
    }
6524
535
    if ((cur->c2 != NULL) &&
6525
535
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6526
525
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6527
28
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6528
28
        found = 1;
6529
28
    }
6530
535
    cur = cur->c2;
6531
535
      }
6532
2.21k
      if (found)
6533
136
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6534
2.21k
  }
6535
2.21k
  NEXT;
6536
2.21k
    }
6537
8.70k
    return(ret);
6538
10.5k
}
6539
6540
/**
6541
 * Parse the declaration for a Mixed Element content
6542
 * The leading '(' and spaces have been skipped in #xmlParseElementContentDecl
6543
 *
6544
 * @deprecated Internal function, don't use.
6545
 *
6546
 *     [47] children ::= (choice | seq) ('?' | '*' | '+')?
6547
 *
6548
 *     [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6549
 *
6550
 *     [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6551
 *
6552
 *     [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6553
 *
6554
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6555
 * TODO Parameter-entity replacement text must be properly nested
6556
 *  with parenthesized groups. That is to say, if either of the
6557
 *  opening or closing parentheses in a choice, seq, or Mixed
6558
 *  construct is contained in the replacement text for a parameter
6559
 *  entity, both must be contained in the same replacement text. For
6560
 *  interoperability, if a parameter-entity reference appears in a
6561
 *  choice, seq, or Mixed construct, its replacement text should not
6562
 *  be empty, and neither the first nor last non-blank character of
6563
 *  the replacement text should be a connector (| or ,).
6564
 *
6565
 * @param ctxt  an XML parser context
6566
 * @param inputchk  the input used for the current entity, needed for boundary checks
6567
 * @returns the tree of xmlElementContent describing the element
6568
 *          hierarchy.
6569
 */
6570
xmlElementContent *
6571
0
xmlParseElementChildrenContentDecl(xmlParserCtxt *ctxt, int inputchk) {
6572
    /* stub left for API/ABI compat */
6573
0
    return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6574
0
}
6575
6576
/**
6577
 * Parse the declaration for an Element content either Mixed or Children,
6578
 * the cases EMPTY and ANY are handled directly in #xmlParseElementDecl
6579
 *
6580
 * @deprecated Internal function, don't use.
6581
 *
6582
 *     [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6583
 *
6584
 * @param ctxt  an XML parser context
6585
 * @param name  the name of the element being defined.
6586
 * @param result  the Element Content pointer will be stored here if any
6587
 * @returns an xmlElementTypeVal value or -1 on error
6588
 */
6589
6590
int
6591
xmlParseElementContentDecl(xmlParserCtxt *ctxt, const xmlChar *name,
6592
441
                           xmlElementContent **result) {
6593
6594
441
    xmlElementContentPtr tree = NULL;
6595
441
    int openInputNr = ctxt->inputNr;
6596
441
    int res;
6597
6598
441
    *result = NULL;
6599
6600
441
    if (RAW != '(') {
6601
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6602
0
    "xmlParseElementContentDecl : %s '(' expected\n", name);
6603
0
  return(-1);
6604
0
    }
6605
441
    NEXT;
6606
441
    xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6607
441
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6608
159
        tree = xmlParseElementMixedContentDecl(ctxt, openInputNr);
6609
159
  res = XML_ELEMENT_TYPE_MIXED;
6610
282
    } else {
6611
282
        tree = xmlParseElementChildrenContentDeclPriv(ctxt, openInputNr, 1);
6612
282
  res = XML_ELEMENT_TYPE_ELEMENT;
6613
282
    }
6614
441
    if (tree == NULL)
6615
214
        return(-1);
6616
227
    SKIP_BLANKS_PE;
6617
227
    *result = tree;
6618
227
    return(res);
6619
441
}
6620
6621
/**
6622
 * Parse an element declaration. Always consumes '<!'.
6623
 *
6624
 * @deprecated Internal function, don't use.
6625
 *
6626
 *     [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6627
 *
6628
 * [ VC: Unique Element Type Declaration ]
6629
 * No element type may be declared more than once
6630
 *
6631
 * @param ctxt  an XML parser context
6632
 * @returns the type of the element, or -1 in case of error
6633
 */
6634
int
6635
540
xmlParseElementDecl(xmlParserCtxt *ctxt) {
6636
540
    const xmlChar *name;
6637
540
    int ret = -1;
6638
540
    xmlElementContentPtr content  = NULL;
6639
6640
540
    if ((CUR != '<') || (NXT(1) != '!'))
6641
0
        return(ret);
6642
540
    SKIP(2);
6643
6644
    /* GROW; done in the caller */
6645
540
    if (CMP7(CUR_PTR, 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6646
520
#ifdef LIBXML_VALID_ENABLED
6647
520
  int oldInputNr = ctxt->inputNr;
6648
520
#endif
6649
6650
520
  SKIP(7);
6651
520
  if (SKIP_BLANKS_PE == 0) {
6652
13
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6653
13
               "Space required after 'ELEMENT'\n");
6654
13
      return(-1);
6655
13
  }
6656
507
        name = xmlParseName(ctxt);
6657
507
  if (name == NULL) {
6658
5
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6659
5
         "xmlParseElementDecl: no name for Element\n");
6660
5
      return(-1);
6661
5
  }
6662
502
  if (SKIP_BLANKS_PE == 0) {
6663
4
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6664
4
         "Space required after the element name\n");
6665
4
  }
6666
502
  if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6667
5
      SKIP(5);
6668
      /*
6669
       * Element must always be empty.
6670
       */
6671
5
      ret = XML_ELEMENT_TYPE_EMPTY;
6672
497
  } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6673
12
             (NXT(2) == 'Y')) {
6674
11
      SKIP(3);
6675
      /*
6676
       * Element is a generic container.
6677
       */
6678
11
      ret = XML_ELEMENT_TYPE_ANY;
6679
486
  } else if (RAW == '(') {
6680
441
      ret = xmlParseElementContentDecl(ctxt, name, &content);
6681
441
            if (ret <= 0)
6682
214
                return(-1);
6683
441
  } else {
6684
      /*
6685
       * [ WFC: PEs in Internal Subset ] error handling.
6686
       */
6687
45
            xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6688
45
                  "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6689
45
      return(-1);
6690
45
  }
6691
6692
243
  SKIP_BLANKS_PE;
6693
6694
243
  if (RAW != '>') {
6695
9
      xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6696
9
      if (content != NULL) {
6697
8
    xmlFreeDocElementContent(ctxt->myDoc, content);
6698
8
      }
6699
234
  } else {
6700
234
#ifdef LIBXML_VALID_ENABLED
6701
234
      if ((ctxt->validate) && (ctxt->inputNr > oldInputNr)) {
6702
0
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6703
0
                                 "Element declaration doesn't start and stop in"
6704
0
                                 " the same entity\n",
6705
0
                                 NULL, NULL);
6706
0
      }
6707
234
#endif
6708
6709
234
      NEXT;
6710
234
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6711
116
    (ctxt->sax->elementDecl != NULL)) {
6712
0
    if (content != NULL)
6713
0
        content->parent = NULL;
6714
0
          ctxt->sax->elementDecl(ctxt->userData, name, ret,
6715
0
                           content);
6716
0
    if ((content != NULL) && (content->parent == NULL)) {
6717
        /*
6718
         * this is a trick: if xmlAddElementDecl is called,
6719
         * instead of copying the full tree it is plugged directly
6720
         * if called from the parser. Avoid duplicating the
6721
         * interfaces or change the API/ABI
6722
         */
6723
0
        xmlFreeDocElementContent(ctxt->myDoc, content);
6724
0
    }
6725
234
      } else if (content != NULL) {
6726
219
    xmlFreeDocElementContent(ctxt->myDoc, content);
6727
219
      }
6728
234
  }
6729
243
    }
6730
263
    return(ret);
6731
540
}
6732
6733
/**
6734
 * Parse a conditional section. Always consumes '<!['.
6735
 *
6736
 *     [61] conditionalSect ::= includeSect | ignoreSect
6737
 *     [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6738
 *     [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6739
 *     [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>'
6740
 *                                 Ignore)*
6741
 *     [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6742
 * @param ctxt  an XML parser context
6743
 */
6744
6745
static void
6746
0
xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6747
0
    size_t depth = 0;
6748
0
    int isFreshPE = 0;
6749
0
    int oldInputNr = ctxt->inputNr;
6750
0
    int declInputNr = ctxt->inputNr;
6751
6752
0
    while (!PARSER_STOPPED(ctxt)) {
6753
0
        if (ctxt->input->cur >= ctxt->input->end) {
6754
0
            if (ctxt->inputNr <= oldInputNr) {
6755
0
                xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6756
0
                return;
6757
0
            }
6758
6759
0
            xmlPopPE(ctxt);
6760
0
            declInputNr = ctxt->inputNr;
6761
0
        } else if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6762
0
            SKIP(3);
6763
0
            SKIP_BLANKS_PE;
6764
6765
0
            isFreshPE = 0;
6766
6767
0
            if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6768
0
                SKIP(7);
6769
0
                SKIP_BLANKS_PE;
6770
0
                if (RAW != '[') {
6771
0
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6772
0
                    return;
6773
0
                }
6774
0
#ifdef LIBXML_VALID_ENABLED
6775
0
                if ((ctxt->validate) && (ctxt->inputNr > declInputNr)) {
6776
0
        xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6777
0
                                     "All markup of the conditional section is"
6778
0
                                     " not in the same entity\n",
6779
0
                                     NULL, NULL);
6780
0
                }
6781
0
#endif
6782
0
                NEXT;
6783
6784
0
                depth++;
6785
0
            } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6786
0
                size_t ignoreDepth = 0;
6787
6788
0
                SKIP(6);
6789
0
                SKIP_BLANKS_PE;
6790
0
                if (RAW != '[') {
6791
0
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6792
0
                    return;
6793
0
                }
6794
0
#ifdef LIBXML_VALID_ENABLED
6795
0
                if ((ctxt->validate) && (ctxt->inputNr > declInputNr)) {
6796
0
        xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6797
0
                                     "All markup of the conditional section is"
6798
0
                                     " not in the same entity\n",
6799
0
                                     NULL, NULL);
6800
0
                }
6801
0
#endif
6802
0
                NEXT;
6803
6804
0
                while (PARSER_STOPPED(ctxt) == 0) {
6805
0
                    if (RAW == 0) {
6806
0
                        xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6807
0
                        return;
6808
0
                    }
6809
0
                    if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6810
0
                        SKIP(3);
6811
0
                        ignoreDepth++;
6812
                        /* Check for integer overflow */
6813
0
                        if (ignoreDepth == 0) {
6814
0
                            xmlErrMemory(ctxt);
6815
0
                            return;
6816
0
                        }
6817
0
                    } else if ((RAW == ']') && (NXT(1) == ']') &&
6818
0
                               (NXT(2) == '>')) {
6819
0
                        SKIP(3);
6820
0
                        if (ignoreDepth == 0)
6821
0
                            break;
6822
0
                        ignoreDepth--;
6823
0
                    } else {
6824
0
                        NEXT;
6825
0
                    }
6826
0
                }
6827
6828
0
#ifdef LIBXML_VALID_ENABLED
6829
0
                if ((ctxt->validate) && (ctxt->inputNr > declInputNr)) {
6830
0
        xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6831
0
                                     "All markup of the conditional section is"
6832
0
                                     " not in the same entity\n",
6833
0
                                     NULL, NULL);
6834
0
                }
6835
0
#endif
6836
0
            } else {
6837
0
                xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6838
0
                return;
6839
0
            }
6840
0
        } else if ((depth > 0) &&
6841
0
                   (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6842
0
            if (isFreshPE) {
6843
0
                xmlFatalErrMsg(ctxt, XML_ERR_CONDSEC_INVALID,
6844
0
                               "Parameter entity must match "
6845
0
                               "extSubsetDecl\n");
6846
0
                return;
6847
0
            }
6848
6849
0
            depth--;
6850
0
#ifdef LIBXML_VALID_ENABLED
6851
0
            if ((ctxt->validate) && (ctxt->inputNr > declInputNr)) {
6852
0
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6853
0
                                 "All markup of the conditional section is not"
6854
0
                                 " in the same entity\n",
6855
0
                                 NULL, NULL);
6856
0
            }
6857
0
#endif
6858
0
            SKIP(3);
6859
0
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
6860
0
            isFreshPE = 0;
6861
0
            xmlParseMarkupDecl(ctxt);
6862
0
        } else if (RAW == '%') {
6863
0
            xmlParsePERefInternal(ctxt, 1);
6864
0
            if (ctxt->inputNr > declInputNr) {
6865
0
                isFreshPE = 1;
6866
0
                declInputNr = ctxt->inputNr;
6867
0
            }
6868
0
        } else {
6869
0
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6870
0
            return;
6871
0
        }
6872
6873
0
        if (depth == 0)
6874
0
            break;
6875
6876
0
        SKIP_BLANKS;
6877
0
        SHRINK;
6878
0
        GROW;
6879
0
    }
6880
0
}
6881
6882
/**
6883
 * Parse markup declarations. Always consumes '<!' or '<?'.
6884
 *
6885
 * @deprecated Internal function, don't use.
6886
 *
6887
 *     [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6888
 *                         NotationDecl | PI | Comment
6889
 *
6890
 * [ VC: Proper Declaration/PE Nesting ]
6891
 * Parameter-entity replacement text must be properly nested with
6892
 * markup declarations. That is to say, if either the first character
6893
 * or the last character of a markup declaration (markupdecl above) is
6894
 * contained in the replacement text for a parameter-entity reference,
6895
 * both must be contained in the same replacement text.
6896
 *
6897
 * [ WFC: PEs in Internal Subset ]
6898
 * In the internal DTD subset, parameter-entity references can occur
6899
 * only where markup declarations can occur, not within markup declarations.
6900
 * (This does not apply to references that occur in external parameter
6901
 * entities or to the external subset.)
6902
 *
6903
 * @param ctxt  an XML parser context
6904
 */
6905
void
6906
48.4k
xmlParseMarkupDecl(xmlParserCtxt *ctxt) {
6907
48.4k
    GROW;
6908
48.4k
    if (CUR == '<') {
6909
48.4k
        if (NXT(1) == '!') {
6910
41.3k
      switch (NXT(2)) {
6911
23.2k
          case 'E':
6912
23.2k
        if (NXT(3) == 'L')
6913
540
      xmlParseElementDecl(ctxt);
6914
22.7k
        else if (NXT(3) == 'N')
6915
22.7k
      xmlParseEntityDecl(ctxt);
6916
10
                    else
6917
10
                        SKIP(2);
6918
23.2k
        break;
6919
3.47k
          case 'A':
6920
3.47k
        xmlParseAttributeListDecl(ctxt);
6921
3.47k
        break;
6922
630
          case 'N':
6923
630
        xmlParseNotationDecl(ctxt);
6924
630
        break;
6925
13.9k
          case '-':
6926
13.9k
        xmlParseComment(ctxt);
6927
13.9k
        break;
6928
42
    default:
6929
42
                    xmlFatalErr(ctxt,
6930
42
                                ctxt->inSubset == 2 ?
6931
0
                                    XML_ERR_EXT_SUBSET_NOT_FINISHED :
6932
42
                                    XML_ERR_INT_SUBSET_NOT_FINISHED,
6933
42
                                NULL);
6934
42
                    SKIP(2);
6935
42
        break;
6936
41.3k
      }
6937
41.3k
  } else if (NXT(1) == '?') {
6938
7.08k
      xmlParsePI(ctxt);
6939
7.08k
  }
6940
48.4k
    }
6941
48.4k
}
6942
6943
/**
6944
 * Parse an XML declaration header for external entities
6945
 *
6946
 * @deprecated Internal function, don't use.
6947
 *
6948
 *     [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6949
 * @param ctxt  an XML parser context
6950
 */
6951
6952
void
6953
0
xmlParseTextDecl(xmlParserCtxt *ctxt) {
6954
0
    xmlChar *version;
6955
6956
    /*
6957
     * We know that '<?xml' is here.
6958
     */
6959
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6960
0
  SKIP(5);
6961
0
    } else {
6962
0
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6963
0
  return;
6964
0
    }
6965
6966
0
    if (SKIP_BLANKS == 0) {
6967
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6968
0
           "Space needed after '<?xml'\n");
6969
0
    }
6970
6971
    /*
6972
     * We may have the VersionInfo here.
6973
     */
6974
0
    version = xmlParseVersionInfo(ctxt);
6975
0
    if (version == NULL) {
6976
0
  version = xmlCharStrdup(XML_DEFAULT_VERSION);
6977
0
        if (version == NULL) {
6978
0
            xmlErrMemory(ctxt);
6979
0
            return;
6980
0
        }
6981
0
    } else {
6982
0
  if (SKIP_BLANKS == 0) {
6983
0
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6984
0
               "Space needed here\n");
6985
0
  }
6986
0
    }
6987
0
    ctxt->input->version = version;
6988
6989
    /*
6990
     * We must have the encoding declaration
6991
     */
6992
0
    xmlParseEncodingDecl(ctxt);
6993
6994
0
    SKIP_BLANKS;
6995
0
    if ((RAW == '?') && (NXT(1) == '>')) {
6996
0
        SKIP(2);
6997
0
    } else if (RAW == '>') {
6998
        /* Deprecated old WD ... */
6999
0
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7000
0
  NEXT;
7001
0
    } else {
7002
0
        int c;
7003
7004
0
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7005
0
        while ((PARSER_STOPPED(ctxt) == 0) && ((c = CUR) != 0)) {
7006
0
            NEXT;
7007
0
            if (c == '>')
7008
0
                break;
7009
0
        }
7010
0
    }
7011
0
}
7012
7013
/**
7014
 * Parse Markup declarations from an external subset
7015
 *
7016
 * @deprecated Internal function, don't use.
7017
 *
7018
 *     [30] extSubset ::= textDecl? extSubsetDecl
7019
 *
7020
 *     [31] extSubsetDecl ::= (markupdecl | conditionalSect |
7021
 *                             PEReference | S) *
7022
 * @param ctxt  an XML parser context
7023
 * @param publicId  the public identifier
7024
 * @param systemId  the system identifier (URL)
7025
 */
7026
void
7027
xmlParseExternalSubset(xmlParserCtxt *ctxt, const xmlChar *publicId,
7028
0
                       const xmlChar *systemId) {
7029
0
    int oldInputNr;
7030
7031
0
    xmlCtxtInitializeLate(ctxt);
7032
7033
0
    xmlDetectEncoding(ctxt);
7034
7035
0
    if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7036
0
  xmlParseTextDecl(ctxt);
7037
0
    }
7038
0
    if (ctxt->myDoc == NULL) {
7039
0
        ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7040
0
  if (ctxt->myDoc == NULL) {
7041
0
      xmlErrMemory(ctxt);
7042
0
      return;
7043
0
  }
7044
0
  ctxt->myDoc->properties = XML_DOC_INTERNAL;
7045
0
    }
7046
0
    if ((ctxt->myDoc->intSubset == NULL) &&
7047
0
        (xmlCreateIntSubset(ctxt->myDoc, NULL, publicId, systemId) == NULL)) {
7048
0
        xmlErrMemory(ctxt);
7049
0
    }
7050
7051
0
    ctxt->inSubset = 2;
7052
0
    oldInputNr = ctxt->inputNr;
7053
7054
0
    SKIP_BLANKS;
7055
0
    while (!PARSER_STOPPED(ctxt)) {
7056
0
        if (ctxt->input->cur >= ctxt->input->end) {
7057
0
            if (ctxt->inputNr <= oldInputNr) {
7058
0
                xmlParserCheckEOF(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED);
7059
0
                break;
7060
0
            }
7061
7062
0
            xmlPopPE(ctxt);
7063
0
        } else if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7064
0
            xmlParseConditionalSections(ctxt);
7065
0
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7066
0
            xmlParseMarkupDecl(ctxt);
7067
0
        } else if (RAW == '%') {
7068
0
            xmlParsePERefInternal(ctxt, 1);
7069
0
        } else {
7070
0
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7071
7072
0
            while (ctxt->inputNr > oldInputNr)
7073
0
                xmlPopPE(ctxt);
7074
0
            break;
7075
0
        }
7076
0
        SKIP_BLANKS;
7077
0
        SHRINK;
7078
0
        GROW;
7079
0
    }
7080
0
}
7081
7082
/**
7083
 * Parse and handle entity references in content, depending on the SAX
7084
 * interface, this may end-up in a call to character() if this is a
7085
 * CharRef, a predefined entity, if there is no reference() callback.
7086
 * or if the parser was asked to switch to that mode.
7087
 *
7088
 * @deprecated Internal function, don't use.
7089
 *
7090
 * Always consumes '&'.
7091
 *
7092
 *     [67] Reference ::= EntityRef | CharRef
7093
 * @param ctxt  an XML parser context
7094
 */
7095
void
7096
261k
xmlParseReference(xmlParserCtxt *ctxt) {
7097
261k
    xmlEntityPtr ent = NULL;
7098
261k
    const xmlChar *name;
7099
261k
    xmlChar *val;
7100
7101
261k
    if (RAW != '&')
7102
0
        return;
7103
7104
    /*
7105
     * Simple case of a CharRef
7106
     */
7107
261k
    if (NXT(1) == '#') {
7108
50.2k
  int i = 0;
7109
50.2k
  xmlChar out[16];
7110
50.2k
  int value = xmlParseCharRef(ctxt);
7111
7112
50.2k
  if (value == 0)
7113
29
      return;
7114
7115
        /*
7116
         * Just encode the value in UTF-8
7117
         */
7118
50.2k
        COPY_BUF(out, i, value);
7119
50.2k
        out[i] = 0;
7120
50.2k
        if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7121
50.2k
            (!ctxt->disableSAX))
7122
50.2k
            ctxt->sax->characters(ctxt->userData, out, i);
7123
50.2k
  return;
7124
50.2k
    }
7125
7126
    /*
7127
     * We are seeing an entity reference
7128
     */
7129
210k
    name = xmlParseEntityRefInternal(ctxt);
7130
210k
    if (name == NULL)
7131
132
        return;
7132
210k
    ent = xmlLookupGeneralEntity(ctxt, name, /* isAttr */ 0);
7133
210k
    if (ent == NULL) {
7134
        /*
7135
         * Create a reference for undeclared entities.
7136
         */
7137
71
        if ((ctxt->replaceEntities == 0) &&
7138
0
            (ctxt->sax != NULL) &&
7139
0
            (ctxt->disableSAX == 0) &&
7140
0
            (ctxt->sax->reference != NULL)) {
7141
0
            ctxt->sax->reference(ctxt->userData, name);
7142
0
        }
7143
71
        return;
7144
71
    }
7145
210k
    if (!ctxt->wellFormed)
7146
0
  return;
7147
7148
    /* special case of predefined entities */
7149
210k
    if ((ent->name == NULL) ||
7150
210k
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7151
210k
  val = ent->content;
7152
210k
  if (val == NULL) return;
7153
  /*
7154
   * inline the entity.
7155
   */
7156
210k
  if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7157
210k
      (!ctxt->disableSAX))
7158
210k
      ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7159
210k
  return;
7160
210k
    }
7161
7162
    /*
7163
     * Some users try to parse entities on their own and used to set
7164
     * the renamed "checked" member. Fix the flags to cover this
7165
     * case.
7166
     */
7167
1
    if (((ent->flags & XML_ENT_PARSED) == 0) && (ent->children != NULL))
7168
0
        ent->flags |= XML_ENT_PARSED;
7169
7170
    /*
7171
     * The first reference to the entity trigger a parsing phase
7172
     * where the ent->children is filled with the result from
7173
     * the parsing.
7174
     * Note: external parsed entities will not be loaded, it is not
7175
     * required for a non-validating parser, unless the parsing option
7176
     * of validating, or substituting entities were given. Doing so is
7177
     * far more secure as the parser will only process data coming from
7178
     * the document entity by default.
7179
     *
7180
     * FIXME: This doesn't work correctly since entities can be
7181
     * expanded with different namespace declarations in scope.
7182
     * For example:
7183
     *
7184
     * <!DOCTYPE doc [
7185
     *   <!ENTITY ent "<ns:elem/>">
7186
     * ]>
7187
     * <doc>
7188
     *   <decl1 xmlns:ns="urn:ns1">
7189
     *     &ent;
7190
     *   </decl1>
7191
     *   <decl2 xmlns:ns="urn:ns2">
7192
     *     &ent;
7193
     *   </decl2>
7194
     * </doc>
7195
     *
7196
     * Proposed fix:
7197
     *
7198
     * - Ignore current namespace declarations when parsing the
7199
     *   entity. If a prefix can't be resolved, don't report an error
7200
     *   but mark it as unresolved.
7201
     * - Try to resolve these prefixes when expanding the entity.
7202
     *   This will require a specialized version of xmlStaticCopyNode
7203
     *   which can also make use of the namespace hash table to avoid
7204
     *   quadratic behavior.
7205
     *
7206
     * Alternatively, we could simply reparse the entity on each
7207
     * expansion like we already do with custom SAX callbacks.
7208
     * External entity content should be cached in this case.
7209
     */
7210
1
    if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7211
0
        (((ctxt->options & XML_PARSE_NO_XXE) == 0) &&
7212
0
         ((ctxt->replaceEntities) ||
7213
0
          (ctxt->validate)))) {
7214
0
        if ((ent->flags & XML_ENT_PARSED) == 0) {
7215
0
            xmlCtxtParseEntity(ctxt, ent);
7216
0
        } else if (ent->children == NULL) {
7217
            /*
7218
             * Probably running in SAX mode and the callbacks don't
7219
             * build the entity content. Parse the entity again.
7220
             *
7221
             * This will also be triggered in normal tree builder mode
7222
             * if an entity happens to be empty, causing unnecessary
7223
             * reloads. It's hard to come up with a reliable check in
7224
             * which mode we're running.
7225
             */
7226
0
            xmlCtxtParseEntity(ctxt, ent);
7227
0
        }
7228
0
    }
7229
7230
    /*
7231
     * We also check for amplification if entities aren't substituted.
7232
     * They might be expanded later.
7233
     */
7234
1
    if (xmlParserEntityCheck(ctxt, ent->expandedSize))
7235
0
        return;
7236
7237
1
    if ((ctxt->sax == NULL) || (ctxt->disableSAX))
7238
0
        return;
7239
7240
1
    if (ctxt->replaceEntities == 0) {
7241
  /*
7242
   * Create a reference
7243
   */
7244
0
        if (ctxt->sax->reference != NULL)
7245
0
      ctxt->sax->reference(ctxt->userData, ent->name);
7246
1
    } else if ((ent->children != NULL) && (ctxt->node != NULL)) {
7247
0
        xmlNodePtr copy, cur;
7248
7249
        /*
7250
         * Seems we are generating the DOM content, copy the tree
7251
   */
7252
0
        cur = ent->children;
7253
7254
        /*
7255
         * Handle first text node with SAX to coalesce text efficiently
7256
         */
7257
0
        if ((cur->type == XML_TEXT_NODE) ||
7258
0
            (cur->type == XML_CDATA_SECTION_NODE)) {
7259
0
            int len = xmlStrlen(cur->content);
7260
7261
0
            if ((cur->type == XML_TEXT_NODE) ||
7262
0
                (ctxt->options & XML_PARSE_NOCDATA)) {
7263
0
                if (ctxt->sax->characters != NULL)
7264
0
                    ctxt->sax->characters(ctxt->userData, cur->content, len);
7265
0
            } else {
7266
0
                if (ctxt->sax->cdataBlock != NULL)
7267
0
                    ctxt->sax->cdataBlock(ctxt->userData, cur->content, len);
7268
0
            }
7269
7270
0
            cur = cur->next;
7271
0
        }
7272
7273
0
        while (cur != NULL) {
7274
0
            xmlNodePtr last;
7275
7276
            /*
7277
             * Handle last text node with SAX to coalesce text efficiently
7278
             */
7279
0
            if ((cur->next == NULL) &&
7280
0
                ((cur->type == XML_TEXT_NODE) ||
7281
0
                 (cur->type == XML_CDATA_SECTION_NODE))) {
7282
0
                int len = xmlStrlen(cur->content);
7283
7284
0
                if ((cur->type == XML_TEXT_NODE) ||
7285
0
                    (ctxt->options & XML_PARSE_NOCDATA)) {
7286
0
                    if (ctxt->sax->characters != NULL)
7287
0
                        ctxt->sax->characters(ctxt->userData, cur->content,
7288
0
                                              len);
7289
0
                } else {
7290
0
                    if (ctxt->sax->cdataBlock != NULL)
7291
0
                        ctxt->sax->cdataBlock(ctxt->userData, cur->content,
7292
0
                                              len);
7293
0
                }
7294
7295
0
                break;
7296
0
            }
7297
7298
            /*
7299
             * Reset coalesce buffer stats only for non-text nodes.
7300
             */
7301
0
            ctxt->nodemem = 0;
7302
0
            ctxt->nodelen = 0;
7303
7304
0
            copy = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7305
7306
0
            if (copy == NULL) {
7307
0
                xmlErrMemory(ctxt);
7308
0
                break;
7309
0
            }
7310
7311
0
            if (ctxt->parseMode == XML_PARSE_READER) {
7312
                /* Needed for reader */
7313
0
                copy->extra = cur->extra;
7314
                /* Maybe needed for reader */
7315
0
                copy->_private = cur->_private;
7316
0
            }
7317
7318
0
            copy->parent = ctxt->node;
7319
0
            last = ctxt->node->last;
7320
0
            if (last == NULL) {
7321
0
                ctxt->node->children = copy;
7322
0
            } else {
7323
0
                last->next = copy;
7324
0
                copy->prev = last;
7325
0
            }
7326
0
            ctxt->node->last = copy;
7327
7328
0
            cur = cur->next;
7329
0
        }
7330
0
    }
7331
1
}
7332
7333
static void
7334
16.2k
xmlHandleUndeclaredEntity(xmlParserCtxtPtr ctxt, const xmlChar *name) {
7335
    /*
7336
     * [ WFC: Entity Declared ]
7337
     * In a document without any DTD, a document with only an
7338
     * internal DTD subset which contains no parameter entity
7339
     * references, or a document with "standalone='yes'", the
7340
     * Name given in the entity reference must match that in an
7341
     * entity declaration, except that well-formed documents
7342
     * need not declare any of the following entities: amp, lt,
7343
     * gt, apos, quot.
7344
     * The declaration of a parameter entity must precede any
7345
     * reference to it.
7346
     * Similarly, the declaration of a general entity must
7347
     * precede any reference to it which appears in a default
7348
     * value in an attribute-list declaration. Note that if
7349
     * entities are declared in the external subset or in
7350
     * external parameter entities, a non-validating processor
7351
     * is not obligated to read and process their declarations;
7352
     * for such documents, the rule that an entity must be
7353
     * declared is a well-formedness constraint only if
7354
     * standalone='yes'.
7355
     */
7356
16.2k
    if ((ctxt->standalone == 1) ||
7357
15.5k
        ((ctxt->hasExternalSubset == 0) &&
7358
15.5k
         (ctxt->hasPErefs == 0))) {
7359
8.63k
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7360
8.63k
                          "Entity '%s' not defined\n", name);
7361
8.63k
#ifdef LIBXML_VALID_ENABLED
7362
8.63k
    } else if (ctxt->validate) {
7363
        /*
7364
         * [ VC: Entity Declared ]
7365
         * In a document with an external subset or external
7366
         * parameter entities with "standalone='no'", ...
7367
         * ... The declaration of a parameter entity must
7368
         * precede any reference to it...
7369
         */
7370
0
        xmlValidityError(ctxt, XML_ERR_UNDECLARED_ENTITY,
7371
0
                         "Entity '%s' not defined\n", name, NULL);
7372
0
#endif
7373
7.57k
    } else if ((ctxt->loadsubset & ~XML_SKIP_IDS) ||
7374
7.57k
               ((ctxt->replaceEntities) &&
7375
7.57k
                ((ctxt->options & XML_PARSE_NO_XXE) == 0))) {
7376
        /*
7377
         * Also raise a non-fatal error
7378
         *
7379
         * - if the external subset is loaded and all entity declarations
7380
         *   should be available, or
7381
         * - entity substition was requested without restricting
7382
         *   external entity access.
7383
         */
7384
7.57k
        xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7385
7.57k
                     "Entity '%s' not defined\n", name);
7386
7.57k
    } else {
7387
0
        xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7388
0
                      "Entity '%s' not defined\n", name, NULL);
7389
0
    }
7390
7391
16.2k
    ctxt->valid = 0;
7392
16.2k
}
7393
7394
static xmlEntityPtr
7395
309k
xmlLookupGeneralEntity(xmlParserCtxtPtr ctxt, const xmlChar *name, int inAttr) {
7396
309k
    xmlEntityPtr ent = NULL;
7397
7398
    /*
7399
     * Predefined entities override any extra definition
7400
     */
7401
309k
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7402
309k
        ent = xmlGetPredefinedEntity(name);
7403
309k
        if (ent != NULL)
7404
300k
            return(ent);
7405
309k
    }
7406
7407
    /*
7408
     * Ask first SAX for entity resolution, otherwise try the
7409
     * entities which may have stored in the parser context.
7410
     */
7411
8.88k
    if (ctxt->sax != NULL) {
7412
8.88k
  if (ctxt->sax->getEntity != NULL)
7413
8.88k
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7414
8.88k
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7415
287
      (ctxt->options & XML_PARSE_OLDSAX))
7416
0
      ent = xmlGetPredefinedEntity(name);
7417
8.88k
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7418
287
      (ctxt->userData==ctxt)) {
7419
0
      ent = xmlSAX2GetEntity(ctxt, name);
7420
0
  }
7421
8.88k
    }
7422
7423
8.88k
    if (ent == NULL) {
7424
8.88k
        xmlHandleUndeclaredEntity(ctxt, name);
7425
8.88k
    }
7426
7427
    /*
7428
     * [ WFC: Parsed Entity ]
7429
     * An entity reference must not contain the name of an
7430
     * unparsed entity
7431
     */
7432
1
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7433
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7434
0
     "Entity reference to unparsed entity %s\n", name);
7435
0
        ent = NULL;
7436
0
    }
7437
7438
    /*
7439
     * [ WFC: No External Entity References ]
7440
     * Attribute values cannot contain direct or indirect
7441
     * entity references to external entities.
7442
     */
7443
1
    else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7444
0
        if (inAttr) {
7445
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7446
0
                 "Attribute references external entity '%s'\n", name);
7447
0
            ent = NULL;
7448
0
        }
7449
0
    }
7450
7451
8.88k
    return(ent);
7452
309k
}
7453
7454
/**
7455
 * Parse an entity reference. Always consumes '&'.
7456
 *
7457
 *     [68] EntityRef ::= '&' Name ';'
7458
 *
7459
 * @param ctxt  an XML parser context
7460
 * @returns the name, or NULL in case of error.
7461
 */
7462
static const xmlChar *
7463
478k
xmlParseEntityRefInternal(xmlParserCtxtPtr ctxt) {
7464
478k
    const xmlChar *name;
7465
7466
478k
    GROW;
7467
7468
478k
    if (RAW != '&')
7469
0
        return(NULL);
7470
478k
    NEXT;
7471
478k
    name = xmlParseName(ctxt);
7472
478k
    if (name == NULL) {
7473
92.4k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7474
92.4k
           "xmlParseEntityRef: no name\n");
7475
92.4k
        return(NULL);
7476
92.4k
    }
7477
386k
    if (RAW != ';') {
7478
76.6k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7479
76.6k
  return(NULL);
7480
76.6k
    }
7481
309k
    NEXT;
7482
7483
309k
    return(name);
7484
386k
}
7485
7486
/**
7487
 * @deprecated Internal function, don't use.
7488
 *
7489
 * @param ctxt  an XML parser context
7490
 * @returns the xmlEntity if found, or NULL otherwise.
7491
 */
7492
xmlEntity *
7493
0
xmlParseEntityRef(xmlParserCtxt *ctxt) {
7494
0
    const xmlChar *name;
7495
7496
0
    if (ctxt == NULL)
7497
0
        return(NULL);
7498
7499
0
    name = xmlParseEntityRefInternal(ctxt);
7500
0
    if (name == NULL)
7501
0
        return(NULL);
7502
7503
0
    return(xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 0));
7504
0
}
7505
7506
/**
7507
 * Parse ENTITY references declarations, but this version parses it from
7508
 * a string value.
7509
 *
7510
 *     [68] EntityRef ::= '&' Name ';'
7511
 *
7512
 * [ WFC: Entity Declared ]
7513
 * In a document without any DTD, a document with only an internal DTD
7514
 * subset which contains no parameter entity references, or a document
7515
 * with "standalone='yes'", the Name given in the entity reference
7516
 * must match that in an entity declaration, except that well-formed
7517
 * documents need not declare any of the following entities: amp, lt,
7518
 * gt, apos, quot.  The declaration of a parameter entity must precede
7519
 * any reference to it.  Similarly, the declaration of a general entity
7520
 * must precede any reference to it which appears in a default value in an
7521
 * attribute-list declaration. Note that if entities are declared in the
7522
 * external subset or in external parameter entities, a non-validating
7523
 * processor is not obligated to read and process their declarations;
7524
 * for such documents, the rule that an entity must be declared is a
7525
 * well-formedness constraint only if standalone='yes'.
7526
 *
7527
 * [ WFC: Parsed Entity ]
7528
 * An entity reference must not contain the name of an unparsed entity
7529
 *
7530
 * @param ctxt  an XML parser context
7531
 * @param str  a pointer to an index in the string
7532
 * @returns the xmlEntity if found, or NULL otherwise. The str pointer
7533
 * is updated to the current location in the string.
7534
 */
7535
static xmlChar *
7536
0
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7537
0
    xmlChar *name;
7538
0
    const xmlChar *ptr;
7539
0
    xmlChar cur;
7540
7541
0
    if ((str == NULL) || (*str == NULL))
7542
0
        return(NULL);
7543
0
    ptr = *str;
7544
0
    cur = *ptr;
7545
0
    if (cur != '&')
7546
0
  return(NULL);
7547
7548
0
    ptr++;
7549
0
    name = xmlParseStringName(ctxt, &ptr);
7550
0
    if (name == NULL) {
7551
0
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7552
0
           "xmlParseStringEntityRef: no name\n");
7553
0
  *str = ptr;
7554
0
  return(NULL);
7555
0
    }
7556
0
    if (*ptr != ';') {
7557
0
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7558
0
        xmlFree(name);
7559
0
  *str = ptr;
7560
0
  return(NULL);
7561
0
    }
7562
0
    ptr++;
7563
7564
0
    *str = ptr;
7565
0
    return(name);
7566
0
}
7567
7568
/**
7569
 * Parse a parameter entity reference. Always consumes '%'.
7570
 *
7571
 * The entity content is handled directly by pushing it's content as
7572
 * a new input stream.
7573
 *
7574
 *     [69] PEReference ::= '%' Name ';'
7575
 *
7576
 * [ WFC: No Recursion ]
7577
 * A parsed entity must not contain a recursive
7578
 * reference to itself, either directly or indirectly.
7579
 *
7580
 * [ WFC: Entity Declared ]
7581
 * In a document without any DTD, a document with only an internal DTD
7582
 * subset which contains no parameter entity references, or a document
7583
 * with "standalone='yes'", ...  ... The declaration of a parameter
7584
 * entity must precede any reference to it...
7585
 *
7586
 * [ VC: Entity Declared ]
7587
 * In a document with an external subset or external parameter entities
7588
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7589
 * must precede any reference to it...
7590
 *
7591
 * [ WFC: In DTD ]
7592
 * Parameter-entity references may only appear in the DTD.
7593
 * NOTE: misleading but this is handled.
7594
 *
7595
 * @param ctxt  an XML parser context
7596
 * @param markupDecl  whether the PERef starts a markup declaration
7597
 */
7598
static void
7599
7.68k
xmlParsePERefInternal(xmlParserCtxt *ctxt, int markupDecl) {
7600
7.68k
    const xmlChar *name;
7601
7.68k
    xmlEntityPtr entity = NULL;
7602
7.68k
    xmlParserInputPtr input;
7603
7604
7.68k
    if (RAW != '%')
7605
0
        return;
7606
7.68k
    NEXT;
7607
7.68k
    name = xmlParseName(ctxt);
7608
7.68k
    if (name == NULL) {
7609
941
  xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
7610
941
  return;
7611
941
    }
7612
6.74k
    if (RAW != ';') {
7613
14
  xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
7614
14
        return;
7615
14
    }
7616
7617
6.72k
    NEXT;
7618
7619
    /* Must be set before xmlHandleUndeclaredEntity */
7620
6.72k
    ctxt->hasPErefs = 1;
7621
7622
    /*
7623
     * Request the entity from SAX
7624
     */
7625
6.72k
    if ((ctxt->sax != NULL) &&
7626
6.72k
  (ctxt->sax->getParameterEntity != NULL))
7627
0
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7628
7629
6.72k
    if (entity == NULL) {
7630
6.72k
        xmlHandleUndeclaredEntity(ctxt, name);
7631
6.72k
    } else {
7632
  /*
7633
   * Internal checking in case the entity quest barfed
7634
   */
7635
0
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7636
0
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7637
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7638
0
      "Internal: %%%s; is not a parameter entity\n",
7639
0
        name, NULL);
7640
0
  } else {
7641
0
      if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7642
0
                ((ctxt->options & XML_PARSE_NO_XXE) ||
7643
0
     (((ctxt->loadsubset & ~XML_SKIP_IDS) == 0) &&
7644
0
      (ctxt->replaceEntities == 0) &&
7645
0
      (ctxt->validate == 0))))
7646
0
    return;
7647
7648
0
            if (entity->flags & XML_ENT_EXPANDING) {
7649
0
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7650
0
                return;
7651
0
            }
7652
7653
0
      input = xmlNewEntityInputStream(ctxt, entity);
7654
0
      if (xmlCtxtPushInput(ctxt, input) < 0) {
7655
0
                xmlFreeInputStream(input);
7656
0
    return;
7657
0
            }
7658
7659
0
            entity->flags |= XML_ENT_EXPANDING;
7660
7661
0
            if (markupDecl)
7662
0
                input->flags |= XML_INPUT_MARKUP_DECL;
7663
7664
0
            GROW;
7665
7666
0
      if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
7667
0
                xmlDetectEncoding(ctxt);
7668
7669
0
                if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7670
0
                    (IS_BLANK_CH(NXT(5)))) {
7671
0
                    xmlParseTextDecl(ctxt);
7672
0
                }
7673
0
            }
7674
0
  }
7675
0
    }
7676
6.72k
}
7677
7678
/**
7679
 * Parse a parameter entity reference.
7680
 *
7681
 * @deprecated Internal function, don't use.
7682
 *
7683
 * @param ctxt  an XML parser context
7684
 */
7685
void
7686
0
xmlParsePEReference(xmlParserCtxt *ctxt) {
7687
0
    xmlParsePERefInternal(ctxt, 0);
7688
0
}
7689
7690
/**
7691
 * Load the content of an entity.
7692
 *
7693
 * @param ctxt  an XML parser context
7694
 * @param entity  an unloaded system entity
7695
 * @returns 0 in case of success and -1 in case of failure
7696
 */
7697
static int
7698
0
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7699
0
    xmlParserInputPtr oldinput, input = NULL;
7700
0
    xmlParserInputPtr *oldinputTab;
7701
0
    xmlChar *oldencoding;
7702
0
    xmlChar *content = NULL;
7703
0
    xmlResourceType rtype;
7704
0
    size_t length, i;
7705
0
    int oldinputNr, oldinputMax;
7706
0
    int ret = -1;
7707
0
    int res;
7708
7709
0
    if ((ctxt == NULL) || (entity == NULL) ||
7710
0
        ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7711
0
   (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7712
0
  (entity->content != NULL)) {
7713
0
  xmlFatalErr(ctxt, XML_ERR_ARGUMENT,
7714
0
              "xmlLoadEntityContent parameter error");
7715
0
        return(-1);
7716
0
    }
7717
7718
0
    if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)
7719
0
        rtype = XML_RESOURCE_PARAMETER_ENTITY;
7720
0
    else
7721
0
        rtype = XML_RESOURCE_GENERAL_ENTITY;
7722
7723
0
    input = xmlLoadResource(ctxt, (char *) entity->URI,
7724
0
                            (char *) entity->ExternalID, rtype);
7725
0
    if (input == NULL)
7726
0
        return(-1);
7727
7728
0
    oldinput = ctxt->input;
7729
0
    oldinputNr = ctxt->inputNr;
7730
0
    oldinputMax = ctxt->inputMax;
7731
0
    oldinputTab = ctxt->inputTab;
7732
0
    oldencoding = ctxt->encoding;
7733
7734
0
    ctxt->input = NULL;
7735
0
    ctxt->inputNr = 0;
7736
0
    ctxt->inputMax = 1;
7737
0
    ctxt->encoding = NULL;
7738
0
    ctxt->inputTab = xmlMalloc(sizeof(xmlParserInputPtr));
7739
0
    if (ctxt->inputTab == NULL) {
7740
0
        xmlErrMemory(ctxt);
7741
0
        xmlFreeInputStream(input);
7742
0
        goto error;
7743
0
    }
7744
7745
0
    xmlBufResetInput(input->buf->buffer, input);
7746
7747
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
7748
0
        xmlFreeInputStream(input);
7749
0
        goto error;
7750
0
    }
7751
7752
0
    xmlDetectEncoding(ctxt);
7753
7754
    /*
7755
     * Parse a possible text declaration first
7756
     */
7757
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7758
0
  xmlParseTextDecl(ctxt);
7759
        /*
7760
         * An XML-1.0 document can't reference an entity not XML-1.0
7761
         */
7762
0
        if ((xmlStrEqual(ctxt->version, BAD_CAST "1.0")) &&
7763
0
            (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
7764
0
            xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
7765
0
                           "Version mismatch between document and entity\n");
7766
0
        }
7767
0
    }
7768
7769
0
    length = input->cur - input->base;
7770
0
    xmlBufShrink(input->buf->buffer, length);
7771
0
    xmlSaturatedAdd(&ctxt->sizeentities, length);
7772
7773
0
    while ((res = xmlParserInputBufferGrow(input->buf, 4096)) > 0)
7774
0
        ;
7775
7776
0
    xmlBufResetInput(input->buf->buffer, input);
7777
7778
0
    if (res < 0) {
7779
0
        xmlCtxtErrIO(ctxt, input->buf->error, NULL);
7780
0
        goto error;
7781
0
    }
7782
7783
0
    length = xmlBufUse(input->buf->buffer);
7784
0
    if (length > INT_MAX) {
7785
0
        xmlErrMemory(ctxt);
7786
0
        goto error;
7787
0
    }
7788
7789
0
    content = xmlStrndup(xmlBufContent(input->buf->buffer), length);
7790
0
    if (content == NULL) {
7791
0
        xmlErrMemory(ctxt);
7792
0
        goto error;
7793
0
    }
7794
7795
0
    for (i = 0; i < length; ) {
7796
0
        int clen = length - i;
7797
0
        int c = xmlGetUTF8Char(content + i, &clen);
7798
7799
0
        if ((c < 0) || (!IS_CHAR(c))) {
7800
0
            xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
7801
0
                              "xmlLoadEntityContent: invalid char value %d\n",
7802
0
                              content[i]);
7803
0
            goto error;
7804
0
        }
7805
0
        i += clen;
7806
0
    }
7807
7808
0
    xmlSaturatedAdd(&ctxt->sizeentities, length);
7809
0
    entity->content = content;
7810
0
    entity->length = length;
7811
0
    content = NULL;
7812
0
    ret = 0;
7813
7814
0
error:
7815
0
    while (ctxt->inputNr > 0)
7816
0
        xmlFreeInputStream(xmlCtxtPopInput(ctxt));
7817
0
    xmlFree(ctxt->inputTab);
7818
0
    xmlFree(ctxt->encoding);
7819
7820
0
    ctxt->input = oldinput;
7821
0
    ctxt->inputNr = oldinputNr;
7822
0
    ctxt->inputMax = oldinputMax;
7823
0
    ctxt->inputTab = oldinputTab;
7824
0
    ctxt->encoding = oldencoding;
7825
7826
0
    xmlFree(content);
7827
7828
0
    return(ret);
7829
0
}
7830
7831
/**
7832
 * Parse PEReference declarations
7833
 *
7834
 *     [69] PEReference ::= '%' Name ';'
7835
 *
7836
 * [ WFC: No Recursion ]
7837
 * A parsed entity must not contain a recursive
7838
 * reference to itself, either directly or indirectly.
7839
 *
7840
 * [ WFC: Entity Declared ]
7841
 * In a document without any DTD, a document with only an internal DTD
7842
 * subset which contains no parameter entity references, or a document
7843
 * with "standalone='yes'", ...  ... The declaration of a parameter
7844
 * entity must precede any reference to it...
7845
 *
7846
 * [ VC: Entity Declared ]
7847
 * In a document with an external subset or external parameter entities
7848
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7849
 * must precede any reference to it...
7850
 *
7851
 * [ WFC: In DTD ]
7852
 * Parameter-entity references may only appear in the DTD.
7853
 * NOTE: misleading but this is handled.
7854
 *
7855
 * @param ctxt  an XML parser context
7856
 * @param str  a pointer to an index in the string
7857
 * @returns the string of the entity content.
7858
 *         str is updated to the current value of the index
7859
 */
7860
static xmlEntityPtr
7861
1.64k
xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7862
1.64k
    const xmlChar *ptr;
7863
1.64k
    xmlChar cur;
7864
1.64k
    xmlChar *name;
7865
1.64k
    xmlEntityPtr entity = NULL;
7866
7867
1.64k
    if ((str == NULL) || (*str == NULL)) return(NULL);
7868
1.64k
    ptr = *str;
7869
1.64k
    cur = *ptr;
7870
1.64k
    if (cur != '%')
7871
0
        return(NULL);
7872
1.64k
    ptr++;
7873
1.64k
    name = xmlParseStringName(ctxt, &ptr);
7874
1.64k
    if (name == NULL) {
7875
587
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7876
587
           "xmlParseStringPEReference: no name\n");
7877
587
  *str = ptr;
7878
587
  return(NULL);
7879
587
    }
7880
1.05k
    cur = *ptr;
7881
1.05k
    if (cur != ';') {
7882
459
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7883
459
  xmlFree(name);
7884
459
  *str = ptr;
7885
459
  return(NULL);
7886
459
    }
7887
598
    ptr++;
7888
7889
    /* Must be set before xmlHandleUndeclaredEntity */
7890
598
    ctxt->hasPErefs = 1;
7891
7892
    /*
7893
     * Request the entity from SAX
7894
     */
7895
598
    if ((ctxt->sax != NULL) &&
7896
598
  (ctxt->sax->getParameterEntity != NULL))
7897
0
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7898
7899
598
    if (entity == NULL) {
7900
598
        xmlHandleUndeclaredEntity(ctxt, name);
7901
598
    } else {
7902
  /*
7903
   * Internal checking in case the entity quest barfed
7904
   */
7905
0
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7906
0
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7907
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7908
0
        "%%%s; is not a parameter entity\n",
7909
0
        name, NULL);
7910
0
  }
7911
0
    }
7912
7913
598
    xmlFree(name);
7914
598
    *str = ptr;
7915
598
    return(entity);
7916
1.05k
}
7917
7918
/**
7919
 * Parse a DOCTYPE declaration
7920
 *
7921
 * @deprecated Internal function, don't use.
7922
 *
7923
 *     [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7924
 *                          ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7925
 *
7926
 * [ VC: Root Element Type ]
7927
 * The Name in the document type declaration must match the element
7928
 * type of the root element.
7929
 *
7930
 * @param ctxt  an XML parser context
7931
 */
7932
7933
void
7934
1.80k
xmlParseDocTypeDecl(xmlParserCtxt *ctxt) {
7935
1.80k
    const xmlChar *name = NULL;
7936
1.80k
    xmlChar *publicId = NULL;
7937
1.80k
    xmlChar *URI = NULL;
7938
7939
    /*
7940
     * We know that '<!DOCTYPE' has been detected.
7941
     */
7942
1.80k
    SKIP(9);
7943
7944
1.80k
    if (SKIP_BLANKS == 0) {
7945
36
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7946
36
                       "Space required after 'DOCTYPE'\n");
7947
36
    }
7948
7949
    /*
7950
     * Parse the DOCTYPE name.
7951
     */
7952
1.80k
    name = xmlParseName(ctxt);
7953
1.80k
    if (name == NULL) {
7954
3
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7955
3
           "xmlParseDocTypeDecl : no DOCTYPE name !\n");
7956
3
    }
7957
1.80k
    ctxt->intSubName = name;
7958
7959
1.80k
    SKIP_BLANKS;
7960
7961
    /*
7962
     * Check for public and system identifier (URI)
7963
     */
7964
1.80k
    URI = xmlParseExternalID(ctxt, &publicId, 1);
7965
7966
1.80k
    if ((URI != NULL) || (publicId != NULL)) {
7967
77
        ctxt->hasExternalSubset = 1;
7968
77
    }
7969
1.80k
    ctxt->extSubURI = URI;
7970
1.80k
    ctxt->extSubSystem = publicId;
7971
7972
1.80k
    SKIP_BLANKS;
7973
7974
    /*
7975
     * Create and update the internal subset.
7976
     */
7977
1.80k
    if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
7978
0
  (!ctxt->disableSAX))
7979
0
  ctxt->sax->internalSubset(ctxt->userData, name, publicId, URI);
7980
7981
1.80k
    if ((RAW != '[') && (RAW != '>')) {
7982
50
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
7983
50
    }
7984
1.80k
}
7985
7986
/**
7987
 * Parse the internal subset declaration
7988
 *
7989
 *     [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7990
 * @param ctxt  an XML parser context
7991
 */
7992
7993
static void
7994
1.67k
xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
7995
    /*
7996
     * Is there any DTD definition ?
7997
     */
7998
1.67k
    if (RAW == '[') {
7999
1.67k
        int oldInputNr = ctxt->inputNr;
8000
8001
1.67k
        NEXT;
8002
  /*
8003
   * Parse the succession of Markup declarations and
8004
   * PEReferences.
8005
   * Subsequence (markupdecl | PEReference | S)*
8006
   */
8007
1.67k
  SKIP_BLANKS;
8008
57.8k
        while (1) {
8009
57.8k
            if (PARSER_STOPPED(ctxt)) {
8010
4
                return;
8011
57.8k
            } else if (ctxt->input->cur >= ctxt->input->end) {
8012
423
                if (ctxt->inputNr <= oldInputNr) {
8013
423
                xmlFatalErr(ctxt, XML_ERR_INT_SUBSET_NOT_FINISHED, NULL);
8014
423
                    return;
8015
423
                }
8016
0
                xmlPopPE(ctxt);
8017
57.4k
            } else if ((RAW == ']') && (ctxt->inputNr <= oldInputNr)) {
8018
515
                NEXT;
8019
515
                SKIP_BLANKS;
8020
515
                break;
8021
56.8k
            } else if ((PARSER_EXTERNAL(ctxt)) &&
8022
0
                       (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8023
                /*
8024
                 * Conditional sections are allowed in external entities
8025
                 * included by PE References in the internal subset.
8026
                 */
8027
0
                xmlParseConditionalSections(ctxt);
8028
56.8k
            } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
8029
48.4k
                xmlParseMarkupDecl(ctxt);
8030
48.4k
            } else if (RAW == '%') {
8031
7.68k
                xmlParsePERefInternal(ctxt, 1);
8032
7.68k
            } else {
8033
735
                xmlFatalErr(ctxt, XML_ERR_INT_SUBSET_NOT_FINISHED, NULL);
8034
8035
735
                while (ctxt->inputNr > oldInputNr)
8036
0
                    xmlPopPE(ctxt);
8037
735
                return;
8038
735
            }
8039
56.1k
            SKIP_BLANKS;
8040
56.1k
            SHRINK;
8041
56.1k
            GROW;
8042
56.1k
        }
8043
1.67k
    }
8044
8045
    /*
8046
     * We should be at the end of the DOCTYPE declaration.
8047
     */
8048
515
    if (RAW != '>') {
8049
19
        xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8050
19
        return;
8051
19
    }
8052
496
    NEXT;
8053
496
}
8054
8055
#ifdef LIBXML_SAX1_ENABLED
8056
/**
8057
 * Parse an attribute
8058
 *
8059
 * @deprecated Internal function, don't use.
8060
 *
8061
 *     [41] Attribute ::= Name Eq AttValue
8062
 *
8063
 * [ WFC: No External Entity References ]
8064
 * Attribute values cannot contain direct or indirect entity references
8065
 * to external entities.
8066
 *
8067
 * [ WFC: No < in Attribute Values ]
8068
 * The replacement text of any entity referred to directly or indirectly in
8069
 * an attribute value (other than "&lt;") must not contain a <.
8070
 *
8071
 * [ VC: Attribute Value Type ]
8072
 * The attribute must have been declared; the value must be of the type
8073
 * declared for it.
8074
 *
8075
 *     [25] Eq ::= S? '=' S?
8076
 *
8077
 * With namespace:
8078
 *
8079
 *     [NS 11] Attribute ::= QName Eq AttValue
8080
 *
8081
 * Also the case QName == xmlns:??? is handled independently as a namespace
8082
 * definition.
8083
 *
8084
 * @param ctxt  an XML parser context
8085
 * @param value  a xmlChar ** used to store the value of the attribute
8086
 * @returns the attribute name, and the value in *value.
8087
 */
8088
8089
const xmlChar *
8090
0
xmlParseAttribute(xmlParserCtxt *ctxt, xmlChar **value) {
8091
0
    const xmlChar *name;
8092
0
    xmlChar *val;
8093
8094
0
    *value = NULL;
8095
0
    GROW;
8096
0
    name = xmlParseName(ctxt);
8097
0
    if (name == NULL) {
8098
0
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8099
0
                 "error parsing attribute name\n");
8100
0
        return(NULL);
8101
0
    }
8102
8103
    /*
8104
     * read the value
8105
     */
8106
0
    SKIP_BLANKS;
8107
0
    if (RAW == '=') {
8108
0
        NEXT;
8109
0
  SKIP_BLANKS;
8110
0
  val = xmlParseAttValue(ctxt);
8111
0
    } else {
8112
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8113
0
         "Specification mandates value for attribute %s\n", name);
8114
0
  return(name);
8115
0
    }
8116
8117
    /*
8118
     * Check that xml:lang conforms to the specification
8119
     * No more registered as an error, just generate a warning now
8120
     * since this was deprecated in XML second edition
8121
     */
8122
0
    if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8123
0
  if (!xmlCheckLanguageID(val)) {
8124
0
      xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8125
0
              "Malformed value for xml:lang : %s\n",
8126
0
        val, NULL);
8127
0
  }
8128
0
    }
8129
8130
    /*
8131
     * Check that xml:space conforms to the specification
8132
     */
8133
0
    if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8134
0
  if (xmlStrEqual(val, BAD_CAST "default"))
8135
0
      *(ctxt->space) = 0;
8136
0
  else if (xmlStrEqual(val, BAD_CAST "preserve"))
8137
0
      *(ctxt->space) = 1;
8138
0
  else {
8139
0
    xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8140
0
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8141
0
                                 val, NULL);
8142
0
  }
8143
0
    }
8144
8145
0
    *value = val;
8146
0
    return(name);
8147
0
}
8148
8149
/**
8150
 * Parse a start tag. Always consumes '<'.
8151
 *
8152
 * @deprecated Internal function, don't use.
8153
 *
8154
 *     [40] STag ::= '<' Name (S Attribute)* S? '>'
8155
 *
8156
 * [ WFC: Unique Att Spec ]
8157
 * No attribute name may appear more than once in the same start-tag or
8158
 * empty-element tag.
8159
 *
8160
 *     [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8161
 *
8162
 * [ WFC: Unique Att Spec ]
8163
 * No attribute name may appear more than once in the same start-tag or
8164
 * empty-element tag.
8165
 *
8166
 * With namespace:
8167
 *
8168
 *     [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8169
 *
8170
 *     [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8171
 *
8172
 * @param ctxt  an XML parser context
8173
 * @returns the element name parsed
8174
 */
8175
8176
const xmlChar *
8177
0
xmlParseStartTag(xmlParserCtxt *ctxt) {
8178
0
    const xmlChar *name;
8179
0
    const xmlChar *attname;
8180
0
    xmlChar *attvalue;
8181
0
    const xmlChar **atts = ctxt->atts;
8182
0
    int nbatts = 0;
8183
0
    int maxatts = ctxt->maxatts;
8184
0
    int i;
8185
8186
0
    if (RAW != '<') return(NULL);
8187
0
    NEXT1;
8188
8189
0
    name = xmlParseName(ctxt);
8190
0
    if (name == NULL) {
8191
0
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8192
0
       "xmlParseStartTag: invalid element name\n");
8193
0
        return(NULL);
8194
0
    }
8195
8196
    /*
8197
     * Now parse the attributes, it ends up with the ending
8198
     *
8199
     * (S Attribute)* S?
8200
     */
8201
0
    SKIP_BLANKS;
8202
0
    GROW;
8203
8204
0
    while (((RAW != '>') &&
8205
0
     ((RAW != '/') || (NXT(1) != '>')) &&
8206
0
     (IS_BYTE_CHAR(RAW))) && (PARSER_STOPPED(ctxt) == 0)) {
8207
0
  attname = xmlParseAttribute(ctxt, &attvalue);
8208
0
        if (attname == NULL)
8209
0
      break;
8210
0
        if (attvalue != NULL) {
8211
      /*
8212
       * [ WFC: Unique Att Spec ]
8213
       * No attribute name may appear more than once in the same
8214
       * start-tag or empty-element tag.
8215
       */
8216
0
      for (i = 0; i < nbatts;i += 2) {
8217
0
          if (xmlStrEqual(atts[i], attname)) {
8218
0
        xmlErrAttributeDup(ctxt, NULL, attname);
8219
0
        goto failed;
8220
0
    }
8221
0
      }
8222
      /*
8223
       * Add the pair to atts
8224
       */
8225
0
      if (nbatts + 4 > maxatts) {
8226
0
          const xmlChar **n;
8227
0
                int newSize;
8228
8229
0
                newSize = xmlGrowCapacity(maxatts, sizeof(n[0]) * 2,
8230
0
                                          11, XML_MAX_ATTRS);
8231
0
                if (newSize < 0) {
8232
0
        xmlErrMemory(ctxt);
8233
0
        goto failed;
8234
0
    }
8235
0
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
8236
0
                if (newSize < 2)
8237
0
                    newSize = 2;
8238
0
#endif
8239
0
          n = xmlRealloc(atts, newSize * sizeof(n[0]) * 2);
8240
0
    if (n == NULL) {
8241
0
        xmlErrMemory(ctxt);
8242
0
        goto failed;
8243
0
    }
8244
0
    atts = n;
8245
0
                maxatts = newSize * 2;
8246
0
    ctxt->atts = atts;
8247
0
    ctxt->maxatts = maxatts;
8248
0
      }
8249
8250
0
      atts[nbatts++] = attname;
8251
0
      atts[nbatts++] = attvalue;
8252
0
      atts[nbatts] = NULL;
8253
0
      atts[nbatts + 1] = NULL;
8254
8255
0
            attvalue = NULL;
8256
0
  }
8257
8258
0
failed:
8259
8260
0
        if (attvalue != NULL)
8261
0
            xmlFree(attvalue);
8262
8263
0
  GROW
8264
0
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8265
0
      break;
8266
0
  if (SKIP_BLANKS == 0) {
8267
0
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8268
0
         "attributes construct error\n");
8269
0
  }
8270
0
  SHRINK;
8271
0
        GROW;
8272
0
    }
8273
8274
    /*
8275
     * SAX: Start of Element !
8276
     */
8277
0
    if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8278
0
  (!ctxt->disableSAX)) {
8279
0
  if (nbatts > 0)
8280
0
      ctxt->sax->startElement(ctxt->userData, name, atts);
8281
0
  else
8282
0
      ctxt->sax->startElement(ctxt->userData, name, NULL);
8283
0
    }
8284
8285
0
    if (atts != NULL) {
8286
        /* Free only the content strings */
8287
0
        for (i = 1;i < nbatts;i+=2)
8288
0
      if (atts[i] != NULL)
8289
0
         xmlFree((xmlChar *) atts[i]);
8290
0
    }
8291
0
    return(name);
8292
0
}
8293
8294
/**
8295
 * Parse an end tag. Always consumes '</'.
8296
 *
8297
 *     [42] ETag ::= '</' Name S? '>'
8298
 *
8299
 * With namespace
8300
 *
8301
 *     [NS 9] ETag ::= '</' QName S? '>'
8302
 * @param ctxt  an XML parser context
8303
 * @param line  line of the start tag
8304
 */
8305
8306
static void
8307
0
xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8308
0
    const xmlChar *name;
8309
8310
0
    GROW;
8311
0
    if ((RAW != '<') || (NXT(1) != '/')) {
8312
0
  xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8313
0
           "xmlParseEndTag: '</' not found\n");
8314
0
  return;
8315
0
    }
8316
0
    SKIP(2);
8317
8318
0
    name = xmlParseNameAndCompare(ctxt,ctxt->name);
8319
8320
    /*
8321
     * We should definitely be at the ending "S? '>'" part
8322
     */
8323
0
    GROW;
8324
0
    SKIP_BLANKS;
8325
0
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8326
0
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8327
0
    } else
8328
0
  NEXT1;
8329
8330
    /*
8331
     * [ WFC: Element Type Match ]
8332
     * The Name in an element's end-tag must match the element type in the
8333
     * start-tag.
8334
     *
8335
     */
8336
0
    if (name != (xmlChar*)1) {
8337
0
        if (name == NULL) name = BAD_CAST "unparsable";
8338
0
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8339
0
         "Opening and ending tag mismatch: %s line %d and %s\n",
8340
0
                    ctxt->name, line, name);
8341
0
    }
8342
8343
    /*
8344
     * SAX: End of Tag
8345
     */
8346
0
    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8347
0
  (!ctxt->disableSAX))
8348
0
        ctxt->sax->endElement(ctxt->userData, ctxt->name);
8349
8350
0
    namePop(ctxt);
8351
0
    spacePop(ctxt);
8352
0
}
8353
8354
/**
8355
 * Parse an end of tag
8356
 *
8357
 * @deprecated Internal function, don't use.
8358
 *
8359
 *     [42] ETag ::= '</' Name S? '>'
8360
 *
8361
 * With namespace
8362
 *
8363
 *     [NS 9] ETag ::= '</' QName S? '>'
8364
 * @param ctxt  an XML parser context
8365
 */
8366
8367
void
8368
0
xmlParseEndTag(xmlParserCtxt *ctxt) {
8369
0
    xmlParseEndTag1(ctxt, 0);
8370
0
}
8371
#endif /* LIBXML_SAX1_ENABLED */
8372
8373
/************************************************************************
8374
 *                  *
8375
 *          SAX 2 specific operations       *
8376
 *                  *
8377
 ************************************************************************/
8378
8379
/**
8380
 * Parse an XML Namespace QName
8381
 *
8382
 *     [6]  QName  ::= (Prefix ':')? LocalPart
8383
 *     [7]  Prefix  ::= NCName
8384
 *     [8]  LocalPart  ::= NCName
8385
 *
8386
 * @param ctxt  an XML parser context
8387
 * @param prefix  pointer to store the prefix part
8388
 * @returns the Name parsed or NULL
8389
 */
8390
8391
static xmlHashedString
8392
43.9M
xmlParseQNameHashed(xmlParserCtxtPtr ctxt, xmlHashedString *prefix) {
8393
43.9M
    xmlHashedString l, p;
8394
43.9M
    int start, isNCName = 0;
8395
8396
43.9M
    l.name = NULL;
8397
43.9M
    p.name = NULL;
8398
8399
43.9M
    GROW;
8400
43.9M
    start = CUR_PTR - BASE_PTR;
8401
8402
43.9M
    l = xmlParseNCName(ctxt);
8403
43.9M
    if (l.name != NULL) {
8404
43.8M
        isNCName = 1;
8405
43.8M
        if (CUR == ':') {
8406
17.0M
            NEXT;
8407
17.0M
            p = l;
8408
17.0M
            l = xmlParseNCName(ctxt);
8409
17.0M
        }
8410
43.8M
    }
8411
43.9M
    if ((l.name == NULL) || (CUR == ':')) {
8412
295k
        xmlChar *tmp;
8413
8414
295k
        l.name = NULL;
8415
295k
        p.name = NULL;
8416
295k
        if ((isNCName == 0) && (CUR != ':'))
8417
47.6k
            return(l);
8418
247k
        tmp = xmlParseNmtoken(ctxt);
8419
247k
        if (tmp != NULL)
8420
219k
            xmlFree(tmp);
8421
247k
        l = xmlDictLookupHashed(ctxt->dict, BASE_PTR + start,
8422
247k
                                CUR_PTR - (BASE_PTR + start));
8423
247k
        if (l.name == NULL) {
8424
0
            xmlErrMemory(ctxt);
8425
0
            return(l);
8426
0
        }
8427
247k
        xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8428
247k
                 "Failed to parse QName '%s'\n", l.name, NULL, NULL);
8429
247k
    }
8430
8431
43.9M
    *prefix = p;
8432
43.9M
    return(l);
8433
43.9M
}
8434
8435
/**
8436
 * Parse an XML Namespace QName
8437
 *
8438
 *     [6]  QName  ::= (Prefix ':')? LocalPart
8439
 *     [7]  Prefix  ::= NCName
8440
 *     [8]  LocalPart  ::= NCName
8441
 *
8442
 * @param ctxt  an XML parser context
8443
 * @param prefix  pointer to store the prefix part
8444
 * @returns the Name parsed or NULL
8445
 */
8446
8447
static const xmlChar *
8448
8.54k
xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8449
8.54k
    xmlHashedString n, p;
8450
8451
8.54k
    n = xmlParseQNameHashed(ctxt, &p);
8452
8.54k
    if (n.name == NULL)
8453
486
        return(NULL);
8454
8.05k
    *prefix = p.name;
8455
8.05k
    return(n.name);
8456
8.54k
}
8457
8458
/**
8459
 * Parse an XML name and compares for match
8460
 * (specialized for endtag parsing)
8461
 *
8462
 * @param ctxt  an XML parser context
8463
 * @param name  the localname
8464
 * @param prefix  the prefix, if any.
8465
 * @returns NULL for an illegal name, (xmlChar*) 1 for success
8466
 * and the name for mismatch
8467
 */
8468
8469
static const xmlChar *
8470
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8471
4.86M
                        xmlChar const *prefix) {
8472
4.86M
    const xmlChar *cmp;
8473
4.86M
    const xmlChar *in;
8474
4.86M
    const xmlChar *ret;
8475
4.86M
    const xmlChar *prefix2;
8476
8477
4.86M
    if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8478
8479
4.86M
    GROW;
8480
4.86M
    in = ctxt->input->cur;
8481
8482
4.86M
    cmp = prefix;
8483
13.8M
    while (*in != 0 && *in == *cmp) {
8484
8.94M
  ++in;
8485
8.94M
  ++cmp;
8486
8.94M
    }
8487
4.86M
    if ((*cmp == 0) && (*in == ':')) {
8488
4.85M
        in++;
8489
4.85M
  cmp = name;
8490
32.0M
  while (*in != 0 && *in == *cmp) {
8491
27.2M
      ++in;
8492
27.2M
      ++cmp;
8493
27.2M
  }
8494
4.85M
  if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8495
      /* success */
8496
4.85M
            ctxt->input->col += in - ctxt->input->cur;
8497
4.85M
      ctxt->input->cur = in;
8498
4.85M
      return((const xmlChar*) 1);
8499
4.85M
  }
8500
4.85M
    }
8501
    /*
8502
     * all strings coms from the dictionary, equality can be done directly
8503
     */
8504
8.55k
    ret = xmlParseQName (ctxt, &prefix2);
8505
8.55k
    if (ret == NULL)
8506
486
        return(NULL);
8507
8.06k
    if ((ret == name) && (prefix == prefix2))
8508
215
  return((const xmlChar*) 1);
8509
7.84k
    return ret;
8510
8.06k
}
8511
8512
/**
8513
 * Parse an attribute in the new SAX2 framework.
8514
 *
8515
 * @param ctxt  an XML parser context
8516
 * @param pref  the element prefix
8517
 * @param elem  the element name
8518
 * @param hprefix  resulting attribute prefix
8519
 * @param value  resulting value of the attribute
8520
 * @param len  resulting length of the attribute
8521
 * @param alloc  resulting indicator if the attribute was allocated
8522
 * @returns the attribute name, and the value in *value, .
8523
 */
8524
8525
static xmlHashedString
8526
xmlParseAttribute2(xmlParserCtxtPtr ctxt,
8527
                   const xmlChar * pref, const xmlChar * elem,
8528
                   xmlHashedString * hprefix, xmlChar ** value,
8529
                   int *len, int *alloc)
8530
19.8M
{
8531
19.8M
    xmlHashedString hname;
8532
19.8M
    const xmlChar *prefix, *name;
8533
19.8M
    xmlChar *val = NULL, *internal_val = NULL;
8534
19.8M
    int special = 0;
8535
19.8M
    int isNamespace;
8536
19.8M
    int flags;
8537
8538
19.8M
    *value = NULL;
8539
19.8M
    GROW;
8540
19.8M
    hname = xmlParseQNameHashed(ctxt, hprefix);
8541
19.8M
    if (hname.name == NULL) {
8542
38.7k
        xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8543
38.7k
                       "error parsing attribute name\n");
8544
38.7k
        return(hname);
8545
38.7k
    }
8546
19.7M
    name = hname.name;
8547
19.7M
    prefix = hprefix->name;
8548
8549
    /*
8550
     * get the type if needed
8551
     */
8552
19.7M
    if (ctxt->attsSpecial != NULL) {
8553
8.73k
        special = XML_PTR_TO_INT(xmlHashQLookup2(ctxt->attsSpecial, pref, elem,
8554
8.73k
                                              prefix, name));
8555
8.73k
    }
8556
8557
    /*
8558
     * read the value
8559
     */
8560
19.7M
    SKIP_BLANKS;
8561
19.7M
    if (RAW != '=') {
8562
12.0k
        xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8563
12.0k
                          "Specification mandates value for attribute %s\n",
8564
12.0k
                          name);
8565
12.0k
        goto error;
8566
12.0k
    }
8567
8568
8569
19.7M
    NEXT;
8570
19.7M
    SKIP_BLANKS;
8571
19.7M
    flags = 0;
8572
19.7M
    isNamespace = (((prefix == NULL) && (name == ctxt->str_xmlns)) ||
8573
19.5M
                   (prefix == ctxt->str_xmlns));
8574
19.7M
    val = xmlParseAttValueInternal(ctxt, len, &flags, special,
8575
19.7M
                                   isNamespace);
8576
19.7M
    if (val == NULL)
8577
5.32k
        goto error;
8578
8579
19.7M
    *alloc = (flags & XML_ATTVAL_ALLOC) != 0;
8580
8581
19.7M
#ifdef LIBXML_VALID_ENABLED
8582
19.7M
    if ((ctxt->validate) &&
8583
0
        (ctxt->standalone == 1) &&
8584
0
        (special & XML_SPECIAL_EXTERNAL) &&
8585
0
        (flags & XML_ATTVAL_NORM_CHANGE)) {
8586
0
        xmlValidityError(ctxt, XML_DTD_NOT_STANDALONE,
8587
0
                         "standalone: normalization of attribute %s on %s "
8588
0
                         "by external subset declaration\n",
8589
0
                         name, elem);
8590
0
    }
8591
19.7M
#endif
8592
8593
19.7M
    if (prefix == ctxt->str_xml) {
8594
        /*
8595
         * Check that xml:lang conforms to the specification
8596
         * No more registered as an error, just generate a warning now
8597
         * since this was deprecated in XML second edition
8598
         */
8599
83.3k
        if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8600
0
            internal_val = xmlStrndup(val, *len);
8601
0
            if (internal_val == NULL)
8602
0
                goto mem_error;
8603
0
            if (!xmlCheckLanguageID(internal_val)) {
8604
0
                xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8605
0
                              "Malformed value for xml:lang : %s\n",
8606
0
                              internal_val, NULL);
8607
0
            }
8608
0
        }
8609
8610
        /*
8611
         * Check that xml:space conforms to the specification
8612
         */
8613
83.3k
        if (xmlStrEqual(name, BAD_CAST "space")) {
8614
71.3k
            internal_val = xmlStrndup(val, *len);
8615
71.3k
            if (internal_val == NULL)
8616
0
                goto mem_error;
8617
71.3k
            if (xmlStrEqual(internal_val, BAD_CAST "default"))
8618
0
                *(ctxt->space) = 0;
8619
71.3k
            else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8620
70.2k
                *(ctxt->space) = 1;
8621
1.06k
            else {
8622
1.06k
                xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8623
1.06k
                              "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8624
1.06k
                              internal_val, NULL);
8625
1.06k
            }
8626
71.3k
        }
8627
83.3k
        if (internal_val) {
8628
71.3k
            xmlFree(internal_val);
8629
71.3k
        }
8630
83.3k
    }
8631
8632
19.7M
    *value = val;
8633
19.7M
    return (hname);
8634
8635
0
mem_error:
8636
0
    xmlErrMemory(ctxt);
8637
17.4k
error:
8638
17.4k
    if ((val != NULL) && (*alloc != 0))
8639
0
        xmlFree(val);
8640
17.4k
    return(hname);
8641
0
}
8642
8643
/**
8644
 * Inserts a new attribute into the hash table.
8645
 *
8646
 * @param ctxt  parser context
8647
 * @param size  size of the hash table
8648
 * @param name  attribute name
8649
 * @param uri  namespace uri
8650
 * @param hashValue  combined hash value of name and uri
8651
 * @param aindex  attribute index (this is a multiple of 5)
8652
 * @returns INT_MAX if no existing attribute was found, the attribute
8653
 * index if an attribute was found, -1 if a memory allocation failed.
8654
 */
8655
static int
8656
xmlAttrHashInsert(xmlParserCtxtPtr ctxt, unsigned size, const xmlChar *name,
8657
14.7M
                  const xmlChar *uri, unsigned hashValue, int aindex) {
8658
14.7M
    xmlAttrHashBucket *table = ctxt->attrHash;
8659
14.7M
    xmlAttrHashBucket *bucket;
8660
14.7M
    unsigned hindex;
8661
8662
14.7M
    hindex = hashValue & (size - 1);
8663
14.7M
    bucket = &table[hindex];
8664
8665
17.3M
    while (bucket->index >= 0) {
8666
2.62M
        const xmlChar **atts = &ctxt->atts[bucket->index];
8667
8668
2.62M
        if (name == atts[0]) {
8669
151k
            int nsIndex = XML_PTR_TO_INT(atts[2]);
8670
8671
151k
            if ((nsIndex == NS_INDEX_EMPTY) ? (uri == NULL) :
8672
151k
                (nsIndex == NS_INDEX_XML) ? (uri == ctxt->str_xml_ns) :
8673
2.05k
                (uri == ctxt->nsTab[nsIndex * 2 + 1]))
8674
47.8k
                return(bucket->index);
8675
151k
        }
8676
8677
2.57M
        hindex++;
8678
2.57M
        bucket++;
8679
2.57M
        if (hindex >= size) {
8680
950k
            hindex = 0;
8681
950k
            bucket = table;
8682
950k
        }
8683
2.57M
    }
8684
8685
14.7M
    bucket->index = aindex;
8686
8687
14.7M
    return(INT_MAX);
8688
14.7M
}
8689
8690
static int
8691
xmlAttrHashInsertQName(xmlParserCtxtPtr ctxt, unsigned size,
8692
                       const xmlChar *name, const xmlChar *prefix,
8693
14
                       unsigned hashValue, int aindex) {
8694
14
    xmlAttrHashBucket *table = ctxt->attrHash;
8695
14
    xmlAttrHashBucket *bucket;
8696
14
    unsigned hindex;
8697
8698
14
    hindex = hashValue & (size - 1);
8699
14
    bucket = &table[hindex];
8700
8701
15
    while (bucket->index >= 0) {
8702
5
        const xmlChar **atts = &ctxt->atts[bucket->index];
8703
8704
5
        if ((name == atts[0]) && (prefix == atts[1]))
8705
4
            return(bucket->index);
8706
8707
1
        hindex++;
8708
1
        bucket++;
8709
1
        if (hindex >= size) {
8710
0
            hindex = 0;
8711
0
            bucket = table;
8712
0
        }
8713
1
    }
8714
8715
10
    bucket->index = aindex;
8716
8717
10
    return(INT_MAX);
8718
14
}
8719
/**
8720
 * Parse a start tag. Always consumes '<'.
8721
 *
8722
 * This routine is called when running SAX2 parsing
8723
 *
8724
 *     [40] STag ::= '<' Name (S Attribute)* S? '>'
8725
 *
8726
 * [ WFC: Unique Att Spec ]
8727
 * No attribute name may appear more than once in the same start-tag or
8728
 * empty-element tag.
8729
 *
8730
 *     [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8731
 *
8732
 * [ WFC: Unique Att Spec ]
8733
 * No attribute name may appear more than once in the same start-tag or
8734
 * empty-element tag.
8735
 *
8736
 * With namespace:
8737
 *
8738
 *     [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8739
 *
8740
 *     [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8741
 *
8742
 * @param ctxt  an XML parser context
8743
 * @param pref  resulting namespace prefix
8744
 * @param URI  resulting namespace URI
8745
 * @param nbNsPtr  resulting number of namespace declarations
8746
 * @returns the element name parsed
8747
 */
8748
8749
static const xmlChar *
8750
xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
8751
24.1M
                  const xmlChar **URI, int *nbNsPtr) {
8752
24.1M
    xmlHashedString hlocalname;
8753
24.1M
    xmlHashedString hprefix;
8754
24.1M
    xmlHashedString hattname;
8755
24.1M
    xmlHashedString haprefix;
8756
24.1M
    const xmlChar *localname;
8757
24.1M
    const xmlChar *prefix;
8758
24.1M
    const xmlChar *attname;
8759
24.1M
    const xmlChar *aprefix;
8760
24.1M
    const xmlChar *uri;
8761
24.1M
    xmlChar *attvalue = NULL;
8762
24.1M
    const xmlChar **atts = ctxt->atts;
8763
24.1M
    unsigned attrHashSize = 0;
8764
24.1M
    int maxatts = ctxt->maxatts;
8765
24.1M
    int nratts, nbatts, nbdef;
8766
24.1M
    int i, j, nbNs, nbTotalDef, attval, nsIndex, maxAtts;
8767
24.1M
    int alloc = 0;
8768
24.1M
    int numNsErr = 0;
8769
24.1M
    int numDupErr = 0;
8770
8771
24.1M
    if (RAW != '<') return(NULL);
8772
24.1M
    NEXT1;
8773
8774
24.1M
    nbatts = 0;
8775
24.1M
    nratts = 0;
8776
24.1M
    nbdef = 0;
8777
24.1M
    nbNs = 0;
8778
24.1M
    nbTotalDef = 0;
8779
24.1M
    attval = 0;
8780
8781
24.1M
    if (xmlParserNsStartElement(ctxt->nsdb) < 0) {
8782
0
        xmlErrMemory(ctxt);
8783
0
        return(NULL);
8784
0
    }
8785
8786
24.1M
    hlocalname = xmlParseQNameHashed(ctxt, &hprefix);
8787
24.1M
    if (hlocalname.name == NULL) {
8788
8.36k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8789
8.36k
           "StartTag: invalid element name\n");
8790
8.36k
        return(NULL);
8791
8.36k
    }
8792
24.1M
    localname = hlocalname.name;
8793
24.1M
    prefix = hprefix.name;
8794
8795
    /*
8796
     * Now parse the attributes, it ends up with the ending
8797
     *
8798
     * (S Attribute)* S?
8799
     */
8800
24.1M
    SKIP_BLANKS;
8801
24.1M
    GROW;
8802
8803
    /*
8804
     * The ctxt->atts array will be ultimately passed to the SAX callback
8805
     * containing five xmlChar pointers for each attribute:
8806
     *
8807
     * [0] attribute name
8808
     * [1] attribute prefix
8809
     * [2] namespace URI
8810
     * [3] attribute value
8811
     * [4] end of attribute value
8812
     *
8813
     * To save memory, we reuse this array temporarily and store integers
8814
     * in these pointer variables.
8815
     *
8816
     * [0] attribute name
8817
     * [1] attribute prefix
8818
     * [2] hash value of attribute prefix, and later namespace index
8819
     * [3] for non-allocated values: ptrdiff_t offset into input buffer
8820
     * [4] for non-allocated values: ptrdiff_t offset into input buffer
8821
     *
8822
     * The ctxt->attallocs array contains an additional unsigned int for
8823
     * each attribute, containing the hash value of the attribute name
8824
     * and the alloc flag in bit 31.
8825
     */
8826
8827
34.9M
    while (((RAW != '>') &&
8828
21.6M
     ((RAW != '/') || (NXT(1) != '>')) &&
8829
19.8M
     (IS_BYTE_CHAR(RAW))) && (PARSER_STOPPED(ctxt) == 0)) {
8830
19.8M
  int len = -1;
8831
8832
19.8M
  hattname = xmlParseAttribute2(ctxt, prefix, localname,
8833
19.8M
                                          &haprefix, &attvalue, &len,
8834
19.8M
                                          &alloc);
8835
19.8M
        if (hattname.name == NULL)
8836
38.7k
      break;
8837
19.7M
        if (attvalue == NULL)
8838
17.4k
            goto next_attr;
8839
19.7M
        attname = hattname.name;
8840
19.7M
        aprefix = haprefix.name;
8841
19.7M
  if (len < 0) len = xmlStrlen(attvalue);
8842
8843
19.7M
        if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8844
206k
            xmlHashedString huri;
8845
206k
            xmlURIPtr parsedUri;
8846
8847
206k
            huri = xmlDictLookupHashed(ctxt->dict, attvalue, len);
8848
206k
            uri = huri.name;
8849
206k
            if (uri == NULL) {
8850
0
                xmlErrMemory(ctxt);
8851
0
                goto next_attr;
8852
0
            }
8853
206k
            if (*uri != 0) {
8854
205k
                if (xmlParseURISafe((const char *) uri, &parsedUri) < 0) {
8855
0
                    xmlErrMemory(ctxt);
8856
0
                    goto next_attr;
8857
0
                }
8858
205k
                if (parsedUri == NULL) {
8859
74.5k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
8860
74.5k
                             "xmlns: '%s' is not a valid URI\n",
8861
74.5k
                                       uri, NULL, NULL);
8862
131k
                } else {
8863
131k
                    if (parsedUri->scheme == NULL) {
8864
35.9k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8865
35.9k
                                  "xmlns: URI %s is not absolute\n",
8866
35.9k
                                  uri, NULL, NULL);
8867
35.9k
                    }
8868
131k
                    xmlFreeURI(parsedUri);
8869
131k
                }
8870
205k
                if (uri == ctxt->str_xml_ns) {
8871
10
                    if (attname != ctxt->str_xml) {
8872
10
                        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8873
10
                     "xml namespace URI cannot be the default namespace\n",
8874
10
                                 NULL, NULL, NULL);
8875
10
                    }
8876
10
                    goto next_attr;
8877
10
                }
8878
205k
                if ((len == 29) &&
8879
3.10k
                    (xmlStrEqual(uri,
8880
3.10k
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8881
180
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8882
180
                         "reuse of the xmlns namespace name is forbidden\n",
8883
180
                             NULL, NULL, NULL);
8884
180
                    goto next_attr;
8885
180
                }
8886
205k
            }
8887
8888
206k
            if (xmlParserNsPush(ctxt, NULL, &huri, NULL, 0) > 0)
8889
131k
                nbNs++;
8890
19.5M
        } else if (aprefix == ctxt->str_xmlns) {
8891
627k
            xmlHashedString huri;
8892
627k
            xmlURIPtr parsedUri;
8893
8894
627k
            huri = xmlDictLookupHashed(ctxt->dict, attvalue, len);
8895
627k
            uri = huri.name;
8896
627k
            if (uri == NULL) {
8897
0
                xmlErrMemory(ctxt);
8898
0
                goto next_attr;
8899
0
            }
8900
8901
627k
            if (attname == ctxt->str_xml) {
8902
1.01k
                if (uri != ctxt->str_xml_ns) {
8903
998
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8904
998
                             "xml namespace prefix mapped to wrong URI\n",
8905
998
                             NULL, NULL, NULL);
8906
998
                }
8907
                /*
8908
                 * Do not keep a namespace definition node
8909
                 */
8910
1.01k
                goto next_attr;
8911
1.01k
            }
8912
626k
            if (uri == ctxt->str_xml_ns) {
8913
145
                if (attname != ctxt->str_xml) {
8914
145
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8915
145
                             "xml namespace URI mapped to wrong prefix\n",
8916
145
                             NULL, NULL, NULL);
8917
145
                }
8918
145
                goto next_attr;
8919
145
            }
8920
626k
            if (attname == ctxt->str_xmlns) {
8921
20
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8922
20
                         "redefinition of the xmlns prefix is forbidden\n",
8923
20
                         NULL, NULL, NULL);
8924
20
                goto next_attr;
8925
20
            }
8926
626k
            if ((len == 29) &&
8927
12.6k
                (xmlStrEqual(uri,
8928
12.6k
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8929
90
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8930
90
                         "reuse of the xmlns namespace name is forbidden\n",
8931
90
                         NULL, NULL, NULL);
8932
90
                goto next_attr;
8933
90
            }
8934
626k
            if ((uri == NULL) || (uri[0] == 0)) {
8935
224
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8936
224
                         "xmlns:%s: Empty XML namespace is not allowed\n",
8937
224
                              attname, NULL, NULL);
8938
224
                goto next_attr;
8939
626k
            } else {
8940
626k
                if (xmlParseURISafe((const char *) uri, &parsedUri) < 0) {
8941
0
                    xmlErrMemory(ctxt);
8942
0
                    goto next_attr;
8943
0
                }
8944
626k
                if (parsedUri == NULL) {
8945
56.0k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
8946
56.0k
                         "xmlns:%s: '%s' is not a valid URI\n",
8947
56.0k
                                       attname, uri, NULL);
8948
570k
                } else {
8949
570k
                    if ((ctxt->pedantic) && (parsedUri->scheme == NULL)) {
8950
0
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8951
0
                                  "xmlns:%s: URI %s is not absolute\n",
8952
0
                                  attname, uri, NULL);
8953
0
                    }
8954
570k
                    xmlFreeURI(parsedUri);
8955
570k
                }
8956
626k
            }
8957
8958
626k
            if (xmlParserNsPush(ctxt, &hattname, &huri, NULL, 0) > 0)
8959
592k
                nbNs++;
8960
18.9M
        } else {
8961
            /*
8962
             * Populate attributes array, see above for repurposing
8963
             * of xmlChar pointers.
8964
             */
8965
18.9M
            if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8966
487k
                int res = xmlCtxtGrowAttrs(ctxt);
8967
8968
487k
                maxatts = ctxt->maxatts;
8969
487k
                atts = ctxt->atts;
8970
8971
487k
                if (res < 0)
8972
0
                    goto next_attr;
8973
487k
            }
8974
18.9M
            ctxt->attallocs[nratts++] = (hattname.hashValue & 0x7FFFFFFF) |
8975
18.9M
                                        ((unsigned) alloc << 31);
8976
18.9M
            atts[nbatts++] = attname;
8977
18.9M
            atts[nbatts++] = aprefix;
8978
18.9M
            atts[nbatts++] = XML_INT_TO_PTR(haprefix.hashValue);
8979
18.9M
            if (alloc) {
8980
55.4k
                atts[nbatts++] = attvalue;
8981
55.4k
                attvalue += len;
8982
55.4k
                atts[nbatts++] = attvalue;
8983
18.8M
            } else {
8984
                /*
8985
                 * attvalue points into the input buffer which can be
8986
                 * reallocated. Store differences to input->base instead.
8987
                 * The pointers will be reconstructed later.
8988
                 */
8989
18.8M
                atts[nbatts++] = XML_INT_TO_PTR(attvalue - BASE_PTR);
8990
18.8M
                attvalue += len;
8991
18.8M
                atts[nbatts++] = XML_INT_TO_PTR(attvalue - BASE_PTR);
8992
18.8M
            }
8993
            /*
8994
             * tag if some deallocation is needed
8995
             */
8996
18.9M
            if (alloc != 0) attval = 1;
8997
18.9M
            attvalue = NULL; /* moved into atts */
8998
18.9M
        }
8999
9000
19.7M
next_attr:
9001
19.7M
        if ((attvalue != NULL) && (alloc != 0)) {
9002
65.6k
            xmlFree(attvalue);
9003
65.6k
            attvalue = NULL;
9004
65.6k
        }
9005
9006
19.7M
  GROW
9007
19.7M
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9008
8.89M
      break;
9009
10.8M
  if (SKIP_BLANKS == 0) {
9010
23.9k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9011
23.9k
         "attributes construct error\n");
9012
23.9k
      break;
9013
23.9k
  }
9014
10.8M
        GROW;
9015
10.8M
    }
9016
9017
    /*
9018
     * Namespaces from default attributes
9019
     */
9020
24.1M
    if (ctxt->attsDefault != NULL) {
9021
50.9k
        xmlDefAttrsPtr defaults;
9022
9023
50.9k
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9024
50.9k
  if (defaults != NULL) {
9025
282k
      for (i = 0; i < defaults->nbAttrs; i++) {
9026
252k
                xmlDefAttr *attr = &defaults->attrs[i];
9027
9028
252k
          attname = attr->name.name;
9029
252k
    aprefix = attr->prefix.name;
9030
9031
252k
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9032
617
                    xmlParserEntityCheck(ctxt, attr->expandedSize);
9033
9034
617
                    if (xmlParserNsPush(ctxt, NULL, &attr->value, NULL, 1) > 0)
9035
617
                        nbNs++;
9036
252k
    } else if (aprefix == ctxt->str_xmlns) {
9037
136k
                    xmlParserEntityCheck(ctxt, attr->expandedSize);
9038
9039
136k
                    if (xmlParserNsPush(ctxt, &attr->name, &attr->value,
9040
136k
                                      NULL, 1) > 0)
9041
136k
                        nbNs++;
9042
136k
    } else {
9043
115k
                    if (nratts + nbTotalDef >= XML_MAX_ATTRS) {
9044
0
                        xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
9045
0
                                    "Maximum number of attributes exceeded");
9046
0
                        break;
9047
0
                    }
9048
115k
                    nbTotalDef += 1;
9049
115k
                }
9050
252k
      }
9051
29.9k
  }
9052
50.9k
    }
9053
9054
    /*
9055
     * Resolve attribute namespaces
9056
     */
9057
43.0M
    for (i = 0; i < nbatts; i += 5) {
9058
18.9M
        attname = atts[i];
9059
18.9M
        aprefix = atts[i+1];
9060
9061
        /*
9062
  * The default namespace does not apply to attribute names.
9063
  */
9064
18.9M
  if (aprefix == NULL) {
9065
13.2M
            nsIndex = NS_INDEX_EMPTY;
9066
13.2M
        } else if (aprefix == ctxt->str_xml) {
9067
83.3k
            nsIndex = NS_INDEX_XML;
9068
5.57M
        } else {
9069
5.57M
            haprefix.name = aprefix;
9070
5.57M
            haprefix.hashValue = (size_t) atts[i+2];
9071
5.57M
            nsIndex = xmlParserNsLookup(ctxt, &haprefix, NULL);
9072
9073
5.57M
      if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex)) {
9074
254k
                xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9075
254k
        "Namespace prefix %s for %s on %s is not defined\n",
9076
254k
        aprefix, attname, localname);
9077
254k
                nsIndex = NS_INDEX_EMPTY;
9078
254k
            }
9079
5.57M
        }
9080
9081
18.9M
        atts[i+2] = XML_INT_TO_PTR(nsIndex);
9082
18.9M
    }
9083
9084
    /*
9085
     * Maximum number of attributes including default attributes.
9086
     */
9087
24.1M
    maxAtts = nratts + nbTotalDef;
9088
9089
    /*
9090
     * Verify that attribute names are unique.
9091
     */
9092
24.1M
    if (maxAtts > 1) {
9093
4.94M
        attrHashSize = 4;
9094
7.84M
        while (attrHashSize / 2 < (unsigned) maxAtts)
9095
2.90M
            attrHashSize *= 2;
9096
9097
4.94M
        if (attrHashSize > ctxt->attrHashMax) {
9098
200k
            xmlAttrHashBucket *tmp;
9099
9100
200k
            tmp = xmlRealloc(ctxt->attrHash, attrHashSize * sizeof(tmp[0]));
9101
200k
            if (tmp == NULL) {
9102
0
                xmlErrMemory(ctxt);
9103
0
                goto done;
9104
0
            }
9105
9106
200k
            ctxt->attrHash = tmp;
9107
200k
            ctxt->attrHashMax = attrHashSize;
9108
200k
        }
9109
9110
4.94M
        memset(ctxt->attrHash, -1, attrHashSize * sizeof(ctxt->attrHash[0]));
9111
9112
19.8M
        for (i = 0, j = 0; j < nratts; i += 5, j++) {
9113
14.9M
            const xmlChar *nsuri;
9114
14.9M
            unsigned hashValue, nameHashValue, uriHashValue;
9115
14.9M
            int res;
9116
9117
14.9M
            attname = atts[i];
9118
14.9M
            aprefix = atts[i+1];
9119
14.9M
            nsIndex = XML_PTR_TO_INT(atts[i+2]);
9120
            /* Hash values always have bit 31 set, see dict.c */
9121
14.9M
            nameHashValue = ctxt->attallocs[j] | 0x80000000;
9122
9123
14.9M
            if (nsIndex == NS_INDEX_EMPTY) {
9124
                /*
9125
                 * Prefix with empty namespace means an undeclared
9126
                 * prefix which was already reported above.
9127
                 */
9128
10.7M
                if (aprefix != NULL)
9129
231k
                    continue;
9130
10.5M
                nsuri = NULL;
9131
10.5M
                uriHashValue = URI_HASH_EMPTY;
9132
10.5M
            } else if (nsIndex == NS_INDEX_XML) {
9133
707
                nsuri = ctxt->str_xml_ns;
9134
707
                uriHashValue = URI_HASH_XML;
9135
4.13M
            } else {
9136
4.13M
                nsuri = ctxt->nsTab[nsIndex * 2 + 1];
9137
4.13M
                uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue;
9138
4.13M
            }
9139
9140
14.6M
            hashValue = xmlDictCombineHash(nameHashValue, uriHashValue);
9141
14.6M
            res = xmlAttrHashInsert(ctxt, attrHashSize, attname, nsuri,
9142
14.6M
                                    hashValue, i);
9143
14.6M
            if (res < 0)
9144
0
                continue;
9145
9146
            /*
9147
             * [ WFC: Unique Att Spec ]
9148
             * No attribute name may appear more than once in the same
9149
             * start-tag or empty-element tag.
9150
             * As extended by the Namespace in XML REC.
9151
             */
9152
14.6M
            if (res < INT_MAX) {
9153
45.1k
                if (aprefix == atts[res+1]) {
9154
45.1k
                    xmlErrAttributeDup(ctxt, aprefix, attname);
9155
45.1k
                    numDupErr += 1;
9156
45.1k
                } else {
9157
16
                    xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9158
16
                             "Namespaced Attribute %s in '%s' redefined\n",
9159
16
                             attname, nsuri, NULL);
9160
16
                    numNsErr += 1;
9161
16
                }
9162
45.1k
            }
9163
14.6M
        }
9164
4.94M
    }
9165
9166
    /*
9167
     * Default attributes
9168
     */
9169
24.1M
    if (ctxt->attsDefault != NULL) {
9170
50.9k
        xmlDefAttrsPtr defaults;
9171
9172
50.9k
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9173
50.9k
  if (defaults != NULL) {
9174
282k
      for (i = 0; i < defaults->nbAttrs; i++) {
9175
252k
                xmlDefAttr *attr = &defaults->attrs[i];
9176
252k
                const xmlChar *nsuri = NULL;
9177
252k
                unsigned hashValue, uriHashValue = 0;
9178
252k
                int res;
9179
9180
252k
          attname = attr->name.name;
9181
252k
    aprefix = attr->prefix.name;
9182
9183
252k
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL))
9184
617
                    continue;
9185
252k
    if (aprefix == ctxt->str_xmlns)
9186
136k
                    continue;
9187
9188
115k
                if (aprefix == NULL) {
9189
33.8k
                    nsIndex = NS_INDEX_EMPTY;
9190
33.8k
                    nsuri = NULL;
9191
33.8k
                    uriHashValue = URI_HASH_EMPTY;
9192
81.2k
                } else if (aprefix == ctxt->str_xml) {
9193
36.6k
                    nsIndex = NS_INDEX_XML;
9194
36.6k
                    nsuri = ctxt->str_xml_ns;
9195
36.6k
                    uriHashValue = URI_HASH_XML;
9196
44.6k
                } else {
9197
44.6k
                    nsIndex = xmlParserNsLookup(ctxt, &attr->prefix, NULL);
9198
44.6k
                    if ((nsIndex == INT_MAX) ||
9199
39.6k
                        (nsIndex < ctxt->nsdb->minNsIndex)) {
9200
39.6k
                        xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9201
39.6k
                                 "Namespace prefix %s for %s on %s is not "
9202
39.6k
                                 "defined\n",
9203
39.6k
                                 aprefix, attname, localname);
9204
39.6k
                        nsIndex = NS_INDEX_EMPTY;
9205
39.6k
                        nsuri = NULL;
9206
39.6k
                        uriHashValue = URI_HASH_EMPTY;
9207
39.6k
                    } else {
9208
4.99k
                        nsuri = ctxt->nsTab[nsIndex * 2 + 1];
9209
4.99k
                        uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue;
9210
4.99k
                    }
9211
44.6k
                }
9212
9213
                /*
9214
                 * Check whether the attribute exists
9215
                 */
9216
115k
                if (maxAtts > 1) {
9217
101k
                    hashValue = xmlDictCombineHash(attr->name.hashValue,
9218
101k
                                                   uriHashValue);
9219
101k
                    res = xmlAttrHashInsert(ctxt, attrHashSize, attname, nsuri,
9220
101k
                                            hashValue, nbatts);
9221
101k
                    if (res < 0)
9222
0
                        continue;
9223
101k
                    if (res < INT_MAX) {
9224
2.70k
                        if (aprefix == atts[res+1])
9225
194
                            continue;
9226
2.51k
                        xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9227
2.51k
                                 "Namespaced Attribute %s in '%s' redefined\n",
9228
2.51k
                                 attname, nsuri, NULL);
9229
2.51k
                    }
9230
101k
                }
9231
9232
114k
                xmlParserEntityCheck(ctxt, attr->expandedSize);
9233
9234
114k
                if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9235
1.13k
                    res = xmlCtxtGrowAttrs(ctxt);
9236
9237
1.13k
                    maxatts = ctxt->maxatts;
9238
1.13k
                    atts = ctxt->atts;
9239
9240
1.13k
                    if (res < 0) {
9241
0
                        localname = NULL;
9242
0
                        goto done;
9243
0
                    }
9244
1.13k
                }
9245
9246
114k
                atts[nbatts++] = attname;
9247
114k
                atts[nbatts++] = aprefix;
9248
114k
                atts[nbatts++] = XML_INT_TO_PTR(nsIndex);
9249
114k
                atts[nbatts++] = attr->value.name;
9250
114k
                atts[nbatts++] = attr->valueEnd;
9251
9252
114k
#ifdef LIBXML_VALID_ENABLED
9253
                /*
9254
                 * This should be moved to valid.c, but we don't keep track
9255
                 * whether an attribute was defaulted.
9256
                 */
9257
114k
                if ((ctxt->validate) &&
9258
0
                    (ctxt->standalone == 1) &&
9259
0
                    (attr->external != 0)) {
9260
0
                    xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9261
0
                            "standalone: attribute %s on %s defaulted "
9262
0
                            "from external subset\n",
9263
0
                            attname, localname);
9264
0
                }
9265
114k
#endif
9266
114k
                nbdef++;
9267
114k
      }
9268
29.9k
  }
9269
50.9k
    }
9270
9271
    /*
9272
     * Using a single hash table for nsUri/localName pairs cannot
9273
     * detect duplicate QNames reliably. The following example will
9274
     * only result in two namespace errors.
9275
     *
9276
     * <doc xmlns:a="a" xmlns:b="a">
9277
     *   <elem a:a="" b:a="" b:a=""/>
9278
     * </doc>
9279
     *
9280
     * If we saw more than one namespace error but no duplicate QNames
9281
     * were found, we have to scan for duplicate QNames.
9282
     */
9283
24.1M
    if ((numDupErr == 0) && (numNsErr > 1)) {
9284
4
        memset(ctxt->attrHash, -1,
9285
4
               attrHashSize * sizeof(ctxt->attrHash[0]));
9286
9287
24
        for (i = 0, j = 0; j < nratts; i += 5, j++) {
9288
20
            unsigned hashValue, nameHashValue, prefixHashValue;
9289
20
            int res;
9290
9291
20
            aprefix = atts[i+1];
9292
20
            if (aprefix == NULL)
9293
6
                continue;
9294
9295
14
            attname = atts[i];
9296
            /* Hash values always have bit 31 set, see dict.c */
9297
14
            nameHashValue = ctxt->attallocs[j] | 0x80000000;
9298
14
            prefixHashValue = xmlDictComputeHash(ctxt->dict, aprefix);
9299
9300
14
            hashValue = xmlDictCombineHash(nameHashValue, prefixHashValue);
9301
14
            res = xmlAttrHashInsertQName(ctxt, attrHashSize, attname,
9302
14
                                         aprefix, hashValue, i);
9303
14
            if (res < INT_MAX)
9304
4
                xmlErrAttributeDup(ctxt, aprefix, attname);
9305
14
        }
9306
4
    }
9307
9308
    /*
9309
     * Reconstruct attribute pointers
9310
     */
9311
43.1M
    for (i = 0, j = 0; i < nbatts; i += 5, j++) {
9312
        /* namespace URI */
9313
19.0M
        nsIndex = XML_PTR_TO_INT(atts[i+2]);
9314
19.0M
        if (nsIndex == INT_MAX)
9315
13.6M
            atts[i+2] = NULL;
9316
5.44M
        else if (nsIndex == INT_MAX - 1)
9317
120k
            atts[i+2] = ctxt->str_xml_ns;
9318
5.32M
        else
9319
5.32M
            atts[i+2] = ctxt->nsTab[nsIndex * 2 + 1];
9320
9321
19.0M
        if ((j < nratts) && (ctxt->attallocs[j] & 0x80000000) == 0) {
9322
18.8M
            atts[i+3] = BASE_PTR + XML_PTR_TO_INT(atts[i+3]);  /* value */
9323
18.8M
            atts[i+4] = BASE_PTR + XML_PTR_TO_INT(atts[i+4]);  /* valuend */
9324
18.8M
        }
9325
19.0M
    }
9326
9327
24.1M
    uri = xmlParserNsLookupUri(ctxt, &hprefix);
9328
24.1M
    if ((prefix != NULL) && (uri == NULL)) {
9329
383k
  xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9330
383k
           "Namespace prefix %s on %s is not defined\n",
9331
383k
     prefix, localname, NULL);
9332
383k
    }
9333
24.1M
    *pref = prefix;
9334
24.1M
    *URI = uri;
9335
9336
    /*
9337
     * SAX callback
9338
     */
9339
24.1M
    if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9340
24.1M
  (!ctxt->disableSAX)) {
9341
24.0M
  if (nbNs > 0)
9342
286k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix, uri,
9343
286k
                          nbNs, ctxt->nsTab + 2 * (ctxt->nsNr - nbNs),
9344
286k
        nbatts / 5, nbdef, atts);
9345
23.7M
  else
9346
23.7M
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix, uri,
9347
23.7M
                          0, NULL, nbatts / 5, nbdef, atts);
9348
24.0M
    }
9349
9350
24.1M
done:
9351
    /*
9352
     * Free allocated attribute values
9353
     */
9354
24.1M
    if (attval != 0) {
9355
248k
  for (i = 0, j = 0; j < nratts; i += 5, j++)
9356
198k
      if (ctxt->attallocs[j] & 0x80000000)
9357
55.4k
          xmlFree((xmlChar *) atts[i+3]);
9358
49.9k
    }
9359
9360
24.1M
    *nbNsPtr = nbNs;
9361
24.1M
    return(localname);
9362
24.1M
}
9363
9364
/**
9365
 * Parse an end tag. Always consumes '</'.
9366
 *
9367
 *     [42] ETag ::= '</' Name S? '>'
9368
 *
9369
 * With namespace
9370
 *
9371
 *     [NS 9] ETag ::= '</' QName S? '>'
9372
 * @param ctxt  an XML parser context
9373
 * @param tag  the corresponding start tag
9374
 */
9375
9376
static void
9377
7.48M
xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9378
7.48M
    const xmlChar *name;
9379
9380
7.48M
    GROW;
9381
7.48M
    if ((RAW != '<') || (NXT(1) != '/')) {
9382
5
  xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9383
5
  return;
9384
5
    }
9385
7.48M
    SKIP(2);
9386
9387
7.48M
    if (tag->prefix == NULL)
9388
2.62M
        name = xmlParseNameAndCompare(ctxt, ctxt->name);
9389
4.86M
    else
9390
4.86M
        name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9391
9392
    /*
9393
     * We should definitely be at the ending "S? '>'" part
9394
     */
9395
7.48M
    GROW;
9396
7.48M
    SKIP_BLANKS;
9397
7.48M
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9398
6.35k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9399
6.35k
    } else
9400
7.48M
  NEXT1;
9401
9402
    /*
9403
     * [ WFC: Element Type Match ]
9404
     * The Name in an element's end-tag must match the element type in the
9405
     * start-tag.
9406
     *
9407
     */
9408
7.48M
    if (name != (xmlChar*)1) {
9409
12.8k
        if (name == NULL) name = BAD_CAST "unparsable";
9410
12.8k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9411
12.8k
         "Opening and ending tag mismatch: %s line %d and %s\n",
9412
12.8k
                    ctxt->name, tag->line, name);
9413
12.8k
    }
9414
9415
    /*
9416
     * SAX: End of Tag
9417
     */
9418
7.48M
    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9419
7.48M
  (!ctxt->disableSAX))
9420
7.46M
  ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9421
7.46M
                                tag->URI);
9422
9423
7.48M
    spacePop(ctxt);
9424
7.48M
    if (tag->nsNr != 0)
9425
147k
  xmlParserNsPop(ctxt, tag->nsNr);
9426
7.48M
}
9427
9428
/**
9429
 * Parse escaped pure raw content. Always consumes '<!['.
9430
 *
9431
 * @deprecated Internal function, don't use.
9432
 *
9433
 *     [18] CDSect ::= CDStart CData CDEnd
9434
 *
9435
 *     [19] CDStart ::= '<![CDATA['
9436
 *
9437
 *     [20] Data ::= (Char* - (Char* ']]>' Char*))
9438
 *
9439
 *     [21] CDEnd ::= ']]>'
9440
 * @param ctxt  an XML parser context
9441
 */
9442
void
9443
5.56k
xmlParseCDSect(xmlParserCtxt *ctxt) {
9444
5.56k
    xmlChar *buf = NULL;
9445
5.56k
    int len = 0;
9446
5.56k
    int size = XML_PARSER_BUFFER_SIZE;
9447
5.56k
    int r, rl;
9448
5.56k
    int s, sl;
9449
5.56k
    int cur, l;
9450
5.56k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9451
5.56k
                    XML_MAX_HUGE_LENGTH :
9452
5.56k
                    XML_MAX_TEXT_LENGTH;
9453
9454
5.56k
    if ((CUR != '<') || (NXT(1) != '!') || (NXT(2) != '['))
9455
0
        return;
9456
5.56k
    SKIP(3);
9457
9458
5.56k
    if (!CMP6(CUR_PTR, 'C', 'D', 'A', 'T', 'A', '['))
9459
0
        return;
9460
5.56k
    SKIP(6);
9461
9462
5.56k
    r = xmlCurrentCharRecover(ctxt, &rl);
9463
5.56k
    if (!IS_CHAR(r)) {
9464
4
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9465
4
        goto out;
9466
4
    }
9467
5.56k
    NEXTL(rl);
9468
5.56k
    s = xmlCurrentCharRecover(ctxt, &sl);
9469
5.56k
    if (!IS_CHAR(s)) {
9470
9
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9471
9
        goto out;
9472
9
    }
9473
5.55k
    NEXTL(sl);
9474
5.55k
    cur = xmlCurrentCharRecover(ctxt, &l);
9475
5.55k
    buf = xmlMalloc(size);
9476
5.55k
    if (buf == NULL) {
9477
0
  xmlErrMemory(ctxt);
9478
0
        goto out;
9479
0
    }
9480
21.5M
    while (IS_CHAR(cur) &&
9481
21.5M
           ((r != ']') || (s != ']') || (cur != '>'))) {
9482
21.5M
  if (len + 5 >= size) {
9483
17.5k
      xmlChar *tmp;
9484
17.5k
            int newSize;
9485
9486
17.5k
            newSize = xmlGrowCapacity(size, 1, 1, maxLength);
9487
17.5k
            if (newSize < 0) {
9488
0
                xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9489
0
                               "CData section too big found\n");
9490
0
                goto out;
9491
0
            }
9492
17.5k
      tmp = xmlRealloc(buf, newSize);
9493
17.5k
      if (tmp == NULL) {
9494
0
    xmlErrMemory(ctxt);
9495
0
                goto out;
9496
0
      }
9497
17.5k
      buf = tmp;
9498
17.5k
      size = newSize;
9499
17.5k
  }
9500
21.5M
  COPY_BUF(buf, len, r);
9501
21.5M
  r = s;
9502
21.5M
  rl = sl;
9503
21.5M
  s = cur;
9504
21.5M
  sl = l;
9505
21.5M
  NEXTL(l);
9506
21.5M
  cur = xmlCurrentCharRecover(ctxt, &l);
9507
21.5M
    }
9508
5.55k
    buf[len] = 0;
9509
5.55k
    if (cur != '>') {
9510
100
  xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9511
100
                       "CData section not finished\n%.50s\n", buf);
9512
100
        goto out;
9513
100
    }
9514
5.45k
    NEXTL(l);
9515
9516
    /*
9517
     * OK the buffer is to be consumed as cdata.
9518
     */
9519
5.45k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9520
5.44k
        if ((ctxt->sax->cdataBlock != NULL) &&
9521
0
            ((ctxt->options & XML_PARSE_NOCDATA) == 0)) {
9522
0
            ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9523
5.44k
        } else if (ctxt->sax->characters != NULL) {
9524
5.44k
            ctxt->sax->characters(ctxt->userData, buf, len);
9525
5.44k
        }
9526
5.44k
    }
9527
9528
5.56k
out:
9529
5.56k
    xmlFree(buf);
9530
5.56k
}
9531
9532
/**
9533
 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9534
 * unexpected EOF to the caller.
9535
 *
9536
 * @param ctxt  an XML parser context
9537
 */
9538
9539
static void
9540
4.87k
xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9541
4.87k
    int oldNameNr = ctxt->nameNr;
9542
4.87k
    int oldSpaceNr = ctxt->spaceNr;
9543
4.87k
    int oldNodeNr = ctxt->nodeNr;
9544
9545
4.87k
    GROW;
9546
1.62M
    while ((ctxt->input->cur < ctxt->input->end) &&
9547
1.62M
     (PARSER_STOPPED(ctxt) == 0)) {
9548
1.62M
  const xmlChar *cur = ctxt->input->cur;
9549
9550
  /*
9551
   * First case : a Processing Instruction.
9552
   */
9553
1.62M
  if ((*cur == '<') && (cur[1] == '?')) {
9554
1.25k
      xmlParsePI(ctxt);
9555
1.25k
  }
9556
9557
  /*
9558
   * Second case : a CDSection
9559
   */
9560
  /* 2.6.0 test was *cur not RAW */
9561
1.61M
  else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9562
0
      xmlParseCDSect(ctxt);
9563
0
  }
9564
9565
  /*
9566
   * Third case :  a comment
9567
   */
9568
1.61M
  else if ((*cur == '<') && (NXT(1) == '!') &&
9569
0
     (NXT(2) == '-') && (NXT(3) == '-')) {
9570
0
      xmlParseComment(ctxt);
9571
0
  }
9572
9573
  /*
9574
   * Fourth case :  a sub-element.
9575
   */
9576
1.61M
  else if (*cur == '<') {
9577
1.27M
            if (NXT(1) == '/') {
9578
368k
                if (ctxt->nameNr <= oldNameNr)
9579
4.66k
                    break;
9580
363k
          xmlParseElementEnd(ctxt);
9581
903k
            } else {
9582
903k
          xmlParseElementStart(ctxt);
9583
903k
            }
9584
1.27M
  }
9585
9586
  /*
9587
   * Fifth case : a reference. If if has not been resolved,
9588
   *    parsing returns it's Name, create the node
9589
   */
9590
9591
348k
  else if (*cur == '&') {
9592
10
      xmlParseReference(ctxt);
9593
10
  }
9594
9595
  /*
9596
   * Last case, text. Note that References are handled directly.
9597
   */
9598
348k
  else {
9599
348k
      xmlParseCharDataInternal(ctxt, 0);
9600
348k
  }
9601
9602
1.61M
  SHRINK;
9603
1.61M
  GROW;
9604
1.61M
    }
9605
9606
4.87k
    if ((ctxt->nameNr > oldNameNr) &&
9607
206
        (ctxt->input->cur >= ctxt->input->end) &&
9608
201
        (ctxt->wellFormed)) {
9609
0
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9610
0
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9611
0
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9612
0
                "Premature end of data in tag %s line %d\n",
9613
0
                name, line, NULL);
9614
0
    }
9615
9616
    /*
9617
     * Clean up in error case
9618
     */
9619
9620
5.00k
    while (ctxt->nodeNr > oldNodeNr)
9621
125
        nodePop(ctxt);
9622
9623
16.7k
    while (ctxt->nameNr > oldNameNr) {
9624
11.8k
        xmlStartTag *tag = &ctxt->pushTab[ctxt->nameNr - 1];
9625
9626
11.8k
        if (tag->nsNr != 0)
9627
12
            xmlParserNsPop(ctxt, tag->nsNr);
9628
9629
11.8k
        namePop(ctxt);
9630
11.8k
    }
9631
9632
16.7k
    while (ctxt->spaceNr > oldSpaceNr)
9633
11.8k
        spacePop(ctxt);
9634
4.87k
}
9635
9636
/**
9637
 * Parse XML element content. This is useful if you're only interested
9638
 * in custom SAX callbacks. If you want a node list, use
9639
 * #xmlCtxtParseContent.
9640
 *
9641
 * @param ctxt  an XML parser context
9642
 */
9643
void
9644
0
xmlParseContent(xmlParserCtxt *ctxt) {
9645
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
9646
0
        return;
9647
9648
0
    xmlCtxtInitializeLate(ctxt);
9649
9650
0
    xmlParseContentInternal(ctxt);
9651
9652
0
    xmlParserCheckEOF(ctxt, XML_ERR_NOT_WELL_BALANCED);
9653
0
}
9654
9655
/**
9656
 * Parse an XML element
9657
 *
9658
 * @deprecated Internal function, don't use.
9659
 *
9660
 *     [39] element ::= EmptyElemTag | STag content ETag
9661
 *
9662
 * [ WFC: Element Type Match ]
9663
 * The Name in an element's end-tag must match the element type in the
9664
 * start-tag.
9665
 *
9666
 * @param ctxt  an XML parser context
9667
 */
9668
9669
void
9670
5.07k
xmlParseElement(xmlParserCtxt *ctxt) {
9671
5.07k
    if (xmlParseElementStart(ctxt) != 0)
9672
198
        return;
9673
9674
4.87k
    xmlParseContentInternal(ctxt);
9675
9676
4.87k
    if (ctxt->input->cur >= ctxt->input->end) {
9677
206
        if (ctxt->wellFormed) {
9678
0
            const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9679
0
            int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9680
0
            xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9681
0
                    "Premature end of data in tag %s line %d\n",
9682
0
                    name, line, NULL);
9683
0
        }
9684
206
        return;
9685
206
    }
9686
9687
4.67k
    xmlParseElementEnd(ctxt);
9688
4.67k
}
9689
9690
/**
9691
 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
9692
 * opening tag was parsed, 1 if an empty element was parsed.
9693
 *
9694
 * Always consumes '<'.
9695
 *
9696
 * @param ctxt  an XML parser context
9697
 */
9698
static int
9699
908k
xmlParseElementStart(xmlParserCtxtPtr ctxt) {
9700
908k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
9701
908k
    const xmlChar *name;
9702
908k
    const xmlChar *prefix = NULL;
9703
908k
    const xmlChar *URI = NULL;
9704
908k
    xmlParserNodeInfo node_info;
9705
908k
    int line;
9706
908k
    xmlNodePtr cur;
9707
908k
    int nbNs = 0;
9708
9709
908k
    if (ctxt->nameNr > maxDepth) {
9710
5
        xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
9711
5
                "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9712
5
                ctxt->nameNr);
9713
5
  return(-1);
9714
5
    }
9715
9716
    /* Capture start position */
9717
908k
    if (ctxt->record_info) {
9718
0
        node_info.begin_pos = ctxt->input->consumed +
9719
0
                          (CUR_PTR - ctxt->input->base);
9720
0
  node_info.begin_line = ctxt->input->line;
9721
0
    }
9722
9723
908k
    if (ctxt->spaceNr == 0)
9724
5.07k
  spacePush(ctxt, -1);
9725
903k
    else if (*ctxt->space == -2)
9726
0
  spacePush(ctxt, -1);
9727
903k
    else
9728
903k
  spacePush(ctxt, *ctxt->space);
9729
9730
908k
    line = ctxt->input->line;
9731
908k
#ifdef LIBXML_SAX1_ENABLED
9732
908k
    if (ctxt->sax2)
9733
908k
#endif /* LIBXML_SAX1_ENABLED */
9734
908k
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &nbNs);
9735
0
#ifdef LIBXML_SAX1_ENABLED
9736
0
    else
9737
0
  name = xmlParseStartTag(ctxt);
9738
908k
#endif /* LIBXML_SAX1_ENABLED */
9739
908k
    if (name == NULL) {
9740
6.92k
  spacePop(ctxt);
9741
6.92k
        return(-1);
9742
6.92k
    }
9743
901k
    nameNsPush(ctxt, name, prefix, URI, line, nbNs);
9744
901k
    cur = ctxt->node;
9745
9746
901k
#ifdef LIBXML_VALID_ENABLED
9747
    /*
9748
     * [ VC: Root Element Type ]
9749
     * The Name in the document type declaration must match the element
9750
     * type of the root element.
9751
     */
9752
901k
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9753
0
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
9754
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9755
901k
#endif /* LIBXML_VALID_ENABLED */
9756
9757
    /*
9758
     * Check for an Empty Element.
9759
     */
9760
901k
    if ((RAW == '/') && (NXT(1) == '>')) {
9761
471k
        SKIP(2);
9762
471k
  if (ctxt->sax2) {
9763
471k
      if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9764
471k
    (!ctxt->disableSAX))
9765
457k
    ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
9766
471k
#ifdef LIBXML_SAX1_ENABLED
9767
471k
  } else {
9768
0
      if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9769
0
    (!ctxt->disableSAX))
9770
0
    ctxt->sax->endElement(ctxt->userData, name);
9771
0
#endif /* LIBXML_SAX1_ENABLED */
9772
0
  }
9773
471k
  namePop(ctxt);
9774
471k
  spacePop(ctxt);
9775
471k
  if (nbNs > 0)
9776
1.06k
      xmlParserNsPop(ctxt, nbNs);
9777
471k
  if (cur != NULL && ctxt->record_info) {
9778
0
            node_info.node = cur;
9779
0
            node_info.end_pos = ctxt->input->consumed +
9780
0
                                (CUR_PTR - ctxt->input->base);
9781
0
            node_info.end_line = ctxt->input->line;
9782
0
            xmlParserAddNodeInfo(ctxt, &node_info);
9783
0
  }
9784
471k
  return(1);
9785
471k
    }
9786
429k
    if (RAW == '>') {
9787
380k
        NEXT1;
9788
380k
        if (cur != NULL && ctxt->record_info) {
9789
0
            node_info.node = cur;
9790
0
            node_info.end_pos = 0;
9791
0
            node_info.end_line = 0;
9792
0
            xmlParserAddNodeInfo(ctxt, &node_info);
9793
0
        }
9794
380k
    } else {
9795
49.0k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9796
49.0k
         "Couldn't find end of Start Tag %s line %d\n",
9797
49.0k
                    name, line, NULL);
9798
9799
  /*
9800
   * end of parsing of this node.
9801
   */
9802
49.0k
  nodePop(ctxt);
9803
49.0k
  namePop(ctxt);
9804
49.0k
  spacePop(ctxt);
9805
49.0k
  if (nbNs > 0)
9806
249
      xmlParserNsPop(ctxt, nbNs);
9807
49.0k
  return(-1);
9808
49.0k
    }
9809
9810
380k
    return(0);
9811
429k
}
9812
9813
/**
9814
 * Parse the end of an XML element. Always consumes '</'.
9815
 *
9816
 * @param ctxt  an XML parser context
9817
 */
9818
static void
9819
368k
xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
9820
368k
    xmlNodePtr cur = ctxt->node;
9821
9822
368k
    if (ctxt->nameNr <= 0) {
9823
0
        if ((RAW == '<') && (NXT(1) == '/'))
9824
0
            SKIP(2);
9825
0
        return;
9826
0
    }
9827
9828
    /*
9829
     * parse the end of tag: '</' should be here.
9830
     */
9831
368k
    if (ctxt->sax2) {
9832
368k
  xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
9833
368k
  namePop(ctxt);
9834
368k
    }
9835
0
#ifdef LIBXML_SAX1_ENABLED
9836
0
    else
9837
0
  xmlParseEndTag1(ctxt, 0);
9838
368k
#endif /* LIBXML_SAX1_ENABLED */
9839
9840
    /*
9841
     * Capture end position
9842
     */
9843
368k
    if (cur != NULL && ctxt->record_info) {
9844
0
        xmlParserNodeInfoPtr node_info;
9845
9846
0
        node_info = (xmlParserNodeInfoPtr) xmlParserFindNodeInfo(ctxt, cur);
9847
0
        if (node_info != NULL) {
9848
0
            node_info->end_pos = ctxt->input->consumed +
9849
0
                                 (CUR_PTR - ctxt->input->base);
9850
0
            node_info->end_line = ctxt->input->line;
9851
0
        }
9852
0
    }
9853
368k
}
9854
9855
/**
9856
 * Parse the XML version value.
9857
 *
9858
 * @deprecated Internal function, don't use.
9859
 *
9860
 *     [26] VersionNum ::= '1.' [0-9]+
9861
 *
9862
 * In practice allow [0-9].[0-9]+ at that level
9863
 *
9864
 * @param ctxt  an XML parser context
9865
 * @returns the string giving the XML version number, or NULL
9866
 */
9867
xmlChar *
9868
133k
xmlParseVersionNum(xmlParserCtxt *ctxt) {
9869
133k
    xmlChar *buf = NULL;
9870
133k
    int len = 0;
9871
133k
    int size = 10;
9872
133k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9873
128k
                    XML_MAX_TEXT_LENGTH :
9874
133k
                    XML_MAX_NAME_LENGTH;
9875
133k
    xmlChar cur;
9876
9877
133k
    buf = xmlMalloc(size);
9878
133k
    if (buf == NULL) {
9879
0
  xmlErrMemory(ctxt);
9880
0
  return(NULL);
9881
0
    }
9882
133k
    cur = CUR;
9883
133k
    if (!((cur >= '0') && (cur <= '9'))) {
9884
38
  xmlFree(buf);
9885
38
  return(NULL);
9886
38
    }
9887
133k
    buf[len++] = cur;
9888
133k
    NEXT;
9889
133k
    cur=CUR;
9890
133k
    if (cur != '.') {
9891
18
  xmlFree(buf);
9892
18
  return(NULL);
9893
18
    }
9894
133k
    buf[len++] = cur;
9895
133k
    NEXT;
9896
133k
    cur=CUR;
9897
269k
    while ((cur >= '0') && (cur <= '9')) {
9898
135k
  if (len + 1 >= size) {
9899
149
      xmlChar *tmp;
9900
149
            int newSize;
9901
9902
149
            newSize = xmlGrowCapacity(size, 1, 1, maxLength);
9903
149
            if (newSize < 0) {
9904
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "VersionNum");
9905
0
                xmlFree(buf);
9906
0
                return(NULL);
9907
0
            }
9908
149
      tmp = xmlRealloc(buf, newSize);
9909
149
      if (tmp == NULL) {
9910
0
    xmlErrMemory(ctxt);
9911
0
          xmlFree(buf);
9912
0
    return(NULL);
9913
0
      }
9914
149
      buf = tmp;
9915
149
            size = newSize;
9916
149
  }
9917
135k
  buf[len++] = cur;
9918
135k
  NEXT;
9919
135k
  cur=CUR;
9920
135k
    }
9921
133k
    buf[len] = 0;
9922
133k
    return(buf);
9923
133k
}
9924
9925
/**
9926
 * Parse the XML version.
9927
 *
9928
 * @deprecated Internal function, don't use.
9929
 *
9930
 *     [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
9931
 *
9932
 *     [25] Eq ::= S? '=' S?
9933
 *
9934
 * @param ctxt  an XML parser context
9935
 * @returns the version string, e.g. "1.0"
9936
 */
9937
9938
xmlChar *
9939
133k
xmlParseVersionInfo(xmlParserCtxt *ctxt) {
9940
133k
    xmlChar *version = NULL;
9941
9942
133k
    if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
9943
133k
  SKIP(7);
9944
133k
  SKIP_BLANKS;
9945
133k
  if (RAW != '=') {
9946
15
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
9947
15
      return(NULL);
9948
15
        }
9949
133k
  NEXT;
9950
133k
  SKIP_BLANKS;
9951
133k
  if (RAW == '"') {
9952
133k
      NEXT;
9953
133k
      version = xmlParseVersionNum(ctxt);
9954
133k
      if (RAW != '"') {
9955
106
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9956
106
      } else
9957
133k
          NEXT;
9958
133k
  } else if (RAW == '\''){
9959
33
      NEXT;
9960
33
      version = xmlParseVersionNum(ctxt);
9961
33
      if (RAW != '\'') {
9962
5
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9963
5
      } else
9964
28
          NEXT;
9965
33
  } else {
9966
19
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
9967
19
  }
9968
133k
    }
9969
133k
    return(version);
9970
133k
}
9971
9972
/**
9973
 * Parse the XML encoding name
9974
 *
9975
 * @deprecated Internal function, don't use.
9976
 *
9977
 *     [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
9978
 *
9979
 * @param ctxt  an XML parser context
9980
 * @returns the encoding name value or NULL
9981
 */
9982
xmlChar *
9983
124k
xmlParseEncName(xmlParserCtxt *ctxt) {
9984
124k
    xmlChar *buf = NULL;
9985
124k
    int len = 0;
9986
124k
    int size = 10;
9987
124k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9988
119k
                    XML_MAX_TEXT_LENGTH :
9989
124k
                    XML_MAX_NAME_LENGTH;
9990
124k
    xmlChar cur;
9991
9992
124k
    cur = CUR;
9993
124k
    if (((cur >= 'a') && (cur <= 'z')) ||
9994
123k
        ((cur >= 'A') && (cur <= 'Z'))) {
9995
123k
  buf = xmlMalloc(size);
9996
123k
  if (buf == NULL) {
9997
0
      xmlErrMemory(ctxt);
9998
0
      return(NULL);
9999
0
  }
10000
10001
123k
  buf[len++] = cur;
10002
123k
  NEXT;
10003
123k
  cur = CUR;
10004
624k
  while (((cur >= 'a') && (cur <= 'z')) ||
10005
617k
         ((cur >= 'A') && (cur <= 'Z')) ||
10006
374k
         ((cur >= '0') && (cur <= '9')) ||
10007
249k
         (cur == '.') || (cur == '_') ||
10008
500k
         (cur == '-')) {
10009
500k
      if (len + 1 >= size) {
10010
524
          xmlChar *tmp;
10011
524
                int newSize;
10012
10013
524
                newSize = xmlGrowCapacity(size, 1, 1, maxLength);
10014
524
                if (newSize < 0) {
10015
0
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "EncName");
10016
0
                    xmlFree(buf);
10017
0
                    return(NULL);
10018
0
                }
10019
524
    tmp = xmlRealloc(buf, newSize);
10020
524
    if (tmp == NULL) {
10021
0
        xmlErrMemory(ctxt);
10022
0
        xmlFree(buf);
10023
0
        return(NULL);
10024
0
    }
10025
524
    buf = tmp;
10026
524
                size = newSize;
10027
524
      }
10028
500k
      buf[len++] = cur;
10029
500k
      NEXT;
10030
500k
      cur = CUR;
10031
500k
        }
10032
123k
  buf[len] = 0;
10033
123k
    } else {
10034
27
  xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10035
27
    }
10036
124k
    return(buf);
10037
124k
}
10038
10039
/**
10040
 * Parse the XML encoding declaration
10041
 *
10042
 * @deprecated Internal function, don't use.
10043
 *
10044
 *     [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | 
10045
 *                           "'" EncName "'")
10046
 *
10047
 * this setups the conversion filters.
10048
 *
10049
 * @param ctxt  an XML parser context
10050
 * @returns the encoding value or NULL
10051
 */
10052
10053
const xmlChar *
10054
125k
xmlParseEncodingDecl(xmlParserCtxt *ctxt) {
10055
125k
    xmlChar *encoding = NULL;
10056
10057
125k
    SKIP_BLANKS;
10058
125k
    if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g') == 0)
10059
1.38k
        return(NULL);
10060
10061
124k
    SKIP(8);
10062
124k
    SKIP_BLANKS;
10063
124k
    if (RAW != '=') {
10064
12
        xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10065
12
        return(NULL);
10066
12
    }
10067
124k
    NEXT;
10068
124k
    SKIP_BLANKS;
10069
124k
    if (RAW == '"') {
10070
124k
        NEXT;
10071
124k
        encoding = xmlParseEncName(ctxt);
10072
124k
        if (RAW != '"') {
10073
88
            xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10074
88
            xmlFree(encoding);
10075
88
            return(NULL);
10076
88
        } else
10077
123k
            NEXT;
10078
124k
    } else if (RAW == '\''){
10079
12
        NEXT;
10080
12
        encoding = xmlParseEncName(ctxt);
10081
12
        if (RAW != '\'') {
10082
4
            xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10083
4
            xmlFree(encoding);
10084
4
            return(NULL);
10085
4
        } else
10086
8
            NEXT;
10087
12
    } else {
10088
8
        xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10089
8
    }
10090
10091
123k
    if (encoding == NULL)
10092
15
        return(NULL);
10093
10094
123k
    xmlSetDeclaredEncoding(ctxt, encoding);
10095
10096
123k
    return(ctxt->encoding);
10097
123k
}
10098
10099
/**
10100
 * Parse the XML standalone declaration
10101
 *
10102
 * @deprecated Internal function, don't use.
10103
 *
10104
 *     [32] SDDecl ::= S 'standalone' Eq
10105
 *                     (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10106
 *
10107
 * [ VC: Standalone Document Declaration ]
10108
 * TODO The standalone document declaration must have the value "no"
10109
 * if any external markup declarations contain declarations of:
10110
 *  - attributes with default values, if elements to which these
10111
 *    attributes apply appear in the document without specifications
10112
 *    of values for these attributes, or
10113
 *  - entities (other than amp, lt, gt, apos, quot), if references
10114
 *    to those entities appear in the document, or
10115
 *  - attributes with values subject to normalization, where the
10116
 *    attribute appears in the document with a value which will change
10117
 *    as a result of normalization, or
10118
 *  - element types with element content, if white space occurs directly
10119
 *    within any instance of those types.
10120
 *
10121
 * @param ctxt  an XML parser context
10122
 * @returns
10123
 *   1 if standalone="yes"
10124
 *   0 if standalone="no"
10125
 *  -2 if standalone attribute is missing or invalid
10126
 *    (A standalone value of -2 means that the XML declaration was found,
10127
 *     but no value was specified for the standalone attribute).
10128
 */
10129
10130
int
10131
90.7k
xmlParseSDDecl(xmlParserCtxt *ctxt) {
10132
90.7k
    int standalone = -2;
10133
10134
90.7k
    SKIP_BLANKS;
10135
90.7k
    if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10136
89.9k
  SKIP(10);
10137
89.9k
        SKIP_BLANKS;
10138
89.9k
  if (RAW != '=') {
10139
8
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10140
8
      return(standalone);
10141
8
        }
10142
89.9k
  NEXT;
10143
89.9k
  SKIP_BLANKS;
10144
89.9k
        if (RAW == '\''){
10145
24
      NEXT;
10146
24
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10147
14
          standalone = 0;
10148
14
                SKIP(2);
10149
14
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10150
3
                 (NXT(2) == 's')) {
10151
2
          standalone = 1;
10152
2
    SKIP(3);
10153
8
            } else {
10154
8
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10155
8
      }
10156
24
      if (RAW != '\'') {
10157
10
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10158
10
      } else
10159
14
          NEXT;
10160
89.8k
  } else if (RAW == '"'){
10161
89.8k
      NEXT;
10162
89.8k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10163
364
          standalone = 0;
10164
364
    SKIP(2);
10165
89.5k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10166
89.4k
                 (NXT(2) == 's')) {
10167
89.4k
          standalone = 1;
10168
89.4k
                SKIP(3);
10169
89.4k
            } else {
10170
36
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10171
36
      }
10172
89.8k
      if (RAW != '"') {
10173
45
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10174
45
      } else
10175
89.8k
          NEXT;
10176
89.8k
  } else {
10177
10
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10178
10
        }
10179
89.9k
    }
10180
90.7k
    return(standalone);
10181
90.7k
}
10182
10183
/**
10184
 * Parse an XML declaration header
10185
 *
10186
 * @deprecated Internal function, don't use.
10187
 *
10188
 *     [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10189
 * @param ctxt  an XML parser context
10190
 */
10191
10192
void
10193
133k
xmlParseXMLDecl(xmlParserCtxt *ctxt) {
10194
133k
    xmlChar *version;
10195
10196
    /*
10197
     * This value for standalone indicates that the document has an
10198
     * XML declaration but it does not have a standalone attribute.
10199
     * It will be overwritten later if a standalone attribute is found.
10200
     */
10201
10202
133k
    ctxt->standalone = -2;
10203
10204
    /*
10205
     * We know that '<?xml' is here.
10206
     */
10207
133k
    SKIP(5);
10208
10209
133k
    if (!IS_BLANK_CH(RAW)) {
10210
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10211
0
                 "Blank needed after '<?xml'\n");
10212
0
    }
10213
133k
    SKIP_BLANKS;
10214
10215
    /*
10216
     * We must have the VersionInfo here.
10217
     */
10218
133k
    version = xmlParseVersionInfo(ctxt);
10219
133k
    if (version == NULL) {
10220
388
  xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10221
133k
    } else {
10222
133k
  if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10223
      /*
10224
       * Changed here for XML-1.0 5th edition
10225
       */
10226
1.80k
      if (ctxt->options & XML_PARSE_OLD10) {
10227
0
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10228
0
                "Unsupported version '%s'\n",
10229
0
                version);
10230
1.80k
      } else {
10231
1.80k
          if ((version[0] == '1') && ((version[1] == '.'))) {
10232
1.67k
        xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10233
1.67k
                      "Unsupported version '%s'\n",
10234
1.67k
          version, NULL);
10235
1.67k
    } else {
10236
124
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10237
124
              "Unsupported version '%s'\n",
10238
124
              version);
10239
124
    }
10240
1.80k
      }
10241
1.80k
  }
10242
133k
  if (ctxt->version != NULL)
10243
0
      xmlFree(ctxt->version);
10244
133k
  ctxt->version = version;
10245
133k
    }
10246
10247
    /*
10248
     * We may have the encoding declaration
10249
     */
10250
133k
    if (!IS_BLANK_CH(RAW)) {
10251
8.85k
        if ((RAW == '?') && (NXT(1) == '>')) {
10252
8.38k
      SKIP(2);
10253
8.38k
      return;
10254
8.38k
  }
10255
474
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10256
474
    }
10257
125k
    xmlParseEncodingDecl(ctxt);
10258
10259
    /*
10260
     * We may have the standalone status.
10261
     */
10262
125k
    if ((ctxt->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10263
34.6k
        if ((RAW == '?') && (NXT(1) == '>')) {
10264
34.6k
      SKIP(2);
10265
34.6k
      return;
10266
34.6k
  }
10267
21
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10268
21
    }
10269
10270
    /*
10271
     * We can grow the input buffer freely at that point
10272
     */
10273
90.7k
    GROW;
10274
10275
90.7k
    SKIP_BLANKS;
10276
90.7k
    ctxt->standalone = xmlParseSDDecl(ctxt);
10277
10278
90.7k
    SKIP_BLANKS;
10279
90.7k
    if ((RAW == '?') && (NXT(1) == '>')) {
10280
89.9k
        SKIP(2);
10281
89.9k
    } else if (RAW == '>') {
10282
        /* Deprecated old WD ... */
10283
26
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10284
26
  NEXT;
10285
833
    } else {
10286
833
        int c;
10287
10288
833
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10289
343k
        while ((PARSER_STOPPED(ctxt) == 0) &&
10290
343k
               ((c = CUR) != 0)) {
10291
342k
            NEXT;
10292
342k
            if (c == '>')
10293
544
                break;
10294
342k
        }
10295
833
    }
10296
90.7k
}
10297
10298
/**
10299
 * @since 2.14.0
10300
 *
10301
 * @param ctxt  parser context
10302
 * @returns the version from the XML declaration.
10303
 */
10304
const xmlChar *
10305
0
xmlCtxtGetVersion(xmlParserCtxt *ctxt) {
10306
0
    if (ctxt == NULL)
10307
0
        return(NULL);
10308
10309
0
    return(ctxt->version);
10310
0
}
10311
10312
/**
10313
 * @since 2.14.0
10314
 *
10315
 * @param ctxt  parser context
10316
 * @returns the value from the standalone document declaration.
10317
 */
10318
int
10319
0
xmlCtxtGetStandalone(xmlParserCtxt *ctxt) {
10320
0
    if (ctxt == NULL)
10321
0
        return(0);
10322
10323
0
    return(ctxt->standalone);
10324
0
}
10325
10326
/**
10327
 * Parse an XML Misc* optional field.
10328
 *
10329
 * @deprecated Internal function, don't use.
10330
 *
10331
 *     [27] Misc ::= Comment | PI |  S
10332
 * @param ctxt  an XML parser context
10333
 */
10334
10335
void
10336
10.1k
xmlParseMisc(xmlParserCtxt *ctxt) {
10337
10.2k
    while (PARSER_STOPPED(ctxt) == 0) {
10338
10.2k
        SKIP_BLANKS;
10339
10.2k
        GROW;
10340
10.2k
        if ((RAW == '<') && (NXT(1) == '?')) {
10341
64
      xmlParsePI(ctxt);
10342
10.1k
        } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10343
0
      xmlParseComment(ctxt);
10344
10.1k
        } else {
10345
10.1k
            break;
10346
10.1k
        }
10347
10.2k
    }
10348
10.1k
}
10349
10350
static void
10351
120k
xmlFinishDocument(xmlParserCtxtPtr ctxt) {
10352
120k
    xmlDocPtr doc;
10353
10354
    /*
10355
     * SAX: end of the document processing.
10356
     */
10357
120k
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10358
5.07k
        ctxt->sax->endDocument(ctxt->userData);
10359
10360
    /*
10361
     * Remove locally kept entity definitions if the tree was not built
10362
     */
10363
120k
    doc = ctxt->myDoc;
10364
120k
    if ((doc != NULL) &&
10365
5.07k
        (xmlStrEqual(doc->version, SAX_COMPAT_MODE))) {
10366
4
        xmlFreeDoc(doc);
10367
4
        ctxt->myDoc = NULL;
10368
4
    }
10369
120k
}
10370
10371
/**
10372
 * Parse an XML document and invoke the SAX handlers. This is useful
10373
 * if you're only interested in custom SAX callbacks. If you want a
10374
 * document tree, use #xmlCtxtParseDocument.
10375
 *
10376
 * @param ctxt  an XML parser context
10377
 * @returns 0, -1 in case of error.
10378
 */
10379
10380
int
10381
5.12k
xmlParseDocument(xmlParserCtxt *ctxt) {
10382
5.12k
    if ((ctxt == NULL) || (ctxt->input == NULL))
10383
0
        return(-1);
10384
10385
5.12k
    GROW;
10386
10387
    /*
10388
     * SAX: detecting the level.
10389
     */
10390
5.12k
    xmlCtxtInitializeLate(ctxt);
10391
10392
5.12k
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10393
5.12k
        ctxt->sax->setDocumentLocator(ctxt->userData,
10394
5.12k
                (xmlSAXLocator *) &xmlDefaultSAXLocator);
10395
5.12k
    }
10396
10397
5.12k
    xmlDetectEncoding(ctxt);
10398
10399
5.12k
    if (CUR == 0) {
10400
44
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10401
44
  return(-1);
10402
44
    }
10403
10404
5.07k
    GROW;
10405
5.07k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10406
10407
  /*
10408
   * Note that we will switch encoding on the fly.
10409
   */
10410
4.82k
  xmlParseXMLDecl(ctxt);
10411
4.82k
  SKIP_BLANKS;
10412
4.82k
    } else {
10413
253
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10414
253
        if (ctxt->version == NULL) {
10415
0
            xmlErrMemory(ctxt);
10416
0
            return(-1);
10417
0
        }
10418
253
    }
10419
5.07k
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10420
5.06k
        ctxt->sax->startDocument(ctxt->userData);
10421
5.07k
    if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10422
5.06k
        (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10423
0
  ctxt->myDoc->compression = ctxt->input->buf->compressed;
10424
0
    }
10425
10426
    /*
10427
     * The Misc part of the Prolog
10428
     */
10429
5.07k
    xmlParseMisc(ctxt);
10430
10431
    /*
10432
     * Then possibly doc type declaration(s) and more Misc
10433
     * (doctypedecl Misc*)?
10434
     */
10435
5.07k
    GROW;
10436
5.07k
    if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10437
10438
0
  ctxt->inSubset = 1;
10439
0
  xmlParseDocTypeDecl(ctxt);
10440
0
  if (RAW == '[') {
10441
0
      xmlParseInternalSubset(ctxt);
10442
0
  } else if (RAW == '>') {
10443
0
            NEXT;
10444
0
        }
10445
10446
  /*
10447
   * Create and update the external subset.
10448
   */
10449
0
  ctxt->inSubset = 2;
10450
0
  if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10451
0
      (!ctxt->disableSAX))
10452
0
      ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10453
0
                                ctxt->extSubSystem, ctxt->extSubURI);
10454
0
  ctxt->inSubset = 0;
10455
10456
0
        xmlCleanSpecialAttr(ctxt);
10457
10458
0
  xmlParseMisc(ctxt);
10459
0
    }
10460
10461
    /*
10462
     * Time to start parsing the tree itself
10463
     */
10464
5.07k
    GROW;
10465
5.07k
    if (RAW != '<') {
10466
4
        if (ctxt->wellFormed)
10467
1
            xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10468
1
                           "Start tag expected, '<' not found\n");
10469
5.07k
    } else {
10470
5.07k
  xmlParseElement(ctxt);
10471
10472
  /*
10473
   * The Misc part at the end
10474
   */
10475
5.07k
  xmlParseMisc(ctxt);
10476
10477
5.07k
        xmlParserCheckEOF(ctxt, XML_ERR_DOCUMENT_END);
10478
5.07k
    }
10479
10480
5.07k
    ctxt->instate = XML_PARSER_EOF;
10481
5.07k
    xmlFinishDocument(ctxt);
10482
10483
5.07k
    if (! ctxt->wellFormed) {
10484
312
  ctxt->valid = 0;
10485
312
  return(-1);
10486
312
    }
10487
10488
4.76k
    return(0);
10489
5.07k
}
10490
10491
/**
10492
 * Parse a general parsed entity
10493
 * An external general parsed entity is well-formed if it matches the
10494
 * production labeled extParsedEnt.
10495
 *
10496
 * @deprecated Internal function, don't use.
10497
 *
10498
 *     [78] extParsedEnt ::= TextDecl? content
10499
 *
10500
 * @param ctxt  an XML parser context
10501
 * @returns 0, -1 in case of error. the parser context is augmented
10502
 *                as a result of the parsing.
10503
 */
10504
10505
int
10506
0
xmlParseExtParsedEnt(xmlParserCtxt *ctxt) {
10507
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
10508
0
        return(-1);
10509
10510
0
    xmlCtxtInitializeLate(ctxt);
10511
10512
0
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10513
0
        ctxt->sax->setDocumentLocator(ctxt->userData,
10514
0
                (xmlSAXLocator *) &xmlDefaultSAXLocator);
10515
0
    }
10516
10517
0
    xmlDetectEncoding(ctxt);
10518
10519
0
    if (CUR == 0) {
10520
0
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10521
0
    }
10522
10523
    /*
10524
     * Check for the XMLDecl in the Prolog.
10525
     */
10526
0
    GROW;
10527
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10528
10529
  /*
10530
   * Note that we will switch encoding on the fly.
10531
   */
10532
0
  xmlParseXMLDecl(ctxt);
10533
0
  SKIP_BLANKS;
10534
0
    } else {
10535
0
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10536
0
    }
10537
0
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10538
0
        ctxt->sax->startDocument(ctxt->userData);
10539
10540
    /*
10541
     * Doing validity checking on chunk doesn't make sense
10542
     */
10543
0
    ctxt->options &= ~XML_PARSE_DTDVALID;
10544
0
    ctxt->validate = 0;
10545
0
    ctxt->depth = 0;
10546
10547
0
    xmlParseContentInternal(ctxt);
10548
10549
0
    if (ctxt->input->cur < ctxt->input->end)
10550
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10551
10552
    /*
10553
     * SAX: end of the document processing.
10554
     */
10555
0
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10556
0
        ctxt->sax->endDocument(ctxt->userData);
10557
10558
0
    if (! ctxt->wellFormed) return(-1);
10559
0
    return(0);
10560
0
}
10561
10562
#ifdef LIBXML_PUSH_ENABLED
10563
/************************************************************************
10564
 *                  *
10565
 *    Progressive parsing interfaces        *
10566
 *                  *
10567
 ************************************************************************/
10568
10569
/**
10570
 * Check whether the input buffer contains a character.
10571
 *
10572
 * @param ctxt  an XML parser context
10573
 * @param c  character
10574
 */
10575
static int
10576
3.61M
xmlParseLookupChar(xmlParserCtxtPtr ctxt, int c) {
10577
3.61M
    const xmlChar *cur;
10578
10579
3.61M
    if (ctxt->checkIndex == 0) {
10580
3.61M
        cur = ctxt->input->cur + 1;
10581
3.61M
    } else {
10582
555
        cur = ctxt->input->cur + ctxt->checkIndex;
10583
555
    }
10584
10585
3.61M
    if (memchr(cur, c, ctxt->input->end - cur) == NULL) {
10586
673
        size_t index = ctxt->input->end - ctxt->input->cur;
10587
10588
673
        if (index > LONG_MAX) {
10589
0
            ctxt->checkIndex = 0;
10590
0
            return(1);
10591
0
        }
10592
673
        ctxt->checkIndex = index;
10593
673
        return(0);
10594
3.61M
    } else {
10595
3.61M
        ctxt->checkIndex = 0;
10596
3.61M
        return(1);
10597
3.61M
    }
10598
3.61M
}
10599
10600
/**
10601
 * Check whether the input buffer contains a string.
10602
 *
10603
 * @param ctxt  an XML parser context
10604
 * @param startDelta  delta to apply at the start
10605
 * @param str  string
10606
 * @param strLen  length of string
10607
 */
10608
static const xmlChar *
10609
xmlParseLookupString(xmlParserCtxtPtr ctxt, size_t startDelta,
10610
61.8k
                     const char *str, size_t strLen) {
10611
61.8k
    const xmlChar *cur, *term;
10612
10613
61.8k
    if (ctxt->checkIndex == 0) {
10614
61.2k
        cur = ctxt->input->cur + startDelta;
10615
61.2k
    } else {
10616
677
        cur = ctxt->input->cur + ctxt->checkIndex;
10617
677
    }
10618
10619
61.8k
    term = BAD_CAST strstr((const char *) cur, str);
10620
61.8k
    if (term == NULL) {
10621
865
        const xmlChar *end = ctxt->input->end;
10622
865
        size_t index;
10623
10624
        /* Rescan (strLen - 1) characters. */
10625
865
        if ((size_t) (end - cur) < strLen)
10626
12
            end = cur;
10627
853
        else
10628
853
            end -= strLen - 1;
10629
865
        index = end - ctxt->input->cur;
10630
865
        if (index > LONG_MAX) {
10631
0
            ctxt->checkIndex = 0;
10632
0
            return(ctxt->input->end - strLen);
10633
0
        }
10634
865
        ctxt->checkIndex = index;
10635
61.0k
    } else {
10636
61.0k
        ctxt->checkIndex = 0;
10637
61.0k
    }
10638
10639
61.8k
    return(term);
10640
61.8k
}
10641
10642
/**
10643
 * Check whether the input buffer contains terminated char data.
10644
 *
10645
 * @param ctxt  an XML parser context
10646
 */
10647
static int
10648
57.3k
xmlParseLookupCharData(xmlParserCtxtPtr ctxt) {
10649
57.3k
    const xmlChar *cur = ctxt->input->cur + ctxt->checkIndex;
10650
57.3k
    const xmlChar *end = ctxt->input->end;
10651
57.3k
    size_t index;
10652
10653
715k
    while (cur < end) {
10654
714k
        if ((*cur == '<') || (*cur == '&')) {
10655
55.6k
            ctxt->checkIndex = 0;
10656
55.6k
            return(1);
10657
55.6k
        }
10658
658k
        cur++;
10659
658k
    }
10660
10661
1.75k
    index = cur - ctxt->input->cur;
10662
1.75k
    if (index > LONG_MAX) {
10663
0
        ctxt->checkIndex = 0;
10664
0
        return(1);
10665
0
    }
10666
1.75k
    ctxt->checkIndex = index;
10667
1.75k
    return(0);
10668
1.75k
}
10669
10670
/**
10671
 * Check whether there's enough data in the input buffer to finish parsing
10672
 * a start tag. This has to take quotes into account.
10673
 *
10674
 * @param ctxt  an XML parser context
10675
 */
10676
static int
10677
13.8M
xmlParseLookupGt(xmlParserCtxtPtr ctxt) {
10678
13.8M
    const xmlChar *cur;
10679
13.8M
    const xmlChar *end = ctxt->input->end;
10680
13.8M
    int state = ctxt->endCheckState;
10681
13.8M
    size_t index;
10682
10683
13.8M
    if (ctxt->checkIndex == 0)
10684
13.8M
        cur = ctxt->input->cur + 1;
10685
6.03k
    else
10686
6.03k
        cur = ctxt->input->cur + ctxt->checkIndex;
10687
10688
364M
    while (cur < end) {
10689
364M
        if (state) {
10690
104M
            if (*cur == state)
10691
9.85M
                state = 0;
10692
260M
        } else if (*cur == '\'' || *cur == '"') {
10693
9.85M
            state = *cur;
10694
250M
        } else if (*cur == '>') {
10695
13.8M
            ctxt->checkIndex = 0;
10696
13.8M
            ctxt->endCheckState = 0;
10697
13.8M
            return(1);
10698
13.8M
        }
10699
350M
        cur++;
10700
350M
    }
10701
10702
8.10k
    index = cur - ctxt->input->cur;
10703
8.10k
    if (index > LONG_MAX) {
10704
0
        ctxt->checkIndex = 0;
10705
0
        ctxt->endCheckState = 0;
10706
0
        return(1);
10707
0
    }
10708
8.10k
    ctxt->checkIndex = index;
10709
8.10k
    ctxt->endCheckState = state;
10710
8.10k
    return(0);
10711
8.10k
}
10712
10713
/**
10714
 * Check whether there's enough data in the input buffer to finish parsing
10715
 * the internal subset.
10716
 *
10717
 * @param ctxt  an XML parser context
10718
 */
10719
static int
10720
488
xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt) {
10721
    /*
10722
     * Sorry, but progressive parsing of the internal subset is not
10723
     * supported. We first check that the full content of the internal
10724
     * subset is available and parsing is launched only at that point.
10725
     * Internal subset ends with "']' S? '>'" in an unescaped section and
10726
     * not in a ']]>' sequence which are conditional sections.
10727
     */
10728
488
    const xmlChar *cur, *start;
10729
488
    const xmlChar *end = ctxt->input->end;
10730
488
    int state = ctxt->endCheckState;
10731
488
    size_t index;
10732
10733
488
    if (ctxt->checkIndex == 0) {
10734
340
        cur = ctxt->input->cur + 1;
10735
340
    } else {
10736
148
        cur = ctxt->input->cur + ctxt->checkIndex;
10737
148
    }
10738
488
    start = cur;
10739
10740
9.70M
    while (cur < end) {
10741
9.70M
        if (state == '-') {
10742
1.08M
            if ((*cur == '-') &&
10743
135k
                (cur[1] == '-') &&
10744
74.6k
                (cur[2] == '>')) {
10745
30.4k
                state = 0;
10746
30.4k
                cur += 3;
10747
30.4k
                start = cur;
10748
30.4k
                continue;
10749
30.4k
            }
10750
1.08M
        }
10751
8.62M
        else if (state == ']') {
10752
86.4k
            if (*cur == '>') {
10753
82
                ctxt->checkIndex = 0;
10754
82
                ctxt->endCheckState = 0;
10755
82
                return(1);
10756
82
            }
10757
86.4k
            if (IS_BLANK_CH(*cur)) {
10758
4.01k
                state = ' ';
10759
82.3k
            } else if (*cur != ']') {
10760
2.27k
                state = 0;
10761
2.27k
                start = cur;
10762
2.27k
                continue;
10763
2.27k
            }
10764
86.4k
        }
10765
8.53M
        else if (state == ' ') {
10766
13.4k
            if (*cur == '>') {
10767
3
                ctxt->checkIndex = 0;
10768
3
                ctxt->endCheckState = 0;
10769
3
                return(1);
10770
3
            }
10771
13.4k
            if (!IS_BLANK_CH(*cur)) {
10772
4.01k
                state = 0;
10773
4.01k
                start = cur;
10774
4.01k
                continue;
10775
4.01k
            }
10776
13.4k
        }
10777
8.52M
        else if (state != 0) {
10778
5.94M
            if (*cur == state) {
10779
40.1k
                state = 0;
10780
40.1k
                start = cur + 1;
10781
40.1k
            }
10782
5.94M
        }
10783
2.57M
        else if (*cur == '<') {
10784
98.1k
            if ((cur[1] == '!') &&
10785
54.2k
                (cur[2] == '-') &&
10786
31.1k
                (cur[3] == '-')) {
10787
30.5k
                state = '-';
10788
30.5k
                cur += 4;
10789
                /* Don't treat <!--> as comment */
10790
30.5k
                start = cur;
10791
30.5k
                continue;
10792
30.5k
            }
10793
98.1k
        }
10794
2.48M
        else if ((*cur == '"') || (*cur == '\'') || (*cur == ']')) {
10795
46.6k
            state = *cur;
10796
46.6k
        }
10797
10798
9.64M
        cur++;
10799
9.64M
    }
10800
10801
    /*
10802
     * Rescan the three last characters to detect "<!--" and "-->"
10803
     * split across chunks.
10804
     */
10805
403
    if ((state == 0) || (state == '-')) {
10806
181
        if (cur - start < 3)
10807
22
            cur = start;
10808
159
        else
10809
159
            cur -= 3;
10810
181
    }
10811
403
    index = cur - ctxt->input->cur;
10812
403
    if (index > LONG_MAX) {
10813
0
        ctxt->checkIndex = 0;
10814
0
        ctxt->endCheckState = 0;
10815
0
        return(1);
10816
0
    }
10817
403
    ctxt->checkIndex = index;
10818
403
    ctxt->endCheckState = state;
10819
403
    return(0);
10820
403
}
10821
10822
/**
10823
 * Try to progress on parsing
10824
 *
10825
 * @param ctxt  an XML parser context
10826
 * @param terminate  last chunk indicator
10827
 * @returns zero if no parsing was possible
10828
 */
10829
static int
10830
164k
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
10831
164k
    int ret = 0;
10832
164k
    size_t avail;
10833
164k
    xmlChar cur, next;
10834
10835
164k
    if (ctxt->input == NULL)
10836
0
        return(0);
10837
10838
164k
    if ((ctxt->input != NULL) &&
10839
164k
        (ctxt->input->cur - ctxt->input->base > 4096)) {
10840
16.1k
        xmlParserShrink(ctxt);
10841
16.1k
    }
10842
10843
68.6M
    while (ctxt->disableSAX == 0) {
10844
68.5M
        avail = ctxt->input->end - ctxt->input->cur;
10845
68.5M
        if (avail < 1)
10846
116k
      goto done;
10847
68.4M
        switch (ctxt->instate) {
10848
615
            case XML_PARSER_EOF:
10849
          /*
10850
     * Document parsing is done !
10851
     */
10852
615
          goto done;
10853
146k
            case XML_PARSER_START:
10854
                /*
10855
                 * Very first chars read from the document flow.
10856
                 */
10857
146k
                if ((!terminate) && (avail < 4))
10858
0
                    goto done;
10859
10860
                /*
10861
                 * We need more bytes to detect EBCDIC code pages.
10862
                 * See xmlDetectEBCDIC.
10863
                 */
10864
146k
                if ((CMP4(CUR_PTR, 0x4C, 0x6F, 0xA7, 0x94)) &&
10865
3
                    (!terminate) && (avail < 200))
10866
0
                    goto done;
10867
10868
146k
                xmlDetectEncoding(ctxt);
10869
146k
                ctxt->instate = XML_PARSER_XML_DECL;
10870
146k
    break;
10871
10872
146k
            case XML_PARSER_XML_DECL:
10873
146k
    if ((!terminate) && (avail < 2))
10874
0
        goto done;
10875
146k
    cur = ctxt->input->cur[0];
10876
146k
    next = ctxt->input->cur[1];
10877
146k
          if ((cur == '<') && (next == '?')) {
10878
        /* PI or XML decl */
10879
139k
        if ((!terminate) &&
10880
14.9k
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
10881
98
      goto done;
10882
139k
        if ((ctxt->input->cur[2] == 'x') &&
10883
136k
      (ctxt->input->cur[3] == 'm') &&
10884
136k
      (ctxt->input->cur[4] == 'l') &&
10885
135k
      (IS_BLANK_CH(ctxt->input->cur[5]))) {
10886
128k
      ret += 5;
10887
128k
      xmlParseXMLDecl(ctxt);
10888
128k
        } else {
10889
10.4k
      ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10890
10.4k
                        if (ctxt->version == NULL) {
10891
0
                            xmlErrMemory(ctxt);
10892
0
                            break;
10893
0
                        }
10894
10.4k
        }
10895
139k
    } else {
10896
6.79k
        ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10897
6.79k
        if (ctxt->version == NULL) {
10898
0
            xmlErrMemory(ctxt);
10899
0
      break;
10900
0
        }
10901
6.79k
    }
10902
146k
                if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10903
0
                    ctxt->sax->setDocumentLocator(ctxt->userData,
10904
0
                            (xmlSAXLocator *) &xmlDefaultSAXLocator);
10905
0
                }
10906
146k
                if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10907
0
                    (!ctxt->disableSAX))
10908
0
                    ctxt->sax->startDocument(ctxt->userData);
10909
146k
                ctxt->instate = XML_PARSER_MISC;
10910
146k
    break;
10911
23.2M
            case XML_PARSER_START_TAG: {
10912
23.2M
          const xmlChar *name;
10913
23.2M
    const xmlChar *prefix = NULL;
10914
23.2M
    const xmlChar *URI = NULL;
10915
23.2M
                int line = ctxt->input->line;
10916
23.2M
    int nbNs = 0;
10917
10918
23.2M
    if ((!terminate) && (avail < 2))
10919
2
        goto done;
10920
23.2M
    cur = ctxt->input->cur[0];
10921
23.2M
          if (cur != '<') {
10922
229
        xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10923
229
                                   "Start tag expected, '<' not found");
10924
229
                    ctxt->instate = XML_PARSER_EOF;
10925
229
                    xmlFinishDocument(ctxt);
10926
229
        goto done;
10927
229
    }
10928
23.2M
    if ((!terminate) && (!xmlParseLookupGt(ctxt)))
10929
8.37k
                    goto done;
10930
23.2M
    if (ctxt->spaceNr == 0)
10931
0
        spacePush(ctxt, -1);
10932
23.2M
    else if (*ctxt->space == -2)
10933
4.84M
        spacePush(ctxt, -1);
10934
18.3M
    else
10935
18.3M
        spacePush(ctxt, *ctxt->space);
10936
23.2M
#ifdef LIBXML_SAX1_ENABLED
10937
23.2M
    if (ctxt->sax2)
10938
23.2M
#endif /* LIBXML_SAX1_ENABLED */
10939
23.2M
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &nbNs);
10940
210
#ifdef LIBXML_SAX1_ENABLED
10941
210
    else
10942
210
        name = xmlParseStartTag(ctxt);
10943
23.2M
#endif /* LIBXML_SAX1_ENABLED */
10944
23.2M
    if (name == NULL) {
10945
1.44k
        spacePop(ctxt);
10946
1.44k
                    ctxt->instate = XML_PARSER_EOF;
10947
1.44k
                    xmlFinishDocument(ctxt);
10948
1.44k
        goto done;
10949
1.44k
    }
10950
23.2M
#ifdef LIBXML_VALID_ENABLED
10951
    /*
10952
     * [ VC: Root Element Type ]
10953
     * The Name in the document type declaration must match
10954
     * the element type of the root element.
10955
     */
10956
23.2M
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10957
0
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
10958
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10959
23.2M
#endif /* LIBXML_VALID_ENABLED */
10960
10961
    /*
10962
     * Check for an Empty Element.
10963
     */
10964
23.2M
    if ((RAW == '/') && (NXT(1) == '>')) {
10965
6.97M
        SKIP(2);
10966
10967
6.97M
        if (ctxt->sax2) {
10968
6.97M
      if ((ctxt->sax != NULL) &&
10969
6.97M
          (ctxt->sax->endElementNs != NULL) &&
10970
6.97M
          (!ctxt->disableSAX))
10971
6.97M
          ctxt->sax->endElementNs(ctxt->userData, name,
10972
6.97M
                                  prefix, URI);
10973
6.97M
      if (nbNs > 0)
10974
75.0k
          xmlParserNsPop(ctxt, nbNs);
10975
6.97M
#ifdef LIBXML_SAX1_ENABLED
10976
6.97M
        } else {
10977
0
      if ((ctxt->sax != NULL) &&
10978
0
          (ctxt->sax->endElement != NULL) &&
10979
0
          (!ctxt->disableSAX))
10980
0
          ctxt->sax->endElement(ctxt->userData, name);
10981
0
#endif /* LIBXML_SAX1_ENABLED */
10982
0
        }
10983
6.97M
        spacePop(ctxt);
10984
16.2M
    } else if (RAW == '>') {
10985
16.2M
        NEXT;
10986
16.2M
                    nameNsPush(ctxt, name, prefix, URI, line, nbNs);
10987
16.2M
    } else {
10988
19.7k
        xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
10989
19.7k
           "Couldn't find end of Start Tag %s\n",
10990
19.7k
           name);
10991
19.7k
        nodePop(ctxt);
10992
19.7k
        spacePop(ctxt);
10993
19.7k
                    if (nbNs > 0)
10994
2.45k
                        xmlParserNsPop(ctxt, nbNs);
10995
19.7k
    }
10996
10997
23.2M
                if (ctxt->nameNr == 0)
10998
3.89k
                    ctxt->instate = XML_PARSER_EPILOG;
10999
23.2M
                else
11000
23.2M
                    ctxt->instate = XML_PARSER_CONTENT;
11001
23.2M
                break;
11002
23.2M
      }
11003
37.6M
            case XML_PARSER_CONTENT: {
11004
37.6M
    cur = ctxt->input->cur[0];
11005
11006
37.6M
    if (cur == '<') {
11007
30.2M
                    if ((!terminate) && (avail < 2))
11008
396
                        goto done;
11009
30.2M
        next = ctxt->input->cur[1];
11010
11011
30.2M
                    if (next == '/') {
11012
7.12M
                        ctxt->instate = XML_PARSER_END_TAG;
11013
7.12M
                        break;
11014
23.1M
                    } else if (next == '?') {
11015
5.30k
                        if ((!terminate) &&
11016
3.40k
                            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11017
48
                            goto done;
11018
5.25k
                        xmlParsePI(ctxt);
11019
5.25k
                        ctxt->instate = XML_PARSER_CONTENT;
11020
5.25k
                        break;
11021
23.1M
                    } else if (next == '!') {
11022
39.7k
                        if ((!terminate) && (avail < 3))
11023
5
                            goto done;
11024
39.7k
                        next = ctxt->input->cur[2];
11025
11026
39.7k
                        if (next == '-') {
11027
33.5k
                            if ((!terminate) && (avail < 4))
11028
4
                                goto done;
11029
33.5k
                            if (ctxt->input->cur[3] == '-') {
11030
33.5k
                                if ((!terminate) &&
11031
24.8k
                                    (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11032
68
                                    goto done;
11033
33.4k
                                xmlParseComment(ctxt);
11034
33.4k
                                ctxt->instate = XML_PARSER_CONTENT;
11035
33.4k
                                break;
11036
33.5k
                            }
11037
33.5k
                        } else if (next == '[') {
11038
6.15k
                            if ((!terminate) && (avail < 9))
11039
3
                                goto done;
11040
6.15k
                            if ((ctxt->input->cur[2] == '[') &&
11041
6.15k
                                (ctxt->input->cur[3] == 'C') &&
11042
6.15k
                                (ctxt->input->cur[4] == 'D') &&
11043
6.14k
                                (ctxt->input->cur[5] == 'A') &&
11044
6.13k
                                (ctxt->input->cur[6] == 'T') &&
11045
6.13k
                                (ctxt->input->cur[7] == 'A') &&
11046
6.13k
                                (ctxt->input->cur[8] == '[')) {
11047
6.13k
                                if ((!terminate) &&
11048
4.73k
                                    (!xmlParseLookupString(ctxt, 9, "]]>", 3)))
11049
565
                                    goto done;
11050
5.56k
                                ctxt->instate = XML_PARSER_CDATA_SECTION;
11051
5.56k
                                xmlParseCDSect(ctxt);
11052
5.56k
                                ctxt->instate = XML_PARSER_CONTENT;
11053
5.56k
                                break;
11054
6.13k
                            }
11055
6.15k
                        }
11056
39.7k
                    }
11057
30.2M
    } else if (cur == '&') {
11058
261k
        if ((!terminate) && (!xmlParseLookupChar(ctxt, ';')))
11059
85
      goto done;
11060
261k
        xmlParseReference(ctxt);
11061
261k
                    break;
11062
7.13M
    } else {
11063
        /* TODO Avoid the extra copy, handle directly !!! */
11064
        /*
11065
         * Goal of the following test is:
11066
         *  - minimize calls to the SAX 'character' callback
11067
         *    when they are mergeable
11068
         *  - handle an problem for isBlank when we only parse
11069
         *    a sequence of blank chars and the next one is
11070
         *    not available to check against '<' presence.
11071
         *  - tries to homogenize the differences in SAX
11072
         *    callbacks between the push and pull versions
11073
         *    of the parser.
11074
         */
11075
7.13M
        if (avail < XML_PARSER_BIG_BUFFER_SIZE) {
11076
238k
      if ((!terminate) && (!xmlParseLookupCharData(ctxt)))
11077
1.75k
          goto done;
11078
238k
                    }
11079
7.13M
                    ctxt->checkIndex = 0;
11080
7.13M
        xmlParseCharDataInternal(ctxt, !terminate);
11081
7.13M
                    break;
11082
7.13M
    }
11083
11084
23.0M
                ctxt->instate = XML_PARSER_START_TAG;
11085
23.0M
    break;
11086
37.6M
      }
11087
7.12M
            case XML_PARSER_END_TAG:
11088
7.12M
    if ((!terminate) && (!xmlParseLookupChar(ctxt, '>')))
11089
588
        goto done;
11090
7.12M
    if (ctxt->sax2) {
11091
7.12M
              xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11092
7.12M
        nameNsPop(ctxt);
11093
7.12M
    }
11094
68
#ifdef LIBXML_SAX1_ENABLED
11095
68
      else
11096
68
        xmlParseEndTag1(ctxt, 0);
11097
7.12M
#endif /* LIBXML_SAX1_ENABLED */
11098
7.12M
    if (ctxt->nameNr == 0) {
11099
110k
        ctxt->instate = XML_PARSER_EPILOG;
11100
7.00M
    } else {
11101
7.00M
        ctxt->instate = XML_PARSER_CONTENT;
11102
7.00M
    }
11103
7.12M
    break;
11104
162k
            case XML_PARSER_MISC:
11105
163k
            case XML_PARSER_PROLOG:
11106
172k
            case XML_PARSER_EPILOG:
11107
172k
    SKIP_BLANKS;
11108
172k
                avail = ctxt->input->end - ctxt->input->cur;
11109
172k
    if (avail < 1)
11110
3.98k
        goto done;
11111
168k
    if (ctxt->input->cur[0] == '<') {
11112
168k
                    if ((!terminate) && (avail < 2))
11113
8
                        goto done;
11114
168k
                    next = ctxt->input->cur[1];
11115
168k
                    if (next == '?') {
11116
21.4k
                        if ((!terminate) &&
11117
12.8k
                            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11118
53
                            goto done;
11119
21.3k
                        xmlParsePI(ctxt);
11120
21.3k
                        break;
11121
146k
                    } else if (next == '!') {
11122
3.73k
                        if ((!terminate) && (avail < 3))
11123
3
                            goto done;
11124
11125
3.73k
                        if (ctxt->input->cur[2] == '-') {
11126
1.83k
                            if ((!terminate) && (avail < 4))
11127
0
                                goto done;
11128
1.83k
                            if (ctxt->input->cur[3] == '-') {
11129
1.82k
                                if ((!terminate) &&
11130
1.09k
                                    (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11131
33
                                    goto done;
11132
1.79k
                                xmlParseComment(ctxt);
11133
1.79k
                                break;
11134
1.82k
                            }
11135
1.90k
                        } else if (ctxt->instate == XML_PARSER_MISC) {
11136
1.89k
                            if ((!terminate) && (avail < 9))
11137
0
                                goto done;
11138
1.89k
                            if ((ctxt->input->cur[2] == 'D') &&
11139
1.88k
                                (ctxt->input->cur[3] == 'O') &&
11140
1.88k
                                (ctxt->input->cur[4] == 'C') &&
11141
1.88k
                                (ctxt->input->cur[5] == 'T') &&
11142
1.88k
                                (ctxt->input->cur[6] == 'Y') &&
11143
1.88k
                                (ctxt->input->cur[7] == 'P') &&
11144
1.88k
                                (ctxt->input->cur[8] == 'E')) {
11145
1.88k
                                if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11146
74
                                    goto done;
11147
1.80k
                                ctxt->inSubset = 1;
11148
1.80k
                                xmlParseDocTypeDecl(ctxt);
11149
1.80k
                                if (RAW == '[') {
11150
1.70k
                                    ctxt->instate = XML_PARSER_DTD;
11151
1.70k
                                } else {
11152
105
                                    if (RAW == '>')
11153
55
                                        NEXT;
11154
                                    /*
11155
                                     * Create and update the external subset.
11156
                                     */
11157
105
                                    ctxt->inSubset = 2;
11158
105
                                    if ((ctxt->sax != NULL) &&
11159
105
                                        (!ctxt->disableSAX) &&
11160
54
                                        (ctxt->sax->externalSubset != NULL))
11161
0
                                        ctxt->sax->externalSubset(
11162
0
                                                ctxt->userData,
11163
0
                                                ctxt->intSubName,
11164
0
                                                ctxt->extSubSystem,
11165
0
                                                ctxt->extSubURI);
11166
105
                                    ctxt->inSubset = 0;
11167
105
                                    xmlCleanSpecialAttr(ctxt);
11168
105
                                    ctxt->instate = XML_PARSER_PROLOG;
11169
105
                                }
11170
1.80k
                                break;
11171
1.88k
                            }
11172
1.89k
                        }
11173
3.73k
                    }
11174
168k
                }
11175
11176
143k
                if (ctxt->instate == XML_PARSER_EPILOG) {
11177
426
                    if (ctxt->errNo == XML_ERR_OK)
11178
42
                        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11179
426
        ctxt->instate = XML_PARSER_EOF;
11180
426
                    xmlFinishDocument(ctxt);
11181
143k
                } else {
11182
143k
        ctxt->instate = XML_PARSER_START_TAG;
11183
143k
    }
11184
143k
    break;
11185
2.08k
            case XML_PARSER_DTD: {
11186
2.08k
                if ((!terminate) && (!xmlParseLookupInternalSubset(ctxt)))
11187
403
                    goto done;
11188
1.67k
    xmlParseInternalSubset(ctxt);
11189
1.67k
    ctxt->inSubset = 2;
11190
1.67k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11191
487
        (ctxt->sax->externalSubset != NULL))
11192
0
        ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11193
0
          ctxt->extSubSystem, ctxt->extSubURI);
11194
1.67k
    ctxt->inSubset = 0;
11195
1.67k
    xmlCleanSpecialAttr(ctxt);
11196
1.67k
    ctxt->instate = XML_PARSER_PROLOG;
11197
1.67k
                break;
11198
2.08k
      }
11199
0
            default:
11200
0
                xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
11201
0
      "PP: internal error\n");
11202
0
    ctxt->instate = XML_PARSER_EOF;
11203
0
    break;
11204
68.4M
  }
11205
68.4M
    }
11206
164k
done:
11207
164k
    return(ret);
11208
164k
}
11209
11210
/**
11211
 * Parse a chunk of memory in push parser mode.
11212
 *
11213
 * Assumes that the parser context was initialized with
11214
 * #xmlCreatePushParserCtxt.
11215
 *
11216
 * The last chunk, which will often be empty, must be marked with
11217
 * the `terminate` flag. With the default SAX callbacks, the resulting
11218
 * document will be available in ctxt->myDoc. This pointer will not
11219
 * be freed when calling #xmlFreeParserCtxt and must be freed by the
11220
 * caller. If the document isn't well-formed, it will still be returned
11221
 * in ctxt->myDoc.
11222
 *
11223
 * As an exception, #xmlCtxtResetPush will free the document in
11224
 * ctxt->myDoc. So ctxt->myDoc should be set to NULL after extracting
11225
 * the document.
11226
 *
11227
 * Since 2.14.0, #xmlCtxtGetDocument can be used to retrieve the
11228
 * result document.
11229
 *
11230
 * @param ctxt  an XML parser context
11231
 * @param chunk  chunk of memory
11232
 * @param size  size of chunk in bytes
11233
 * @param terminate  last chunk indicator
11234
 * @returns an xmlParserErrors code (0 on success).
11235
 */
11236
int
11237
xmlParseChunk(xmlParserCtxt *ctxt, const char *chunk, int size,
11238
164k
              int terminate) {
11239
164k
    size_t curBase;
11240
164k
    size_t maxLength;
11241
164k
    size_t pos;
11242
164k
    int end_in_lf = 0;
11243
164k
    int res;
11244
11245
164k
    if ((ctxt == NULL) || (size < 0))
11246
0
        return(XML_ERR_ARGUMENT);
11247
164k
    if ((chunk == NULL) && (size > 0))
11248
0
        return(XML_ERR_ARGUMENT);
11249
164k
    if ((ctxt->input == NULL) || (ctxt->input->buf == NULL))
11250
0
        return(XML_ERR_ARGUMENT);
11251
164k
    if (ctxt->disableSAX != 0)
11252
0
        return(ctxt->errNo);
11253
11254
164k
    ctxt->input->flags |= XML_INPUT_PROGRESSIVE;
11255
164k
    if (ctxt->instate == XML_PARSER_START)
11256
146k
        xmlCtxtInitializeLate(ctxt);
11257
164k
    if ((size > 0) && (chunk != NULL) && (!terminate) &&
11258
26.1k
        (chunk[size - 1] == '\r')) {
11259
78
  end_in_lf = 1;
11260
78
  size--;
11261
78
    }
11262
11263
    /*
11264
     * Also push an empty chunk to make sure that the raw buffer
11265
     * will be flushed if there is an encoder.
11266
     */
11267
164k
    pos = ctxt->input->cur - ctxt->input->base;
11268
164k
    res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11269
164k
    xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
11270
164k
    if (res < 0) {
11271
3
        xmlCtxtErrIO(ctxt, ctxt->input->buf->error, NULL);
11272
3
        return(ctxt->errNo);
11273
3
    }
11274
11275
164k
    xmlParseTryOrFinish(ctxt, terminate);
11276
11277
164k
    curBase = ctxt->input->cur - ctxt->input->base;
11278
164k
    maxLength = (ctxt->options & XML_PARSE_HUGE) ?
11279
164k
                XML_MAX_HUGE_LENGTH :
11280
164k
                XML_MAX_LOOKUP_LIMIT;
11281
164k
    if (curBase > maxLength) {
11282
0
        xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
11283
0
                    "Buffer size limit exceeded, try XML_PARSE_HUGE\n");
11284
0
    }
11285
11286
164k
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX != 0))
11287
31.6k
        return(ctxt->errNo);
11288
11289
133k
    if (end_in_lf == 1) {
11290
72
  pos = ctxt->input->cur - ctxt->input->base;
11291
72
  res = xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
11292
72
  xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
11293
72
        if (res < 0) {
11294
0
            xmlCtxtErrIO(ctxt, ctxt->input->buf->error, NULL);
11295
0
            return(ctxt->errNo);
11296
0
        }
11297
72
    }
11298
133k
    if (terminate) {
11299
  /*
11300
   * Check for termination
11301
   */
11302
114k
        if ((ctxt->instate != XML_PARSER_EOF) &&
11303
113k
            (ctxt->instate != XML_PARSER_EPILOG)) {
11304
2.95k
            if (ctxt->nameNr > 0) {
11305
2.91k
                const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
11306
2.91k
                int line = ctxt->pushTab[ctxt->nameNr - 1].line;
11307
2.91k
                xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
11308
2.91k
                        "Premature end of data in tag %s line %d\n",
11309
2.91k
                        name, line, NULL);
11310
2.91k
            } else if (ctxt->instate == XML_PARSER_START) {
11311
0
                xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11312
46
            } else {
11313
46
                xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
11314
46
                               "Start tag expected, '<' not found\n");
11315
46
            }
11316
111k
        } else {
11317
111k
            xmlParserCheckEOF(ctxt, XML_ERR_DOCUMENT_END);
11318
111k
        }
11319
114k
  if (ctxt->instate != XML_PARSER_EOF) {
11320
113k
            ctxt->instate = XML_PARSER_EOF;
11321
113k
            xmlFinishDocument(ctxt);
11322
113k
  }
11323
114k
    }
11324
133k
    if (ctxt->wellFormed == 0)
11325
2.95k
  return((xmlParserErrors) ctxt->errNo);
11326
130k
    else
11327
130k
        return(0);
11328
133k
}
11329
11330
/************************************************************************
11331
 *                  *
11332
 *    I/O front end functions to the parser     *
11333
 *                  *
11334
 ************************************************************************/
11335
11336
/**
11337
 * Create a parser context for using the XML parser in push mode.
11338
 * See #xmlParseChunk.
11339
 *
11340
 * Passing an initial chunk is useless and deprecated.
11341
 *
11342
 * The push parser doesn't support recovery mode or the
11343
 * XML_PARSE_NOBLANKS option.
11344
 *
11345
 * `filename` is used as base URI to fetch external entities and for
11346
 * error reports.
11347
 *
11348
 * @param sax  a SAX handler (optional)
11349
 * @param user_data  user data for SAX callbacks (optional)
11350
 * @param chunk  initial chunk (optional, deprecated)
11351
 * @param size  size of initial chunk in bytes
11352
 * @param filename  file name or URI (optional)
11353
 * @returns the new parser context or NULL if a memory allocation
11354
 * failed.
11355
 */
11356
11357
xmlParserCtxt *
11358
xmlCreatePushParserCtxt(xmlSAXHandler *sax, void *user_data,
11359
146k
                        const char *chunk, int size, const char *filename) {
11360
146k
    xmlParserCtxtPtr ctxt;
11361
146k
    xmlParserInputPtr input;
11362
11363
146k
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
11364
146k
    if (ctxt == NULL)
11365
0
  return(NULL);
11366
11367
146k
    ctxt->options &= ~XML_PARSE_NODICT;
11368
146k
    ctxt->dictNames = 1;
11369
11370
146k
    input = xmlNewPushInput(filename, chunk, size);
11371
146k
    if (input == NULL) {
11372
0
  xmlFreeParserCtxt(ctxt);
11373
0
  return(NULL);
11374
0
    }
11375
146k
    if (xmlCtxtPushInput(ctxt, input) < 0) {
11376
0
        xmlFreeInputStream(input);
11377
0
        xmlFreeParserCtxt(ctxt);
11378
0
        return(NULL);
11379
0
    }
11380
11381
146k
    return(ctxt);
11382
146k
}
11383
#endif /* LIBXML_PUSH_ENABLED */
11384
11385
/**
11386
 * Blocks further parser processing
11387
 *
11388
 * @param ctxt  an XML parser context
11389
 */
11390
void
11391
0
xmlStopParser(xmlParserCtxt *ctxt) {
11392
0
    if (ctxt == NULL)
11393
0
        return;
11394
11395
    /* This stops the parser */
11396
0
    ctxt->disableSAX = 2;
11397
11398
    /*
11399
     * xmlStopParser is often called from error handlers,
11400
     * so we can't raise an error here to avoid infinite
11401
     * loops. Just make sure that an error condition is
11402
     * reported.
11403
     */
11404
0
    if (ctxt->errNo == XML_ERR_OK) {
11405
0
        ctxt->errNo = XML_ERR_USER_STOP;
11406
0
        ctxt->lastError.code = XML_ERR_USER_STOP;
11407
0
        ctxt->wellFormed = 0;
11408
0
    }
11409
0
}
11410
11411
/**
11412
 * Create a parser context for using the XML parser with an existing
11413
 * I/O stream
11414
 *
11415
 * @param sax  a SAX handler (optional)
11416
 * @param user_data  user data for SAX callbacks (optional)
11417
 * @param ioread  an I/O read function
11418
 * @param ioclose  an I/O close function (optional)
11419
 * @param ioctx  an I/O handler
11420
 * @param enc  the charset encoding if known (deprecated)
11421
 * @returns the new parser context or NULL
11422
 */
11423
xmlParserCtxt *
11424
xmlCreateIOParserCtxt(xmlSAXHandler *sax, void *user_data,
11425
                      xmlInputReadCallback ioread,
11426
                      xmlInputCloseCallback ioclose,
11427
0
                      void *ioctx, xmlCharEncoding enc) {
11428
0
    xmlParserCtxtPtr ctxt;
11429
0
    xmlParserInputPtr input;
11430
0
    const char *encoding;
11431
11432
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
11433
0
    if (ctxt == NULL)
11434
0
  return(NULL);
11435
11436
0
    encoding = xmlGetCharEncodingName(enc);
11437
0
    input = xmlCtxtNewInputFromIO(ctxt, NULL, ioread, ioclose, ioctx,
11438
0
                                  encoding, 0);
11439
0
    if (input == NULL) {
11440
0
  xmlFreeParserCtxt(ctxt);
11441
0
        return (NULL);
11442
0
    }
11443
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
11444
0
        xmlFreeInputStream(input);
11445
0
        xmlFreeParserCtxt(ctxt);
11446
0
        return(NULL);
11447
0
    }
11448
11449
0
    return(ctxt);
11450
0
}
11451
11452
#ifdef LIBXML_VALID_ENABLED
11453
/************************************************************************
11454
 *                  *
11455
 *    Front ends when parsing a DTD       *
11456
 *                  *
11457
 ************************************************************************/
11458
11459
/**
11460
 * Parse a DTD.
11461
 *
11462
 * Option XML_PARSE_DTDLOAD should be enabled in the parser context
11463
 * to make external entities work.
11464
 *
11465
 * @since 2.14.0
11466
 *
11467
 * @param ctxt  a parser context
11468
 * @param input  a parser input
11469
 * @param publicId  public ID of the DTD (optional)
11470
 * @param systemId  system ID of the DTD (optional)
11471
 * @returns the resulting xmlDtd or NULL in case of error.
11472
 * `input` will be freed by the function in any case.
11473
 */
11474
xmlDtd *
11475
xmlCtxtParseDtd(xmlParserCtxt *ctxt, xmlParserInput *input,
11476
0
                const xmlChar *publicId, const xmlChar *systemId) {
11477
0
    xmlDtdPtr ret = NULL;
11478
11479
0
    if ((ctxt == NULL) || (input == NULL)) {
11480
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
11481
0
        xmlFreeInputStream(input);
11482
0
        return(NULL);
11483
0
    }
11484
11485
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
11486
0
        xmlFreeInputStream(input);
11487
0
        return(NULL);
11488
0
    }
11489
11490
0
    if (publicId == NULL)
11491
0
        publicId = BAD_CAST "none";
11492
0
    if (systemId == NULL)
11493
0
        systemId = BAD_CAST "none";
11494
11495
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11496
0
    if (ctxt->myDoc == NULL) {
11497
0
        xmlErrMemory(ctxt);
11498
0
        goto error;
11499
0
    }
11500
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
11501
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11502
0
                                       publicId, systemId);
11503
0
    if (ctxt->myDoc->extSubset == NULL) {
11504
0
        xmlErrMemory(ctxt);
11505
0
        xmlFreeDoc(ctxt->myDoc);
11506
0
        goto error;
11507
0
    }
11508
11509
0
    xmlParseExternalSubset(ctxt, publicId, systemId);
11510
11511
0
    if (ctxt->wellFormed) {
11512
0
        ret = ctxt->myDoc->extSubset;
11513
0
        ctxt->myDoc->extSubset = NULL;
11514
0
        if (ret != NULL) {
11515
0
            xmlNodePtr tmp;
11516
11517
0
            ret->doc = NULL;
11518
0
            tmp = ret->children;
11519
0
            while (tmp != NULL) {
11520
0
                tmp->doc = NULL;
11521
0
                tmp = tmp->next;
11522
0
            }
11523
0
        }
11524
0
    } else {
11525
0
        ret = NULL;
11526
0
    }
11527
0
    xmlFreeDoc(ctxt->myDoc);
11528
0
    ctxt->myDoc = NULL;
11529
11530
0
error:
11531
0
    xmlFreeInputStream(xmlCtxtPopInput(ctxt));
11532
11533
0
    return(ret);
11534
0
}
11535
11536
/**
11537
 * Load and parse a DTD
11538
 *
11539
 * @deprecated Use #xmlCtxtParseDtd.
11540
 *
11541
 * @param sax  the SAX handler block or NULL
11542
 * @param input  an Input Buffer
11543
 * @param enc  the charset encoding if known
11544
 * @returns the resulting xmlDtd or NULL in case of error.
11545
 * `input` will be freed by the function in any case.
11546
 */
11547
11548
xmlDtd *
11549
xmlIOParseDTD(xmlSAXHandler *sax, xmlParserInputBuffer *input,
11550
0
        xmlCharEncoding enc) {
11551
0
    xmlDtdPtr ret = NULL;
11552
0
    xmlParserCtxtPtr ctxt;
11553
0
    xmlParserInputPtr pinput = NULL;
11554
11555
0
    if (input == NULL)
11556
0
  return(NULL);
11557
11558
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
11559
0
    if (ctxt == NULL) {
11560
0
        xmlFreeParserInputBuffer(input);
11561
0
  return(NULL);
11562
0
    }
11563
0
    xmlCtxtSetOptions(ctxt, XML_PARSE_DTDLOAD);
11564
11565
    /*
11566
     * generate a parser input from the I/O handler
11567
     */
11568
11569
0
    pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
11570
0
    if (pinput == NULL) {
11571
0
        xmlFreeParserInputBuffer(input);
11572
0
  xmlFreeParserCtxt(ctxt);
11573
0
  return(NULL);
11574
0
    }
11575
11576
0
    if (enc != XML_CHAR_ENCODING_NONE) {
11577
0
        xmlSwitchEncoding(ctxt, enc);
11578
0
    }
11579
11580
0
    ret = xmlCtxtParseDtd(ctxt, pinput, NULL, NULL);
11581
11582
0
    xmlFreeParserCtxt(ctxt);
11583
0
    return(ret);
11584
0
}
11585
11586
/**
11587
 * Load and parse an external subset.
11588
 *
11589
 * @deprecated Use #xmlCtxtParseDtd.
11590
 *
11591
 * @param sax  the SAX handler block
11592
 * @param publicId  public identifier of the DTD (optional)
11593
 * @param systemId  system identifier (URL) of the DTD
11594
 * @returns the resulting xmlDtd or NULL in case of error.
11595
 */
11596
11597
xmlDtd *
11598
xmlSAXParseDTD(xmlSAXHandler *sax, const xmlChar *publicId,
11599
0
               const xmlChar *systemId) {
11600
0
    xmlDtdPtr ret = NULL;
11601
0
    xmlParserCtxtPtr ctxt;
11602
0
    xmlParserInputPtr input = NULL;
11603
0
    xmlChar* systemIdCanonic;
11604
11605
0
    if ((publicId == NULL) && (systemId == NULL)) return(NULL);
11606
11607
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
11608
0
    if (ctxt == NULL) {
11609
0
  return(NULL);
11610
0
    }
11611
0
    xmlCtxtSetOptions(ctxt, XML_PARSE_DTDLOAD);
11612
11613
    /*
11614
     * Canonicalise the system ID
11615
     */
11616
0
    systemIdCanonic = xmlCanonicPath(systemId);
11617
0
    if ((systemId != NULL) && (systemIdCanonic == NULL)) {
11618
0
  xmlFreeParserCtxt(ctxt);
11619
0
  return(NULL);
11620
0
    }
11621
11622
    /*
11623
     * Ask the Entity resolver to load the damn thing
11624
     */
11625
11626
0
    if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
11627
0
  input = ctxt->sax->resolveEntity(ctxt->userData, publicId,
11628
0
                                   systemIdCanonic);
11629
0
    if (input == NULL) {
11630
0
  xmlFreeParserCtxt(ctxt);
11631
0
  if (systemIdCanonic != NULL)
11632
0
      xmlFree(systemIdCanonic);
11633
0
  return(NULL);
11634
0
    }
11635
11636
0
    if (input->filename == NULL)
11637
0
  input->filename = (char *) systemIdCanonic;
11638
0
    else
11639
0
  xmlFree(systemIdCanonic);
11640
11641
0
    ret = xmlCtxtParseDtd(ctxt, input, publicId, systemId);
11642
11643
0
    xmlFreeParserCtxt(ctxt);
11644
0
    return(ret);
11645
0
}
11646
11647
11648
/**
11649
 * Load and parse an external subset.
11650
 *
11651
 * @param publicId  public identifier of the DTD (optional)
11652
 * @param systemId  system identifier (URL) of the DTD
11653
 * @returns the resulting xmlDtd or NULL in case of error.
11654
 */
11655
11656
xmlDtd *
11657
0
xmlParseDTD(const xmlChar *publicId, const xmlChar *systemId) {
11658
0
    return(xmlSAXParseDTD(NULL, publicId, systemId));
11659
0
}
11660
#endif /* LIBXML_VALID_ENABLED */
11661
11662
/************************************************************************
11663
 *                  *
11664
 *    Front ends when parsing an Entity     *
11665
 *                  *
11666
 ************************************************************************/
11667
11668
static xmlNodePtr
11669
xmlCtxtParseContentInternal(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
11670
0
                            int hasTextDecl, int buildTree) {
11671
0
    xmlNodePtr root = NULL;
11672
0
    xmlNodePtr list = NULL;
11673
0
    xmlChar *rootName = BAD_CAST "#root";
11674
0
    int result;
11675
11676
0
    if (buildTree) {
11677
0
        root = xmlNewDocNode(ctxt->myDoc, NULL, rootName, NULL);
11678
0
        if (root == NULL) {
11679
0
            xmlErrMemory(ctxt);
11680
0
            goto error;
11681
0
        }
11682
0
    }
11683
11684
0
    if (xmlCtxtPushInput(ctxt, input) < 0)
11685
0
        goto error;
11686
11687
0
    nameNsPush(ctxt, rootName, NULL, NULL, 0, 0);
11688
0
    spacePush(ctxt, -1);
11689
11690
0
    if (buildTree)
11691
0
        nodePush(ctxt, root);
11692
11693
0
    if (hasTextDecl) {
11694
0
        xmlDetectEncoding(ctxt);
11695
11696
        /*
11697
         * Parse a possible text declaration first
11698
         */
11699
0
        if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
11700
0
            (IS_BLANK_CH(NXT(5)))) {
11701
0
            xmlParseTextDecl(ctxt);
11702
            /*
11703
             * An XML-1.0 document can't reference an entity not XML-1.0
11704
             */
11705
0
            if ((xmlStrEqual(ctxt->version, BAD_CAST "1.0")) &&
11706
0
                (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
11707
0
                xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
11708
0
                               "Version mismatch between document and "
11709
0
                               "entity\n");
11710
0
            }
11711
0
        }
11712
0
    }
11713
11714
0
    xmlParseContentInternal(ctxt);
11715
11716
0
    if (ctxt->input->cur < ctxt->input->end)
11717
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11718
11719
0
    if ((ctxt->wellFormed) ||
11720
0
        ((ctxt->recovery) && (!xmlCtxtIsCatastrophicError(ctxt)))) {
11721
0
        if (root != NULL) {
11722
0
            xmlNodePtr cur;
11723
11724
            /*
11725
             * Unlink newly created node list.
11726
             */
11727
0
            list = root->children;
11728
0
            root->children = NULL;
11729
0
            root->last = NULL;
11730
0
            for (cur = list; cur != NULL; cur = cur->next)
11731
0
                cur->parent = NULL;
11732
0
        }
11733
0
    }
11734
11735
    /*
11736
     * Read the rest of the stream in case of errors. We want
11737
     * to account for the whole entity size.
11738
     */
11739
0
    do {
11740
0
        ctxt->input->cur = ctxt->input->end;
11741
0
        xmlParserShrink(ctxt);
11742
0
        result = xmlParserGrow(ctxt);
11743
0
    } while (result > 0);
11744
11745
0
    if (buildTree)
11746
0
        nodePop(ctxt);
11747
11748
0
    namePop(ctxt);
11749
0
    spacePop(ctxt);
11750
11751
0
    xmlCtxtPopInput(ctxt);
11752
11753
0
error:
11754
0
    xmlFreeNode(root);
11755
11756
0
    return(list);
11757
0
}
11758
11759
static void
11760
0
xmlCtxtParseEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr ent) {
11761
0
    xmlParserInputPtr input;
11762
0
    xmlNodePtr list;
11763
0
    unsigned long consumed;
11764
0
    int isExternal;
11765
0
    int buildTree;
11766
0
    int oldMinNsIndex;
11767
0
    int oldNodelen, oldNodemem;
11768
11769
0
    isExternal = (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY);
11770
0
    buildTree = (ctxt->node != NULL);
11771
11772
    /*
11773
     * Recursion check
11774
     */
11775
0
    if (ent->flags & XML_ENT_EXPANDING) {
11776
0
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
11777
0
        goto error;
11778
0
    }
11779
11780
    /*
11781
     * Load entity
11782
     */
11783
0
    input = xmlNewEntityInputStream(ctxt, ent);
11784
0
    if (input == NULL)
11785
0
        goto error;
11786
11787
    /*
11788
     * When building a tree, we need to limit the scope of namespace
11789
     * declarations, so that entities don't reference xmlNs structs
11790
     * from the parent of a reference.
11791
     */
11792
0
    oldMinNsIndex = ctxt->nsdb->minNsIndex;
11793
0
    if (buildTree)
11794
0
        ctxt->nsdb->minNsIndex = ctxt->nsNr;
11795
11796
0
    oldNodelen = ctxt->nodelen;
11797
0
    oldNodemem = ctxt->nodemem;
11798
0
    ctxt->nodelen = 0;
11799
0
    ctxt->nodemem = 0;
11800
11801
    /*
11802
     * Parse content
11803
     *
11804
     * This initiates a recursive call chain:
11805
     *
11806
     * - xmlCtxtParseContentInternal
11807
     * - xmlParseContentInternal
11808
     * - xmlParseReference
11809
     * - xmlCtxtParseEntity
11810
     *
11811
     * The nesting depth is limited by the maximum number of inputs,
11812
     * see xmlCtxtPushInput.
11813
     *
11814
     * It's possible to make this non-recursive (minNsIndex must be
11815
     * stored in the input struct) at the expense of code readability.
11816
     */
11817
11818
0
    ent->flags |= XML_ENT_EXPANDING;
11819
11820
0
    list = xmlCtxtParseContentInternal(ctxt, input, isExternal, buildTree);
11821
11822
0
    ent->flags &= ~XML_ENT_EXPANDING;
11823
11824
0
    ctxt->nsdb->minNsIndex = oldMinNsIndex;
11825
0
    ctxt->nodelen = oldNodelen;
11826
0
    ctxt->nodemem = oldNodemem;
11827
11828
    /*
11829
     * Entity size accounting
11830
     */
11831
0
    consumed = input->consumed;
11832
0
    xmlSaturatedAddSizeT(&consumed, input->end - input->base);
11833
11834
0
    if ((ent->flags & XML_ENT_CHECKED) == 0)
11835
0
        xmlSaturatedAdd(&ent->expandedSize, consumed);
11836
11837
0
    if ((ent->flags & XML_ENT_PARSED) == 0) {
11838
0
        if (isExternal)
11839
0
            xmlSaturatedAdd(&ctxt->sizeentities, consumed);
11840
11841
0
        ent->children = list;
11842
11843
0
        while (list != NULL) {
11844
0
            list->parent = (xmlNodePtr) ent;
11845
11846
            /*
11847
             * Downstream code like the nginx xslt module can set
11848
             * ctxt->myDoc->extSubset to a separate DTD, so the entity
11849
             * might have a different or a NULL document.
11850
             */
11851
0
            if (list->doc != ent->doc)
11852
0
                xmlSetTreeDoc(list, ent->doc);
11853
11854
0
            if (list->next == NULL)
11855
0
                ent->last = list;
11856
0
            list = list->next;
11857
0
        }
11858
0
    } else {
11859
0
        xmlFreeNodeList(list);
11860
0
    }
11861
11862
0
    xmlFreeInputStream(input);
11863
11864
0
error:
11865
0
    ent->flags |= XML_ENT_PARSED | XML_ENT_CHECKED;
11866
0
}
11867
11868
/**
11869
 * Parse an external general entity within an existing parsing context
11870
 * An external general parsed entity is well-formed if it matches the
11871
 * production labeled extParsedEnt.
11872
 *
11873
 *     [78] extParsedEnt ::= TextDecl? content
11874
 *
11875
 * @param ctxt  the existing parsing context
11876
 * @param URL  the URL for the entity to load
11877
 * @param ID  the System ID for the entity to load
11878
 * @param listOut  the return value for the set of parsed nodes
11879
 * @returns 0 if the entity is well formed, -1 in case of args problem and
11880
 *    the parser error code otherwise
11881
 */
11882
11883
int
11884
xmlParseCtxtExternalEntity(xmlParserCtxt *ctxt, const xmlChar *URL,
11885
0
                           const xmlChar *ID, xmlNode **listOut) {
11886
0
    xmlParserInputPtr input;
11887
0
    xmlNodePtr list;
11888
11889
0
    if (listOut != NULL)
11890
0
        *listOut = NULL;
11891
11892
0
    if (ctxt == NULL)
11893
0
        return(XML_ERR_ARGUMENT);
11894
11895
0
    input = xmlLoadResource(ctxt, (char *) URL, (char *) ID,
11896
0
                            XML_RESOURCE_GENERAL_ENTITY);
11897
0
    if (input == NULL)
11898
0
        return(ctxt->errNo);
11899
11900
0
    xmlCtxtInitializeLate(ctxt);
11901
11902
0
    list = xmlCtxtParseContentInternal(ctxt, input, /* hasTextDecl */ 1, 1);
11903
0
    if (listOut != NULL)
11904
0
        *listOut = list;
11905
0
    else
11906
0
        xmlFreeNodeList(list);
11907
11908
0
    xmlFreeInputStream(input);
11909
0
    return(ctxt->errNo);
11910
0
}
11911
11912
#ifdef LIBXML_SAX1_ENABLED
11913
/**
11914
 * Parse an external general entity
11915
 * An external general parsed entity is well-formed if it matches the
11916
 * production labeled extParsedEnt.
11917
 *
11918
 * This function uses deprecated global variables to set parser options
11919
 * which default to XML_PARSE_NODICT.
11920
 *
11921
 * @deprecated Use #xmlParseCtxtExternalEntity.
11922
 *
11923
 *     [78] extParsedEnt ::= TextDecl? content
11924
 *
11925
 * @param doc  the document the chunk pertains to
11926
 * @param sax  the SAX handler block (possibly NULL)
11927
 * @param user_data  The user data returned on SAX callbacks (possibly NULL)
11928
 * @param depth  Used for loop detection, use 0
11929
 * @param URL  the URL for the entity to load
11930
 * @param ID  the System ID for the entity to load
11931
 * @param list  the return value for the set of parsed nodes
11932
 * @returns 0 if the entity is well formed, -1 in case of args problem and
11933
 *    the parser error code otherwise
11934
 */
11935
11936
int
11937
xmlParseExternalEntity(xmlDoc *doc, xmlSAXHandler *sax, void *user_data,
11938
0
    int depth, const xmlChar *URL, const xmlChar *ID, xmlNode **list) {
11939
0
    xmlParserCtxtPtr ctxt;
11940
0
    int ret;
11941
11942
0
    if (list != NULL)
11943
0
        *list = NULL;
11944
11945
0
    if (doc == NULL)
11946
0
        return(XML_ERR_ARGUMENT);
11947
11948
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
11949
0
    if (ctxt == NULL)
11950
0
        return(XML_ERR_NO_MEMORY);
11951
11952
0
    ctxt->depth = depth;
11953
0
    ctxt->myDoc = doc;
11954
0
    ret = xmlParseCtxtExternalEntity(ctxt, URL, ID, list);
11955
11956
0
    xmlFreeParserCtxt(ctxt);
11957
0
    return(ret);
11958
0
}
11959
11960
/**
11961
 * Parse a well-balanced chunk of an XML document
11962
 * called by the parser
11963
 * The allowed sequence for the Well Balanced Chunk is the one defined by
11964
 * the content production in the XML grammar:
11965
 *
11966
 *     [43] content ::= (element | CharData | Reference | CDSect | PI |
11967
 *                       Comment)*
11968
 *
11969
 * This function uses deprecated global variables to set parser options
11970
 * which default to XML_PARSE_NODICT.
11971
 *
11972
 * @param doc  the document the chunk pertains to (must not be NULL)
11973
 * @param sax  the SAX handler block (possibly NULL)
11974
 * @param user_data  The user data returned on SAX callbacks (possibly NULL)
11975
 * @param depth  Used for loop detection, use 0
11976
 * @param string  the input string in UTF8 or ISO-Latin (zero terminated)
11977
 * @param lst  the return value for the set of parsed nodes
11978
 * @returns 0 if the chunk is well balanced, -1 in case of args problem and
11979
 *    the parser error code otherwise
11980
 */
11981
11982
int
11983
xmlParseBalancedChunkMemory(xmlDoc *doc, xmlSAXHandler *sax,
11984
0
     void *user_data, int depth, const xmlChar *string, xmlNode **lst) {
11985
0
    return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
11986
0
                                                depth, string, lst, 0 );
11987
0
}
11988
#endif /* LIBXML_SAX1_ENABLED */
11989
11990
/**
11991
 * Parse a well-balanced chunk of XML matching the 'content' production.
11992
 *
11993
 * Namespaces in scope of `node` and entities of `node`'s document are
11994
 * recognized. When validating, the DTD of `node`'s document is used.
11995
 *
11996
 * Always consumes `input` even in error case.
11997
 *
11998
 * @since 2.14.0
11999
 *
12000
 * @param ctxt  parser context
12001
 * @param input  parser input
12002
 * @param node  target node or document
12003
 * @param hasTextDecl  whether to parse text declaration
12004
 * @returns a node list or NULL in case of error.
12005
 */
12006
xmlNode *
12007
xmlCtxtParseContent(xmlParserCtxt *ctxt, xmlParserInput *input,
12008
0
                    xmlNode *node, int hasTextDecl) {
12009
0
    xmlDocPtr doc;
12010
0
    xmlNodePtr cur, list = NULL;
12011
0
    int nsnr = 0;
12012
0
    xmlDictPtr oldDict;
12013
0
    int oldOptions, oldDictNames, oldLoadSubset;
12014
12015
0
    if ((ctxt == NULL) || (input == NULL) || (node == NULL)) {
12016
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
12017
0
        goto exit;
12018
0
    }
12019
12020
0
    doc = node->doc;
12021
0
    if (doc == NULL) {
12022
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
12023
0
        goto exit;
12024
0
    }
12025
12026
0
    switch (node->type) {
12027
0
        case XML_ELEMENT_NODE:
12028
0
        case XML_DOCUMENT_NODE:
12029
0
        case XML_HTML_DOCUMENT_NODE:
12030
0
            break;
12031
12032
0
        case XML_ATTRIBUTE_NODE:
12033
0
        case XML_TEXT_NODE:
12034
0
        case XML_CDATA_SECTION_NODE:
12035
0
        case XML_ENTITY_REF_NODE:
12036
0
        case XML_PI_NODE:
12037
0
        case XML_COMMENT_NODE:
12038
0
            for (cur = node->parent; cur != NULL; cur = cur->parent) {
12039
0
                if ((cur->type == XML_ELEMENT_NODE) ||
12040
0
                    (cur->type == XML_DOCUMENT_NODE) ||
12041
0
                    (cur->type == XML_HTML_DOCUMENT_NODE)) {
12042
0
                    node = cur;
12043
0
                    break;
12044
0
                }
12045
0
            }
12046
0
            break;
12047
12048
0
        default:
12049
0
            xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
12050
0
            goto exit;
12051
0
    }
12052
12053
0
    xmlCtxtReset(ctxt);
12054
12055
0
    oldDict = ctxt->dict;
12056
0
    oldOptions = ctxt->options;
12057
0
    oldDictNames = ctxt->dictNames;
12058
0
    oldLoadSubset = ctxt->loadsubset;
12059
12060
    /*
12061
     * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
12062
     */
12063
0
    if (doc->dict != NULL) {
12064
0
        ctxt->dict = doc->dict;
12065
0
    } else {
12066
0
        ctxt->options |= XML_PARSE_NODICT;
12067
0
        ctxt->dictNames = 0;
12068
0
    }
12069
12070
    /*
12071
     * Disable IDs
12072
     */
12073
0
    ctxt->loadsubset |= XML_SKIP_IDS;
12074
0
    ctxt->options |= XML_PARSE_SKIP_IDS;
12075
12076
0
    ctxt->myDoc = doc;
12077
12078
0
#ifdef LIBXML_HTML_ENABLED
12079
0
    if (ctxt->html) {
12080
        /*
12081
         * When parsing in context, it makes no sense to add implied
12082
         * elements like html/body/etc...
12083
         */
12084
0
        ctxt->options |= HTML_PARSE_NOIMPLIED;
12085
12086
0
        list = htmlCtxtParseContentInternal(ctxt, input);
12087
0
    } else
12088
0
#endif
12089
0
    {
12090
0
        xmlCtxtInitializeLate(ctxt);
12091
12092
        /*
12093
         * initialize the SAX2 namespaces stack
12094
         */
12095
0
        cur = node;
12096
0
        while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
12097
0
            xmlNsPtr ns = cur->nsDef;
12098
0
            xmlHashedString hprefix, huri;
12099
12100
0
            while (ns != NULL) {
12101
0
                hprefix = xmlDictLookupHashed(ctxt->dict, ns->prefix, -1);
12102
0
                huri = xmlDictLookupHashed(ctxt->dict, ns->href, -1);
12103
0
                if (xmlParserNsPush(ctxt, &hprefix, &huri, ns, 1) > 0)
12104
0
                    nsnr++;
12105
0
                ns = ns->next;
12106
0
            }
12107
0
            cur = cur->parent;
12108
0
        }
12109
12110
0
        list = xmlCtxtParseContentInternal(ctxt, input, hasTextDecl, 1);
12111
12112
0
        if (nsnr > 0)
12113
0
            xmlParserNsPop(ctxt, nsnr);
12114
0
    }
12115
12116
0
    ctxt->dict = oldDict;
12117
0
    ctxt->options = oldOptions;
12118
0
    ctxt->dictNames = oldDictNames;
12119
0
    ctxt->loadsubset = oldLoadSubset;
12120
0
    ctxt->myDoc = NULL;
12121
0
    ctxt->node = NULL;
12122
12123
0
exit:
12124
0
    xmlFreeInputStream(input);
12125
0
    return(list);
12126
0
}
12127
12128
/**
12129
 * Parse a well-balanced chunk of an XML document
12130
 * within the context (DTD, namespaces, etc ...) of the given node.
12131
 *
12132
 * The allowed sequence for the data is a Well Balanced Chunk defined by
12133
 * the content production in the XML grammar:
12134
 *
12135
 *     [43] content ::= (element | CharData | Reference | CDSect | PI |
12136
 *                       Comment)*
12137
 *
12138
 * This function assumes the encoding of `node`'s document which is
12139
 * typically not what you want. A better alternative is
12140
 * #xmlCtxtParseContent.
12141
 *
12142
 * @param node  the context node
12143
 * @param data  the input string
12144
 * @param datalen  the input string length in bytes
12145
 * @param options  a combination of xmlParserOption
12146
 * @param listOut  the return value for the set of parsed nodes
12147
 * @returns XML_ERR_OK if the chunk is well balanced, and the parser
12148
 * error code otherwise
12149
 */
12150
xmlParserErrors
12151
xmlParseInNodeContext(xmlNode *node, const char *data, int datalen,
12152
0
                      int options, xmlNode **listOut) {
12153
0
    xmlParserCtxtPtr ctxt;
12154
0
    xmlParserInputPtr input;
12155
0
    xmlDocPtr doc;
12156
0
    xmlNodePtr list;
12157
0
    xmlParserErrors ret;
12158
12159
0
    if (listOut == NULL)
12160
0
        return(XML_ERR_INTERNAL_ERROR);
12161
0
    *listOut = NULL;
12162
12163
0
    if ((node == NULL) || (data == NULL) || (datalen < 0))
12164
0
        return(XML_ERR_INTERNAL_ERROR);
12165
12166
0
    doc = node->doc;
12167
0
    if (doc == NULL)
12168
0
        return(XML_ERR_INTERNAL_ERROR);
12169
12170
0
#ifdef LIBXML_HTML_ENABLED
12171
0
    if (doc->type == XML_HTML_DOCUMENT_NODE) {
12172
0
        ctxt = htmlNewParserCtxt();
12173
0
    }
12174
0
    else
12175
0
#endif
12176
0
        ctxt = xmlNewParserCtxt();
12177
12178
0
    if (ctxt == NULL)
12179
0
        return(XML_ERR_NO_MEMORY);
12180
12181
0
    input = xmlCtxtNewInputFromMemory(ctxt, NULL, data, datalen,
12182
0
                                      (const char *) doc->encoding,
12183
0
                                      XML_INPUT_BUF_STATIC);
12184
0
    if (input == NULL) {
12185
0
        xmlFreeParserCtxt(ctxt);
12186
0
        return(XML_ERR_NO_MEMORY);
12187
0
    }
12188
12189
0
    xmlCtxtUseOptions(ctxt, options);
12190
12191
0
    list = xmlCtxtParseContent(ctxt, input, node, /* hasTextDecl */ 0);
12192
12193
0
    if (list == NULL) {
12194
0
        ret = ctxt->errNo;
12195
0
        if (ret == XML_ERR_ARGUMENT)
12196
0
            ret = XML_ERR_INTERNAL_ERROR;
12197
0
    } else {
12198
0
        ret = XML_ERR_OK;
12199
0
        *listOut = list;
12200
0
    }
12201
12202
0
    xmlFreeParserCtxt(ctxt);
12203
12204
0
    return(ret);
12205
0
}
12206
12207
#ifdef LIBXML_SAX1_ENABLED
12208
/**
12209
 * Parse a well-balanced chunk of an XML document
12210
 *
12211
 * The allowed sequence for the Well Balanced Chunk is the one defined by
12212
 * the content production in the XML grammar:
12213
 *
12214
 *     [43] content ::= (element | CharData | Reference | CDSect | PI |
12215
 *                       Comment)*
12216
 *
12217
 * In case recover is set to 1, the nodelist will not be empty even if
12218
 * the parsed chunk is not well balanced, assuming the parsing succeeded to
12219
 * some extent.
12220
 *
12221
 * This function uses deprecated global variables to set parser options
12222
 * which default to XML_PARSE_NODICT.
12223
 *
12224
 * @param doc  the document the chunk pertains to (must not be NULL)
12225
 * @param sax  the SAX handler block (possibly NULL)
12226
 * @param user_data  The user data returned on SAX callbacks (possibly NULL)
12227
 * @param depth  Used for loop detection, use 0
12228
 * @param string  the input string in UTF8 or ISO-Latin (zero terminated)
12229
 * @param listOut  the return value for the set of parsed nodes
12230
 * @param recover  return nodes even if the data is broken (use 0)
12231
 * @returns 0 if the chunk is well balanced, or thehe parser error code
12232
 * otherwise.
12233
 */
12234
int
12235
xmlParseBalancedChunkMemoryRecover(xmlDoc *doc, xmlSAXHandler *sax,
12236
     void *user_data, int depth, const xmlChar *string, xmlNode **listOut,
12237
0
     int recover) {
12238
0
    xmlParserCtxtPtr ctxt;
12239
0
    xmlParserInputPtr input;
12240
0
    xmlNodePtr list;
12241
0
    int ret;
12242
12243
0
    if (listOut != NULL)
12244
0
        *listOut = NULL;
12245
12246
0
    if (string == NULL)
12247
0
        return(XML_ERR_ARGUMENT);
12248
12249
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12250
0
    if (ctxt == NULL)
12251
0
        return(XML_ERR_NO_MEMORY);
12252
12253
0
    xmlCtxtInitializeLate(ctxt);
12254
12255
0
    ctxt->depth = depth;
12256
0
    ctxt->myDoc = doc;
12257
0
    if (recover) {
12258
0
        ctxt->options |= XML_PARSE_RECOVER;
12259
0
        ctxt->recovery = 1;
12260
0
    }
12261
12262
0
    input = xmlNewStringInputStream(ctxt, string);
12263
0
    if (input == NULL) {
12264
0
        ret = ctxt->errNo;
12265
0
        goto error;
12266
0
    }
12267
12268
0
    list = xmlCtxtParseContentInternal(ctxt, input, /* hasTextDecl */ 0, 1);
12269
0
    if (listOut != NULL)
12270
0
        *listOut = list;
12271
0
    else
12272
0
        xmlFreeNodeList(list);
12273
12274
0
    if (!ctxt->wellFormed)
12275
0
        ret = ctxt->errNo;
12276
0
    else
12277
0
        ret = XML_ERR_OK;
12278
12279
0
error:
12280
0
    xmlFreeInputStream(input);
12281
0
    xmlFreeParserCtxt(ctxt);
12282
0
    return(ret);
12283
0
}
12284
12285
/**
12286
 * Parse an XML external entity out of context and build a tree.
12287
 * It use the given SAX function block to handle the parsing callback.
12288
 * If sax is NULL, fallback to the default DOM tree building routines.
12289
 *
12290
 * @deprecated Don't use.
12291
 *
12292
 *     [78] extParsedEnt ::= TextDecl? content
12293
 *
12294
 * This correspond to a "Well Balanced" chunk
12295
 *
12296
 * This function uses deprecated global variables to set parser options
12297
 * which default to XML_PARSE_NODICT.
12298
 *
12299
 * @param sax  the SAX handler block
12300
 * @param filename  the filename
12301
 * @returns the resulting document tree
12302
 */
12303
12304
xmlDoc *
12305
0
xmlSAXParseEntity(xmlSAXHandler *sax, const char *filename) {
12306
0
    xmlDocPtr ret;
12307
0
    xmlParserCtxtPtr ctxt;
12308
12309
0
    ctxt = xmlCreateFileParserCtxt(filename);
12310
0
    if (ctxt == NULL) {
12311
0
  return(NULL);
12312
0
    }
12313
0
    if (sax != NULL) {
12314
0
        if (sax->initialized == XML_SAX2_MAGIC) {
12315
0
            *ctxt->sax = *sax;
12316
0
        } else {
12317
0
            memset(ctxt->sax, 0, sizeof(*ctxt->sax));
12318
0
            memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12319
0
        }
12320
0
        ctxt->userData = NULL;
12321
0
    }
12322
12323
0
    xmlParseExtParsedEnt(ctxt);
12324
12325
0
    if (ctxt->wellFormed) {
12326
0
  ret = ctxt->myDoc;
12327
0
    } else {
12328
0
        ret = NULL;
12329
0
        xmlFreeDoc(ctxt->myDoc);
12330
0
    }
12331
12332
0
    xmlFreeParserCtxt(ctxt);
12333
12334
0
    return(ret);
12335
0
}
12336
12337
/**
12338
 * Parse an XML external entity out of context and build a tree.
12339
 *
12340
 *     [78] extParsedEnt ::= TextDecl? content
12341
 *
12342
 * This correspond to a "Well Balanced" chunk
12343
 *
12344
 * This function uses deprecated global variables to set parser options
12345
 * which default to XML_PARSE_NODICT.
12346
 *
12347
 * @deprecated Don't use.
12348
 *
12349
 * @param filename  the filename
12350
 * @returns the resulting document tree
12351
 */
12352
12353
xmlDoc *
12354
0
xmlParseEntity(const char *filename) {
12355
0
    return(xmlSAXParseEntity(NULL, filename));
12356
0
}
12357
#endif /* LIBXML_SAX1_ENABLED */
12358
12359
/**
12360
 * Create a parser context for an external entity
12361
 * Automatic support for ZLIB/Compress compressed document is provided
12362
 * by default if found at compile-time.
12363
 *
12364
 * @deprecated Don't use.
12365
 *
12366
 * @param URL  the entity URL
12367
 * @param ID  the entity PUBLIC ID
12368
 * @param base  a possible base for the target URI
12369
 * @returns the new parser context or NULL
12370
 */
12371
xmlParserCtxt *
12372
xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
12373
0
                    const xmlChar *base) {
12374
0
    xmlParserCtxtPtr ctxt;
12375
0
    xmlParserInputPtr input;
12376
0
    xmlChar *uri = NULL;
12377
12378
0
    ctxt = xmlNewParserCtxt();
12379
0
    if (ctxt == NULL)
12380
0
  return(NULL);
12381
12382
0
    if (base != NULL) {
12383
0
        if (xmlBuildURISafe(URL, base, &uri) < 0)
12384
0
            goto error;
12385
0
        if (uri != NULL)
12386
0
            URL = uri;
12387
0
    }
12388
12389
0
    input = xmlLoadResource(ctxt, (char *) URL, (char *) ID,
12390
0
                            XML_RESOURCE_UNKNOWN);
12391
0
    if (input == NULL)
12392
0
        goto error;
12393
12394
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
12395
0
        xmlFreeInputStream(input);
12396
0
        goto error;
12397
0
    }
12398
12399
0
    xmlFree(uri);
12400
0
    return(ctxt);
12401
12402
0
error:
12403
0
    xmlFree(uri);
12404
0
    xmlFreeParserCtxt(ctxt);
12405
0
    return(NULL);
12406
0
}
12407
12408
/************************************************************************
12409
 *                  *
12410
 *    Front ends when parsing from a file     *
12411
 *                  *
12412
 ************************************************************************/
12413
12414
/**
12415
 * Create a parser context for a file or URL content.
12416
 * Automatic support for ZLIB/Compress compressed document is provided
12417
 * by default if found at compile-time and for file accesses
12418
 *
12419
 * @deprecated Use #xmlNewParserCtxt and #xmlCtxtReadFile.
12420
 *
12421
 * @param filename  the filename or URL
12422
 * @param options  a combination of xmlParserOption
12423
 * @returns the new parser context or NULL
12424
 */
12425
xmlParserCtxt *
12426
xmlCreateURLParserCtxt(const char *filename, int options)
12427
0
{
12428
0
    xmlParserCtxtPtr ctxt;
12429
0
    xmlParserInputPtr input;
12430
12431
0
    ctxt = xmlNewParserCtxt();
12432
0
    if (ctxt == NULL)
12433
0
  return(NULL);
12434
12435
0
    xmlCtxtUseOptions(ctxt, options);
12436
12437
0
    input = xmlLoadResource(ctxt, filename, NULL, XML_RESOURCE_MAIN_DOCUMENT);
12438
0
    if (input == NULL) {
12439
0
  xmlFreeParserCtxt(ctxt);
12440
0
  return(NULL);
12441
0
    }
12442
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
12443
0
        xmlFreeInputStream(input);
12444
0
        xmlFreeParserCtxt(ctxt);
12445
0
        return(NULL);
12446
0
    }
12447
12448
0
    return(ctxt);
12449
0
}
12450
12451
/**
12452
 * Create a parser context for a file content.
12453
 * Automatic support for ZLIB/Compress compressed document is provided
12454
 * by default if found at compile-time.
12455
 *
12456
 * @deprecated Use #xmlNewParserCtxt and #xmlCtxtReadFile.
12457
 *
12458
 * @param filename  the filename
12459
 * @returns the new parser context or NULL
12460
 */
12461
xmlParserCtxt *
12462
xmlCreateFileParserCtxt(const char *filename)
12463
0
{
12464
0
    return(xmlCreateURLParserCtxt(filename, 0));
12465
0
}
12466
12467
#ifdef LIBXML_SAX1_ENABLED
12468
/**
12469
 * Parse an XML file and build a tree. Automatic support for ZLIB/Compress
12470
 * compressed document is provided by default if found at compile-time.
12471
 * It use the given SAX function block to handle the parsing callback.
12472
 * If sax is NULL, fallback to the default DOM tree building routines.
12473
 *
12474
 * This function uses deprecated global variables to set parser options
12475
 * which default to XML_PARSE_NODICT.
12476
 *
12477
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadFile.
12478
 *
12479
 * User data (void *) is stored within the parser context in the
12480
 * context's _private member, so it is available nearly everywhere in libxml
12481
 *
12482
 * @param sax  the SAX handler block
12483
 * @param filename  the filename
12484
 * @param recovery  work in recovery mode, i.e. tries to read no Well Formed
12485
 *             documents
12486
 * @param data  the userdata
12487
 * @returns the resulting document tree
12488
 */
12489
12490
xmlDoc *
12491
xmlSAXParseFileWithData(xmlSAXHandler *sax, const char *filename,
12492
0
                        int recovery, void *data) {
12493
0
    xmlDocPtr ret = NULL;
12494
0
    xmlParserCtxtPtr ctxt;
12495
0
    xmlParserInputPtr input;
12496
12497
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12498
0
    if (ctxt == NULL)
12499
0
  return(NULL);
12500
12501
0
    if (data != NULL)
12502
0
  ctxt->_private = data;
12503
12504
0
    if (recovery) {
12505
0
        ctxt->options |= XML_PARSE_RECOVER;
12506
0
        ctxt->recovery = 1;
12507
0
    }
12508
12509
0
    if ((filename != NULL) && (filename[0] == '-') && (filename[1] == 0))
12510
0
        input = xmlCtxtNewInputFromFd(ctxt, filename, STDIN_FILENO, NULL, 0);
12511
0
    else
12512
0
        input = xmlCtxtNewInputFromUrl(ctxt, filename, NULL, NULL, 0);
12513
12514
0
    if (input != NULL)
12515
0
        ret = xmlCtxtParseDocument(ctxt, input);
12516
12517
0
    xmlFreeParserCtxt(ctxt);
12518
0
    return(ret);
12519
0
}
12520
12521
/**
12522
 * Parse an XML file and build a tree. Automatic support for ZLIB/Compress
12523
 * compressed document is provided by default if found at compile-time.
12524
 * It use the given SAX function block to handle the parsing callback.
12525
 * If sax is NULL, fallback to the default DOM tree building routines.
12526
 *
12527
 * This function uses deprecated global variables to set parser options
12528
 * which default to XML_PARSE_NODICT.
12529
 *
12530
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadFile.
12531
 *
12532
 * @param sax  the SAX handler block
12533
 * @param filename  the filename
12534
 * @param recovery  work in recovery mode, i.e. tries to read no Well Formed
12535
 *             documents
12536
 * @returns the resulting document tree
12537
 */
12538
12539
xmlDoc *
12540
xmlSAXParseFile(xmlSAXHandler *sax, const char *filename,
12541
0
                          int recovery) {
12542
0
    return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
12543
0
}
12544
12545
/**
12546
 * Parse an XML in-memory document and build a tree.
12547
 * In the case the document is not Well Formed, a attempt to build a
12548
 * tree is tried anyway
12549
 *
12550
 * This function uses deprecated global variables to set parser options
12551
 * which default to XML_PARSE_NODICT | XML_PARSE_RECOVER.
12552
 *
12553
 * @deprecated Use #xmlReadDoc with XML_PARSE_RECOVER.
12554
 *
12555
 * @param cur  a pointer to an array of xmlChar
12556
 * @returns the resulting document tree or NULL in case of failure
12557
 */
12558
12559
xmlDoc *
12560
0
xmlRecoverDoc(const xmlChar *cur) {
12561
0
    return(xmlSAXParseDoc(NULL, cur, 1));
12562
0
}
12563
12564
/**
12565
 * Parse an XML file and build a tree. Automatic support for ZLIB/Compress
12566
 * compressed document is provided by default if found at compile-time.
12567
 *
12568
 * This function uses deprecated global variables to set parser options
12569
 * which default to XML_PARSE_NODICT.
12570
 *
12571
 * @deprecated Use #xmlReadFile.
12572
 *
12573
 * @param filename  the filename
12574
 * @returns the resulting document tree if the file was wellformed,
12575
 * NULL otherwise.
12576
 */
12577
12578
xmlDoc *
12579
0
xmlParseFile(const char *filename) {
12580
0
    return(xmlSAXParseFile(NULL, filename, 0));
12581
0
}
12582
12583
/**
12584
 * Parse an XML file and build a tree. Automatic support for ZLIB/Compress
12585
 * compressed document is provided by default if found at compile-time.
12586
 * In the case the document is not Well Formed, it attempts to build
12587
 * a tree anyway
12588
 *
12589
 * This function uses deprecated global variables to set parser options
12590
 * which default to XML_PARSE_NODICT | XML_PARSE_RECOVER.
12591
 *
12592
 * @deprecated Use #xmlReadFile with XML_PARSE_RECOVER.
12593
 *
12594
 * @param filename  the filename
12595
 * @returns the resulting document tree or NULL in case of failure
12596
 */
12597
12598
xmlDoc *
12599
0
xmlRecoverFile(const char *filename) {
12600
0
    return(xmlSAXParseFile(NULL, filename, 1));
12601
0
}
12602
12603
12604
/**
12605
 * Setup the parser context to parse a new buffer; Clears any prior
12606
 * contents from the parser context. The buffer parameter must not be
12607
 * NULL, but the filename parameter can be
12608
 *
12609
 * @deprecated Don't use.
12610
 *
12611
 * @param ctxt  an XML parser context
12612
 * @param buffer  a xmlChar * buffer
12613
 * @param filename  a file name
12614
 */
12615
void
12616
xmlSetupParserForBuffer(xmlParserCtxt *ctxt, const xmlChar* buffer,
12617
                             const char* filename)
12618
0
{
12619
0
    xmlParserInputPtr input;
12620
12621
0
    if ((ctxt == NULL) || (buffer == NULL))
12622
0
        return;
12623
12624
0
    xmlCtxtReset(ctxt);
12625
12626
0
    input = xmlCtxtNewInputFromString(ctxt, filename, (const char *) buffer,
12627
0
                                      NULL, 0);
12628
0
    if (input == NULL)
12629
0
        return;
12630
0
    if (xmlCtxtPushInput(ctxt, input) < 0)
12631
0
        xmlFreeInputStream(input);
12632
0
}
12633
12634
/**
12635
 * Parse an XML file and call the given SAX handler routines.
12636
 * Automatic support for ZLIB/Compress compressed document is provided
12637
 *
12638
 * This function uses deprecated global variables to set parser options
12639
 * which default to XML_PARSE_NODICT.
12640
 *
12641
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadFile.
12642
 *
12643
 * @param sax  a SAX handler
12644
 * @param user_data  The user data returned on SAX callbacks
12645
 * @param filename  a file name
12646
 * @returns 0 in case of success or a error number otherwise
12647
 */
12648
int
12649
xmlSAXUserParseFile(xmlSAXHandler *sax, void *user_data,
12650
0
                    const char *filename) {
12651
0
    int ret = 0;
12652
0
    xmlParserCtxtPtr ctxt;
12653
12654
0
    ctxt = xmlCreateFileParserCtxt(filename);
12655
0
    if (ctxt == NULL) return -1;
12656
0
    if (sax != NULL) {
12657
0
        if (sax->initialized == XML_SAX2_MAGIC) {
12658
0
            *ctxt->sax = *sax;
12659
0
        } else {
12660
0
            memset(ctxt->sax, 0, sizeof(*ctxt->sax));
12661
0
            memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12662
0
        }
12663
0
  ctxt->userData = user_data;
12664
0
    }
12665
12666
0
    xmlParseDocument(ctxt);
12667
12668
0
    if (ctxt->wellFormed)
12669
0
  ret = 0;
12670
0
    else {
12671
0
        if (ctxt->errNo != 0)
12672
0
      ret = ctxt->errNo;
12673
0
  else
12674
0
      ret = -1;
12675
0
    }
12676
0
    if (ctxt->myDoc != NULL) {
12677
0
        xmlFreeDoc(ctxt->myDoc);
12678
0
  ctxt->myDoc = NULL;
12679
0
    }
12680
0
    xmlFreeParserCtxt(ctxt);
12681
12682
0
    return ret;
12683
0
}
12684
#endif /* LIBXML_SAX1_ENABLED */
12685
12686
/************************************************************************
12687
 *                  *
12688
 *    Front ends when parsing from memory     *
12689
 *                  *
12690
 ************************************************************************/
12691
12692
/**
12693
 * Create a parser context for an XML in-memory document. The input buffer
12694
 * must not contain a terminating null byte.
12695
 *
12696
 * @param buffer  a pointer to a char array
12697
 * @param size  the size of the array
12698
 * @returns the new parser context or NULL
12699
 */
12700
xmlParserCtxt *
12701
0
xmlCreateMemoryParserCtxt(const char *buffer, int size) {
12702
0
    xmlParserCtxtPtr ctxt;
12703
0
    xmlParserInputPtr input;
12704
12705
0
    if (size < 0)
12706
0
  return(NULL);
12707
12708
0
    ctxt = xmlNewParserCtxt();
12709
0
    if (ctxt == NULL)
12710
0
  return(NULL);
12711
12712
0
    input = xmlCtxtNewInputFromMemory(ctxt, NULL, buffer, size, NULL, 0);
12713
0
    if (input == NULL) {
12714
0
  xmlFreeParserCtxt(ctxt);
12715
0
  return(NULL);
12716
0
    }
12717
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
12718
0
        xmlFreeInputStream(input);
12719
0
        xmlFreeParserCtxt(ctxt);
12720
0
        return(NULL);
12721
0
    }
12722
12723
0
    return(ctxt);
12724
0
}
12725
12726
#ifdef LIBXML_SAX1_ENABLED
12727
/**
12728
 * Parse an XML in-memory block and use the given SAX function block
12729
 * to handle the parsing callback. If sax is NULL, fallback to the default
12730
 * DOM tree building routines.
12731
 *
12732
 * This function uses deprecated global variables to set parser options
12733
 * which default to XML_PARSE_NODICT.
12734
 *
12735
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadMemory.
12736
 *
12737
 * User data (void *) is stored within the parser context in the
12738
 * context's _private member, so it is available nearly everywhere in libxml
12739
 *
12740
 * @param sax  the SAX handler block
12741
 * @param buffer  an pointer to a char array
12742
 * @param size  the size of the array
12743
 * @param recovery  work in recovery mode, i.e. tries to read no Well Formed
12744
 *             documents
12745
 * @param data  the userdata
12746
 * @returns the resulting document tree
12747
 */
12748
12749
xmlDoc *
12750
xmlSAXParseMemoryWithData(xmlSAXHandler *sax, const char *buffer,
12751
0
                          int size, int recovery, void *data) {
12752
0
    xmlDocPtr ret = NULL;
12753
0
    xmlParserCtxtPtr ctxt;
12754
0
    xmlParserInputPtr input;
12755
12756
0
    if (size < 0)
12757
0
        return(NULL);
12758
12759
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12760
0
    if (ctxt == NULL)
12761
0
        return(NULL);
12762
12763
0
    if (data != NULL)
12764
0
  ctxt->_private=data;
12765
12766
0
    if (recovery) {
12767
0
        ctxt->options |= XML_PARSE_RECOVER;
12768
0
        ctxt->recovery = 1;
12769
0
    }
12770
12771
0
    input = xmlCtxtNewInputFromMemory(ctxt, NULL, buffer, size, NULL,
12772
0
                                      XML_INPUT_BUF_STATIC);
12773
12774
0
    if (input != NULL)
12775
0
        ret = xmlCtxtParseDocument(ctxt, input);
12776
12777
0
    xmlFreeParserCtxt(ctxt);
12778
0
    return(ret);
12779
0
}
12780
12781
/**
12782
 * Parse an XML in-memory block and use the given SAX function block
12783
 * to handle the parsing callback. If sax is NULL, fallback to the default
12784
 * DOM tree building routines.
12785
 *
12786
 * This function uses deprecated global variables to set parser options
12787
 * which default to XML_PARSE_NODICT.
12788
 *
12789
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadMemory.
12790
 *
12791
 * @param sax  the SAX handler block
12792
 * @param buffer  an pointer to a char array
12793
 * @param size  the size of the array
12794
 * @param recovery  work in recovery mode, i.e. tries to read not Well Formed
12795
 *             documents
12796
 * @returns the resulting document tree
12797
 */
12798
xmlDoc *
12799
xmlSAXParseMemory(xmlSAXHandler *sax, const char *buffer,
12800
0
            int size, int recovery) {
12801
0
    return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
12802
0
}
12803
12804
/**
12805
 * Parse an XML in-memory block and build a tree.
12806
 *
12807
 * This function uses deprecated global variables to set parser options
12808
 * which default to XML_PARSE_NODICT.
12809
 *
12810
 * @deprecated Use #xmlReadMemory.
12811
 *
12812
 * @param buffer  an pointer to a char array
12813
 * @param size  the size of the array
12814
 * @returns the resulting document tree
12815
 */
12816
12817
0
xmlDoc *xmlParseMemory(const char *buffer, int size) {
12818
0
   return(xmlSAXParseMemory(NULL, buffer, size, 0));
12819
0
}
12820
12821
/**
12822
 * Parse an XML in-memory block and build a tree.
12823
 * In the case the document is not Well Formed, an attempt to
12824
 * build a tree is tried anyway
12825
 *
12826
 * This function uses deprecated global variables to set parser options
12827
 * which default to XML_PARSE_NODICT | XML_PARSE_RECOVER.
12828
 *
12829
 * @deprecated Use #xmlReadMemory with XML_PARSE_RECOVER.
12830
 *
12831
 * @param buffer  an pointer to a char array
12832
 * @param size  the size of the array
12833
 * @returns the resulting document tree or NULL in case of error
12834
 */
12835
12836
0
xmlDoc *xmlRecoverMemory(const char *buffer, int size) {
12837
0
   return(xmlSAXParseMemory(NULL, buffer, size, 1));
12838
0
}
12839
12840
/**
12841
 * Parse an XML in-memory buffer and call the given SAX handler routines.
12842
 *
12843
 * This function uses deprecated global variables to set parser options
12844
 * which default to XML_PARSE_NODICT.
12845
 *
12846
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadMemory.
12847
 *
12848
 * @param sax  a SAX handler
12849
 * @param user_data  The user data returned on SAX callbacks
12850
 * @param buffer  an in-memory XML document input
12851
 * @param size  the length of the XML document in bytes
12852
 * @returns 0 in case of success or a error number otherwise
12853
 */
12854
int xmlSAXUserParseMemory(xmlSAXHandler *sax, void *user_data,
12855
0
        const char *buffer, int size) {
12856
0
    int ret = 0;
12857
0
    xmlParserCtxtPtr ctxt;
12858
12859
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12860
0
    if (ctxt == NULL) return -1;
12861
0
    if (sax != NULL) {
12862
0
        if (sax->initialized == XML_SAX2_MAGIC) {
12863
0
            *ctxt->sax = *sax;
12864
0
        } else {
12865
0
            memset(ctxt->sax, 0, sizeof(*ctxt->sax));
12866
0
            memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12867
0
        }
12868
0
  ctxt->userData = user_data;
12869
0
    }
12870
12871
0
    xmlParseDocument(ctxt);
12872
12873
0
    if (ctxt->wellFormed)
12874
0
  ret = 0;
12875
0
    else {
12876
0
        if (ctxt->errNo != 0)
12877
0
      ret = ctxt->errNo;
12878
0
  else
12879
0
      ret = -1;
12880
0
    }
12881
0
    if (ctxt->myDoc != NULL) {
12882
0
        xmlFreeDoc(ctxt->myDoc);
12883
0
  ctxt->myDoc = NULL;
12884
0
    }
12885
0
    xmlFreeParserCtxt(ctxt);
12886
12887
0
    return ret;
12888
0
}
12889
#endif /* LIBXML_SAX1_ENABLED */
12890
12891
/**
12892
 * Creates a parser context for an XML in-memory document.
12893
 *
12894
 * @param str  a pointer to an array of xmlChar
12895
 * @returns the new parser context or NULL
12896
 */
12897
xmlParserCtxt *
12898
0
xmlCreateDocParserCtxt(const xmlChar *str) {
12899
0
    xmlParserCtxtPtr ctxt;
12900
0
    xmlParserInputPtr input;
12901
12902
0
    ctxt = xmlNewParserCtxt();
12903
0
    if (ctxt == NULL)
12904
0
  return(NULL);
12905
12906
0
    input = xmlCtxtNewInputFromString(ctxt, NULL, (const char *) str, NULL, 0);
12907
0
    if (input == NULL) {
12908
0
  xmlFreeParserCtxt(ctxt);
12909
0
  return(NULL);
12910
0
    }
12911
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
12912
0
        xmlFreeInputStream(input);
12913
0
        xmlFreeParserCtxt(ctxt);
12914
0
        return(NULL);
12915
0
    }
12916
12917
0
    return(ctxt);
12918
0
}
12919
12920
#ifdef LIBXML_SAX1_ENABLED
12921
/**
12922
 * Parse an XML in-memory document and build a tree.
12923
 * It use the given SAX function block to handle the parsing callback.
12924
 * If sax is NULL, fallback to the default DOM tree building routines.
12925
 *
12926
 * This function uses deprecated global variables to set parser options
12927
 * which default to XML_PARSE_NODICT.
12928
 *
12929
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadDoc.
12930
 *
12931
 * @param sax  the SAX handler block
12932
 * @param cur  a pointer to an array of xmlChar
12933
 * @param recovery  work in recovery mode, i.e. tries to read no Well Formed
12934
 *             documents
12935
 * @returns the resulting document tree
12936
 */
12937
12938
xmlDoc *
12939
0
xmlSAXParseDoc(xmlSAXHandler *sax, const xmlChar *cur, int recovery) {
12940
0
    xmlDocPtr ret;
12941
0
    xmlParserCtxtPtr ctxt;
12942
0
    xmlSAXHandlerPtr oldsax = NULL;
12943
12944
0
    if (cur == NULL) return(NULL);
12945
12946
12947
0
    ctxt = xmlCreateDocParserCtxt(cur);
12948
0
    if (ctxt == NULL) return(NULL);
12949
0
    if (sax != NULL) {
12950
0
        oldsax = ctxt->sax;
12951
0
        ctxt->sax = sax;
12952
0
        ctxt->userData = NULL;
12953
0
    }
12954
12955
0
    xmlParseDocument(ctxt);
12956
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12957
0
    else {
12958
0
       ret = NULL;
12959
0
       xmlFreeDoc(ctxt->myDoc);
12960
0
       ctxt->myDoc = NULL;
12961
0
    }
12962
0
    if (sax != NULL)
12963
0
  ctxt->sax = oldsax;
12964
0
    xmlFreeParserCtxt(ctxt);
12965
12966
0
    return(ret);
12967
0
}
12968
12969
/**
12970
 * Parse an XML in-memory document and build a tree.
12971
 *
12972
 * This function uses deprecated global variables to set parser options
12973
 * which default to XML_PARSE_NODICT.
12974
 *
12975
 * @deprecated Use #xmlReadDoc.
12976
 *
12977
 * @param cur  a pointer to an array of xmlChar
12978
 * @returns the resulting document tree
12979
 */
12980
12981
xmlDoc *
12982
0
xmlParseDoc(const xmlChar *cur) {
12983
0
    return(xmlSAXParseDoc(NULL, cur, 0));
12984
0
}
12985
#endif /* LIBXML_SAX1_ENABLED */
12986
12987
/************************************************************************
12988
 *                  *
12989
 *  New set (2.6.0) of simpler and more flexible APIs   *
12990
 *                  *
12991
 ************************************************************************/
12992
12993
/**
12994
 * Reset a parser context
12995
 *
12996
 * @param ctxt  an XML parser context
12997
 */
12998
void
12999
xmlCtxtReset(xmlParserCtxt *ctxt)
13000
5.12k
{
13001
5.12k
    xmlParserInputPtr input;
13002
13003
5.12k
    if (ctxt == NULL)
13004
0
        return;
13005
13006
5.12k
    while ((input = xmlCtxtPopInput(ctxt)) != NULL) { /* Non consuming */
13007
0
        xmlFreeInputStream(input);
13008
0
    }
13009
5.12k
    ctxt->inputNr = 0;
13010
5.12k
    ctxt->input = NULL;
13011
13012
5.12k
    ctxt->spaceNr = 0;
13013
5.12k
    if (ctxt->spaceTab != NULL) {
13014
5.12k
  ctxt->spaceTab[0] = -1;
13015
5.12k
  ctxt->space = &ctxt->spaceTab[0];
13016
5.12k
    } else {
13017
0
        ctxt->space = NULL;
13018
0
    }
13019
13020
13021
5.12k
    ctxt->nodeNr = 0;
13022
5.12k
    ctxt->node = NULL;
13023
13024
5.12k
    ctxt->nameNr = 0;
13025
5.12k
    ctxt->name = NULL;
13026
13027
5.12k
    ctxt->nsNr = 0;
13028
5.12k
    xmlParserNsReset(ctxt->nsdb);
13029
13030
5.12k
    if (ctxt->version != NULL) {
13031
0
        xmlFree(ctxt->version);
13032
0
        ctxt->version = NULL;
13033
0
    }
13034
5.12k
    if (ctxt->encoding != NULL) {
13035
0
        xmlFree(ctxt->encoding);
13036
0
        ctxt->encoding = NULL;
13037
0
    }
13038
5.12k
    if (ctxt->extSubURI != NULL) {
13039
0
        xmlFree(ctxt->extSubURI);
13040
0
        ctxt->extSubURI = NULL;
13041
0
    }
13042
5.12k
    if (ctxt->extSubSystem != NULL) {
13043
0
        xmlFree(ctxt->extSubSystem);
13044
0
        ctxt->extSubSystem = NULL;
13045
0
    }
13046
5.12k
    if (ctxt->directory != NULL) {
13047
0
        xmlFree(ctxt->directory);
13048
0
        ctxt->directory = NULL;
13049
0
    }
13050
13051
5.12k
    if (ctxt->myDoc != NULL)
13052
0
        xmlFreeDoc(ctxt->myDoc);
13053
5.12k
    ctxt->myDoc = NULL;
13054
13055
5.12k
    ctxt->standalone = -1;
13056
5.12k
    ctxt->hasExternalSubset = 0;
13057
5.12k
    ctxt->hasPErefs = 0;
13058
5.12k
    ctxt->html = ctxt->html ? 1 : 0;
13059
5.12k
    ctxt->instate = XML_PARSER_START;
13060
13061
5.12k
    ctxt->wellFormed = 1;
13062
5.12k
    ctxt->nsWellFormed = 1;
13063
5.12k
    ctxt->disableSAX = 0;
13064
5.12k
    ctxt->valid = 1;
13065
5.12k
    ctxt->record_info = 0;
13066
5.12k
    ctxt->checkIndex = 0;
13067
5.12k
    ctxt->endCheckState = 0;
13068
5.12k
    ctxt->inSubset = 0;
13069
5.12k
    ctxt->errNo = XML_ERR_OK;
13070
5.12k
    ctxt->depth = 0;
13071
5.12k
    ctxt->catalogs = NULL;
13072
5.12k
    ctxt->sizeentities = 0;
13073
5.12k
    ctxt->sizeentcopy = 0;
13074
5.12k
    xmlInitNodeInfoSeq(&ctxt->node_seq);
13075
13076
5.12k
    if (ctxt->attsDefault != NULL) {
13077
0
        xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
13078
0
        ctxt->attsDefault = NULL;
13079
0
    }
13080
5.12k
    if (ctxt->attsSpecial != NULL) {
13081
0
        xmlHashFree(ctxt->attsSpecial, NULL);
13082
0
        ctxt->attsSpecial = NULL;
13083
0
    }
13084
13085
5.12k
#ifdef LIBXML_CATALOG_ENABLED
13086
5.12k
    if (ctxt->catalogs != NULL)
13087
0
  xmlCatalogFreeLocal(ctxt->catalogs);
13088
5.12k
#endif
13089
5.12k
    ctxt->nbErrors = 0;
13090
5.12k
    ctxt->nbWarnings = 0;
13091
5.12k
    if (ctxt->lastError.code != XML_ERR_OK)
13092
0
        xmlResetError(&ctxt->lastError);
13093
5.12k
}
13094
13095
/**
13096
 * Reset a push parser context
13097
 *
13098
 * @param ctxt  an XML parser context
13099
 * @param chunk  a pointer to an array of chars
13100
 * @param size  number of chars in the array
13101
 * @param filename  an optional file name or URI
13102
 * @param encoding  the document encoding, or NULL
13103
 * @returns 0 in case of success and 1 in case of error
13104
 */
13105
int
13106
xmlCtxtResetPush(xmlParserCtxt *ctxt, const char *chunk,
13107
                 int size, const char *filename, const char *encoding)
13108
0
{
13109
0
    xmlParserInputPtr input;
13110
13111
0
    if (ctxt == NULL)
13112
0
        return(1);
13113
13114
0
    xmlCtxtReset(ctxt);
13115
13116
0
    input = xmlNewPushInput(filename, chunk, size);
13117
0
    if (input == NULL)
13118
0
        return(1);
13119
13120
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
13121
0
        xmlFreeInputStream(input);
13122
0
        return(1);
13123
0
    }
13124
13125
0
    if (encoding != NULL)
13126
0
        xmlSwitchEncodingName(ctxt, encoding);
13127
13128
0
    return(0);
13129
0
}
13130
13131
static int
13132
xmlCtxtSetOptionsInternal(xmlParserCtxtPtr ctxt, int options, int keepMask)
13133
151k
{
13134
151k
    int allMask;
13135
13136
151k
    if (ctxt == NULL)
13137
0
        return(-1);
13138
13139
    /*
13140
     * XInclude options aren't handled by the parser.
13141
     *
13142
     * XML_PARSE_XINCLUDE
13143
     * XML_PARSE_NOXINCNODE
13144
     * XML_PARSE_NOBASEFIX
13145
     */
13146
151k
    allMask = XML_PARSE_RECOVER |
13147
151k
              XML_PARSE_NOENT |
13148
151k
              XML_PARSE_DTDLOAD |
13149
151k
              XML_PARSE_DTDATTR |
13150
151k
              XML_PARSE_DTDVALID |
13151
151k
              XML_PARSE_NOERROR |
13152
151k
              XML_PARSE_NOWARNING |
13153
151k
              XML_PARSE_PEDANTIC |
13154
151k
              XML_PARSE_NOBLANKS |
13155
151k
#ifdef LIBXML_SAX1_ENABLED
13156
151k
              XML_PARSE_SAX1 |
13157
151k
#endif
13158
151k
              XML_PARSE_NONET |
13159
151k
              XML_PARSE_NODICT |
13160
151k
              XML_PARSE_NSCLEAN |
13161
151k
              XML_PARSE_NOCDATA |
13162
151k
              XML_PARSE_COMPACT |
13163
151k
              XML_PARSE_OLD10 |
13164
151k
              XML_PARSE_HUGE |
13165
151k
              XML_PARSE_OLDSAX |
13166
151k
              XML_PARSE_IGNORE_ENC |
13167
151k
              XML_PARSE_BIG_LINES |
13168
151k
              XML_PARSE_NO_XXE |
13169
151k
              XML_PARSE_UNZIP |
13170
151k
              XML_PARSE_NO_SYS_CATALOG |
13171
151k
              XML_PARSE_CATALOG_PI;
13172
13173
151k
    ctxt->options = (ctxt->options & keepMask) | (options & allMask);
13174
13175
    /*
13176
     * For some options, struct members are historically the source
13177
     * of truth. The values are initalized from global variables and
13178
     * old code could also modify them directly. Several older API
13179
     * functions that don't take an options argument rely on these
13180
     * deprecated mechanisms.
13181
     *
13182
     * Once public access to struct members and the globals are
13183
     * disabled, we can use the options bitmask as source of
13184
     * truth, making all these struct members obsolete.
13185
     *
13186
     * The XML_DETECT_IDS flags is misnamed. It simply enables
13187
     * loading of the external subset.
13188
     */
13189
151k
    ctxt->recovery = (options & XML_PARSE_RECOVER) ? 1 : 0;
13190
151k
    ctxt->replaceEntities = (options & XML_PARSE_NOENT) ? 1 : 0;
13191
151k
    ctxt->loadsubset = (options & XML_PARSE_DTDLOAD) ? XML_DETECT_IDS : 0;
13192
151k
    ctxt->loadsubset |= (options & XML_PARSE_DTDATTR) ? XML_COMPLETE_ATTRS : 0;
13193
151k
    ctxt->loadsubset |= (options & XML_PARSE_SKIP_IDS) ? XML_SKIP_IDS : 0;
13194
151k
    ctxt->validate = (options & XML_PARSE_DTDVALID) ? 1 : 0;
13195
151k
    ctxt->pedantic = (options & XML_PARSE_PEDANTIC) ? 1 : 0;
13196
151k
    ctxt->keepBlanks = (options & XML_PARSE_NOBLANKS) ? 0 : 1;
13197
151k
    ctxt->dictNames = (options & XML_PARSE_NODICT) ? 0 : 1;
13198
13199
151k
    return(options & ~allMask);
13200
151k
}
13201
13202
/**
13203
 * Applies the options to the parser context. Unset options are
13204
 * cleared.
13205
 *
13206
 * @since 2.13.0
13207
 *
13208
 * With older versions, you can use #xmlCtxtUseOptions.
13209
 *
13210
 * @param ctxt  an XML parser context
13211
 * @param options  a bitmask of xmlParserOption values
13212
 * @returns 0 in case of success, the set of unknown or unimplemented options
13213
 *         in case of error.
13214
 */
13215
int
13216
xmlCtxtSetOptions(xmlParserCtxt *ctxt, int options)
13217
0
{
13218
0
#ifdef LIBXML_HTML_ENABLED
13219
0
    if ((ctxt != NULL) && (ctxt->html))
13220
0
        return(htmlCtxtSetOptions(ctxt, options));
13221
0
#endif
13222
13223
0
    return(xmlCtxtSetOptionsInternal(ctxt, options, 0));
13224
0
}
13225
13226
/**
13227
 * Get the current options of the parser context.
13228
 *
13229
 * @since 2.14.0
13230
 *
13231
 * @param ctxt  an XML parser context
13232
 * @returns the current options set in the parser context, or -1 if ctxt is NULL.
13233
 */
13234
int
13235
xmlCtxtGetOptions(xmlParserCtxt *ctxt)
13236
0
{
13237
0
    if (ctxt == NULL)
13238
0
        return(-1);
13239
13240
0
    return(ctxt->options);
13241
0
}
13242
13243
/**
13244
 * Applies the options to the parser context. The following options
13245
 * are never cleared and can only be enabled:
13246
 *
13247
 * - XML_PARSE_NOERROR
13248
 * - XML_PARSE_NOWARNING
13249
 * - XML_PARSE_NONET
13250
 * - XML_PARSE_NSCLEAN
13251
 * - XML_PARSE_NOCDATA
13252
 * - XML_PARSE_COMPACT
13253
 * - XML_PARSE_OLD10
13254
 * - XML_PARSE_HUGE
13255
 * - XML_PARSE_OLDSAX
13256
 * - XML_PARSE_IGNORE_ENC
13257
 * - XML_PARSE_BIG_LINES
13258
 *
13259
 * @deprecated Use #xmlCtxtSetOptions.
13260
 *
13261
 * @param ctxt  an XML parser context
13262
 * @param options  a combination of xmlParserOption
13263
 * @returns 0 in case of success, the set of unknown or unimplemented options
13264
 *         in case of error.
13265
 */
13266
int
13267
xmlCtxtUseOptions(xmlParserCtxt *ctxt, int options)
13268
151k
{
13269
151k
    int keepMask;
13270
13271
151k
#ifdef LIBXML_HTML_ENABLED
13272
151k
    if ((ctxt != NULL) && (ctxt->html))
13273
0
        return(htmlCtxtUseOptions(ctxt, options));
13274
151k
#endif
13275
13276
    /*
13277
     * For historic reasons, some options can only be enabled.
13278
     */
13279
151k
    keepMask = XML_PARSE_NOERROR |
13280
151k
               XML_PARSE_NOWARNING |
13281
151k
               XML_PARSE_NONET |
13282
151k
               XML_PARSE_NSCLEAN |
13283
151k
               XML_PARSE_NOCDATA |
13284
151k
               XML_PARSE_COMPACT |
13285
151k
               XML_PARSE_OLD10 |
13286
151k
               XML_PARSE_HUGE |
13287
151k
               XML_PARSE_OLDSAX |
13288
151k
               XML_PARSE_IGNORE_ENC |
13289
151k
               XML_PARSE_BIG_LINES;
13290
13291
151k
    return(xmlCtxtSetOptionsInternal(ctxt, options, keepMask));
13292
151k
}
13293
13294
/**
13295
 * To protect against exponential entity expansion ("billion laughs"), the
13296
 * size of serialized output is (roughly) limited to the input size
13297
 * multiplied by this factor. The default value is 5.
13298
 *
13299
 * When working with documents making heavy use of entity expansion, it can
13300
 * be necessary to increase the value. For security reasons, this should only
13301
 * be considered when processing trusted input.
13302
 *
13303
 * @param ctxt  an XML parser context
13304
 * @param maxAmpl  maximum amplification factor
13305
 */
13306
void
13307
xmlCtxtSetMaxAmplification(xmlParserCtxt *ctxt, unsigned maxAmpl)
13308
0
{
13309
0
    if (ctxt == NULL)
13310
0
        return;
13311
0
    ctxt->maxAmpl = maxAmpl;
13312
0
}
13313
13314
/**
13315
 * Parse an XML document and return the resulting document tree.
13316
 * Takes ownership of the input object.
13317
 *
13318
 * @since 2.13.0
13319
 *
13320
 * @param ctxt  an XML parser context
13321
 * @param input  parser input
13322
 * @returns the resulting document tree or NULL
13323
 */
13324
xmlDoc *
13325
xmlCtxtParseDocument(xmlParserCtxt *ctxt, xmlParserInput *input)
13326
5.12k
{
13327
5.12k
    xmlDocPtr ret = NULL;
13328
13329
5.12k
    if ((ctxt == NULL) || (input == NULL)) {
13330
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
13331
0
        xmlFreeInputStream(input);
13332
0
        return(NULL);
13333
0
    }
13334
13335
    /* assert(ctxt->inputNr == 0); */
13336
5.12k
    while (ctxt->inputNr > 0)
13337
0
        xmlFreeInputStream(xmlCtxtPopInput(ctxt));
13338
13339
5.12k
    if (xmlCtxtPushInput(ctxt, input) < 0) {
13340
0
        xmlFreeInputStream(input);
13341
0
        return(NULL);
13342
0
    }
13343
13344
5.12k
    xmlParseDocument(ctxt);
13345
13346
5.12k
    ret = xmlCtxtGetDocument(ctxt);
13347
13348
    /* assert(ctxt->inputNr == 1); */
13349
10.2k
    while (ctxt->inputNr > 0)
13350
5.12k
        xmlFreeInputStream(xmlCtxtPopInput(ctxt));
13351
13352
5.12k
    return(ret);
13353
5.12k
}
13354
13355
/**
13356
 * Convenience function to parse an XML document from a
13357
 * zero-terminated string.
13358
 *
13359
 * See #xmlCtxtReadDoc for details.
13360
 *
13361
 * @param cur  a pointer to a zero terminated string
13362
 * @param URL  base URL (optional)
13363
 * @param encoding  the document encoding (optional)
13364
 * @param options  a combination of xmlParserOption
13365
 * @returns the resulting document tree
13366
 */
13367
xmlDoc *
13368
xmlReadDoc(const xmlChar *cur, const char *URL, const char *encoding,
13369
           int options)
13370
0
{
13371
0
    xmlParserCtxtPtr ctxt;
13372
0
    xmlParserInputPtr input;
13373
0
    xmlDocPtr doc = NULL;
13374
13375
0
    ctxt = xmlNewParserCtxt();
13376
0
    if (ctxt == NULL)
13377
0
        return(NULL);
13378
13379
0
    xmlCtxtUseOptions(ctxt, options);
13380
13381
0
    input = xmlCtxtNewInputFromString(ctxt, URL, (const char *) cur, encoding,
13382
0
                                      XML_INPUT_BUF_STATIC);
13383
13384
0
    if (input != NULL)
13385
0
        doc = xmlCtxtParseDocument(ctxt, input);
13386
13387
0
    xmlFreeParserCtxt(ctxt);
13388
0
    return(doc);
13389
0
}
13390
13391
/**
13392
 * Convenience function to parse an XML file from the filesystem
13393
 * or a global, user-defined resource loader.
13394
 *
13395
 * If a "-" filename is passed, the function will read from stdin.
13396
 * This feature is potentially insecure and might be removed from
13397
 * later versions.
13398
 *
13399
 * See #xmlCtxtReadFile for details.
13400
 *
13401
 * @param filename  a file or URL
13402
 * @param encoding  the document encoding (optional)
13403
 * @param options  a combination of xmlParserOption
13404
 * @returns the resulting document tree
13405
 */
13406
xmlDoc *
13407
xmlReadFile(const char *filename, const char *encoding, int options)
13408
0
{
13409
0
    xmlParserCtxtPtr ctxt;
13410
0
    xmlParserInputPtr input;
13411
0
    xmlDocPtr doc = NULL;
13412
13413
0
    ctxt = xmlNewParserCtxt();
13414
0
    if (ctxt == NULL)
13415
0
        return(NULL);
13416
13417
0
    xmlCtxtUseOptions(ctxt, options);
13418
13419
    /*
13420
     * Backward compatibility for users of command line utilities like
13421
     * xmlstarlet expecting "-" to mean stdin. This is dangerous and
13422
     * should be removed at some point.
13423
     */
13424
0
    if ((filename != NULL) && (filename[0] == '-') && (filename[1] == 0))
13425
0
        input = xmlCtxtNewInputFromFd(ctxt, filename, STDIN_FILENO,
13426
0
                                      encoding, 0);
13427
0
    else
13428
0
        input = xmlCtxtNewInputFromUrl(ctxt, filename, NULL, encoding, 0);
13429
13430
0
    if (input != NULL)
13431
0
        doc = xmlCtxtParseDocument(ctxt, input);
13432
13433
0
    xmlFreeParserCtxt(ctxt);
13434
0
    return(doc);
13435
0
}
13436
13437
/**
13438
 * Parse an XML in-memory document and build a tree. The input buffer must
13439
 * not contain a terminating null byte.
13440
 *
13441
 * See #xmlCtxtReadMemory for details.
13442
 *
13443
 * @param buffer  a pointer to a char array
13444
 * @param size  the size of the array
13445
 * @param url  base URL (optional)
13446
 * @param encoding  the document encoding (optional)
13447
 * @param options  a combination of xmlParserOption
13448
 * @returns the resulting document tree
13449
 */
13450
xmlDoc *
13451
xmlReadMemory(const char *buffer, int size, const char *url,
13452
              const char *encoding, int options)
13453
0
{
13454
0
    xmlParserCtxtPtr ctxt;
13455
0
    xmlParserInputPtr input;
13456
0
    xmlDocPtr doc = NULL;
13457
13458
0
    if (size < 0)
13459
0
  return(NULL);
13460
13461
0
    ctxt = xmlNewParserCtxt();
13462
0
    if (ctxt == NULL)
13463
0
        return(NULL);
13464
13465
0
    xmlCtxtUseOptions(ctxt, options);
13466
13467
0
    input = xmlCtxtNewInputFromMemory(ctxt, url, buffer, size, encoding,
13468
0
                                      XML_INPUT_BUF_STATIC);
13469
13470
0
    if (input != NULL)
13471
0
        doc = xmlCtxtParseDocument(ctxt, input);
13472
13473
0
    xmlFreeParserCtxt(ctxt);
13474
0
    return(doc);
13475
0
}
13476
13477
/**
13478
 * Parse an XML from a file descriptor and build a tree.
13479
 *
13480
 * See #xmlCtxtReadFd for details.
13481
 *
13482
 * NOTE that the file descriptor will not be closed when the
13483
 * context is freed or reset.
13484
 *
13485
 * @param fd  an open file descriptor
13486
 * @param URL  base URL (optional)
13487
 * @param encoding  the document encoding (optional)
13488
 * @param options  a combination of xmlParserOption
13489
 * @returns the resulting document tree
13490
 */
13491
xmlDoc *
13492
xmlReadFd(int fd, const char *URL, const char *encoding, int options)
13493
0
{
13494
0
    xmlParserCtxtPtr ctxt;
13495
0
    xmlParserInputPtr input;
13496
0
    xmlDocPtr doc = NULL;
13497
13498
0
    ctxt = xmlNewParserCtxt();
13499
0
    if (ctxt == NULL)
13500
0
        return(NULL);
13501
13502
0
    xmlCtxtUseOptions(ctxt, options);
13503
13504
0
    input = xmlCtxtNewInputFromFd(ctxt, URL, fd, encoding, 0);
13505
13506
0
    if (input != NULL)
13507
0
        doc = xmlCtxtParseDocument(ctxt, input);
13508
13509
0
    xmlFreeParserCtxt(ctxt);
13510
0
    return(doc);
13511
0
}
13512
13513
/**
13514
 * Parse an XML document from I/O functions and context and build a tree.
13515
 *
13516
 * See #xmlCtxtReadIO for details.
13517
 *
13518
 * @param ioread  an I/O read function
13519
 * @param ioclose  an I/O close function (optional)
13520
 * @param ioctx  an I/O handler
13521
 * @param URL  base URL (optional)
13522
 * @param encoding  the document encoding (optional)
13523
 * @param options  a combination of xmlParserOption
13524
 * @returns the resulting document tree
13525
 */
13526
xmlDoc *
13527
xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
13528
          void *ioctx, const char *URL, const char *encoding, int options)
13529
0
{
13530
0
    xmlParserCtxtPtr ctxt;
13531
0
    xmlParserInputPtr input;
13532
0
    xmlDocPtr doc = NULL;
13533
13534
0
    ctxt = xmlNewParserCtxt();
13535
0
    if (ctxt == NULL)
13536
0
        return(NULL);
13537
13538
0
    xmlCtxtUseOptions(ctxt, options);
13539
13540
0
    input = xmlCtxtNewInputFromIO(ctxt, URL, ioread, ioclose, ioctx,
13541
0
                                  encoding, 0);
13542
13543
0
    if (input != NULL)
13544
0
        doc = xmlCtxtParseDocument(ctxt, input);
13545
13546
0
    xmlFreeParserCtxt(ctxt);
13547
0
    return(doc);
13548
0
}
13549
13550
/**
13551
 * Parse an XML in-memory document and build a tree.
13552
 *
13553
 * `URL` is used as base to resolve external entities and for error
13554
 * reporting.
13555
 *
13556
 * @param ctxt  an XML parser context
13557
 * @param str  a pointer to a zero terminated string
13558
 * @param URL  base URL (optional)
13559
 * @param encoding  the document encoding (optional)
13560
 * @param options  a combination of xmlParserOption
13561
 * @returns the resulting document tree
13562
 */
13563
xmlDoc *
13564
xmlCtxtReadDoc(xmlParserCtxt *ctxt, const xmlChar *str,
13565
               const char *URL, const char *encoding, int options)
13566
0
{
13567
0
    xmlParserInputPtr input;
13568
13569
0
    if (ctxt == NULL)
13570
0
        return(NULL);
13571
13572
0
    xmlCtxtReset(ctxt);
13573
0
    xmlCtxtUseOptions(ctxt, options);
13574
13575
0
    input = xmlCtxtNewInputFromString(ctxt, URL, (const char *) str, encoding,
13576
0
                                      XML_INPUT_BUF_STATIC);
13577
0
    if (input == NULL)
13578
0
        return(NULL);
13579
13580
0
    return(xmlCtxtParseDocument(ctxt, input));
13581
0
}
13582
13583
/**
13584
 * Parse an XML file from the filesystem or a global, user-defined
13585
 * resource loader.
13586
 *
13587
 * @param ctxt  an XML parser context
13588
 * @param filename  a file or URL
13589
 * @param encoding  the document encoding (optional)
13590
 * @param options  a combination of xmlParserOption
13591
 * @returns the resulting document tree
13592
 */
13593
xmlDoc *
13594
xmlCtxtReadFile(xmlParserCtxt *ctxt, const char *filename,
13595
                const char *encoding, int options)
13596
0
{
13597
0
    xmlParserInputPtr input;
13598
13599
0
    if (ctxt == NULL)
13600
0
        return(NULL);
13601
13602
0
    xmlCtxtReset(ctxt);
13603
0
    xmlCtxtUseOptions(ctxt, options);
13604
13605
0
    input = xmlCtxtNewInputFromUrl(ctxt, filename, NULL, encoding, 0);
13606
0
    if (input == NULL)
13607
0
        return(NULL);
13608
13609
0
    return(xmlCtxtParseDocument(ctxt, input));
13610
0
}
13611
13612
/**
13613
 * Parse an XML in-memory document and build a tree. The input buffer must
13614
 * not contain a terminating null byte.
13615
 *
13616
 * `URL` is used as base to resolve external entities and for error
13617
 * reporting.
13618
 *
13619
 * @param ctxt  an XML parser context
13620
 * @param buffer  a pointer to a char array
13621
 * @param size  the size of the array
13622
 * @param URL  base URL (optional)
13623
 * @param encoding  the document encoding (optional)
13624
 * @param options  a combination of xmlParserOption
13625
 * @returns the resulting document tree
13626
 */
13627
xmlDoc *
13628
xmlCtxtReadMemory(xmlParserCtxt *ctxt, const char *buffer, int size,
13629
                  const char *URL, const char *encoding, int options)
13630
0
{
13631
0
    xmlParserInputPtr input;
13632
13633
0
    if ((ctxt == NULL) || (size < 0))
13634
0
        return(NULL);
13635
13636
0
    xmlCtxtReset(ctxt);
13637
0
    xmlCtxtUseOptions(ctxt, options);
13638
13639
0
    input = xmlCtxtNewInputFromMemory(ctxt, URL, buffer, size, encoding,
13640
0
                                      XML_INPUT_BUF_STATIC);
13641
0
    if (input == NULL)
13642
0
        return(NULL);
13643
13644
0
    return(xmlCtxtParseDocument(ctxt, input));
13645
0
}
13646
13647
/**
13648
 * Parse an XML document from a file descriptor and build a tree.
13649
 *
13650
 * NOTE that the file descriptor will not be closed when the
13651
 * context is freed or reset.
13652
 *
13653
 * `URL` is used as base to resolve external entities and for error
13654
 * reporting.
13655
 *
13656
 * @param ctxt  an XML parser context
13657
 * @param fd  an open file descriptor
13658
 * @param URL  base URL (optional)
13659
 * @param encoding  the document encoding (optional)
13660
 * @param options  a combination of xmlParserOption
13661
 * @returns the resulting document tree
13662
 */
13663
xmlDoc *
13664
xmlCtxtReadFd(xmlParserCtxt *ctxt, int fd,
13665
              const char *URL, const char *encoding, int options)
13666
0
{
13667
0
    xmlParserInputPtr input;
13668
13669
0
    if (ctxt == NULL)
13670
0
        return(NULL);
13671
13672
0
    xmlCtxtReset(ctxt);
13673
0
    xmlCtxtUseOptions(ctxt, options);
13674
13675
0
    input = xmlCtxtNewInputFromFd(ctxt, URL, fd, encoding, 0);
13676
0
    if (input == NULL)
13677
0
        return(NULL);
13678
13679
0
    return(xmlCtxtParseDocument(ctxt, input));
13680
0
}
13681
13682
/**
13683
 * Parse an XML document from I/O functions and source and build a tree.
13684
 * This reuses the existing `ctxt` parser context
13685
 *
13686
 * `URL` is used as base to resolve external entities and for error
13687
 * reporting.
13688
 *
13689
 * @param ctxt  an XML parser context
13690
 * @param ioread  an I/O read function
13691
 * @param ioclose  an I/O close function
13692
 * @param ioctx  an I/O handler
13693
 * @param URL  the base URL to use for the document
13694
 * @param encoding  the document encoding, or NULL
13695
 * @param options  a combination of xmlParserOption
13696
 * @returns the resulting document tree
13697
 */
13698
xmlDoc *
13699
xmlCtxtReadIO(xmlParserCtxt *ctxt, xmlInputReadCallback ioread,
13700
              xmlInputCloseCallback ioclose, void *ioctx,
13701
        const char *URL,
13702
              const char *encoding, int options)
13703
5.12k
{
13704
5.12k
    xmlParserInputPtr input;
13705
13706
5.12k
    if (ctxt == NULL)
13707
0
        return(NULL);
13708
13709
5.12k
    xmlCtxtReset(ctxt);
13710
5.12k
    xmlCtxtUseOptions(ctxt, options);
13711
13712
5.12k
    input = xmlCtxtNewInputFromIO(ctxt, URL, ioread, ioclose, ioctx,
13713
5.12k
                                  encoding, 0);
13714
5.12k
    if (input == NULL)
13715
0
        return(NULL);
13716
13717
5.12k
    return(xmlCtxtParseDocument(ctxt, input));
13718
5.12k
}
13719