Coverage Report

Created: 2026-01-10 06:17

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libxml2/parser.c
Line
Count
Source
1
/*
2
 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
 *            implemented on top of the SAX interfaces
4
 *
5
 * References:
6
 *   The XML specification:
7
 *     http://www.w3.org/TR/REC-xml
8
 *   Original 1.0 version:
9
 *     http://www.w3.org/TR/1998/REC-xml-19980210
10
 *   XML second edition working draft
11
 *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12
 *
13
 * Okay this is a big file, the parser core is around 7000 lines, then it
14
 * is followed by the progressive parser top routines, then the various
15
 * high level APIs to call the parser and a few miscellaneous functions.
16
 * A number of helper functions and deprecated ones have been moved to
17
 * parserInternals.c to reduce this file size.
18
 * As much as possible the functions are associated with their relative
19
 * production in the XML specification. A few productions defining the
20
 * different ranges of character are actually implanted either in
21
 * parserInternals.h or parserInternals.c
22
 * The DOM tree build is realized from the default SAX callbacks in
23
 * the module SAX2.c.
24
 * The routines doing the validation checks are in valid.c and called either
25
 * from the SAX callbacks or as standalone functions using a preparsed
26
 * document.
27
 *
28
 * See Copyright for the status of this software.
29
 *
30
 * Author: Daniel Veillard
31
 */
32
33
/* To avoid EBCDIC trouble when parsing on zOS */
34
#if defined(__MVS__)
35
#pragma convert("ISO8859-1")
36
#endif
37
38
#define IN_LIBXML
39
#include "libxml.h"
40
41
#if defined(_WIN32)
42
#define XML_DIR_SEP '\\'
43
#else
44
#define XML_DIR_SEP '/'
45
#endif
46
47
#include <stdlib.h>
48
#include <limits.h>
49
#include <string.h>
50
#include <stdarg.h>
51
#include <stddef.h>
52
#include <ctype.h>
53
#include <stdlib.h>
54
#include <libxml/parser.h>
55
#include <libxml/xmlmemory.h>
56
#include <libxml/tree.h>
57
#include <libxml/parserInternals.h>
58
#include <libxml/valid.h>
59
#include <libxml/entities.h>
60
#include <libxml/xmlerror.h>
61
#include <libxml/encoding.h>
62
#include <libxml/xmlIO.h>
63
#include <libxml/uri.h>
64
#include <libxml/SAX2.h>
65
#include <libxml/HTMLparser.h>
66
#ifdef LIBXML_CATALOG_ENABLED
67
#include <libxml/catalog.h>
68
#endif
69
70
#include "private/buf.h"
71
#include "private/dict.h"
72
#include "private/entities.h"
73
#include "private/error.h"
74
#include "private/html.h"
75
#include "private/io.h"
76
#include "private/memory.h"
77
#include "private/parser.h"
78
#include "private/tree.h"
79
80
1.27M
#define NS_INDEX_EMPTY  INT_MAX
81
84.7k
#define NS_INDEX_XML    (INT_MAX - 1)
82
425k
#define URI_HASH_EMPTY  0xD943A04E
83
39.5k
#define URI_HASH_XML    0xF0451F02
84
85
#ifndef STDIN_FILENO
86
0
  #define STDIN_FILENO 0
87
#endif
88
89
#ifndef SIZE_MAX
90
  #define SIZE_MAX ((size_t) -1)
91
#endif
92
93
115k
#define XML_MAX_ATTRS 100000000 /* 100 million */
94
95
964k
#define XML_SPECIAL_EXTERNAL    (1 << 20)
96
960k
#define XML_SPECIAL_TYPE_MASK   (XML_SPECIAL_EXTERNAL - 1)
97
98
1.06M
#define XML_ATTVAL_ALLOC        (1 << 0)
99
12.8k
#define XML_ATTVAL_NORM_CHANGE  (1 << 1)
100
101
struct _xmlStartTag {
102
    const xmlChar *prefix;
103
    const xmlChar *URI;
104
    int line;
105
    int nsNr;
106
};
107
108
typedef struct {
109
    void *saxData;
110
    unsigned prefixHashValue;
111
    unsigned uriHashValue;
112
    unsigned elementId;
113
    int oldIndex;
114
} xmlParserNsExtra;
115
116
typedef struct {
117
    unsigned hashValue;
118
    int index;
119
} xmlParserNsBucket;
120
121
struct _xmlParserNsData {
122
    xmlParserNsExtra *extra;
123
124
    unsigned hashSize;
125
    unsigned hashElems;
126
    xmlParserNsBucket *hash;
127
128
    unsigned elementId;
129
    int defaultNsIndex;
130
    int minNsIndex;
131
};
132
133
static int
134
xmlParseElementStart(xmlParserCtxtPtr ctxt);
135
136
static void
137
xmlParseElementEnd(xmlParserCtxtPtr ctxt);
138
139
static xmlEntityPtr
140
xmlLookupGeneralEntity(xmlParserCtxtPtr ctxt, const xmlChar *name, int inAttr);
141
142
static const xmlChar *
143
xmlParseEntityRefInternal(xmlParserCtxtPtr ctxt);
144
145
/************************************************************************
146
 *                  *
147
 *  Arbitrary limits set in the parser. See XML_PARSE_HUGE    *
148
 *                  *
149
 ************************************************************************/
150
151
#define XML_PARSER_BIG_ENTITY 1000
152
#define XML_PARSER_LOT_ENTITY 5000
153
154
/*
155
 * Constants for protection against abusive entity expansion
156
 * ("billion laughs").
157
 */
158
159
/*
160
 * A certain amount of entity expansion which is always allowed.
161
 */
162
380k
#define XML_PARSER_ALLOWED_EXPANSION 1000000
163
164
/*
165
 * Fixed cost for each entity reference. This crudely models processing time
166
 * as well to protect, for example, against exponential expansion of empty
167
 * or very short entities.
168
 */
169
380k
#define XML_ENT_FIXED_COST 20
170
171
9.04M
#define XML_PARSER_BIG_BUFFER_SIZE 300
172
146k
#define XML_PARSER_BUFFER_SIZE 100
173
56.9k
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
174
175
/**
176
 * XML_PARSER_CHUNK_SIZE
177
 *
178
 * When calling GROW that's the minimal amount of data
179
 * the parser expected to have received. It is not a hard
180
 * limit but an optimization when reading strings like Names
181
 * It is not strictly needed as long as inputs available characters
182
 * are followed by 0, which should be provided by the I/O level
183
 */
184
#define XML_PARSER_CHUNK_SIZE 100
185
186
/**
187
 * Constant string describing the version of the library used at
188
 * run-time.
189
 */
190
const char *const
191
xmlParserVersion = LIBXML_VERSION_STRING LIBXML_VERSION_EXTRA;
192
193
/*
194
 * List of XML prefixed PI allowed by W3C specs
195
 */
196
197
static const char* const xmlW3CPIs[] = {
198
    "xml-stylesheet",
199
    "xml-model",
200
    NULL
201
};
202
203
204
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
205
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
206
                                              const xmlChar **str);
207
208
static void
209
xmlCtxtParseEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr ent);
210
211
static int
212
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
213
214
static void
215
xmlParsePERefInternal(xmlParserCtxt *ctxt, int markupDecl);
216
217
/************************************************************************
218
 *                  *
219
 *    Some factorized error routines        *
220
 *                  *
221
 ************************************************************************/
222
223
static void
224
278
xmlErrMemory(xmlParserCtxtPtr ctxt) {
225
278
    xmlCtxtErrMemory(ctxt);
226
278
}
227
228
/**
229
 * Handle a redefinition of attribute error
230
 *
231
 * @param ctxt  an XML parser context
232
 * @param prefix  the attribute prefix
233
 * @param localname  the attribute localname
234
 */
235
static void
236
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
237
                   const xmlChar * localname)
238
7.18k
{
239
7.18k
    if (prefix == NULL)
240
4.30k
        xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, XML_ERR_ATTRIBUTE_REDEFINED,
241
4.30k
                   XML_ERR_FATAL, localname, NULL, NULL, 0,
242
4.30k
                   "Attribute %s redefined\n", localname);
243
2.87k
    else
244
2.87k
        xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, XML_ERR_ATTRIBUTE_REDEFINED,
245
2.87k
                   XML_ERR_FATAL, prefix, localname, NULL, 0,
246
2.87k
                   "Attribute %s:%s redefined\n", prefix, localname);
247
7.18k
}
248
249
/**
250
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
251
 *
252
 * @param ctxt  an XML parser context
253
 * @param error  the error number
254
 * @param msg  the error message
255
 */
256
static void LIBXML_ATTR_FORMAT(3,0)
257
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
258
               const char *msg)
259
2.31M
{
260
2.31M
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
261
2.31M
               NULL, NULL, NULL, 0, "%s", msg);
262
2.31M
}
263
264
/**
265
 * Handle a warning.
266
 *
267
 * @param ctxt  an XML parser context
268
 * @param error  the error number
269
 * @param msg  the error message
270
 * @param str1  extra data
271
 * @param str2  extra data
272
 */
273
void LIBXML_ATTR_FORMAT(3,0)
274
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
275
              const char *msg, const xmlChar *str1, const xmlChar *str2)
276
4.70k
{
277
4.70k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_WARNING,
278
4.70k
               str1, str2, NULL, 0, msg, str1, str2);
279
4.70k
}
280
281
#ifdef LIBXML_VALID_ENABLED
282
/**
283
 * Handle a validity error.
284
 *
285
 * @param ctxt  an XML parser context
286
 * @param error  the error number
287
 * @param msg  the error message
288
 * @param str1  extra data
289
 * @param str2  extra data
290
 */
291
static void LIBXML_ATTR_FORMAT(3,0)
292
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
293
              const char *msg, const xmlChar *str1, const xmlChar *str2)
294
0
{
295
0
    ctxt->valid = 0;
296
297
0
    xmlCtxtErr(ctxt, NULL, XML_FROM_DTD, error, XML_ERR_ERROR,
298
0
               str1, str2, NULL, 0, msg, str1, str2);
299
0
}
300
#endif
301
302
/**
303
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
304
 *
305
 * @param ctxt  an XML parser context
306
 * @param error  the error number
307
 * @param msg  the error message
308
 * @param val  an integer value
309
 */
310
static void LIBXML_ATTR_FORMAT(3,0)
311
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
312
                  const char *msg, int val)
313
360k
{
314
360k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
315
360k
               NULL, NULL, NULL, val, msg, val);
316
360k
}
317
318
/**
319
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
320
 *
321
 * @param ctxt  an XML parser context
322
 * @param error  the error number
323
 * @param msg  the error message
324
 * @param str1  an string info
325
 * @param val  an integer value
326
 * @param str2  an string info
327
 */
328
static void LIBXML_ATTR_FORMAT(3,0)
329
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
330
                  const char *msg, const xmlChar *str1, int val,
331
      const xmlChar *str2)
332
79.5k
{
333
79.5k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
334
79.5k
               str1, str2, NULL, val, msg, str1, val, str2);
335
79.5k
}
336
337
/**
338
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
339
 *
340
 * @param ctxt  an XML parser context
341
 * @param error  the error number
342
 * @param msg  the error message
343
 * @param val  a string value
344
 */
345
static void LIBXML_ATTR_FORMAT(3,0)
346
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
347
                  const char *msg, const xmlChar * val)
348
105k
{
349
105k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
350
105k
               val, NULL, NULL, 0, msg, val);
351
105k
}
352
353
/**
354
 * Handle a non fatal parser error
355
 *
356
 * @param ctxt  an XML parser context
357
 * @param error  the error number
358
 * @param msg  the error message
359
 * @param val  a string value
360
 */
361
static void LIBXML_ATTR_FORMAT(3,0)
362
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
363
                  const char *msg, const xmlChar * val)
364
117k
{
365
117k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_ERROR,
366
117k
               val, NULL, NULL, 0, msg, val);
367
117k
}
368
369
/**
370
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
371
 *
372
 * @param ctxt  an XML parser context
373
 * @param error  the error number
374
 * @param msg  the message
375
 * @param info1  extra information string
376
 * @param info2  extra information string
377
 * @param info3  extra information string
378
 */
379
static void LIBXML_ATTR_FORMAT(3,0)
380
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
381
         const char *msg,
382
         const xmlChar * info1, const xmlChar * info2,
383
         const xmlChar * info3)
384
184k
{
385
184k
    ctxt->nsWellFormed = 0;
386
387
184k
    xmlCtxtErr(ctxt, NULL, XML_FROM_NAMESPACE, error, XML_ERR_ERROR,
388
184k
               info1, info2, info3, 0, msg, info1, info2, info3);
389
184k
}
390
391
/**
392
 * Handle a namespace warning error
393
 *
394
 * @param ctxt  an XML parser context
395
 * @param error  the error number
396
 * @param msg  the message
397
 * @param info1  extra information string
398
 * @param info2  extra information string
399
 * @param info3  extra information string
400
 */
401
static void LIBXML_ATTR_FORMAT(3,0)
402
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
403
         const char *msg,
404
         const xmlChar * info1, const xmlChar * info2,
405
         const xmlChar * info3)
406
8.26k
{
407
8.26k
    xmlCtxtErr(ctxt, NULL, XML_FROM_NAMESPACE, error, XML_ERR_WARNING,
408
8.26k
               info1, info2, info3, 0, msg, info1, info2, info3);
409
8.26k
}
410
411
/**
412
 * Check for non-linear entity expansion behaviour.
413
 *
414
 * In some cases like xmlExpandEntityInAttValue, this function is called
415
 * for each, possibly nested entity and its unexpanded content length.
416
 *
417
 * In other cases like #xmlParseReference, it's only called for each
418
 * top-level entity with its unexpanded content length plus the sum of
419
 * the unexpanded content lengths (plus fixed cost) of all nested
420
 * entities.
421
 *
422
 * Summing the unexpanded lengths also adds the length of the reference.
423
 * This is by design. Taking the length of the entity name into account
424
 * discourages attacks that try to waste CPU time with abusively long
425
 * entity names. See test/recurse/lol6.xml for example. Each call also
426
 * adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with
427
 * short entities.
428
 *
429
 * @param ctxt  parser context
430
 * @param extra  sum of unexpanded entity sizes
431
 * @returns 1 on error, 0 on success.
432
 */
433
static int
434
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long extra)
435
455k
{
436
455k
    unsigned long consumed;
437
455k
    unsigned long *expandedSize;
438
455k
    xmlParserInputPtr input = ctxt->input;
439
455k
    xmlEntityPtr entity = input->entity;
440
441
455k
    if ((entity) && (entity->flags & XML_ENT_CHECKED))
442
74.9k
        return(0);
443
444
    /*
445
     * Compute total consumed bytes so far, including input streams of
446
     * external entities.
447
     */
448
380k
    consumed = input->consumed;
449
380k
    xmlSaturatedAddSizeT(&consumed, input->cur - input->base);
450
380k
    xmlSaturatedAdd(&consumed, ctxt->sizeentities);
451
452
380k
    if (entity)
453
24.4k
        expandedSize = &entity->expandedSize;
454
356k
    else
455
356k
        expandedSize = &ctxt->sizeentcopy;
456
457
    /*
458
     * Add extra cost and some fixed cost.
459
     */
460
380k
    xmlSaturatedAdd(expandedSize, extra);
461
380k
    xmlSaturatedAdd(expandedSize, XML_ENT_FIXED_COST);
462
463
    /*
464
     * It's important to always use saturation arithmetic when tracking
465
     * entity sizes to make the size checks reliable. If "sizeentcopy"
466
     * overflows, we have to abort.
467
     */
468
380k
    if ((*expandedSize > XML_PARSER_ALLOWED_EXPANSION) &&
469
803
        ((*expandedSize >= ULONG_MAX) ||
470
803
         (*expandedSize / ctxt->maxAmpl > consumed))) {
471
213
        xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
472
213
                       "Maximum entity amplification factor exceeded, see "
473
213
                       "xmlCtxtSetMaxAmplification.\n");
474
213
        return(1);
475
213
    }
476
477
380k
    return(0);
478
380k
}
479
480
/************************************************************************
481
 *                  *
482
 *    Library wide options          *
483
 *                  *
484
 ************************************************************************/
485
486
/**
487
 * Examines if the library has been compiled with a given feature.
488
 *
489
 * @param feature  the feature to be examined
490
 * @returns zero (0) if the feature does not exist or an unknown
491
 * feature is requested, non-zero otherwise.
492
 */
493
int
494
xmlHasFeature(xmlFeature feature)
495
0
{
496
0
    switch (feature) {
497
0
  case XML_WITH_THREAD:
498
0
#ifdef LIBXML_THREAD_ENABLED
499
0
      return(1);
500
#else
501
      return(0);
502
#endif
503
0
        case XML_WITH_TREE:
504
0
            return(1);
505
0
        case XML_WITH_OUTPUT:
506
0
#ifdef LIBXML_OUTPUT_ENABLED
507
0
            return(1);
508
#else
509
            return(0);
510
#endif
511
0
        case XML_WITH_PUSH:
512
0
#ifdef LIBXML_PUSH_ENABLED
513
0
            return(1);
514
#else
515
            return(0);
516
#endif
517
0
        case XML_WITH_READER:
518
0
#ifdef LIBXML_READER_ENABLED
519
0
            return(1);
520
#else
521
            return(0);
522
#endif
523
0
        case XML_WITH_PATTERN:
524
0
#ifdef LIBXML_PATTERN_ENABLED
525
0
            return(1);
526
#else
527
            return(0);
528
#endif
529
0
        case XML_WITH_WRITER:
530
0
#ifdef LIBXML_WRITER_ENABLED
531
0
            return(1);
532
#else
533
            return(0);
534
#endif
535
0
        case XML_WITH_SAX1:
536
0
#ifdef LIBXML_SAX1_ENABLED
537
0
            return(1);
538
#else
539
            return(0);
540
#endif
541
0
        case XML_WITH_HTTP:
542
0
            return(0);
543
0
        case XML_WITH_VALID:
544
0
#ifdef LIBXML_VALID_ENABLED
545
0
            return(1);
546
#else
547
            return(0);
548
#endif
549
0
        case XML_WITH_HTML:
550
0
#ifdef LIBXML_HTML_ENABLED
551
0
            return(1);
552
#else
553
            return(0);
554
#endif
555
0
        case XML_WITH_LEGACY:
556
0
            return(0);
557
0
        case XML_WITH_C14N:
558
0
#ifdef LIBXML_C14N_ENABLED
559
0
            return(1);
560
#else
561
            return(0);
562
#endif
563
0
        case XML_WITH_CATALOG:
564
0
#ifdef LIBXML_CATALOG_ENABLED
565
0
            return(1);
566
#else
567
            return(0);
568
#endif
569
0
        case XML_WITH_XPATH:
570
0
#ifdef LIBXML_XPATH_ENABLED
571
0
            return(1);
572
#else
573
            return(0);
574
#endif
575
0
        case XML_WITH_XPTR:
576
0
#ifdef LIBXML_XPTR_ENABLED
577
0
            return(1);
578
#else
579
            return(0);
580
#endif
581
0
        case XML_WITH_XINCLUDE:
582
0
#ifdef LIBXML_XINCLUDE_ENABLED
583
0
            return(1);
584
#else
585
            return(0);
586
#endif
587
0
        case XML_WITH_ICONV:
588
0
#ifdef LIBXML_ICONV_ENABLED
589
0
            return(1);
590
#else
591
            return(0);
592
#endif
593
0
        case XML_WITH_ISO8859X:
594
0
#ifdef LIBXML_ISO8859X_ENABLED
595
0
            return(1);
596
#else
597
            return(0);
598
#endif
599
0
        case XML_WITH_UNICODE:
600
0
            return(0);
601
0
        case XML_WITH_REGEXP:
602
0
#ifdef LIBXML_REGEXP_ENABLED
603
0
            return(1);
604
#else
605
            return(0);
606
#endif
607
0
        case XML_WITH_AUTOMATA:
608
0
#ifdef LIBXML_REGEXP_ENABLED
609
0
            return(1);
610
#else
611
            return(0);
612
#endif
613
0
        case XML_WITH_EXPR:
614
0
            return(0);
615
0
        case XML_WITH_RELAXNG:
616
0
#ifdef LIBXML_RELAXNG_ENABLED
617
0
            return(1);
618
#else
619
            return(0);
620
#endif
621
0
        case XML_WITH_SCHEMAS:
622
0
#ifdef LIBXML_SCHEMAS_ENABLED
623
0
            return(1);
624
#else
625
            return(0);
626
#endif
627
0
        case XML_WITH_SCHEMATRON:
628
#ifdef LIBXML_SCHEMATRON_ENABLED
629
            return(1);
630
#else
631
0
            return(0);
632
0
#endif
633
0
        case XML_WITH_MODULES:
634
0
#ifdef LIBXML_MODULES_ENABLED
635
0
            return(1);
636
#else
637
            return(0);
638
#endif
639
0
        case XML_WITH_DEBUG:
640
#ifdef LIBXML_DEBUG_ENABLED
641
            return(1);
642
#else
643
0
            return(0);
644
0
#endif
645
0
        case XML_WITH_DEBUG_MEM:
646
0
            return(0);
647
0
        case XML_WITH_ZLIB:
648
0
#ifdef LIBXML_ZLIB_ENABLED
649
0
            return(1);
650
#else
651
            return(0);
652
#endif
653
0
        case XML_WITH_LZMA:
654
0
            return(0);
655
0
        case XML_WITH_ICU:
656
#ifdef LIBXML_ICU_ENABLED
657
            return(1);
658
#else
659
0
            return(0);
660
0
#endif
661
0
        default:
662
0
      break;
663
0
     }
664
0
     return(0);
665
0
}
666
667
/************************************************************************
668
 *                  *
669
 *      Simple string buffer        *
670
 *                  *
671
 ************************************************************************/
672
673
typedef struct {
674
    xmlChar *mem;
675
    unsigned size;
676
    unsigned cap; /* size < cap */
677
    unsigned max; /* size <= max */
678
    xmlParserErrors code;
679
} xmlSBuf;
680
681
static void
682
988k
xmlSBufInit(xmlSBuf *buf, unsigned max) {
683
988k
    buf->mem = NULL;
684
988k
    buf->size = 0;
685
988k
    buf->cap = 0;
686
988k
    buf->max = max;
687
988k
    buf->code = XML_ERR_OK;
688
988k
}
689
690
static int
691
187k
xmlSBufGrow(xmlSBuf *buf, unsigned len) {
692
187k
    xmlChar *mem;
693
187k
    unsigned cap;
694
695
187k
    if (len >= UINT_MAX / 2 - buf->size) {
696
0
        if (buf->code == XML_ERR_OK)
697
0
            buf->code = XML_ERR_RESOURCE_LIMIT;
698
0
        return(-1);
699
0
    }
700
701
187k
    cap = (buf->size + len) * 2;
702
187k
    if (cap < 240)
703
149k
        cap = 240;
704
705
187k
    mem = xmlRealloc(buf->mem, cap);
706
187k
    if (mem == NULL) {
707
69
        buf->code = XML_ERR_NO_MEMORY;
708
69
        return(-1);
709
69
    }
710
711
187k
    buf->mem = mem;
712
187k
    buf->cap = cap;
713
714
187k
    return(0);
715
187k
}
716
717
static void
718
9.06M
xmlSBufAddString(xmlSBuf *buf, const xmlChar *str, unsigned len) {
719
9.06M
    if (buf->max - buf->size < len) {
720
0
        if (buf->code == XML_ERR_OK)
721
0
            buf->code = XML_ERR_RESOURCE_LIMIT;
722
0
        return;
723
0
    }
724
725
9.06M
    if (buf->cap - buf->size <= len) {
726
180k
        if (xmlSBufGrow(buf, len) < 0)
727
63
            return;
728
180k
    }
729
730
9.06M
    if (len > 0)
731
9.06M
        memcpy(buf->mem + buf->size, str, len);
732
9.06M
    buf->size += len;
733
9.06M
}
734
735
static void
736
7.84M
xmlSBufAddCString(xmlSBuf *buf, const char *str, unsigned len) {
737
7.84M
    xmlSBufAddString(buf, (const xmlChar *) str, len);
738
7.84M
}
739
740
static void
741
53.8k
xmlSBufAddChar(xmlSBuf *buf, int c) {
742
53.8k
    xmlChar *end;
743
744
53.8k
    if (buf->max - buf->size < 4) {
745
0
        if (buf->code == XML_ERR_OK)
746
0
            buf->code = XML_ERR_RESOURCE_LIMIT;
747
0
        return;
748
0
    }
749
750
53.8k
    if (buf->cap - buf->size <= 4) {
751
7.19k
        if (xmlSBufGrow(buf, 4) < 0)
752
6
            return;
753
7.19k
    }
754
755
53.8k
    end = buf->mem + buf->size;
756
757
53.8k
    if (c < 0x80) {
758
40.7k
        *end = (xmlChar) c;
759
40.7k
        buf->size += 1;
760
40.7k
    } else {
761
13.0k
        buf->size += xmlCopyCharMultiByte(end, c);
762
13.0k
    }
763
53.8k
}
764
765
static void
766
5.10M
xmlSBufAddReplChar(xmlSBuf *buf) {
767
5.10M
    xmlSBufAddCString(buf, "\xEF\xBF\xBD", 3);
768
5.10M
}
769
770
static void
771
73
xmlSBufReportError(xmlSBuf *buf, xmlParserCtxtPtr ctxt, const char *errMsg) {
772
73
    if (buf->code == XML_ERR_NO_MEMORY)
773
73
        xmlCtxtErrMemory(ctxt);
774
0
    else
775
0
        xmlFatalErr(ctxt, buf->code, errMsg);
776
73
}
777
778
static xmlChar *
779
xmlSBufFinish(xmlSBuf *buf, int *sizeOut, xmlParserCtxtPtr ctxt,
780
156k
              const char *errMsg) {
781
156k
    if (buf->mem == NULL) {
782
10.0k
        buf->mem = xmlMalloc(1);
783
10.0k
        if (buf->mem == NULL) {
784
4
            buf->code = XML_ERR_NO_MEMORY;
785
10.0k
        } else {
786
10.0k
            buf->mem[0] = 0;
787
10.0k
        }
788
146k
    } else {
789
146k
        buf->mem[buf->size] = 0;
790
146k
    }
791
792
156k
    if (buf->code == XML_ERR_OK) {
793
156k
        if (sizeOut != NULL)
794
111k
            *sizeOut = buf->size;
795
156k
        return(buf->mem);
796
156k
    }
797
798
41
    xmlSBufReportError(buf, ctxt, errMsg);
799
800
41
    xmlFree(buf->mem);
801
802
41
    if (sizeOut != NULL)
803
14
        *sizeOut = 0;
804
41
    return(NULL);
805
156k
}
806
807
static void
808
829k
xmlSBufCleanup(xmlSBuf *buf, xmlParserCtxtPtr ctxt, const char *errMsg) {
809
829k
    if (buf->code != XML_ERR_OK)
810
32
        xmlSBufReportError(buf, ctxt, errMsg);
811
812
829k
    xmlFree(buf->mem);
813
829k
}
814
815
static int
816
xmlUTF8MultibyteLen(xmlParserCtxtPtr ctxt, const xmlChar *str,
817
33.4M
                    const char *errMsg) {
818
33.4M
    int c = str[0];
819
33.4M
    int c1 = str[1];
820
821
33.4M
    if ((c1 & 0xC0) != 0x80)
822
1.40M
        goto encoding_error;
823
824
32.0M
    if (c < 0xE0) {
825
        /* 2-byte sequence */
826
26.4M
        if (c < 0xC2)
827
1.64M
            goto encoding_error;
828
829
24.8M
        return(2);
830
26.4M
    } else {
831
5.64M
        int c2 = str[2];
832
833
5.64M
        if ((c2 & 0xC0) != 0x80)
834
8.86k
            goto encoding_error;
835
836
5.63M
        if (c < 0xF0) {
837
            /* 3-byte sequence */
838
5.61M
            if (c == 0xE0) {
839
                /* overlong */
840
882k
                if (c1 < 0xA0)
841
502
                    goto encoding_error;
842
4.73M
            } else if (c == 0xED) {
843
                /* surrogate */
844
1.52k
                if (c1 >= 0xA0)
845
703
                    goto encoding_error;
846
4.73M
            } else if (c == 0xEF) {
847
                /* U+FFFE and U+FFFF are invalid Chars */
848
72.5k
                if ((c1 == 0xBF) && (c2 >= 0xBE))
849
2.39k
                    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR, errMsg);
850
72.5k
            }
851
852
5.61M
            return(3);
853
5.61M
        } else {
854
            /* 4-byte sequence */
855
17.0k
            if ((str[3] & 0xC0) != 0x80)
856
2.57k
                goto encoding_error;
857
14.4k
            if (c == 0xF0) {
858
                /* overlong */
859
1.71k
                if (c1 < 0x90)
860
679
                    goto encoding_error;
861
12.7k
            } else if (c >= 0xF4) {
862
                /* greater than 0x10FFFF */
863
5.65k
                if ((c > 0xF4) || (c1 >= 0x90))
864
4.50k
                    goto encoding_error;
865
5.65k
            }
866
867
9.30k
            return(4);
868
14.4k
        }
869
5.63M
    }
870
871
3.06M
encoding_error:
872
    /* Only report the first error */
873
3.06M
    if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
874
16.0k
        xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
875
16.0k
        ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
876
16.0k
    }
877
878
3.06M
    return(0);
879
32.0M
}
880
881
/************************************************************************
882
 *                  *
883
 *    SAX2 defaulted attributes handling      *
884
 *                  *
885
 ************************************************************************/
886
887
/**
888
 * Final initialization of the parser context before starting to parse.
889
 *
890
 * This accounts for users modifying struct members of parser context
891
 * directly.
892
 *
893
 * @param ctxt  an XML parser context
894
 */
895
static void
896
39.6k
xmlCtxtInitializeLate(xmlParserCtxtPtr ctxt) {
897
39.6k
    xmlSAXHandlerPtr sax;
898
899
    /* Avoid unused variable warning if features are disabled. */
900
39.6k
    (void) sax;
901
902
    /*
903
     * Changing the SAX struct directly is still widespread practice
904
     * in internal and external code.
905
     */
906
39.6k
    if (ctxt == NULL) return;
907
39.6k
    sax = ctxt->sax;
908
39.6k
#ifdef LIBXML_SAX1_ENABLED
909
    /*
910
     * Only enable SAX2 if there SAX2 element handlers, except when there
911
     * are no element handlers at all.
912
     */
913
39.6k
    if (((ctxt->options & XML_PARSE_SAX1) == 0) &&
914
39.6k
        (sax) &&
915
39.6k
        (sax->initialized == XML_SAX2_MAGIC) &&
916
39.6k
        ((sax->startElementNs != NULL) ||
917
0
         (sax->endElementNs != NULL) ||
918
0
         ((sax->startElement == NULL) && (sax->endElement == NULL))))
919
39.6k
        ctxt->sax2 = 1;
920
#else
921
    ctxt->sax2 = 1;
922
#endif /* LIBXML_SAX1_ENABLED */
923
924
    /*
925
     * Some users replace the dictionary directly in the context struct.
926
     * We really need an API function to do that cleanly.
927
     */
928
39.6k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
929
39.6k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
930
39.6k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
931
39.6k
    if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
932
39.6k
    (ctxt->str_xml_ns == NULL)) {
933
3
        xmlErrMemory(ctxt);
934
3
    }
935
936
39.6k
    xmlDictSetLimit(ctxt->dict,
937
39.6k
                    (ctxt->options & XML_PARSE_HUGE) ?
938
0
                        0 :
939
39.6k
                        XML_MAX_DICTIONARY_LIMIT);
940
941
39.6k
#ifdef LIBXML_VALID_ENABLED
942
39.6k
    if (ctxt->validate)
943
0
        ctxt->vctxt.flags |= XML_VCTXT_VALIDATE;
944
39.6k
    else
945
39.6k
        ctxt->vctxt.flags &= ~XML_VCTXT_VALIDATE;
946
39.6k
#endif /* LIBXML_VALID_ENABLED */
947
39.6k
}
948
949
typedef struct {
950
    xmlHashedString prefix;
951
    xmlHashedString name;
952
    xmlHashedString value;
953
    const xmlChar *valueEnd;
954
    int external;
955
    int expandedSize;
956
} xmlDefAttr;
957
958
typedef struct _xmlDefAttrs xmlDefAttrs;
959
typedef xmlDefAttrs *xmlDefAttrsPtr;
960
struct _xmlDefAttrs {
961
    int nbAttrs;  /* number of defaulted attributes on that element */
962
    int maxAttrs;       /* the size of the array */
963
#if __STDC_VERSION__ >= 199901L
964
    /* Using a C99 flexible array member avoids UBSan errors. */
965
    xmlDefAttr attrs[] ATTRIBUTE_COUNTED_BY(maxAttrs);
966
#else
967
    xmlDefAttr attrs[1];
968
#endif
969
};
970
971
/**
972
 * Normalize the space in non CDATA attribute values:
973
 * If the attribute type is not CDATA, then the XML processor MUST further
974
 * process the normalized attribute value by discarding any leading and
975
 * trailing space (\#x20) characters, and by replacing sequences of space
976
 * (\#x20) characters by a single space (\#x20) character.
977
 * Note that the size of dst need to be at least src, and if one doesn't need
978
 * to preserve dst (and it doesn't come from a dictionary or read-only) then
979
 * passing src as dst is just fine.
980
 *
981
 * @param src  the source string
982
 * @param dst  the target string
983
 * @returns a pointer to the normalized value (dst) or NULL if no conversion
984
 *         is needed.
985
 */
986
static xmlChar *
987
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
988
16.3k
{
989
16.3k
    if ((src == NULL) || (dst == NULL))
990
0
        return(NULL);
991
992
16.9k
    while (*src == 0x20) src++;
993
558k
    while (*src != 0) {
994
542k
  if (*src == 0x20) {
995
50.8k
      while (*src == 0x20) src++;
996
14.1k
      if (*src != 0)
997
12.5k
    *dst++ = 0x20;
998
528k
  } else {
999
528k
      *dst++ = *src++;
1000
528k
  }
1001
542k
    }
1002
16.3k
    *dst = 0;
1003
16.3k
    if (dst == src)
1004
14.5k
       return(NULL);
1005
1.72k
    return(dst);
1006
16.3k
}
1007
1008
/**
1009
 * Add a defaulted attribute for an element
1010
 *
1011
 * @param ctxt  an XML parser context
1012
 * @param fullname  the element fullname
1013
 * @param fullattr  the attribute fullname
1014
 * @param value  the attribute value
1015
 */
1016
static void
1017
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1018
               const xmlChar *fullname,
1019
               const xmlChar *fullattr,
1020
18.6k
               const xmlChar *value) {
1021
18.6k
    xmlDefAttrsPtr defaults;
1022
18.6k
    xmlDefAttr *attr;
1023
18.6k
    int len, expandedSize;
1024
18.6k
    xmlHashedString name;
1025
18.6k
    xmlHashedString prefix;
1026
18.6k
    xmlHashedString hvalue;
1027
18.6k
    const xmlChar *localname;
1028
1029
    /*
1030
     * Allows to detect attribute redefinitions
1031
     */
1032
18.6k
    if (ctxt->attsSpecial != NULL) {
1033
17.5k
        if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1034
11.3k
      return;
1035
17.5k
    }
1036
1037
7.35k
    if (ctxt->attsDefault == NULL) {
1038
1.15k
        ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1039
1.15k
  if (ctxt->attsDefault == NULL)
1040
1
      goto mem_error;
1041
1.15k
    }
1042
1043
    /*
1044
     * split the element name into prefix:localname , the string found
1045
     * are within the DTD and then not associated to namespace names.
1046
     */
1047
7.35k
    localname = xmlSplitQName3(fullname, &len);
1048
7.35k
    if (localname == NULL) {
1049
5.37k
        name = xmlDictLookupHashed(ctxt->dict, fullname, -1);
1050
5.37k
  prefix.name = NULL;
1051
5.37k
    } else {
1052
1.98k
        name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1053
1.98k
  prefix = xmlDictLookupHashed(ctxt->dict, fullname, len);
1054
1.98k
        if (prefix.name == NULL)
1055
1
            goto mem_error;
1056
1.98k
    }
1057
7.35k
    if (name.name == NULL)
1058
1
        goto mem_error;
1059
1060
    /*
1061
     * make sure there is some storage
1062
     */
1063
7.35k
    defaults = xmlHashLookup2(ctxt->attsDefault, name.name, prefix.name);
1064
7.35k
    if ((defaults == NULL) ||
1065
5.87k
        (defaults->nbAttrs >= defaults->maxAttrs)) {
1066
2.23k
        xmlDefAttrsPtr temp;
1067
2.23k
        int newSize;
1068
1069
2.23k
        if (defaults == NULL) {
1070
1.48k
            newSize = 4;
1071
1.48k
        } else {
1072
757
            if ((defaults->maxAttrs >= XML_MAX_ATTRS) ||
1073
757
                ((size_t) defaults->maxAttrs >
1074
757
                     SIZE_MAX / 2 / sizeof(temp[0]) - sizeof(*defaults)))
1075
0
                goto mem_error;
1076
1077
757
            if (defaults->maxAttrs > XML_MAX_ATTRS / 2)
1078
0
                newSize = XML_MAX_ATTRS;
1079
757
            else
1080
757
                newSize = defaults->maxAttrs * 2;
1081
757
        }
1082
2.23k
        temp = xmlRealloc(defaults,
1083
2.23k
                          sizeof(*defaults) + newSize * sizeof(xmlDefAttr));
1084
2.23k
  if (temp == NULL)
1085
2
      goto mem_error;
1086
2.23k
        if (defaults == NULL)
1087
1.47k
            temp->nbAttrs = 0;
1088
2.23k
  temp->maxAttrs = newSize;
1089
2.23k
        defaults = temp;
1090
2.23k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name.name, prefix.name,
1091
2.23k
                          defaults, NULL) < 0) {
1092
1
      xmlFree(defaults);
1093
1
      goto mem_error;
1094
1
  }
1095
2.23k
    }
1096
1097
    /*
1098
     * Split the attribute name into prefix:localname , the string found
1099
     * are within the DTD and hen not associated to namespace names.
1100
     */
1101
7.34k
    localname = xmlSplitQName3(fullattr, &len);
1102
7.34k
    if (localname == NULL) {
1103
4.47k
        name = xmlDictLookupHashed(ctxt->dict, fullattr, -1);
1104
4.47k
  prefix.name = NULL;
1105
4.47k
    } else {
1106
2.87k
        name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1107
2.87k
  prefix = xmlDictLookupHashed(ctxt->dict, fullattr, len);
1108
2.87k
        if (prefix.name == NULL)
1109
1
            goto mem_error;
1110
2.87k
    }
1111
7.34k
    if (name.name == NULL)
1112
1
        goto mem_error;
1113
1114
    /* intern the string and precompute the end */
1115
7.34k
    len = strlen((const char *) value);
1116
7.34k
    hvalue = xmlDictLookupHashed(ctxt->dict, value, len);
1117
7.34k
    if (hvalue.name == NULL)
1118
1
        goto mem_error;
1119
1120
7.34k
    expandedSize = strlen((const char *) name.name);
1121
7.34k
    if (prefix.name != NULL)
1122
2.86k
        expandedSize += strlen((const char *) prefix.name);
1123
7.34k
    expandedSize += len;
1124
1125
7.34k
    attr = &defaults->attrs[defaults->nbAttrs++];
1126
7.34k
    attr->name = name;
1127
7.34k
    attr->prefix = prefix;
1128
7.34k
    attr->value = hvalue;
1129
7.34k
    attr->valueEnd = hvalue.name + len;
1130
7.34k
    attr->external = PARSER_EXTERNAL(ctxt);
1131
7.34k
    attr->expandedSize = expandedSize;
1132
1133
7.34k
    return;
1134
1135
9
mem_error:
1136
9
    xmlErrMemory(ctxt);
1137
9
}
1138
1139
/**
1140
 * Register this attribute type
1141
 *
1142
 * @param ctxt  an XML parser context
1143
 * @param fullname  the element fullname
1144
 * @param fullattr  the attribute fullname
1145
 * @param type  the attribute type
1146
 */
1147
static void
1148
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1149
      const xmlChar *fullname,
1150
      const xmlChar *fullattr,
1151
      int type)
1152
22.8k
{
1153
22.8k
    if (ctxt->attsSpecial == NULL) {
1154
1.53k
        ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1155
1.53k
  if (ctxt->attsSpecial == NULL)
1156
1
      goto mem_error;
1157
1.53k
    }
1158
1159
22.8k
    if (PARSER_EXTERNAL(ctxt))
1160
3.65k
        type |= XML_SPECIAL_EXTERNAL;
1161
1162
22.8k
    if (xmlHashAdd2(ctxt->attsSpecial, fullname, fullattr,
1163
22.8k
                    XML_INT_TO_PTR(type)) < 0)
1164
1
        goto mem_error;
1165
22.8k
    return;
1166
1167
22.8k
mem_error:
1168
2
    xmlErrMemory(ctxt);
1169
2
}
1170
1171
/**
1172
 * Removes CDATA attributes from the special attribute table
1173
 */
1174
static void
1175
xmlCleanSpecialAttrCallback(void *payload, void *data,
1176
                            const xmlChar *fullname, const xmlChar *fullattr,
1177
9.94k
                            const xmlChar *unused ATTRIBUTE_UNUSED) {
1178
9.94k
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1179
1180
9.94k
    if (XML_PTR_TO_INT(payload) == XML_ATTRIBUTE_CDATA) {
1181
913
        xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1182
913
    }
1183
9.94k
}
1184
1185
/**
1186
 * Trim the list of attributes defined to remove all those of type
1187
 * CDATA as they are not special. This call should be done when finishing
1188
 * to parse the DTD and before starting to parse the document root.
1189
 *
1190
 * @param ctxt  an XML parser context
1191
 */
1192
static void
1193
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1194
8.16k
{
1195
8.16k
    if (ctxt->attsSpecial == NULL)
1196
6.63k
        return;
1197
1198
1.53k
    xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1199
1200
1.53k
    if (xmlHashSize(ctxt->attsSpecial) == 0) {
1201
75
        xmlHashFree(ctxt->attsSpecial, NULL);
1202
75
        ctxt->attsSpecial = NULL;
1203
75
    }
1204
1.53k
}
1205
1206
/**
1207
 * Checks that the value conforms to the LanguageID production:
1208
 *
1209
 * @deprecated Internal function, do not use.
1210
 *
1211
 * NOTE: this is somewhat deprecated, those productions were removed from
1212
 * the XML Second edition.
1213
 *
1214
 *     [33] LanguageID ::= Langcode ('-' Subcode)*
1215
 *     [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1216
 *     [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1217
 *     [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1218
 *     [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1219
 *     [38] Subcode ::= ([a-z] | [A-Z])+
1220
 *
1221
 * The current REC reference the successors of RFC 1766, currently 5646
1222
 *
1223
 * http://www.rfc-editor.org/rfc/rfc5646.txt
1224
 *
1225
 *     langtag       = language
1226
 *                     ["-" script]
1227
 *                     ["-" region]
1228
 *                     *("-" variant)
1229
 *                     *("-" extension)
1230
 *                     ["-" privateuse]
1231
 *     language      = 2*3ALPHA            ; shortest ISO 639 code
1232
 *                     ["-" extlang]       ; sometimes followed by
1233
 *                                         ; extended language subtags
1234
 *                   / 4ALPHA              ; or reserved for future use
1235
 *                   / 5*8ALPHA            ; or registered language subtag
1236
 *
1237
 *     extlang       = 3ALPHA              ; selected ISO 639 codes
1238
 *                     *2("-" 3ALPHA)      ; permanently reserved
1239
 *
1240
 *     script        = 4ALPHA              ; ISO 15924 code
1241
 *
1242
 *     region        = 2ALPHA              ; ISO 3166-1 code
1243
 *                   / 3DIGIT              ; UN M.49 code
1244
 *
1245
 *     variant       = 5*8alphanum         ; registered variants
1246
 *                   / (DIGIT 3alphanum)
1247
 *
1248
 *     extension     = singleton 1*("-" (2*8alphanum))
1249
 *
1250
 *                                         ; Single alphanumerics
1251
 *                                         ; "x" reserved for private use
1252
 *     singleton     = DIGIT               ; 0 - 9
1253
 *                   / %x41-57             ; A - W
1254
 *                   / %x59-5A             ; Y - Z
1255
 *                   / %x61-77             ; a - w
1256
 *                   / %x79-7A             ; y - z
1257
 *
1258
 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1259
 * The parser below doesn't try to cope with extension or privateuse
1260
 * that could be added but that's not interoperable anyway
1261
 *
1262
 * @param lang  pointer to the string value
1263
 * @returns 1 if correct 0 otherwise
1264
 **/
1265
int
1266
xmlCheckLanguageID(const xmlChar * lang)
1267
0
{
1268
0
    const xmlChar *cur = lang, *nxt;
1269
1270
0
    if (cur == NULL)
1271
0
        return (0);
1272
0
    if (((cur[0] == 'i') && (cur[1] == '-')) ||
1273
0
        ((cur[0] == 'I') && (cur[1] == '-')) ||
1274
0
        ((cur[0] == 'x') && (cur[1] == '-')) ||
1275
0
        ((cur[0] == 'X') && (cur[1] == '-'))) {
1276
        /*
1277
         * Still allow IANA code and user code which were coming
1278
         * from the previous version of the XML-1.0 specification
1279
         * it's deprecated but we should not fail
1280
         */
1281
0
        cur += 2;
1282
0
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1283
0
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
1284
0
            cur++;
1285
0
        return(cur[0] == 0);
1286
0
    }
1287
0
    nxt = cur;
1288
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1289
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1290
0
           nxt++;
1291
0
    if (nxt - cur >= 4) {
1292
        /*
1293
         * Reserved
1294
         */
1295
0
        if ((nxt - cur > 8) || (nxt[0] != 0))
1296
0
            return(0);
1297
0
        return(1);
1298
0
    }
1299
0
    if (nxt - cur < 2)
1300
0
        return(0);
1301
    /* we got an ISO 639 code */
1302
0
    if (nxt[0] == 0)
1303
0
        return(1);
1304
0
    if (nxt[0] != '-')
1305
0
        return(0);
1306
1307
0
    nxt++;
1308
0
    cur = nxt;
1309
    /* now we can have extlang or script or region or variant */
1310
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1311
0
        goto region_m49;
1312
1313
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1314
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1315
0
           nxt++;
1316
0
    if (nxt - cur == 4)
1317
0
        goto script;
1318
0
    if (nxt - cur == 2)
1319
0
        goto region;
1320
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1321
0
        goto variant;
1322
0
    if (nxt - cur != 3)
1323
0
        return(0);
1324
    /* we parsed an extlang */
1325
0
    if (nxt[0] == 0)
1326
0
        return(1);
1327
0
    if (nxt[0] != '-')
1328
0
        return(0);
1329
1330
0
    nxt++;
1331
0
    cur = nxt;
1332
    /* now we can have script or region or variant */
1333
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1334
0
        goto region_m49;
1335
1336
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1337
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1338
0
           nxt++;
1339
0
    if (nxt - cur == 2)
1340
0
        goto region;
1341
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1342
0
        goto variant;
1343
0
    if (nxt - cur != 4)
1344
0
        return(0);
1345
    /* we parsed a script */
1346
0
script:
1347
0
    if (nxt[0] == 0)
1348
0
        return(1);
1349
0
    if (nxt[0] != '-')
1350
0
        return(0);
1351
1352
0
    nxt++;
1353
0
    cur = nxt;
1354
    /* now we can have region or variant */
1355
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1356
0
        goto region_m49;
1357
1358
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1359
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1360
0
           nxt++;
1361
1362
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1363
0
        goto variant;
1364
0
    if (nxt - cur != 2)
1365
0
        return(0);
1366
    /* we parsed a region */
1367
0
region:
1368
0
    if (nxt[0] == 0)
1369
0
        return(1);
1370
0
    if (nxt[0] != '-')
1371
0
        return(0);
1372
1373
0
    nxt++;
1374
0
    cur = nxt;
1375
    /* now we can just have a variant */
1376
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1377
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1378
0
           nxt++;
1379
1380
0
    if ((nxt - cur < 5) || (nxt - cur > 8))
1381
0
        return(0);
1382
1383
    /* we parsed a variant */
1384
0
variant:
1385
0
    if (nxt[0] == 0)
1386
0
        return(1);
1387
0
    if (nxt[0] != '-')
1388
0
        return(0);
1389
    /* extensions and private use subtags not checked */
1390
0
    return (1);
1391
1392
0
region_m49:
1393
0
    if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1394
0
        ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1395
0
        nxt += 3;
1396
0
        goto region;
1397
0
    }
1398
0
    return(0);
1399
0
}
1400
1401
/************************************************************************
1402
 *                  *
1403
 *    Parser stacks related functions and macros    *
1404
 *                  *
1405
 ************************************************************************/
1406
1407
static xmlChar *
1408
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar **str);
1409
1410
/**
1411
 * Create a new namespace database.
1412
 *
1413
 * @returns the new obejct.
1414
 */
1415
xmlParserNsData *
1416
47.3k
xmlParserNsCreate(void) {
1417
47.3k
    xmlParserNsData *nsdb = xmlMalloc(sizeof(*nsdb));
1418
1419
47.3k
    if (nsdb == NULL)
1420
11
        return(NULL);
1421
47.2k
    memset(nsdb, 0, sizeof(*nsdb));
1422
47.2k
    nsdb->defaultNsIndex = INT_MAX;
1423
1424
47.2k
    return(nsdb);
1425
47.3k
}
1426
1427
/**
1428
 * Free a namespace database.
1429
 *
1430
 * @param nsdb  namespace database
1431
 */
1432
void
1433
47.2k
xmlParserNsFree(xmlParserNsData *nsdb) {
1434
47.2k
    if (nsdb == NULL)
1435
0
        return;
1436
1437
47.2k
    xmlFree(nsdb->extra);
1438
47.2k
    xmlFree(nsdb->hash);
1439
47.2k
    xmlFree(nsdb);
1440
47.2k
}
1441
1442
/**
1443
 * Reset a namespace database.
1444
 *
1445
 * @param nsdb  namespace database
1446
 */
1447
static void
1448
47.2k
xmlParserNsReset(xmlParserNsData *nsdb) {
1449
47.2k
    if (nsdb == NULL)
1450
0
        return;
1451
1452
47.2k
    nsdb->hashElems = 0;
1453
47.2k
    nsdb->elementId = 0;
1454
47.2k
    nsdb->defaultNsIndex = INT_MAX;
1455
1456
47.2k
    if (nsdb->hash)
1457
0
        memset(nsdb->hash, 0, nsdb->hashSize * sizeof(nsdb->hash[0]));
1458
47.2k
}
1459
1460
/**
1461
 * Signal that a new element has started.
1462
 *
1463
 * @param nsdb  namespace database
1464
 * @returns 0 on success, -1 if the element counter overflowed.
1465
 */
1466
static int
1467
1.06M
xmlParserNsStartElement(xmlParserNsData *nsdb) {
1468
1.06M
    if (nsdb->elementId == UINT_MAX)
1469
0
        return(-1);
1470
1.06M
    nsdb->elementId++;
1471
1472
1.06M
    return(0);
1473
1.06M
}
1474
1475
/**
1476
 * Lookup namespace with given prefix. If `bucketPtr` is non-NULL, it will
1477
 * be set to the matching bucket, or the first empty bucket if no match
1478
 * was found.
1479
 *
1480
 * @param ctxt  parser context
1481
 * @param prefix  namespace prefix
1482
 * @param bucketPtr  optional bucket (return value)
1483
 * @returns the namespace index on success, INT_MAX if no namespace was
1484
 * found.
1485
 */
1486
static int
1487
xmlParserNsLookup(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1488
1.97M
                  xmlParserNsBucket **bucketPtr) {
1489
1.97M
    xmlParserNsBucket *bucket, *tombstone;
1490
1.97M
    unsigned index, hashValue;
1491
1492
1.97M
    if (prefix->name == NULL)
1493
516k
        return(ctxt->nsdb->defaultNsIndex);
1494
1495
1.46M
    if (ctxt->nsdb->hashSize == 0)
1496
45.3k
        return(INT_MAX);
1497
1498
1.41M
    hashValue = prefix->hashValue;
1499
1.41M
    index = hashValue & (ctxt->nsdb->hashSize - 1);
1500
1.41M
    bucket = &ctxt->nsdb->hash[index];
1501
1.41M
    tombstone = NULL;
1502
1503
1.56M
    while (bucket->hashValue) {
1504
1.47M
        if (bucket->index == INT_MAX) {
1505
35.3k
            if (tombstone == NULL)
1506
25.3k
                tombstone = bucket;
1507
1.43M
        } else if (bucket->hashValue == hashValue) {
1508
1.32M
            if (ctxt->nsTab[bucket->index * 2] == prefix->name) {
1509
1.32M
                if (bucketPtr != NULL)
1510
266k
                    *bucketPtr = bucket;
1511
1.32M
                return(bucket->index);
1512
1.32M
            }
1513
1.32M
        }
1514
1515
146k
        index++;
1516
146k
        bucket++;
1517
146k
        if (index == ctxt->nsdb->hashSize) {
1518
26.7k
            index = 0;
1519
26.7k
            bucket = ctxt->nsdb->hash;
1520
26.7k
        }
1521
146k
    }
1522
1523
92.3k
    if (bucketPtr != NULL)
1524
36.8k
        *bucketPtr = tombstone ? tombstone : bucket;
1525
92.3k
    return(INT_MAX);
1526
1.41M
}
1527
1528
/**
1529
 * Lookup namespace URI with given prefix.
1530
 *
1531
 * @param ctxt  parser context
1532
 * @param prefix  namespace prefix
1533
 * @returns the namespace URI on success, NULL if no namespace was found.
1534
 */
1535
static const xmlChar *
1536
1.00M
xmlParserNsLookupUri(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix) {
1537
1.00M
    const xmlChar *ret;
1538
1.00M
    int nsIndex;
1539
1540
1.00M
    if (prefix->name == ctxt->str_xml)
1541
472
        return(ctxt->str_xml_ns);
1542
1543
    /*
1544
     * minNsIndex is used when building an entity tree. We must
1545
     * ignore namespaces declared outside the entity.
1546
     */
1547
1.00M
    nsIndex = xmlParserNsLookup(ctxt, prefix, NULL);
1548
1.00M
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1549
314k
        return(NULL);
1550
1551
691k
    ret = ctxt->nsTab[nsIndex * 2 + 1];
1552
691k
    if (ret[0] == 0)
1553
511
        ret = NULL;
1554
691k
    return(ret);
1555
1.00M
}
1556
1557
/**
1558
 * Lookup extra data for the given prefix. This returns data stored
1559
 * with xmlParserNsUdpateSax.
1560
 *
1561
 * @param ctxt  parser context
1562
 * @param prefix  namespace prefix
1563
 * @returns the data on success, NULL if no namespace was found.
1564
 */
1565
void *
1566
576k
xmlParserNsLookupSax(xmlParserCtxt *ctxt, const xmlChar *prefix) {
1567
576k
    xmlHashedString hprefix;
1568
576k
    int nsIndex;
1569
1570
576k
    if (prefix == ctxt->str_xml)
1571
31.2k
        return(NULL);
1572
1573
545k
    hprefix.name = prefix;
1574
545k
    if (prefix != NULL)
1575
472k
        hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1576
73.1k
    else
1577
73.1k
        hprefix.hashValue = 0;
1578
545k
    nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1579
545k
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1580
0
        return(NULL);
1581
1582
545k
    return(ctxt->nsdb->extra[nsIndex].saxData);
1583
545k
}
1584
1585
/**
1586
 * Sets or updates extra data for the given prefix. This value will be
1587
 * returned by xmlParserNsLookupSax as long as the namespace with the
1588
 * given prefix is in scope.
1589
 *
1590
 * @param ctxt  parser context
1591
 * @param prefix  namespace prefix
1592
 * @param saxData  extra data for SAX handler
1593
 * @returns the data on success, NULL if no namespace was found.
1594
 */
1595
int
1596
xmlParserNsUpdateSax(xmlParserCtxt *ctxt, const xmlChar *prefix,
1597
42.6k
                     void *saxData) {
1598
42.6k
    xmlHashedString hprefix;
1599
42.6k
    int nsIndex;
1600
1601
42.6k
    if (prefix == ctxt->str_xml)
1602
0
        return(-1);
1603
1604
42.6k
    hprefix.name = prefix;
1605
42.6k
    if (prefix != NULL)
1606
36.4k
        hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1607
6.12k
    else
1608
6.12k
        hprefix.hashValue = 0;
1609
42.6k
    nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1610
42.6k
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1611
0
        return(-1);
1612
1613
42.6k
    ctxt->nsdb->extra[nsIndex].saxData = saxData;
1614
42.6k
    return(0);
1615
42.6k
}
1616
1617
/**
1618
 * Grows the namespace tables.
1619
 *
1620
 * @param ctxt  parser context
1621
 * @returns 0 on success, -1 if a memory allocation failed.
1622
 */
1623
static int
1624
44.2k
xmlParserNsGrow(xmlParserCtxtPtr ctxt) {
1625
44.2k
    const xmlChar **table;
1626
44.2k
    xmlParserNsExtra *extra;
1627
44.2k
    int newSize;
1628
1629
44.2k
    newSize = xmlGrowCapacity(ctxt->nsMax,
1630
44.2k
                              sizeof(table[0]) + sizeof(extra[0]),
1631
44.2k
                              16, XML_MAX_ITEMS);
1632
44.2k
    if (newSize < 0)
1633
0
        goto error;
1634
1635
44.2k
    table = xmlRealloc(ctxt->nsTab, 2 * newSize * sizeof(table[0]));
1636
44.2k
    if (table == NULL)
1637
3
        goto error;
1638
44.2k
    ctxt->nsTab = table;
1639
1640
44.2k
    extra = xmlRealloc(ctxt->nsdb->extra, newSize * sizeof(extra[0]));
1641
44.2k
    if (extra == NULL)
1642
3
        goto error;
1643
44.2k
    ctxt->nsdb->extra = extra;
1644
1645
44.2k
    ctxt->nsMax = newSize;
1646
44.2k
    return(0);
1647
1648
6
error:
1649
6
    xmlErrMemory(ctxt);
1650
6
    return(-1);
1651
44.2k
}
1652
1653
/**
1654
 * Push a new namespace on the table.
1655
 *
1656
 * @param ctxt  parser context
1657
 * @param prefix  prefix with hash value
1658
 * @param uri  uri with hash value
1659
 * @param saxData  extra data for SAX handler
1660
 * @param defAttr  whether the namespace comes from a default attribute
1661
 * @returns 1 if the namespace was pushed, 0 if the namespace was ignored,
1662
 * -1 if a memory allocation failed.
1663
 */
1664
static int
1665
xmlParserNsPush(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1666
214k
                const xmlHashedString *uri, void *saxData, int defAttr) {
1667
214k
    xmlParserNsBucket *bucket = NULL;
1668
214k
    xmlParserNsExtra *extra;
1669
214k
    const xmlChar **ns;
1670
214k
    unsigned hashValue, nsIndex, oldIndex;
1671
1672
214k
    if ((prefix != NULL) && (prefix->name == ctxt->str_xml))
1673
216
        return(0);
1674
1675
214k
    if ((ctxt->nsNr >= ctxt->nsMax) && (xmlParserNsGrow(ctxt) < 0)) {
1676
6
        xmlErrMemory(ctxt);
1677
6
        return(-1);
1678
6
    }
1679
1680
    /*
1681
     * Default namespace and 'xml' namespace
1682
     */
1683
214k
    if ((prefix == NULL) || (prefix->name == NULL)) {
1684
50.0k
        oldIndex = ctxt->nsdb->defaultNsIndex;
1685
1686
50.0k
        if (oldIndex != INT_MAX) {
1687
41.8k
            extra = &ctxt->nsdb->extra[oldIndex];
1688
1689
41.8k
            if (extra->elementId == ctxt->nsdb->elementId) {
1690
2.15k
                if (defAttr == 0)
1691
1.94k
                    xmlErrAttributeDup(ctxt, NULL, BAD_CAST "xmlns");
1692
2.15k
                return(0);
1693
2.15k
            }
1694
1695
39.7k
            if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1696
0
                (uri->name == ctxt->nsTab[oldIndex * 2 + 1]))
1697
0
                return(0);
1698
39.7k
        }
1699
1700
47.8k
        ctxt->nsdb->defaultNsIndex = ctxt->nsNr;
1701
47.8k
        goto populate_entry;
1702
50.0k
    }
1703
1704
    /*
1705
     * Hash table lookup
1706
     */
1707
164k
    oldIndex = xmlParserNsLookup(ctxt, prefix, &bucket);
1708
164k
    if (oldIndex != INT_MAX) {
1709
106k
        extra = &ctxt->nsdb->extra[oldIndex];
1710
1711
        /*
1712
         * Check for duplicate definitions on the same element.
1713
         */
1714
106k
        if (extra->elementId == ctxt->nsdb->elementId) {
1715
847
            if (defAttr == 0)
1716
390
                xmlErrAttributeDup(ctxt, BAD_CAST "xmlns", prefix->name);
1717
847
            return(0);
1718
847
        }
1719
1720
105k
        if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1721
0
            (uri->name == ctxt->nsTab[bucket->index * 2 + 1]))
1722
0
            return(0);
1723
1724
105k
        bucket->index = ctxt->nsNr;
1725
105k
        goto populate_entry;
1726
105k
    }
1727
1728
    /*
1729
     * Insert new bucket
1730
     */
1731
1732
57.6k
    hashValue = prefix->hashValue;
1733
1734
    /*
1735
     * Grow hash table, 50% fill factor
1736
     */
1737
57.6k
    if (ctxt->nsdb->hashElems + 1 > ctxt->nsdb->hashSize / 2) {
1738
22.5k
        xmlParserNsBucket *newHash;
1739
22.5k
        unsigned newSize, i, index;
1740
1741
22.5k
        if (ctxt->nsdb->hashSize > UINT_MAX / 2) {
1742
0
            xmlErrMemory(ctxt);
1743
0
            return(-1);
1744
0
        }
1745
22.5k
        newSize = ctxt->nsdb->hashSize ? ctxt->nsdb->hashSize * 2 : 16;
1746
22.5k
        newHash = xmlMalloc(newSize * sizeof(newHash[0]));
1747
22.5k
        if (newHash == NULL) {
1748
1
            xmlErrMemory(ctxt);
1749
1
            return(-1);
1750
1
        }
1751
22.5k
        memset(newHash, 0, newSize * sizeof(newHash[0]));
1752
1753
105k
        for (i = 0; i < ctxt->nsdb->hashSize; i++) {
1754
83.1k
            unsigned hv = ctxt->nsdb->hash[i].hashValue;
1755
83.1k
            unsigned newIndex;
1756
1757
83.1k
            if ((hv == 0) || (ctxt->nsdb->hash[i].index == INT_MAX))
1758
75.2k
                continue;
1759
7.95k
            newIndex = hv & (newSize - 1);
1760
1761
11.5k
            while (newHash[newIndex].hashValue != 0) {
1762
3.63k
                newIndex++;
1763
3.63k
                if (newIndex == newSize)
1764
1.59k
                    newIndex = 0;
1765
3.63k
            }
1766
1767
7.95k
            newHash[newIndex] = ctxt->nsdb->hash[i];
1768
7.95k
        }
1769
1770
22.5k
        xmlFree(ctxt->nsdb->hash);
1771
22.5k
        ctxt->nsdb->hash = newHash;
1772
22.5k
        ctxt->nsdb->hashSize = newSize;
1773
1774
        /*
1775
         * Relookup
1776
         */
1777
22.5k
        index = hashValue & (newSize - 1);
1778
1779
25.3k
        while (newHash[index].hashValue != 0) {
1780
2.74k
            index++;
1781
2.74k
            if (index == newSize)
1782
818
                index = 0;
1783
2.74k
        }
1784
1785
22.5k
        bucket = &newHash[index];
1786
22.5k
    }
1787
1788
57.6k
    bucket->hashValue = hashValue;
1789
57.6k
    bucket->index = ctxt->nsNr;
1790
57.6k
    ctxt->nsdb->hashElems++;
1791
57.6k
    oldIndex = INT_MAX;
1792
1793
211k
populate_entry:
1794
211k
    nsIndex = ctxt->nsNr;
1795
1796
211k
    ns = &ctxt->nsTab[nsIndex * 2];
1797
211k
    ns[0] = prefix ? prefix->name : NULL;
1798
211k
    ns[1] = uri->name;
1799
1800
211k
    extra = &ctxt->nsdb->extra[nsIndex];
1801
211k
    extra->saxData = saxData;
1802
211k
    extra->prefixHashValue = prefix ? prefix->hashValue : 0;
1803
211k
    extra->uriHashValue = uri->hashValue;
1804
211k
    extra->elementId = ctxt->nsdb->elementId;
1805
211k
    extra->oldIndex = oldIndex;
1806
1807
211k
    ctxt->nsNr++;
1808
1809
211k
    return(1);
1810
57.6k
}
1811
1812
/**
1813
 * Pops the top `nr` namespaces and restores the hash table.
1814
 *
1815
 * @param ctxt  an XML parser context
1816
 * @param nr  the number to pop
1817
 * @returns the number of namespaces popped.
1818
 */
1819
static int
1820
xmlParserNsPop(xmlParserCtxtPtr ctxt, int nr)
1821
86.9k
{
1822
86.9k
    int i;
1823
1824
    /* assert(nr <= ctxt->nsNr); */
1825
1826
294k
    for (i = ctxt->nsNr - 1; i >= ctxt->nsNr - nr; i--) {
1827
207k
        const xmlChar *prefix = ctxt->nsTab[i * 2];
1828
207k
        xmlParserNsExtra *extra = &ctxt->nsdb->extra[i];
1829
1830
207k
        if (prefix == NULL) {
1831
47.2k
            ctxt->nsdb->defaultNsIndex = extra->oldIndex;
1832
159k
        } else {
1833
159k
            xmlHashedString hprefix;
1834
159k
            xmlParserNsBucket *bucket = NULL;
1835
1836
159k
            hprefix.name = prefix;
1837
159k
            hprefix.hashValue = extra->prefixHashValue;
1838
159k
            xmlParserNsLookup(ctxt, &hprefix, &bucket);
1839
            /* assert(bucket && bucket->hashValue); */
1840
159k
            bucket->index = extra->oldIndex;
1841
159k
        }
1842
207k
    }
1843
1844
86.9k
    ctxt->nsNr -= nr;
1845
86.9k
    return(nr);
1846
86.9k
}
1847
1848
static int
1849
48.0k
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt) {
1850
48.0k
    const xmlChar **atts;
1851
48.0k
    unsigned *attallocs;
1852
48.0k
    int newSize;
1853
1854
48.0k
    newSize = xmlGrowCapacity(ctxt->maxatts / 5,
1855
48.0k
                              sizeof(atts[0]) * 5 + sizeof(attallocs[0]),
1856
48.0k
                              10, XML_MAX_ATTRS);
1857
48.0k
    if (newSize < 0) {
1858
0
        xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
1859
0
                    "Maximum number of attributes exceeded");
1860
0
        return(-1);
1861
0
    }
1862
1863
48.0k
    atts = xmlRealloc(ctxt->atts, newSize * sizeof(atts[0]) * 5);
1864
48.0k
    if (atts == NULL)
1865
3
        goto mem_error;
1866
47.9k
    ctxt->atts = atts;
1867
1868
47.9k
    attallocs = xmlRealloc(ctxt->attallocs,
1869
47.9k
                           newSize * sizeof(attallocs[0]));
1870
47.9k
    if (attallocs == NULL)
1871
4
        goto mem_error;
1872
47.9k
    ctxt->attallocs = attallocs;
1873
1874
47.9k
    ctxt->maxatts = newSize * 5;
1875
1876
47.9k
    return(0);
1877
1878
7
mem_error:
1879
7
    xmlErrMemory(ctxt);
1880
7
    return(-1);
1881
47.9k
}
1882
1883
/**
1884
 * Pushes a new parser input on top of the input stack
1885
 *
1886
 * @param ctxt  an XML parser context
1887
 * @param value  the parser input
1888
 * @returns -1 in case of error, the index in the stack otherwise
1889
 */
1890
int
1891
xmlCtxtPushInput(xmlParserCtxt *ctxt, xmlParserInput *value)
1892
152k
{
1893
152k
    char *directory = NULL;
1894
152k
    int maxDepth;
1895
1896
152k
    if ((ctxt == NULL) || (value == NULL))
1897
3.15k
        return(-1);
1898
1899
149k
    maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
1900
1901
149k
    if (ctxt->inputNr >= ctxt->inputMax) {
1902
4.87k
        xmlParserInputPtr *tmp;
1903
4.87k
        int newSize;
1904
1905
4.87k
        newSize = xmlGrowCapacity(ctxt->inputMax, sizeof(tmp[0]),
1906
4.87k
                                  5, maxDepth);
1907
4.87k
        if (newSize < 0) {
1908
1
            xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
1909
1
                           "Maximum entity nesting depth exceeded");
1910
1
            return(-1);
1911
1
        }
1912
4.86k
        tmp = xmlRealloc(ctxt->inputTab, newSize * sizeof(tmp[0]));
1913
4.86k
        if (tmp == NULL) {
1914
2
            xmlErrMemory(ctxt);
1915
2
            return(-1);
1916
2
        }
1917
4.86k
        ctxt->inputTab = tmp;
1918
4.86k
        ctxt->inputMax = newSize;
1919
4.86k
    }
1920
1921
149k
    if ((ctxt->inputNr == 0) && (value->filename != NULL)) {
1922
83.2k
        directory = xmlParserGetDirectory(value->filename);
1923
83.2k
        if (directory == NULL) {
1924
7
            xmlErrMemory(ctxt);
1925
7
            return(-1);
1926
7
        }
1927
83.2k
    }
1928
1929
149k
    if (ctxt->input_id >= INT_MAX) {
1930
0
        xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT, "Input ID overflow\n");
1931
0
        return(-1);
1932
0
    }
1933
1934
149k
    ctxt->inputTab[ctxt->inputNr] = value;
1935
149k
    ctxt->input = value;
1936
1937
149k
    if (ctxt->inputNr == 0) {
1938
83.2k
        xmlFree(ctxt->directory);
1939
83.2k
        ctxt->directory = directory;
1940
83.2k
    }
1941
1942
    /*
1943
     * The input ID is unused internally, but there are entity
1944
     * loaders in downstream code that detect the main document
1945
     * by checking for "input_id == 1".
1946
     */
1947
149k
    value->id = ctxt->input_id++;
1948
1949
149k
    return(ctxt->inputNr++);
1950
149k
}
1951
1952
/**
1953
 * Pops the top parser input from the input stack
1954
 *
1955
 * @param ctxt  an XML parser context
1956
 * @returns the input just removed
1957
 */
1958
xmlParserInput *
1959
xmlCtxtPopInput(xmlParserCtxt *ctxt)
1960
291k
{
1961
291k
    xmlParserInputPtr ret;
1962
1963
291k
    if (ctxt == NULL)
1964
0
        return(NULL);
1965
291k
    if (ctxt->inputNr <= 0)
1966
142k
        return (NULL);
1967
149k
    ctxt->inputNr--;
1968
149k
    if (ctxt->inputNr > 0)
1969
66.3k
        ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1970
83.2k
    else
1971
83.2k
        ctxt->input = NULL;
1972
149k
    ret = ctxt->inputTab[ctxt->inputNr];
1973
149k
    ctxt->inputTab[ctxt->inputNr] = NULL;
1974
149k
    return (ret);
1975
291k
}
1976
1977
/**
1978
 * Pushes a new element node on top of the node stack
1979
 *
1980
 * @deprecated Internal function, do not use.
1981
 *
1982
 * @param ctxt  an XML parser context
1983
 * @param value  the element node
1984
 * @returns -1 in case of error, the index in the stack otherwise
1985
 */
1986
int
1987
nodePush(xmlParserCtxt *ctxt, xmlNode *value)
1988
803k
{
1989
803k
    if (ctxt == NULL)
1990
0
        return(0);
1991
1992
803k
    if (ctxt->nodeNr >= ctxt->nodeMax) {
1993
61.5k
        int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
1994
61.5k
        xmlNodePtr *tmp;
1995
61.5k
        int newSize;
1996
1997
61.5k
        newSize = xmlGrowCapacity(ctxt->nodeMax, sizeof(tmp[0]),
1998
61.5k
                                  10, maxDepth);
1999
61.5k
        if (newSize < 0) {
2000
3
            xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
2001
3
                    "Excessive depth in document: %d,"
2002
3
                    " use XML_PARSE_HUGE option\n",
2003
3
                    ctxt->nodeNr);
2004
3
            return(-1);
2005
3
        }
2006
2007
61.5k
  tmp = xmlRealloc(ctxt->nodeTab, newSize * sizeof(tmp[0]));
2008
61.5k
        if (tmp == NULL) {
2009
9
            xmlErrMemory(ctxt);
2010
9
            return (-1);
2011
9
        }
2012
61.5k
        ctxt->nodeTab = tmp;
2013
61.5k
  ctxt->nodeMax = newSize;
2014
61.5k
    }
2015
2016
803k
    ctxt->nodeTab[ctxt->nodeNr] = value;
2017
803k
    ctxt->node = value;
2018
803k
    return (ctxt->nodeNr++);
2019
803k
}
2020
2021
/**
2022
 * Pops the top element node from the node stack
2023
 *
2024
 * @deprecated Internal function, do not use.
2025
 *
2026
 * @param ctxt  an XML parser context
2027
 * @returns the node just removed
2028
 */
2029
xmlNode *
2030
nodePop(xmlParserCtxt *ctxt)
2031
864k
{
2032
864k
    xmlNodePtr ret;
2033
2034
864k
    if (ctxt == NULL) return(NULL);
2035
864k
    if (ctxt->nodeNr <= 0)
2036
64.2k
        return (NULL);
2037
800k
    ctxt->nodeNr--;
2038
800k
    if (ctxt->nodeNr > 0)
2039
779k
        ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
2040
21.1k
    else
2041
21.1k
        ctxt->node = NULL;
2042
800k
    ret = ctxt->nodeTab[ctxt->nodeNr];
2043
800k
    ctxt->nodeTab[ctxt->nodeNr] = NULL;
2044
800k
    return (ret);
2045
864k
}
2046
2047
/**
2048
 * Pushes a new element name/prefix/URL on top of the name stack
2049
 *
2050
 * @param ctxt  an XML parser context
2051
 * @param value  the element name
2052
 * @param prefix  the element prefix
2053
 * @param URI  the element namespace name
2054
 * @param line  the current line number for error messages
2055
 * @param nsNr  the number of namespaces pushed on the namespace table
2056
 * @returns -1 in case of error, the index in the stack otherwise
2057
 */
2058
static int
2059
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
2060
           const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
2061
1.00M
{
2062
1.00M
    xmlStartTag *tag;
2063
2064
1.00M
    if (ctxt->nameNr >= ctxt->nameMax) {
2065
80.9k
        const xmlChar **tmp;
2066
80.9k
        xmlStartTag *tmp2;
2067
80.9k
        int newSize;
2068
2069
80.9k
        newSize = xmlGrowCapacity(ctxt->nameMax,
2070
80.9k
                                  sizeof(tmp[0]) + sizeof(tmp2[0]),
2071
80.9k
                                  10, XML_MAX_ITEMS);
2072
80.9k
        if (newSize < 0)
2073
0
            goto mem_error;
2074
2075
80.9k
        tmp = xmlRealloc(ctxt->nameTab, newSize * sizeof(tmp[0]));
2076
80.9k
        if (tmp == NULL)
2077
2
      goto mem_error;
2078
80.9k
  ctxt->nameTab = tmp;
2079
2080
80.9k
        tmp2 = xmlRealloc(ctxt->pushTab, newSize * sizeof(tmp2[0]));
2081
80.9k
        if (tmp2 == NULL)
2082
4
      goto mem_error;
2083
80.9k
  ctxt->pushTab = tmp2;
2084
2085
80.9k
        ctxt->nameMax = newSize;
2086
927k
    } else if (ctxt->pushTab == NULL) {
2087
31.6k
        ctxt->pushTab = xmlMalloc(ctxt->nameMax * sizeof(ctxt->pushTab[0]));
2088
31.6k
        if (ctxt->pushTab == NULL)
2089
14
            goto mem_error;
2090
31.6k
    }
2091
1.00M
    ctxt->nameTab[ctxt->nameNr] = value;
2092
1.00M
    ctxt->name = value;
2093
1.00M
    tag = &ctxt->pushTab[ctxt->nameNr];
2094
1.00M
    tag->prefix = prefix;
2095
1.00M
    tag->URI = URI;
2096
1.00M
    tag->line = line;
2097
1.00M
    tag->nsNr = nsNr;
2098
1.00M
    return (ctxt->nameNr++);
2099
20
mem_error:
2100
20
    xmlErrMemory(ctxt);
2101
20
    return (-1);
2102
1.00M
}
2103
#ifdef LIBXML_PUSH_ENABLED
2104
/**
2105
 * Pops the top element/prefix/URI name from the name stack
2106
 *
2107
 * @param ctxt  an XML parser context
2108
 * @returns the name just removed
2109
 */
2110
static const xmlChar *
2111
nameNsPop(xmlParserCtxtPtr ctxt)
2112
0
{
2113
0
    const xmlChar *ret;
2114
2115
0
    if (ctxt->nameNr <= 0)
2116
0
        return (NULL);
2117
0
    ctxt->nameNr--;
2118
0
    if (ctxt->nameNr > 0)
2119
0
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2120
0
    else
2121
0
        ctxt->name = NULL;
2122
0
    ret = ctxt->nameTab[ctxt->nameNr];
2123
0
    ctxt->nameTab[ctxt->nameNr] = NULL;
2124
0
    return (ret);
2125
0
}
2126
#endif /* LIBXML_PUSH_ENABLED */
2127
2128
/**
2129
 * Pops the top element name from the name stack
2130
 *
2131
 * @deprecated Internal function, do not use.
2132
 *
2133
 * @param ctxt  an XML parser context
2134
 * @returns the name just removed
2135
 */
2136
static const xmlChar *
2137
namePop(xmlParserCtxtPtr ctxt)
2138
1.00M
{
2139
1.00M
    const xmlChar *ret;
2140
2141
1.00M
    if ((ctxt == NULL) || (ctxt->nameNr <= 0))
2142
8
        return (NULL);
2143
1.00M
    ctxt->nameNr--;
2144
1.00M
    if (ctxt->nameNr > 0)
2145
976k
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2146
23.3k
    else
2147
23.3k
        ctxt->name = NULL;
2148
1.00M
    ret = ctxt->nameTab[ctxt->nameNr];
2149
1.00M
    ctxt->nameTab[ctxt->nameNr] = NULL;
2150
1.00M
    return (ret);
2151
1.00M
}
2152
2153
1.06M
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
2154
1.06M
    if (ctxt->spaceNr >= ctxt->spaceMax) {
2155
81.3k
        int *tmp;
2156
81.3k
        int newSize;
2157
2158
81.3k
        newSize = xmlGrowCapacity(ctxt->spaceMax, sizeof(tmp[0]),
2159
81.3k
                                  10, XML_MAX_ITEMS);
2160
81.3k
        if (newSize < 0) {
2161
0
      xmlErrMemory(ctxt);
2162
0
      return(-1);
2163
0
        }
2164
2165
81.3k
        tmp = xmlRealloc(ctxt->spaceTab, newSize * sizeof(tmp[0]));
2166
81.3k
        if (tmp == NULL) {
2167
4
      xmlErrMemory(ctxt);
2168
4
      return(-1);
2169
4
  }
2170
81.3k
  ctxt->spaceTab = tmp;
2171
2172
81.3k
        ctxt->spaceMax = newSize;
2173
81.3k
    }
2174
1.06M
    ctxt->spaceTab[ctxt->spaceNr] = val;
2175
1.06M
    ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
2176
1.06M
    return(ctxt->spaceNr++);
2177
1.06M
}
2178
2179
1.06M
static int spacePop(xmlParserCtxtPtr ctxt) {
2180
1.06M
    int ret;
2181
1.06M
    if (ctxt->spaceNr <= 0) return(0);
2182
1.06M
    ctxt->spaceNr--;
2183
1.06M
    if (ctxt->spaceNr > 0)
2184
1.03M
  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
2185
23.3k
    else
2186
23.3k
        ctxt->space = &ctxt->spaceTab[0];
2187
1.06M
    ret = ctxt->spaceTab[ctxt->spaceNr];
2188
1.06M
    ctxt->spaceTab[ctxt->spaceNr] = -1;
2189
1.06M
    return(ret);
2190
1.06M
}
2191
2192
/*
2193
 * Macros for accessing the content. Those should be used only by the parser,
2194
 * and not exported.
2195
 *
2196
 * Dirty macros, i.e. one often need to make assumption on the context to
2197
 * use them
2198
 *
2199
 *   CUR_PTR return the current pointer to the xmlChar to be parsed.
2200
 *           To be used with extreme caution since operations consuming
2201
 *           characters may move the input buffer to a different location !
2202
 *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
2203
 *           This should be used internally by the parser
2204
 *           only to compare to ASCII values otherwise it would break when
2205
 *           running with UTF-8 encoding.
2206
 *   RAW     same as CUR but in the input buffer, bypass any token
2207
 *           extraction that may have been done
2208
 *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
2209
 *           to compare on ASCII based substring.
2210
 *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2211
 *           strings without newlines within the parser.
2212
 *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2213
 *           defined char within the parser.
2214
 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2215
 *
2216
 *   NEXT    Skip to the next character, this does the proper decoding
2217
 *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2218
 *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2219
 *   COPY_BUF  copy the current unicode char to the target buffer, increment
2220
 *            the index
2221
 *   GROW, SHRINK  handling of input buffers
2222
 */
2223
2224
12.5M
#define RAW (*ctxt->input->cur)
2225
861M
#define CUR (*ctxt->input->cur)
2226
5.76M
#define NXT(val) ctxt->input->cur[(val)]
2227
129M
#define CUR_PTR ctxt->input->cur
2228
3.75M
#define BASE_PTR ctxt->input->base
2229
2230
#define CMP4( s, c1, c2, c3, c4 ) \
2231
7.55M
  ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2232
3.81M
    ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2233
#define CMP5( s, c1, c2, c3, c4, c5 ) \
2234
7.19M
  ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2235
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2236
6.74M
  ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2237
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2238
6.44M
  ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2239
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2240
6.16M
  ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2241
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2242
3.01M
  ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2243
3.01M
    ((unsigned char *) s)[ 8 ] == c9 )
2244
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2245
5.52k
  ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2246
5.52k
    ((unsigned char *) s)[ 9 ] == c10 )
2247
2248
1.51M
#define SKIP(val) do {             \
2249
1.51M
    ctxt->input->cur += (val),ctxt->input->col+=(val);      \
2250
1.51M
    if (*ctxt->input->cur == 0)           \
2251
1.51M
        xmlParserGrow(ctxt);           \
2252
1.51M
  } while (0)
2253
2254
#define SKIPL(val) do {             \
2255
    int skipl;                \
2256
    for(skipl=0; skipl<val; skipl++) {          \
2257
  if (*(ctxt->input->cur) == '\n') {        \
2258
  ctxt->input->line++; ctxt->input->col = 1;      \
2259
  } else ctxt->input->col++;          \
2260
  ctxt->input->cur++;           \
2261
    }                 \
2262
    if (*ctxt->input->cur == 0)           \
2263
        xmlParserGrow(ctxt);            \
2264
  } while (0)
2265
2266
#define SHRINK \
2267
3.76M
    if (!PARSER_PROGRESSIVE(ctxt)) \
2268
3.76M
  xmlParserShrink(ctxt);
2269
2270
#define GROW \
2271
14.5M
    if ((!PARSER_PROGRESSIVE(ctxt)) && \
2272
14.5M
        (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2273
2.75M
  xmlParserGrow(ctxt);
2274
2275
4.77M
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2276
2277
379k
#define SKIP_BLANKS_PE xmlSkipBlankCharsPE(ctxt)
2278
2279
800M
#define NEXT xmlNextChar(ctxt)
2280
2281
1.95M
#define NEXT1 {               \
2282
1.95M
  ctxt->input->col++;           \
2283
1.95M
  ctxt->input->cur++;           \
2284
1.95M
  if (*ctxt->input->cur == 0)         \
2285
1.95M
      xmlParserGrow(ctxt);           \
2286
1.95M
    }
2287
2288
100M
#define NEXTL(l) do {             \
2289
100M
    if (*(ctxt->input->cur) == '\n') {         \
2290
5.03M
  ctxt->input->line++; ctxt->input->col = 1;      \
2291
95.7M
    } else ctxt->input->col++;           \
2292
100M
    ctxt->input->cur += l;        \
2293
100M
  } while (0)
2294
2295
#define COPY_BUF(b, i, v)           \
2296
32.2M
    if (v < 0x80) b[i++] = v;           \
2297
32.2M
    else i += xmlCopyCharMultiByte(&b[i],v)
2298
2299
static int
2300
28.5M
xmlCurrentCharRecover(xmlParserCtxtPtr ctxt, int *len) {
2301
28.5M
    int c = xmlCurrentChar(ctxt, len);
2302
2303
28.5M
    if (c == XML_INVALID_CHAR)
2304
1.72M
        c = 0xFFFD; /* replacement character */
2305
2306
28.5M
    return(c);
2307
28.5M
}
2308
2309
/**
2310
 * Skip whitespace in the input stream.
2311
 *
2312
 * @deprecated Internal function, do not use.
2313
 *
2314
 * @param ctxt  the XML parser context
2315
 * @returns the number of space chars skipped
2316
 */
2317
int
2318
4.89M
xmlSkipBlankChars(xmlParserCtxt *ctxt) {
2319
4.89M
    const xmlChar *cur;
2320
4.89M
    int res = 0;
2321
2322
4.89M
    cur = ctxt->input->cur;
2323
4.89M
    while (IS_BLANK_CH(*cur)) {
2324
2.10M
        if (*cur == '\n') {
2325
312k
            ctxt->input->line++; ctxt->input->col = 1;
2326
1.79M
        } else {
2327
1.79M
            ctxt->input->col++;
2328
1.79M
        }
2329
2.10M
        cur++;
2330
2.10M
        if (res < INT_MAX)
2331
2.10M
            res++;
2332
2.10M
        if (*cur == 0) {
2333
16.3k
            ctxt->input->cur = cur;
2334
16.3k
            xmlParserGrow(ctxt);
2335
16.3k
            cur = ctxt->input->cur;
2336
16.3k
        }
2337
2.10M
    }
2338
4.89M
    ctxt->input->cur = cur;
2339
2340
4.89M
    if (res > 4)
2341
34.4k
        GROW;
2342
2343
4.89M
    return(res);
2344
4.89M
}
2345
2346
static void
2347
63.9k
xmlPopPE(xmlParserCtxtPtr ctxt) {
2348
63.9k
    unsigned long consumed;
2349
63.9k
    xmlEntityPtr ent;
2350
2351
63.9k
    ent = ctxt->input->entity;
2352
2353
63.9k
    ent->flags &= ~XML_ENT_EXPANDING;
2354
2355
63.9k
    if ((ent->flags & XML_ENT_CHECKED) == 0) {
2356
3.46k
        int result;
2357
2358
        /*
2359
         * Read the rest of the stream in case of errors. We want
2360
         * to account for the whole entity size.
2361
         */
2362
3.54k
        do {
2363
3.54k
            ctxt->input->cur = ctxt->input->end;
2364
3.54k
            xmlParserShrink(ctxt);
2365
3.54k
            result = xmlParserGrow(ctxt);
2366
3.54k
        } while (result > 0);
2367
2368
3.46k
        consumed = ctxt->input->consumed;
2369
3.46k
        xmlSaturatedAddSizeT(&consumed,
2370
3.46k
                             ctxt->input->end - ctxt->input->base);
2371
2372
3.46k
        xmlSaturatedAdd(&ent->expandedSize, consumed);
2373
2374
        /*
2375
         * Add to sizeentities when parsing an external entity
2376
         * for the first time.
2377
         */
2378
3.46k
        if (ent->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
2379
2.96k
            xmlSaturatedAdd(&ctxt->sizeentities, consumed);
2380
2.96k
        }
2381
2382
3.46k
        ent->flags |= XML_ENT_CHECKED;
2383
3.46k
    }
2384
2385
63.9k
    xmlFreeInputStream(xmlCtxtPopInput(ctxt));
2386
2387
63.9k
    xmlParserEntityCheck(ctxt, ent->expandedSize);
2388
2389
63.9k
    GROW;
2390
63.9k
}
2391
2392
/**
2393
 * Skip whitespace in the input stream, also handling parameter
2394
 * entities.
2395
 *
2396
 * @param ctxt  the XML parser context
2397
 * @returns the number of space chars skipped
2398
 */
2399
static int
2400
379k
xmlSkipBlankCharsPE(xmlParserCtxtPtr ctxt) {
2401
379k
    int res = 0;
2402
379k
    int inParam;
2403
379k
    int expandParam;
2404
2405
379k
    inParam = PARSER_IN_PE(ctxt);
2406
379k
    expandParam = PARSER_EXTERNAL(ctxt);
2407
2408
379k
    if (!inParam && !expandParam)
2409
119k
        return(xmlSkipBlankChars(ctxt));
2410
2411
    /*
2412
     * It's Okay to use CUR/NEXT here since all the blanks are on
2413
     * the ASCII range.
2414
     */
2415
526k
    while (PARSER_STOPPED(ctxt) == 0) {
2416
526k
        if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2417
259k
            NEXT;
2418
266k
        } else if (CUR == '%') {
2419
22.2k
            if ((expandParam == 0) ||
2420
22.2k
                (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2421
17.3k
                break;
2422
2423
            /*
2424
             * Expand parameter entity. We continue to consume
2425
             * whitespace at the start of the entity and possible
2426
             * even consume the whole entity and pop it. We might
2427
             * even pop multiple PEs in this loop.
2428
             */
2429
4.95k
            xmlParsePERefInternal(ctxt, 0);
2430
2431
4.95k
            inParam = PARSER_IN_PE(ctxt);
2432
4.95k
            expandParam = PARSER_EXTERNAL(ctxt);
2433
244k
        } else if (CUR == 0) {
2434
30.2k
            if (inParam == 0)
2435
1
                break;
2436
2437
            /*
2438
             * Don't pop parameter entities that start a markup
2439
             * declaration to detect Well-formedness constraint:
2440
             * PE Between Declarations.
2441
             */
2442
30.2k
            if (ctxt->input->flags & XML_INPUT_MARKUP_DECL)
2443
28.4k
                break;
2444
2445
1.86k
            xmlPopPE(ctxt);
2446
2447
1.86k
            inParam = PARSER_IN_PE(ctxt);
2448
1.86k
            expandParam = PARSER_EXTERNAL(ctxt);
2449
214k
        } else {
2450
214k
            break;
2451
214k
        }
2452
2453
        /*
2454
         * Also increase the counter when entering or exiting a PERef.
2455
         * The spec says: "When a parameter-entity reference is recognized
2456
         * in the DTD and included, its replacement text MUST be enlarged
2457
         * by the attachment of one leading and one following space (#x20)
2458
         * character."
2459
         */
2460
266k
        if (res < INT_MAX)
2461
266k
            res++;
2462
266k
    }
2463
2464
260k
    return(res);
2465
379k
}
2466
2467
/************************************************************************
2468
 *                  *
2469
 *    Commodity functions to handle entities      *
2470
 *                  *
2471
 ************************************************************************/
2472
2473
/**
2474
 * @deprecated Internal function, don't use.
2475
 *
2476
 * @param ctxt  an XML parser context
2477
 * @returns the current xmlChar in the parser context
2478
 */
2479
xmlChar
2480
0
xmlPopInput(xmlParserCtxt *ctxt) {
2481
0
    xmlParserInputPtr input;
2482
2483
0
    if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2484
0
    input = xmlCtxtPopInput(ctxt);
2485
0
    xmlFreeInputStream(input);
2486
0
    if (*ctxt->input->cur == 0)
2487
0
        xmlParserGrow(ctxt);
2488
0
    return(CUR);
2489
0
}
2490
2491
/**
2492
 * Push an input stream onto the stack.
2493
 *
2494
 * @deprecated Internal function, don't use.
2495
 *
2496
 * @param ctxt  an XML parser context
2497
 * @param input  an XML parser input fragment (entity, XML fragment ...).
2498
 * @returns -1 in case of error or the index in the input stack
2499
 */
2500
int
2501
0
xmlPushInput(xmlParserCtxt *ctxt, xmlParserInput *input) {
2502
0
    int ret;
2503
2504
0
    if ((ctxt == NULL) || (input == NULL))
2505
0
        return(-1);
2506
2507
0
    ret = xmlCtxtPushInput(ctxt, input);
2508
0
    if (ret >= 0)
2509
0
        GROW;
2510
0
    return(ret);
2511
0
}
2512
2513
/**
2514
 * Parse a numeric character reference. Always consumes '&'.
2515
 *
2516
 * @deprecated Internal function, don't use.
2517
 *
2518
 *     [66] CharRef ::= '&#' [0-9]+ ';' |
2519
 *                      '&#x' [0-9a-fA-F]+ ';'
2520
 *
2521
 * [ WFC: Legal Character ]
2522
 * Characters referred to using character references must match the
2523
 * production for Char.
2524
 *
2525
 * @param ctxt  an XML parser context
2526
 * @returns the value parsed (as an int), 0 in case of error
2527
 */
2528
int
2529
54.2k
xmlParseCharRef(xmlParserCtxt *ctxt) {
2530
54.2k
    int val = 0;
2531
54.2k
    int count = 0;
2532
2533
    /*
2534
     * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2535
     */
2536
54.2k
    if ((RAW == '&') && (NXT(1) == '#') &&
2537
54.2k
        (NXT(2) == 'x')) {
2538
31.1k
  SKIP(3);
2539
31.1k
  GROW;
2540
102k
  while ((RAW != ';') && (PARSER_STOPPED(ctxt) == 0)) {
2541
74.4k
      if (count++ > 20) {
2542
901
    count = 0;
2543
901
    GROW;
2544
901
      }
2545
74.4k
      if ((RAW >= '0') && (RAW <= '9'))
2546
48.9k
          val = val * 16 + (CUR - '0');
2547
25.5k
      else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2548
6.71k
          val = val * 16 + (CUR - 'a') + 10;
2549
18.8k
      else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2550
15.6k
          val = val * 16 + (CUR - 'A') + 10;
2551
3.16k
      else {
2552
3.16k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2553
3.16k
    val = 0;
2554
3.16k
    break;
2555
3.16k
      }
2556
71.2k
      if (val > 0x110000)
2557
11.2k
          val = 0x110000;
2558
2559
71.2k
      NEXT;
2560
71.2k
      count++;
2561
71.2k
  }
2562
31.1k
  if (RAW == ';') {
2563
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2564
27.9k
      ctxt->input->col++;
2565
27.9k
      ctxt->input->cur++;
2566
27.9k
  }
2567
31.1k
    } else if  ((RAW == '&') && (NXT(1) == '#')) {
2568
23.1k
  SKIP(2);
2569
23.1k
  GROW;
2570
74.9k
  while (RAW != ';') { /* loop blocked by count */
2571
54.4k
      if (count++ > 20) {
2572
1.83k
    count = 0;
2573
1.83k
    GROW;
2574
1.83k
      }
2575
54.4k
      if ((RAW >= '0') && (RAW <= '9'))
2576
51.7k
          val = val * 10 + (CUR - '0');
2577
2.68k
      else {
2578
2.68k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2579
2.68k
    val = 0;
2580
2.68k
    break;
2581
2.68k
      }
2582
51.7k
      if (val > 0x110000)
2583
18.5k
          val = 0x110000;
2584
2585
51.7k
      NEXT;
2586
51.7k
      count++;
2587
51.7k
  }
2588
23.1k
  if (RAW == ';') {
2589
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2590
20.4k
      ctxt->input->col++;
2591
20.4k
      ctxt->input->cur++;
2592
20.4k
  }
2593
23.1k
    } else {
2594
0
        if (RAW == '&')
2595
0
            SKIP(1);
2596
0
        xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2597
0
    }
2598
2599
    /*
2600
     * [ WFC: Legal Character ]
2601
     * Characters referred to using character references must match the
2602
     * production for Char.
2603
     */
2604
54.2k
    if (val >= 0x110000) {
2605
546
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2606
546
                "xmlParseCharRef: character reference out of bounds\n",
2607
546
          val);
2608
546
        val = 0xFFFD;
2609
53.7k
    } else if (!IS_CHAR(val)) {
2610
6.94k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2611
6.94k
                          "xmlParseCharRef: invalid xmlChar value %d\n",
2612
6.94k
                    val);
2613
6.94k
    }
2614
54.2k
    return(val);
2615
54.2k
}
2616
2617
/**
2618
 * Parse Reference declarations, variant parsing from a string rather
2619
 * than an an input flow.
2620
 *
2621
 *     [66] CharRef ::= '&#' [0-9]+ ';' |
2622
 *                      '&#x' [0-9a-fA-F]+ ';'
2623
 *
2624
 * [ WFC: Legal Character ]
2625
 * Characters referred to using character references must match the
2626
 * production for Char.
2627
 *
2628
 * @param ctxt  an XML parser context
2629
 * @param str  a pointer to an index in the string
2630
 * @returns the value parsed (as an int), 0 in case of error, str will be
2631
 *         updated to the current value of the index
2632
 */
2633
static int
2634
38.3k
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2635
38.3k
    const xmlChar *ptr;
2636
38.3k
    xmlChar cur;
2637
38.3k
    int val = 0;
2638
2639
38.3k
    if ((str == NULL) || (*str == NULL)) return(0);
2640
38.3k
    ptr = *str;
2641
38.3k
    cur = *ptr;
2642
38.3k
    if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2643
13.1k
  ptr += 3;
2644
13.1k
  cur = *ptr;
2645
44.4k
  while (cur != ';') { /* Non input consuming loop */
2646
32.6k
      if ((cur >= '0') && (cur <= '9'))
2647
23.7k
          val = val * 16 + (cur - '0');
2648
8.89k
      else if ((cur >= 'a') && (cur <= 'f'))
2649
1.24k
          val = val * 16 + (cur - 'a') + 10;
2650
7.64k
      else if ((cur >= 'A') && (cur <= 'F'))
2651
6.29k
          val = val * 16 + (cur - 'A') + 10;
2652
1.35k
      else {
2653
1.35k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2654
1.35k
    val = 0;
2655
1.35k
    break;
2656
1.35k
      }
2657
31.3k
      if (val > 0x110000)
2658
1.36k
          val = 0x110000;
2659
2660
31.3k
      ptr++;
2661
31.3k
      cur = *ptr;
2662
31.3k
  }
2663
13.1k
  if (cur == ';')
2664
11.7k
      ptr++;
2665
25.2k
    } else if  ((cur == '&') && (ptr[1] == '#')){
2666
25.2k
  ptr += 2;
2667
25.2k
  cur = *ptr;
2668
75.6k
  while (cur != ';') { /* Non input consuming loops */
2669
52.3k
      if ((cur >= '0') && (cur <= '9'))
2670
50.3k
          val = val * 10 + (cur - '0');
2671
1.93k
      else {
2672
1.93k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2673
1.93k
    val = 0;
2674
1.93k
    break;
2675
1.93k
      }
2676
50.3k
      if (val > 0x110000)
2677
399
          val = 0x110000;
2678
2679
50.3k
      ptr++;
2680
50.3k
      cur = *ptr;
2681
50.3k
  }
2682
25.2k
  if (cur == ';')
2683
23.3k
      ptr++;
2684
25.2k
    } else {
2685
0
  xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2686
0
  return(0);
2687
0
    }
2688
38.3k
    *str = ptr;
2689
2690
    /*
2691
     * [ WFC: Legal Character ]
2692
     * Characters referred to using character references must match the
2693
     * production for Char.
2694
     */
2695
38.3k
    if (val >= 0x110000) {
2696
248
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2697
248
                "xmlParseStringCharRef: character reference out of bounds\n",
2698
248
                val);
2699
38.1k
    } else if (IS_CHAR(val)) {
2700
33.8k
        return(val);
2701
33.8k
    } else {
2702
4.29k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2703
4.29k
        "xmlParseStringCharRef: invalid xmlChar value %d\n",
2704
4.29k
        val);
2705
4.29k
    }
2706
4.54k
    return(0);
2707
38.3k
}
2708
2709
/**
2710
 *     [69] PEReference ::= '%' Name ';'
2711
 *
2712
 * @deprecated Internal function, do not use.
2713
 *
2714
 * [ WFC: No Recursion ]
2715
 * A parsed entity must not contain a recursive
2716
 * reference to itself, either directly or indirectly.
2717
 *
2718
 * [ WFC: Entity Declared ]
2719
 * In a document without any DTD, a document with only an internal DTD
2720
 * subset which contains no parameter entity references, or a document
2721
 * with "standalone='yes'", ...  ... The declaration of a parameter
2722
 * entity must precede any reference to it...
2723
 *
2724
 * [ VC: Entity Declared ]
2725
 * In a document with an external subset or external parameter entities
2726
 * with "standalone='no'", ...  ... The declaration of a parameter entity
2727
 * must precede any reference to it...
2728
 *
2729
 * [ WFC: In DTD ]
2730
 * Parameter-entity references may only appear in the DTD.
2731
 * NOTE: misleading but this is handled.
2732
 *
2733
 * A PEReference may have been detected in the current input stream
2734
 * the handling is done accordingly to
2735
 *      http://www.w3.org/TR/REC-xml#entproc
2736
 * i.e.
2737
 *   - Included in literal in entity values
2738
 *   - Included as Parameter Entity reference within DTDs
2739
 * @param ctxt  the parser context
2740
 */
2741
void
2742
0
xmlParserHandlePEReference(xmlParserCtxt *ctxt) {
2743
0
    xmlParsePERefInternal(ctxt, 0);
2744
0
}
2745
2746
/**
2747
 * @deprecated Internal function, don't use.
2748
 *
2749
 * @param ctxt  the parser context
2750
 * @param str  the input string
2751
 * @param len  the string length
2752
 * @param what  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2753
 * @param end  an end marker xmlChar, 0 if none
2754
 * @param end2  an end marker xmlChar, 0 if none
2755
 * @param end3  an end marker xmlChar, 0 if none
2756
 * @returns A newly allocated string with the substitution done. The caller
2757
 *      must deallocate it !
2758
 */
2759
xmlChar *
2760
xmlStringLenDecodeEntities(xmlParserCtxt *ctxt, const xmlChar *str, int len,
2761
                           int what ATTRIBUTE_UNUSED,
2762
0
                           xmlChar end, xmlChar end2, xmlChar end3) {
2763
0
    if ((ctxt == NULL) || (str == NULL) || (len < 0))
2764
0
        return(NULL);
2765
2766
0
    if ((str[len] != 0) ||
2767
0
        (end != 0) || (end2 != 0) || (end3 != 0))
2768
0
        return(NULL);
2769
2770
0
    return(xmlExpandEntitiesInAttValue(ctxt, str, 0));
2771
0
}
2772
2773
/**
2774
 * @deprecated Internal function, don't use.
2775
 *
2776
 * @param ctxt  the parser context
2777
 * @param str  the input string
2778
 * @param what  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2779
 * @param end  an end marker xmlChar, 0 if none
2780
 * @param end2  an end marker xmlChar, 0 if none
2781
 * @param end3  an end marker xmlChar, 0 if none
2782
 * @returns A newly allocated string with the substitution done. The caller
2783
 *      must deallocate it !
2784
 */
2785
xmlChar *
2786
xmlStringDecodeEntities(xmlParserCtxt *ctxt, const xmlChar *str,
2787
                        int what ATTRIBUTE_UNUSED,
2788
0
            xmlChar end, xmlChar  end2, xmlChar end3) {
2789
0
    if ((ctxt == NULL) || (str == NULL))
2790
0
        return(NULL);
2791
2792
0
    if ((end != 0) || (end2 != 0) || (end3 != 0))
2793
0
        return(NULL);
2794
2795
0
    return(xmlExpandEntitiesInAttValue(ctxt, str, 0));
2796
0
}
2797
2798
/************************************************************************
2799
 *                  *
2800
 *    Commodity functions, cleanup needed ?     *
2801
 *                  *
2802
 ************************************************************************/
2803
2804
/**
2805
 * Is this a sequence of blank chars that one can ignore ?
2806
 *
2807
 * @param ctxt  an XML parser context
2808
 * @param str  a xmlChar *
2809
 * @param len  the size of `str`
2810
 * @param blank_chars  we know the chars are blanks
2811
 * @returns 1 if ignorable 0 otherwise.
2812
 */
2813
2814
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2815
0
                     int blank_chars) {
2816
0
    int i;
2817
0
    xmlNodePtr lastChild;
2818
2819
    /*
2820
     * Check for xml:space value.
2821
     */
2822
0
    if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2823
0
        (*(ctxt->space) == -2))
2824
0
  return(0);
2825
2826
    /*
2827
     * Check that the string is made of blanks
2828
     */
2829
0
    if (blank_chars == 0) {
2830
0
  for (i = 0;i < len;i++)
2831
0
      if (!(IS_BLANK_CH(str[i]))) return(0);
2832
0
    }
2833
2834
    /*
2835
     * Look if the element is mixed content in the DTD if available
2836
     */
2837
0
    if (ctxt->node == NULL) return(0);
2838
0
    if (ctxt->myDoc != NULL) {
2839
0
        xmlElementPtr elemDecl = NULL;
2840
0
        xmlDocPtr doc = ctxt->myDoc;
2841
0
        const xmlChar *prefix = NULL;
2842
2843
0
        if (ctxt->node->ns)
2844
0
            prefix = ctxt->node->ns->prefix;
2845
0
        if (doc->intSubset != NULL)
2846
0
            elemDecl = xmlHashLookup2(doc->intSubset->elements, ctxt->node->name,
2847
0
                                      prefix);
2848
0
        if ((elemDecl == NULL) && (doc->extSubset != NULL))
2849
0
            elemDecl = xmlHashLookup2(doc->extSubset->elements, ctxt->node->name,
2850
0
                                      prefix);
2851
0
        if (elemDecl != NULL) {
2852
0
            if (elemDecl->etype == XML_ELEMENT_TYPE_ELEMENT)
2853
0
                return(1);
2854
0
            if ((elemDecl->etype == XML_ELEMENT_TYPE_ANY) ||
2855
0
                (elemDecl->etype == XML_ELEMENT_TYPE_MIXED))
2856
0
                return(0);
2857
0
        }
2858
0
    }
2859
2860
    /*
2861
     * Otherwise, heuristic :-\
2862
     *
2863
     * When push parsing, we could be at the end of a chunk.
2864
     * This makes the look-ahead and consequently the NOBLANKS
2865
     * option unreliable.
2866
     */
2867
0
    if ((RAW != '<') && (RAW != 0xD)) return(0);
2868
0
    if ((ctxt->node->children == NULL) &&
2869
0
  (RAW == '<') && (NXT(1) == '/')) return(0);
2870
2871
0
    lastChild = xmlGetLastChild(ctxt->node);
2872
0
    if (lastChild == NULL) {
2873
0
        if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2874
0
            (ctxt->node->content != NULL)) return(0);
2875
0
    } else if (xmlNodeIsText(lastChild))
2876
0
        return(0);
2877
0
    else if ((ctxt->node->children != NULL) &&
2878
0
             (xmlNodeIsText(ctxt->node->children)))
2879
0
        return(0);
2880
0
    return(1);
2881
0
}
2882
2883
/************************************************************************
2884
 *                  *
2885
 *    Extra stuff for namespace support     *
2886
 *  Relates to http://www.w3.org/TR/WD-xml-names      *
2887
 *                  *
2888
 ************************************************************************/
2889
2890
/**
2891
 * Parse an UTF8 encoded XML qualified name string
2892
 *
2893
 * @deprecated Don't use.
2894
 *
2895
 * @param ctxt  an XML parser context
2896
 * @param name  an XML parser context
2897
 * @param prefixOut  a xmlChar **
2898
 * @returns the local part, and prefix is updated
2899
 *   to get the Prefix if any.
2900
 */
2901
2902
xmlChar *
2903
0
xmlSplitQName(xmlParserCtxt *ctxt, const xmlChar *name, xmlChar **prefixOut) {
2904
0
    xmlChar *ret;
2905
0
    const xmlChar *localname;
2906
2907
0
    localname = xmlSplitQName4(name, prefixOut);
2908
0
    if (localname == NULL) {
2909
0
        xmlCtxtErrMemory(ctxt);
2910
0
        return(NULL);
2911
0
    }
2912
2913
0
    ret = xmlStrdup(localname);
2914
0
    if (ret == NULL) {
2915
0
        xmlCtxtErrMemory(ctxt);
2916
0
        xmlFree(*prefixOut);
2917
0
    }
2918
2919
0
    return(ret);
2920
0
}
2921
2922
/************************************************************************
2923
 *                  *
2924
 *      The parser itself       *
2925
 *  Relates to http://www.w3.org/TR/REC-xml       *
2926
 *                  *
2927
 ************************************************************************/
2928
2929
/************************************************************************
2930
 *                  *
2931
 *  Routines to parse Name, NCName and NmToken      *
2932
 *                  *
2933
 ************************************************************************/
2934
2935
/*
2936
 * The two following functions are related to the change of accepted
2937
 * characters for Name and NmToken in the Revision 5 of XML-1.0
2938
 * They correspond to the modified production [4] and the new production [4a]
2939
 * changes in that revision. Also note that the macros used for the
2940
 * productions Letter, Digit, CombiningChar and Extender are not needed
2941
 * anymore.
2942
 * We still keep compatibility to pre-revision5 parsing semantic if the
2943
 * new XML_PARSE_OLD10 option is given to the parser.
2944
 */
2945
2946
static int
2947
886k
xmlIsNameStartCharNew(int c) {
2948
    /*
2949
     * Use the new checks of production [4] [4a] amd [5] of the
2950
     * Update 5 of XML-1.0
2951
     */
2952
886k
    if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
2953
882k
        (((c >= 'a') && (c <= 'z')) ||
2954
324k
         ((c >= 'A') && (c <= 'Z')) ||
2955
268k
         (c == '_') || (c == ':') ||
2956
231k
         ((c >= 0xC0) && (c <= 0xD6)) ||
2957
220k
         ((c >= 0xD8) && (c <= 0xF6)) ||
2958
217k
         ((c >= 0xF8) && (c <= 0x2FF)) ||
2959
198k
         ((c >= 0x370) && (c <= 0x37D)) ||
2960
198k
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
2961
194k
         ((c >= 0x200C) && (c <= 0x200D)) ||
2962
194k
         ((c >= 0x2070) && (c <= 0x218F)) ||
2963
193k
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
2964
193k
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
2965
191k
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
2966
190k
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
2967
188k
         ((c >= 0x10000) && (c <= 0xEFFFF))))
2968
694k
        return(1);
2969
192k
    return(0);
2970
886k
}
2971
2972
static int
2973
17.6M
xmlIsNameCharNew(int c) {
2974
    /*
2975
     * Use the new checks of production [4] [4a] amd [5] of the
2976
     * Update 5 of XML-1.0
2977
     */
2978
17.6M
    if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
2979
17.5M
        (((c >= 'a') && (c <= 'z')) ||
2980
15.8M
         ((c >= 'A') && (c <= 'Z')) ||
2981
6.53M
         ((c >= '0') && (c <= '9')) || /* !start */
2982
6.05M
         (c == '_') || (c == ':') ||
2983
5.93M
         (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
2984
5.84M
         ((c >= 0xC0) && (c <= 0xD6)) ||
2985
5.60M
         ((c >= 0xD8) && (c <= 0xF6)) ||
2986
5.16M
         ((c >= 0xF8) && (c <= 0x2FF)) ||
2987
3.82M
         ((c >= 0x300) && (c <= 0x36F)) || /* !start */
2988
3.82M
         ((c >= 0x370) && (c <= 0x37D)) ||
2989
3.82M
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
2990
1.69M
         ((c >= 0x200C) && (c <= 0x200D)) ||
2991
1.69M
         ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
2992
1.69M
         ((c >= 0x2070) && (c <= 0x218F)) ||
2993
1.59M
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
2994
1.59M
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
2995
580k
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
2996
578k
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
2997
559k
         ((c >= 0x10000) && (c <= 0xEFFFF))))
2998
17.0M
         return(1);
2999
618k
    return(0);
3000
17.6M
}
3001
3002
static int
3003
0
xmlIsNameStartCharOld(int c) {
3004
0
    if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3005
0
        ((IS_LETTER(c) || (c == '_') || (c == ':'))))
3006
0
        return(1);
3007
0
    return(0);
3008
0
}
3009
3010
static int
3011
0
xmlIsNameCharOld(int c) {
3012
0
    if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3013
0
        ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3014
0
         (c == '.') || (c == '-') ||
3015
0
         (c == '_') || (c == ':') ||
3016
0
         (IS_COMBINING(c)) ||
3017
0
         (IS_EXTENDER(c))))
3018
0
        return(1);
3019
0
    return(0);
3020
0
}
3021
3022
static int
3023
886k
xmlIsNameStartChar(int c, int old10) {
3024
886k
    if (!old10)
3025
886k
        return(xmlIsNameStartCharNew(c));
3026
0
    else
3027
0
        return(xmlIsNameStartCharOld(c));
3028
886k
}
3029
3030
static int
3031
17.6M
xmlIsNameChar(int c, int old10) {
3032
17.6M
    if (!old10)
3033
17.6M
        return(xmlIsNameCharNew(c));
3034
0
    else
3035
0
        return(xmlIsNameCharOld(c));
3036
17.6M
}
3037
3038
/*
3039
 * Scan an XML Name, NCName or Nmtoken.
3040
 *
3041
 * Returns a pointer to the end of the name on success. If the
3042
 * name is invalid, returns `ptr`. If the name is longer than
3043
 * `maxSize` bytes, returns NULL.
3044
 *
3045
 * @param ptr  pointer to the start of the name
3046
 * @param maxSize  maximum size in bytes
3047
 * @param flags  XML_SCAN_* flags
3048
 * @returns a pointer to the end of the name or NULL
3049
 */
3050
const xmlChar *
3051
663k
xmlScanName(const xmlChar *ptr, size_t maxSize, int flags) {
3052
663k
    int stop = flags & XML_SCAN_NC ? ':' : 0;
3053
663k
    int old10 = flags & XML_SCAN_OLD10 ? 1 : 0;
3054
3055
5.49M
    while (1) {
3056
5.49M
        int c, len;
3057
3058
5.49M
        c = *ptr;
3059
5.49M
        if (c < 0x80) {
3060
3.54M
            if (c == stop)
3061
95.2k
                break;
3062
3.44M
            len = 1;
3063
3.44M
        } else {
3064
1.95M
            len = 4;
3065
1.95M
            c = xmlGetUTF8Char(ptr, &len);
3066
1.95M
            if (c < 0)
3067
4.25k
                break;
3068
1.95M
        }
3069
3070
5.39M
        if (flags & XML_SCAN_NMTOKEN ?
3071
4.74M
                !xmlIsNameChar(c, old10) :
3072
5.39M
                !xmlIsNameStartChar(c, old10))
3073
563k
            break;
3074
3075
4.83M
        if ((size_t) len > maxSize)
3076
45
            return(NULL);
3077
4.83M
        ptr += len;
3078
4.83M
        maxSize -= len;
3079
4.83M
        flags |= XML_SCAN_NMTOKEN;
3080
4.83M
    }
3081
3082
663k
    return(ptr);
3083
663k
}
3084
3085
static const xmlChar *
3086
99.2k
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3087
99.2k
    const xmlChar *ret;
3088
99.2k
    int len = 0, l;
3089
99.2k
    int c;
3090
99.2k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3091
0
                    XML_MAX_TEXT_LENGTH :
3092
99.2k
                    XML_MAX_NAME_LENGTH;
3093
99.2k
    int old10 = (ctxt->options & XML_PARSE_OLD10) ? 1 : 0;
3094
3095
    /*
3096
     * Handler for more complex cases
3097
     */
3098
99.2k
    c = xmlCurrentChar(ctxt, &l);
3099
99.2k
    if (!xmlIsNameStartChar(c, old10))
3100
68.1k
        return(NULL);
3101
31.0k
    len += l;
3102
31.0k
    NEXTL(l);
3103
31.0k
    c = xmlCurrentChar(ctxt, &l);
3104
3.89M
    while (xmlIsNameChar(c, old10)) {
3105
3.86M
        if (len <= INT_MAX - l)
3106
3.86M
            len += l;
3107
3.86M
        NEXTL(l);
3108
3.86M
        c = xmlCurrentChar(ctxt, &l);
3109
3.86M
    }
3110
31.0k
    if (len > maxLength) {
3111
97
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3112
97
        return(NULL);
3113
97
    }
3114
30.9k
    if (ctxt->input->cur - ctxt->input->base < len) {
3115
        /*
3116
         * There were a couple of bugs where PERefs lead to to a change
3117
         * of the buffer. Check the buffer size to avoid passing an invalid
3118
         * pointer to xmlDictLookup.
3119
         */
3120
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3121
0
                    "unexpected change of input buffer");
3122
0
        return (NULL);
3123
0
    }
3124
30.9k
    if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3125
254
        ret = xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len);
3126
30.7k
    else
3127
30.7k
        ret = xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len);
3128
30.9k
    if (ret == NULL)
3129
1
        xmlErrMemory(ctxt);
3130
30.9k
    return(ret);
3131
30.9k
}
3132
3133
/**
3134
 * Parse an XML name.
3135
 *
3136
 * @deprecated Internal function, don't use.
3137
 *
3138
 *     [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3139
 *                      CombiningChar | Extender
3140
 *
3141
 *     [5] Name ::= (Letter | '_' | ':') (NameChar)*
3142
 *
3143
 *     [6] Names ::= Name (#x20 Name)*
3144
 *
3145
 * @param ctxt  an XML parser context
3146
 * @returns the Name parsed or NULL
3147
 */
3148
3149
const xmlChar *
3150
534k
xmlParseName(xmlParserCtxt *ctxt) {
3151
534k
    const xmlChar *in;
3152
534k
    const xmlChar *ret;
3153
534k
    size_t count = 0;
3154
534k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3155
0
                       XML_MAX_TEXT_LENGTH :
3156
534k
                       XML_MAX_NAME_LENGTH;
3157
3158
534k
    GROW;
3159
3160
    /*
3161
     * Accelerator for simple ASCII names
3162
     */
3163
534k
    in = ctxt->input->cur;
3164
534k
    if (((*in >= 0x61) && (*in <= 0x7A)) ||
3165
102k
  ((*in >= 0x41) && (*in <= 0x5A)) ||
3166
460k
  (*in == '_') || (*in == ':')) {
3167
460k
  in++;
3168
8.61M
  while (((*in >= 0x61) && (*in <= 0x7A)) ||
3169
7.49M
         ((*in >= 0x41) && (*in <= 0x5A)) ||
3170
896k
         ((*in >= 0x30) && (*in <= 0x39)) ||
3171
665k
         (*in == '_') || (*in == '-') ||
3172
520k
         (*in == ':') || (*in == '.'))
3173
8.15M
      in++;
3174
460k
  if ((*in > 0) && (*in < 0x80)) {
3175
435k
      count = in - ctxt->input->cur;
3176
435k
            if (count > maxLength) {
3177
80
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3178
80
                return(NULL);
3179
80
            }
3180
435k
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3181
435k
      ctxt->input->cur = in;
3182
435k
      ctxt->input->col += count;
3183
435k
      if (ret == NULL)
3184
2
          xmlErrMemory(ctxt);
3185
435k
      return(ret);
3186
435k
  }
3187
460k
    }
3188
    /* accelerator for special cases */
3189
99.2k
    return(xmlParseNameComplex(ctxt));
3190
534k
}
3191
3192
static xmlHashedString
3193
143k
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3194
143k
    xmlHashedString ret;
3195
143k
    int len = 0, l;
3196
143k
    int c;
3197
143k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3198
0
                    XML_MAX_TEXT_LENGTH :
3199
143k
                    XML_MAX_NAME_LENGTH;
3200
143k
    int old10 = (ctxt->options & XML_PARSE_OLD10) ? 1 : 0;
3201
143k
    size_t startPosition = 0;
3202
3203
143k
    ret.name = NULL;
3204
143k
    ret.hashValue = 0;
3205
3206
    /*
3207
     * Handler for more complex cases
3208
     */
3209
143k
    startPosition = CUR_PTR - BASE_PTR;
3210
143k
    c = xmlCurrentChar(ctxt, &l);
3211
143k
    if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3212
135k
  (!xmlIsNameStartChar(c, old10) || (c == ':'))) {
3213
119k
  return(ret);
3214
119k
    }
3215
3216
4.77M
    while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3217
4.76M
     (xmlIsNameChar(c, old10) && (c != ':'))) {
3218
4.74M
        if (len <= INT_MAX - l)
3219
4.74M
      len += l;
3220
4.74M
  NEXTL(l);
3221
4.74M
  c = xmlCurrentChar(ctxt, &l);
3222
4.74M
    }
3223
23.3k
    if (len > maxLength) {
3224
67
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3225
67
        return(ret);
3226
67
    }
3227
23.3k
    ret = xmlDictLookupHashed(ctxt->dict, (BASE_PTR + startPosition), len);
3228
23.3k
    if (ret.name == NULL)
3229
2
        xmlErrMemory(ctxt);
3230
23.3k
    return(ret);
3231
23.3k
}
3232
3233
/**
3234
 * Parse an XML name.
3235
 *
3236
 *     [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3237
 *                          CombiningChar | Extender
3238
 *
3239
 *     [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3240
 *
3241
 * @param ctxt  an XML parser context
3242
 * @returns the Name parsed or NULL
3243
 */
3244
3245
static xmlHashedString
3246
2.74M
xmlParseNCName(xmlParserCtxtPtr ctxt) {
3247
2.74M
    const xmlChar *in, *e;
3248
2.74M
    xmlHashedString ret;
3249
2.74M
    size_t count = 0;
3250
2.74M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3251
0
                       XML_MAX_TEXT_LENGTH :
3252
2.74M
                       XML_MAX_NAME_LENGTH;
3253
3254
2.74M
    ret.name = NULL;
3255
3256
    /*
3257
     * Accelerator for simple ASCII names
3258
     */
3259
2.74M
    in = ctxt->input->cur;
3260
2.74M
    e = ctxt->input->end;
3261
2.74M
    if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3262
194k
   ((*in >= 0x41) && (*in <= 0x5A)) ||
3263
2.61M
   (*in == '_')) && (in < e)) {
3264
2.61M
  in++;
3265
24.9M
  while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3266
14.1M
          ((*in >= 0x41) && (*in <= 0x5A)) ||
3267
2.77M
          ((*in >= 0x30) && (*in <= 0x39)) ||
3268
2.63M
          (*in == '_') || (*in == '-') ||
3269
22.3M
          (*in == '.')) && (in < e))
3270
22.3M
      in++;
3271
2.61M
  if (in >= e)
3272
719
      goto complex;
3273
2.61M
  if ((*in > 0) && (*in < 0x80)) {
3274
2.60M
      count = in - ctxt->input->cur;
3275
2.60M
            if (count > maxLength) {
3276
139
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3277
139
                return(ret);
3278
139
            }
3279
2.60M
      ret = xmlDictLookupHashed(ctxt->dict, ctxt->input->cur, count);
3280
2.60M
      ctxt->input->cur = in;
3281
2.60M
      ctxt->input->col += count;
3282
2.60M
      if (ret.name == NULL) {
3283
1
          xmlErrMemory(ctxt);
3284
1
      }
3285
2.60M
      return(ret);
3286
2.60M
  }
3287
2.61M
    }
3288
143k
complex:
3289
143k
    return(xmlParseNCNameComplex(ctxt));
3290
2.74M
}
3291
3292
/**
3293
 * Parse an XML name and compares for match
3294
 * (specialized for endtag parsing)
3295
 *
3296
 * @param ctxt  an XML parser context
3297
 * @param other  the name to compare with
3298
 * @returns NULL for an illegal name, (xmlChar*) 1 for success
3299
 * and the name for mismatch
3300
 */
3301
3302
static const xmlChar *
3303
243k
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3304
243k
    register const xmlChar *cmp = other;
3305
243k
    register const xmlChar *in;
3306
243k
    const xmlChar *ret;
3307
3308
243k
    GROW;
3309
3310
243k
    in = ctxt->input->cur;
3311
1.25M
    while (*in != 0 && *in == *cmp) {
3312
1.01M
  ++in;
3313
1.01M
  ++cmp;
3314
1.01M
    }
3315
243k
    if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3316
  /* success */
3317
234k
  ctxt->input->col += in - ctxt->input->cur;
3318
234k
  ctxt->input->cur = in;
3319
234k
  return (const xmlChar*) 1;
3320
234k
    }
3321
    /* failure (or end of input buffer), check with full function */
3322
8.77k
    ret = xmlParseName (ctxt);
3323
    /* strings coming from the dictionary direct compare possible */
3324
8.77k
    if (ret == other) {
3325
1.88k
  return (const xmlChar*) 1;
3326
1.88k
    }
3327
6.89k
    return ret;
3328
8.77k
}
3329
3330
/**
3331
 * Parse an XML name.
3332
 *
3333
 * @param ctxt  an XML parser context
3334
 * @param str  a pointer to the string pointer (IN/OUT)
3335
 * @returns the Name parsed or NULL. The `str` pointer
3336
 * is updated to the current location in the string.
3337
 */
3338
3339
static xmlChar *
3340
254k
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3341
254k
    xmlChar *ret;
3342
254k
    const xmlChar *cur = *str;
3343
254k
    int flags = 0;
3344
254k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3345
0
                    XML_MAX_TEXT_LENGTH :
3346
254k
                    XML_MAX_NAME_LENGTH;
3347
3348
254k
    if (ctxt->options & XML_PARSE_OLD10)
3349
0
        flags |= XML_SCAN_OLD10;
3350
3351
254k
    cur = xmlScanName(*str, maxLength, flags);
3352
254k
    if (cur == NULL) {
3353
21
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3354
21
        return(NULL);
3355
21
    }
3356
254k
    if (cur == *str)
3357
6.43k
        return(NULL);
3358
3359
247k
    ret = xmlStrndup(*str, cur - *str);
3360
247k
    if (ret == NULL)
3361
12
        xmlErrMemory(ctxt);
3362
247k
    *str = cur;
3363
247k
    return(ret);
3364
254k
}
3365
3366
/**
3367
 * Parse an XML Nmtoken.
3368
 *
3369
 * @deprecated Internal function, don't use.
3370
 *
3371
 *     [7] Nmtoken ::= (NameChar)+
3372
 *
3373
 *     [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3374
 *
3375
 * @param ctxt  an XML parser context
3376
 * @returns the Nmtoken parsed or NULL
3377
 */
3378
3379
xmlChar *
3380
44.7k
xmlParseNmtoken(xmlParserCtxt *ctxt) {
3381
44.7k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3382
44.7k
    xmlChar *ret;
3383
44.7k
    int len = 0, l;
3384
44.7k
    int c;
3385
44.7k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3386
0
                    XML_MAX_TEXT_LENGTH :
3387
44.7k
                    XML_MAX_NAME_LENGTH;
3388
44.7k
    int old10 = (ctxt->options & XML_PARSE_OLD10) ? 1 : 0;
3389
3390
44.7k
    c = xmlCurrentChar(ctxt, &l);
3391
3392
214k
    while (xmlIsNameChar(c, old10)) {
3393
170k
  COPY_BUF(buf, len, c);
3394
170k
  NEXTL(l);
3395
170k
  c = xmlCurrentChar(ctxt, &l);
3396
170k
  if (len >= XML_MAX_NAMELEN) {
3397
      /*
3398
       * Okay someone managed to make a huge token, so he's ready to pay
3399
       * for the processing speed.
3400
       */
3401
909
      xmlChar *buffer;
3402
909
      int max = len * 2;
3403
3404
909
      buffer = xmlMalloc(max);
3405
909
      if (buffer == NULL) {
3406
4
          xmlErrMemory(ctxt);
3407
4
    return(NULL);
3408
4
      }
3409
905
      memcpy(buffer, buf, len);
3410
4.03M
      while (xmlIsNameChar(c, old10)) {
3411
4.02M
    if (len + 10 > max) {
3412
2.22k
        xmlChar *tmp;
3413
2.22k
                    int newSize;
3414
3415
2.22k
                    newSize = xmlGrowCapacity(max, 1, 1, maxLength);
3416
2.22k
                    if (newSize < 0) {
3417
82
                        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3418
82
                        xmlFree(buffer);
3419
82
                        return(NULL);
3420
82
                    }
3421
2.14k
        tmp = xmlRealloc(buffer, newSize);
3422
2.14k
        if (tmp == NULL) {
3423
1
      xmlErrMemory(ctxt);
3424
1
      xmlFree(buffer);
3425
1
      return(NULL);
3426
1
        }
3427
2.14k
        buffer = tmp;
3428
2.14k
                    max = newSize;
3429
2.14k
    }
3430
4.02M
    COPY_BUF(buffer, len, c);
3431
4.02M
    NEXTL(l);
3432
4.02M
    c = xmlCurrentChar(ctxt, &l);
3433
4.02M
      }
3434
822
      buffer[len] = 0;
3435
822
      return(buffer);
3436
905
  }
3437
170k
    }
3438
43.8k
    if (len == 0)
3439
3.76k
        return(NULL);
3440
40.1k
    if (len > maxLength) {
3441
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3442
0
        return(NULL);
3443
0
    }
3444
40.1k
    ret = xmlStrndup(buf, len);
3445
40.1k
    if (ret == NULL)
3446
6
        xmlErrMemory(ctxt);
3447
40.1k
    return(ret);
3448
40.1k
}
3449
3450
/**
3451
 * Validate an entity value and expand parameter entities.
3452
 *
3453
 * @param ctxt  parser context
3454
 * @param buf  string buffer
3455
 * @param str  entity value
3456
 * @param length  size of entity value
3457
 * @param depth  nesting depth
3458
 */
3459
static void
3460
xmlExpandPEsInEntityValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf,
3461
74.2k
                          const xmlChar *str, int length, int depth) {
3462
74.2k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3463
74.2k
    const xmlChar *end, *chunk;
3464
74.2k
    int c, l;
3465
3466
74.2k
    if (str == NULL)
3467
45.4k
        return;
3468
3469
28.8k
    depth += 1;
3470
28.8k
    if (depth > maxDepth) {
3471
0
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3472
0
                       "Maximum entity nesting depth exceeded");
3473
0
  return;
3474
0
    }
3475
3476
28.8k
    end = str + length;
3477
28.8k
    chunk = str;
3478
3479
29.8M
    while ((str < end) && (!PARSER_STOPPED(ctxt))) {
3480
29.8M
        c = *str;
3481
3482
29.8M
        if (c >= 0x80) {
3483
25.0M
            l = xmlUTF8MultibyteLen(ctxt, str,
3484
25.0M
                    "invalid character in entity value\n");
3485
25.0M
            if (l == 0) {
3486
2.50M
                if (chunk < str)
3487
156k
                    xmlSBufAddString(buf, chunk, str - chunk);
3488
2.50M
                xmlSBufAddReplChar(buf);
3489
2.50M
                str += 1;
3490
2.50M
                chunk = str;
3491
22.5M
            } else {
3492
22.5M
                str += l;
3493
22.5M
            }
3494
25.0M
        } else if (c == '&') {
3495
27.7k
            if (str[1] == '#') {
3496
18.4k
                if (chunk < str)
3497
12.6k
                    xmlSBufAddString(buf, chunk, str - chunk);
3498
3499
18.4k
                c = xmlParseStringCharRef(ctxt, &str);
3500
18.4k
                if (c == 0)
3501
4.54k
                    return;
3502
3503
13.9k
                xmlSBufAddChar(buf, c);
3504
3505
13.9k
                chunk = str;
3506
13.9k
            } else {
3507
9.28k
                xmlChar *name;
3508
3509
                /*
3510
                 * General entity references are checked for
3511
                 * syntactic validity.
3512
                 */
3513
9.28k
                str++;
3514
9.28k
                name = xmlParseStringName(ctxt, &str);
3515
3516
9.28k
                if ((name == NULL) || (*str++ != ';')) {
3517
5.77k
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3518
5.77k
                            "EntityValue: '&' forbidden except for entities "
3519
5.77k
                            "references\n");
3520
5.77k
                    xmlFree(name);
3521
5.77k
                    return;
3522
5.77k
                }
3523
3524
3.51k
                xmlFree(name);
3525
3.51k
            }
3526
4.82M
        } else if (c == '%') {
3527
52.9k
            xmlEntityPtr ent;
3528
3529
52.9k
            if (chunk < str)
3530
28.4k
                xmlSBufAddString(buf, chunk, str - chunk);
3531
3532
52.9k
            ent = xmlParseStringPEReference(ctxt, &str);
3533
52.9k
            if (ent == NULL)
3534
4.54k
                return;
3535
3536
48.3k
            if (!PARSER_EXTERNAL(ctxt)) {
3537
247
                xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3538
247
                return;
3539
247
            }
3540
3541
48.1k
            if (ent->content == NULL) {
3542
                /*
3543
                 * Note: external parsed entities will not be loaded,
3544
                 * it is not required for a non-validating parser to
3545
                 * complete external PEReferences coming from the
3546
                 * internal subset
3547
                 */
3548
45.5k
                if (((ctxt->options & XML_PARSE_NO_XXE) == 0) &&
3549
45.5k
                    ((ctxt->replaceEntities) ||
3550
45.5k
                     (ctxt->validate))) {
3551
45.5k
                    xmlLoadEntityContent(ctxt, ent);
3552
45.5k
                } else {
3553
0
                    xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
3554
0
                                  "not validating will not read content for "
3555
0
                                  "PE entity %s\n", ent->name, NULL);
3556
0
                }
3557
45.5k
            }
3558
3559
            /*
3560
             * TODO: Skip if ent->content is still NULL.
3561
             */
3562
3563
48.1k
            if (xmlParserEntityCheck(ctxt, ent->length))
3564
1
                return;
3565
3566
48.1k
            if (ent->flags & XML_ENT_EXPANDING) {
3567
26
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
3568
26
                return;
3569
26
            }
3570
3571
48.0k
            ent->flags |= XML_ENT_EXPANDING;
3572
48.0k
            xmlExpandPEsInEntityValue(ctxt, buf, ent->content, ent->length,
3573
48.0k
                                      depth);
3574
48.0k
            ent->flags &= ~XML_ENT_EXPANDING;
3575
3576
48.0k
            chunk = str;
3577
4.77M
        } else {
3578
            /* Normal ASCII char */
3579
4.77M
            if (!IS_BYTE_CHAR(c)) {
3580
90.1k
                xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3581
90.1k
                        "invalid character in entity value\n");
3582
90.1k
                if (chunk < str)
3583
7.42k
                    xmlSBufAddString(buf, chunk, str - chunk);
3584
90.1k
                xmlSBufAddReplChar(buf);
3585
90.1k
                str += 1;
3586
90.1k
                chunk = str;
3587
4.68M
            } else {
3588
4.68M
                str += 1;
3589
4.68M
            }
3590
4.77M
        }
3591
29.8M
    }
3592
3593
13.6k
    if (chunk < str)
3594
12.6k
        xmlSBufAddString(buf, chunk, str - chunk);
3595
13.6k
}
3596
3597
/**
3598
 * Parse a value for ENTITY declarations
3599
 *
3600
 * @deprecated Internal function, don't use.
3601
 *
3602
 *     [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3603
 *                         "'" ([^%&'] | PEReference | Reference)* "'"
3604
 *
3605
 * @param ctxt  an XML parser context
3606
 * @param orig  if non-NULL store a copy of the original entity value
3607
 * @returns the EntityValue parsed with reference substituted or NULL
3608
 */
3609
xmlChar *
3610
27.7k
xmlParseEntityValue(xmlParserCtxt *ctxt, xmlChar **orig) {
3611
27.7k
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3612
0
                         XML_MAX_HUGE_LENGTH :
3613
27.7k
                         XML_MAX_TEXT_LENGTH;
3614
27.7k
    xmlSBuf buf;
3615
27.7k
    const xmlChar *start;
3616
27.7k
    int quote, length;
3617
3618
27.7k
    xmlSBufInit(&buf, maxLength);
3619
3620
27.7k
    GROW;
3621
3622
27.7k
    quote = CUR;
3623
27.7k
    if ((quote != '"') && (quote != '\'')) {
3624
0
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3625
0
  return(NULL);
3626
0
    }
3627
27.7k
    CUR_PTR++;
3628
3629
27.7k
    length = 0;
3630
3631
    /*
3632
     * Copy raw content of the entity into a buffer
3633
     */
3634
16.3M
    while (1) {
3635
16.3M
        int c;
3636
3637
16.3M
        if (PARSER_STOPPED(ctxt))
3638
2
            goto error;
3639
3640
16.3M
        if (CUR_PTR >= ctxt->input->end) {
3641
1.53k
            xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3642
1.53k
            goto error;
3643
1.53k
        }
3644
3645
16.3M
        c = CUR;
3646
3647
16.3M
        if (c == 0) {
3648
90
            xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3649
90
                    "invalid character in entity value\n");
3650
90
            goto error;
3651
90
        }
3652
16.3M
        if (c == quote)
3653
26.1k
            break;
3654
16.3M
        NEXTL(1);
3655
16.3M
        length += 1;
3656
3657
        /*
3658
         * TODO: Check growth threshold
3659
         */
3660
16.3M
        if (ctxt->input->end - CUR_PTR < 10)
3661
24.8k
            GROW;
3662
16.3M
    }
3663
3664
26.1k
    start = CUR_PTR - length;
3665
3666
26.1k
    if (orig != NULL) {
3667
26.1k
        *orig = xmlStrndup(start, length);
3668
26.1k
        if (*orig == NULL)
3669
5
            xmlErrMemory(ctxt);
3670
26.1k
    }
3671
3672
26.1k
    xmlExpandPEsInEntityValue(ctxt, &buf, start, length, ctxt->inputNr);
3673
3674
26.1k
    NEXTL(1);
3675
3676
26.1k
    return(xmlSBufFinish(&buf, NULL, ctxt, "entity length too long"));
3677
3678
1.63k
error:
3679
1.63k
    xmlSBufCleanup(&buf, ctxt, "entity length too long");
3680
1.63k
    return(NULL);
3681
27.7k
}
3682
3683
/**
3684
 * Check an entity reference in an attribute value for validity
3685
 * without expanding it.
3686
 *
3687
 * @param ctxt  parser context
3688
 * @param pent  entity
3689
 * @param depth  nesting depth
3690
 */
3691
static void
3692
0
xmlCheckEntityInAttValue(xmlParserCtxtPtr ctxt, xmlEntityPtr pent, int depth) {
3693
0
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3694
0
    const xmlChar *str;
3695
0
    unsigned long expandedSize = pent->length;
3696
0
    int c, flags;
3697
3698
0
    depth += 1;
3699
0
    if (depth > maxDepth) {
3700
0
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3701
0
                       "Maximum entity nesting depth exceeded");
3702
0
  return;
3703
0
    }
3704
3705
0
    if (pent->flags & XML_ENT_EXPANDING) {
3706
0
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
3707
0
        return;
3708
0
    }
3709
3710
    /*
3711
     * If we're parsing a default attribute value in DTD content,
3712
     * the entity might reference other entities which weren't
3713
     * defined yet, so the check isn't reliable.
3714
     */
3715
0
    if (ctxt->inSubset == 0)
3716
0
        flags = XML_ENT_CHECKED | XML_ENT_VALIDATED;
3717
0
    else
3718
0
        flags = XML_ENT_VALIDATED;
3719
3720
0
    str = pent->content;
3721
0
    if (str == NULL)
3722
0
        goto done;
3723
3724
    /*
3725
     * Note that entity values are already validated. We only check
3726
     * for illegal less-than signs and compute the expanded size
3727
     * of the entity. No special handling for multi-byte characters
3728
     * is needed.
3729
     */
3730
0
    while (!PARSER_STOPPED(ctxt)) {
3731
0
        c = *str;
3732
3733
0
  if (c != '&') {
3734
0
            if (c == 0)
3735
0
                break;
3736
3737
0
            if (c == '<')
3738
0
                xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
3739
0
                        "'<' in entity '%s' is not allowed in attributes "
3740
0
                        "values\n", pent->name);
3741
3742
0
            str += 1;
3743
0
        } else if (str[1] == '#') {
3744
0
            int val;
3745
3746
0
      val = xmlParseStringCharRef(ctxt, &str);
3747
0
      if (val == 0) {
3748
0
                pent->content[0] = 0;
3749
0
                break;
3750
0
            }
3751
0
  } else {
3752
0
            xmlChar *name;
3753
0
            xmlEntityPtr ent;
3754
3755
0
      name = xmlParseStringEntityRef(ctxt, &str);
3756
0
      if (name == NULL) {
3757
0
                pent->content[0] = 0;
3758
0
                break;
3759
0
            }
3760
3761
0
            ent = xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 1);
3762
0
            xmlFree(name);
3763
3764
0
            if ((ent != NULL) &&
3765
0
                (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
3766
0
                if ((ent->flags & flags) != flags) {
3767
0
                    pent->flags |= XML_ENT_EXPANDING;
3768
0
                    xmlCheckEntityInAttValue(ctxt, ent, depth);
3769
0
                    pent->flags &= ~XML_ENT_EXPANDING;
3770
0
                }
3771
3772
0
                xmlSaturatedAdd(&expandedSize, ent->expandedSize);
3773
0
                xmlSaturatedAdd(&expandedSize, XML_ENT_FIXED_COST);
3774
0
            }
3775
0
        }
3776
0
    }
3777
3778
0
done:
3779
0
    if (ctxt->inSubset == 0)
3780
0
        pent->expandedSize = expandedSize;
3781
3782
0
    pent->flags |= flags;
3783
0
}
3784
3785
/**
3786
 * Expand general entity references in an entity or attribute value.
3787
 * Perform attribute value normalization.
3788
 *
3789
 * @param ctxt  parser context
3790
 * @param buf  string buffer
3791
 * @param str  entity or attribute value
3792
 * @param pent  entity for entity value, NULL for attribute values
3793
 * @param normalize  whether to collapse whitespace
3794
 * @param inSpace  whitespace state
3795
 * @param depth  nesting depth
3796
 * @param check  whether to check for amplification
3797
 * @returns  whether there was a normalization change
3798
 */
3799
static int
3800
xmlExpandEntityInAttValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf,
3801
                          const xmlChar *str, xmlEntityPtr pent, int normalize,
3802
162k
                          int *inSpace, int depth, int check) {
3803
162k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3804
162k
    int c, chunkSize;
3805
162k
    int normChange = 0;
3806
3807
162k
    if (str == NULL)
3808
0
        return(0);
3809
3810
162k
    depth += 1;
3811
162k
    if (depth > maxDepth) {
3812
2
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3813
2
                       "Maximum entity nesting depth exceeded");
3814
2
  return(0);
3815
2
    }
3816
3817
162k
    if (pent != NULL) {
3818
162k
        if (pent->flags & XML_ENT_EXPANDING) {
3819
5
            xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
3820
5
            return(0);
3821
5
        }
3822
3823
162k
        if (check) {
3824
162k
            if (xmlParserEntityCheck(ctxt, pent->length))
3825
25
                return(0);
3826
162k
        }
3827
162k
    }
3828
3829
162k
    chunkSize = 0;
3830
3831
    /*
3832
     * Note that entity values are already validated. No special
3833
     * handling for multi-byte characters is needed.
3834
     */
3835
90.3M
    while (!PARSER_STOPPED(ctxt)) {
3836
90.3M
        c = *str;
3837
3838
90.3M
  if (c != '&') {
3839
90.1M
            if (c == 0)
3840
139k
                break;
3841
3842
            /*
3843
             * If this function is called without an entity, it is used to
3844
             * expand entities in an attribute content where less-than was
3845
             * already unscaped and is allowed.
3846
             */
3847
90.0M
            if ((pent != NULL) && (c == '<')) {
3848
22.1k
                xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
3849
22.1k
                        "'<' in entity '%s' is not allowed in attributes "
3850
22.1k
                        "values\n", pent->name);
3851
22.1k
                break;
3852
22.1k
            }
3853
3854
89.9M
            if (c <= 0x20) {
3855
1.07M
                if ((normalize) && (*inSpace)) {
3856
                    /* Skip char */
3857
29.4k
                    if (chunkSize > 0) {
3858
16.1k
                        xmlSBufAddString(buf, str - chunkSize, chunkSize);
3859
16.1k
                        chunkSize = 0;
3860
16.1k
                    }
3861
29.4k
                    normChange = 1;
3862
1.04M
                } else if (c < 0x20) {
3863
398k
                    if (chunkSize > 0) {
3864
82.7k
                        xmlSBufAddString(buf, str - chunkSize, chunkSize);
3865
82.7k
                        chunkSize = 0;
3866
82.7k
                    }
3867
3868
398k
                    xmlSBufAddCString(buf, " ", 1);
3869
650k
                } else {
3870
650k
                    chunkSize += 1;
3871
650k
                }
3872
3873
1.07M
                *inSpace = 1;
3874
88.9M
            } else {
3875
88.9M
                chunkSize += 1;
3876
88.9M
                *inSpace = 0;
3877
88.9M
            }
3878
3879
89.9M
            str += 1;
3880
89.9M
        } else if (str[1] == '#') {
3881
19.9k
            int val;
3882
3883
19.9k
            if (chunkSize > 0) {
3884
19.1k
                xmlSBufAddString(buf, str - chunkSize, chunkSize);
3885
19.1k
                chunkSize = 0;
3886
19.1k
            }
3887
3888
19.9k
      val = xmlParseStringCharRef(ctxt, &str);
3889
19.9k
      if (val == 0) {
3890
3
                if (pent != NULL)
3891
3
                    pent->content[0] = 0;
3892
3
                break;
3893
3
            }
3894
3895
19.9k
            if (val == ' ') {
3896
9.28k
                if ((normalize) && (*inSpace))
3897
261
                    normChange = 1;
3898
9.01k
                else
3899
9.01k
                    xmlSBufAddCString(buf, " ", 1);
3900
9.28k
                *inSpace = 1;
3901
10.6k
            } else {
3902
10.6k
                xmlSBufAddChar(buf, val);
3903
10.6k
                *inSpace = 0;
3904
10.6k
            }
3905
192k
  } else {
3906
192k
            xmlChar *name;
3907
192k
            xmlEntityPtr ent;
3908
3909
192k
            if (chunkSize > 0) {
3910
129k
                xmlSBufAddString(buf, str - chunkSize, chunkSize);
3911
129k
                chunkSize = 0;
3912
129k
            }
3913
3914
192k
      name = xmlParseStringEntityRef(ctxt, &str);
3915
192k
            if (name == NULL) {
3916
10
                if (pent != NULL)
3917
10
                    pent->content[0] = 0;
3918
10
                break;
3919
10
            }
3920
3921
192k
            ent = xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 1);
3922
192k
            xmlFree(name);
3923
3924
192k
      if ((ent != NULL) &&
3925
133k
    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3926
60.3k
    if (ent->content == NULL) {
3927
0
        xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
3928
0
          "predefined entity has no content\n");
3929
0
                    break;
3930
0
                }
3931
3932
60.3k
                xmlSBufAddString(buf, ent->content, ent->length);
3933
3934
60.3k
                *inSpace = 0;
3935
131k
      } else if ((ent != NULL) && (ent->content != NULL)) {
3936
73.5k
                if (pent != NULL)
3937
73.5k
                    pent->flags |= XML_ENT_EXPANDING;
3938
73.5k
    normChange |= xmlExpandEntityInAttValue(ctxt, buf,
3939
73.5k
                        ent->content, ent, normalize, inSpace, depth, check);
3940
73.5k
                if (pent != NULL)
3941
73.5k
                    pent->flags &= ~XML_ENT_EXPANDING;
3942
73.5k
      }
3943
192k
        }
3944
90.3M
    }
3945
3946
162k
    if (chunkSize > 0)
3947
39.3k
        xmlSBufAddString(buf, str - chunkSize, chunkSize);
3948
3949
162k
    return(normChange);
3950
162k
}
3951
3952
/**
3953
 * Expand general entity references in an entity or attribute value.
3954
 * Perform attribute value normalization.
3955
 *
3956
 * @param ctxt  parser context
3957
 * @param str  entity or attribute value
3958
 * @param normalize  whether to collapse whitespace
3959
 * @returns the expanded attribtue value.
3960
 */
3961
xmlChar *
3962
xmlExpandEntitiesInAttValue(xmlParserCtxt *ctxt, const xmlChar *str,
3963
0
                            int normalize) {
3964
0
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3965
0
                         XML_MAX_HUGE_LENGTH :
3966
0
                         XML_MAX_TEXT_LENGTH;
3967
0
    xmlSBuf buf;
3968
0
    int inSpace = 1;
3969
3970
0
    xmlSBufInit(&buf, maxLength);
3971
3972
0
    xmlExpandEntityInAttValue(ctxt, &buf, str, NULL, normalize, &inSpace,
3973
0
                              ctxt->inputNr, /* check */ 0);
3974
3975
0
    if ((normalize) && (inSpace) && (buf.size > 0))
3976
0
        buf.size--;
3977
3978
0
    return(xmlSBufFinish(&buf, NULL, ctxt, "AttValue length too long"));
3979
0
}
3980
3981
/**
3982
 * Parse a value for an attribute.
3983
 *
3984
 * NOTE: if no normalization is needed, the routine will return pointers
3985
 * directly from the data buffer.
3986
 *
3987
 * 3.3.3 Attribute-Value Normalization:
3988
 *
3989
 * Before the value of an attribute is passed to the application or
3990
 * checked for validity, the XML processor must normalize it as follows:
3991
 *
3992
 * - a character reference is processed by appending the referenced
3993
 *   character to the attribute value
3994
 * - an entity reference is processed by recursively processing the
3995
 *   replacement text of the entity
3996
 * - a whitespace character (\#x20, \#xD, \#xA, \#x9) is processed by
3997
 *   appending \#x20 to the normalized value, except that only a single
3998
 *   \#x20 is appended for a "#xD#xA" sequence that is part of an external
3999
 *   parsed entity or the literal entity value of an internal parsed entity
4000
 * - other characters are processed by appending them to the normalized value
4001
 *
4002
 * If the declared value is not CDATA, then the XML processor must further
4003
 * process the normalized attribute value by discarding any leading and
4004
 * trailing space (\#x20) characters, and by replacing sequences of space
4005
 * (\#x20) characters by a single space (\#x20) character.
4006
 * All attributes for which no declaration has been read should be treated
4007
 * by a non-validating parser as if declared CDATA.
4008
 *
4009
 * @param ctxt  an XML parser context
4010
 * @param attlen  attribute len result
4011
 * @param outFlags  resulting XML_ATTVAL_* flags
4012
 * @param special  value from attsSpecial
4013
 * @param isNamespace  whether this is a namespace declaration
4014
 * @returns the AttValue parsed or NULL. The value has to be freed by the
4015
 *     caller if it was copied, this can be detected by val[*len] == 0.
4016
 */
4017
static xmlChar *
4018
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *attlen, int *outFlags,
4019
960k
                         int special, int isNamespace) {
4020
960k
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4021
0
                         XML_MAX_HUGE_LENGTH :
4022
960k
                         XML_MAX_TEXT_LENGTH;
4023
960k
    xmlSBuf buf;
4024
960k
    xmlChar *ret;
4025
960k
    int c, l, quote, entFlags, chunkSize;
4026
960k
    int inSpace = 1;
4027
960k
    int replaceEntities;
4028
960k
    int normalize = (special & XML_SPECIAL_TYPE_MASK) > XML_ATTRIBUTE_CDATA;
4029
960k
    int attvalFlags = 0;
4030
4031
    /* Always expand namespace URIs */
4032
960k
    replaceEntities = (ctxt->replaceEntities) || (isNamespace);
4033
4034
960k
    xmlSBufInit(&buf, maxLength);
4035
4036
960k
    GROW;
4037
4038
960k
    quote = CUR;
4039
960k
    if ((quote != '"') && (quote != '\'')) {
4040
2.78k
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
4041
2.78k
  return(NULL);
4042
2.78k
    }
4043
958k
    NEXTL(1);
4044
4045
958k
    if (ctxt->inSubset == 0)
4046
932k
        entFlags = XML_ENT_CHECKED | XML_ENT_VALIDATED;
4047
25.9k
    else
4048
25.9k
        entFlags = XML_ENT_VALIDATED;
4049
4050
958k
    inSpace = 1;
4051
958k
    chunkSize = 0;
4052
4053
42.4M
    while (1) {
4054
42.4M
        if (PARSER_STOPPED(ctxt))
4055
75
            goto error;
4056
4057
42.4M
        if (CUR_PTR >= ctxt->input->end) {
4058
8.18k
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4059
8.18k
                           "AttValue: ' expected\n");
4060
8.18k
            goto error;
4061
8.18k
        }
4062
4063
        /*
4064
         * TODO: Check growth threshold
4065
         */
4066
42.4M
        if (ctxt->input->end - CUR_PTR < 10)
4067
85.9k
            GROW;
4068
4069
42.4M
        c = CUR;
4070
4071
42.4M
        if (c >= 0x80) {
4072
8.47M
            l = xmlUTF8MultibyteLen(ctxt, CUR_PTR,
4073
8.47M
                    "invalid character in attribute value\n");
4074
8.47M
            if (l == 0) {
4075
565k
                if (chunkSize > 0) {
4076
65.4k
                    xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4077
65.4k
                    chunkSize = 0;
4078
65.4k
                }
4079
565k
                xmlSBufAddReplChar(&buf);
4080
565k
                NEXTL(1);
4081
7.90M
            } else {
4082
7.90M
                chunkSize += l;
4083
7.90M
                NEXTL(l);
4084
7.90M
            }
4085
4086
8.47M
            inSpace = 0;
4087
33.9M
        } else if (c != '&') {
4088
33.7M
            if (c > 0x20) {
4089
28.9M
                if (c == quote)
4090
949k
                    break;
4091
4092
27.9M
                if (c == '<')
4093
59.2k
                    xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4094
4095
27.9M
                chunkSize += 1;
4096
27.9M
                inSpace = 0;
4097
27.9M
            } else if (!IS_BYTE_CHAR(c)) {
4098
1.94M
                xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4099
1.94M
                        "invalid character in attribute value\n");
4100
1.94M
                if (chunkSize > 0) {
4101
33.4k
                    xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4102
33.4k
                    chunkSize = 0;
4103
33.4k
                }
4104
1.94M
                xmlSBufAddReplChar(&buf);
4105
1.94M
                inSpace = 0;
4106
2.85M
            } else {
4107
                /* Whitespace */
4108
2.85M
                if ((normalize) && (inSpace)) {
4109
                    /* Skip char */
4110
3.45k
                    if (chunkSize > 0) {
4111
1.48k
                        xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4112
1.48k
                        chunkSize = 0;
4113
1.48k
                    }
4114
3.45k
                    attvalFlags |= XML_ATTVAL_NORM_CHANGE;
4115
2.85M
                } else if (c < 0x20) {
4116
                    /* Convert to space */
4117
2.33M
                    if (chunkSize > 0) {
4118
378k
                        xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4119
378k
                        chunkSize = 0;
4120
378k
                    }
4121
4122
2.33M
                    xmlSBufAddCString(&buf, " ", 1);
4123
2.33M
                } else {
4124
515k
                    chunkSize += 1;
4125
515k
                }
4126
4127
2.85M
                inSpace = 1;
4128
4129
2.85M
                if ((c == 0xD) && (NXT(1) == 0xA))
4130
4.84k
                    CUR_PTR++;
4131
2.85M
            }
4132
4133
32.7M
            NEXTL(1);
4134
32.7M
        } else if (NXT(1) == '#') {
4135
31.6k
            int val;
4136
4137
31.6k
            if (chunkSize > 0) {
4138
18.6k
                xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4139
18.6k
                chunkSize = 0;
4140
18.6k
            }
4141
4142
31.6k
            val = xmlParseCharRef(ctxt);
4143
31.6k
            if (val == 0)
4144
782
                goto error;
4145
4146
30.8k
            if ((val == '&') && (!replaceEntities)) {
4147
                /*
4148
                 * The reparsing will be done in xmlNodeParseContent()
4149
                 * called from SAX2.c
4150
                 */
4151
0
                xmlSBufAddCString(&buf, "&#38;", 5);
4152
0
                inSpace = 0;
4153
30.8k
            } else if (val == ' ') {
4154
1.62k
                if ((normalize) && (inSpace))
4155
179
                    attvalFlags |= XML_ATTVAL_NORM_CHANGE;
4156
1.44k
                else
4157
1.44k
                    xmlSBufAddCString(&buf, " ", 1);
4158
1.62k
                inSpace = 1;
4159
29.2k
            } else {
4160
29.2k
                xmlSBufAddChar(&buf, val);
4161
29.2k
                inSpace = 0;
4162
29.2k
            }
4163
153k
        } else {
4164
153k
            const xmlChar *name;
4165
153k
            xmlEntityPtr ent;
4166
4167
153k
            if (chunkSize > 0) {
4168
55.2k
                xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4169
55.2k
                chunkSize = 0;
4170
55.2k
            }
4171
4172
153k
            name = xmlParseEntityRefInternal(ctxt);
4173
153k
            if (name == NULL) {
4174
                /*
4175
                 * Probably a literal '&' which wasn't escaped.
4176
                 * TODO: Handle gracefully in recovery mode.
4177
                 */
4178
48.9k
                continue;
4179
48.9k
            }
4180
4181
104k
            ent = xmlLookupGeneralEntity(ctxt, name, /* isAttr */ 1);
4182
104k
            if (ent == NULL)
4183
7.81k
                continue;
4184
4185
96.8k
            if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
4186
8.30k
                if ((ent->content[0] == '&') && (!replaceEntities))
4187
0
                    xmlSBufAddCString(&buf, "&#38;", 5);
4188
8.30k
                else
4189
8.30k
                    xmlSBufAddString(&buf, ent->content, ent->length);
4190
8.30k
                inSpace = 0;
4191
88.5k
            } else if (replaceEntities) {
4192
88.5k
                if (xmlExpandEntityInAttValue(ctxt, &buf,
4193
88.5k
                        ent->content, ent, normalize, &inSpace, ctxt->inputNr,
4194
88.5k
                        /* check */ 1) > 0)
4195
8.63k
                    attvalFlags |= XML_ATTVAL_NORM_CHANGE;
4196
88.5k
            } else {
4197
0
                if ((ent->flags & entFlags) != entFlags)
4198
0
                    xmlCheckEntityInAttValue(ctxt, ent, ctxt->inputNr);
4199
4200
0
                if (xmlParserEntityCheck(ctxt, ent->expandedSize)) {
4201
0
                    ent->content[0] = 0;
4202
0
                    goto error;
4203
0
                }
4204
4205
                /*
4206
                 * Just output the reference
4207
                 */
4208
0
                xmlSBufAddCString(&buf, "&", 1);
4209
0
                xmlSBufAddString(&buf, ent->name, xmlStrlen(ent->name));
4210
0
                xmlSBufAddCString(&buf, ";", 1);
4211
4212
0
                inSpace = 0;
4213
0
            }
4214
96.8k
  }
4215
42.4M
    }
4216
4217
949k
    if ((buf.mem == NULL) && (outFlags != NULL)) {
4218
818k
        ret = (xmlChar *) CUR_PTR - chunkSize;
4219
4220
818k
        if (attlen != NULL)
4221
818k
            *attlen = chunkSize;
4222
818k
        if ((normalize) && (inSpace) && (chunkSize > 0)) {
4223
196
            attvalFlags |= XML_ATTVAL_NORM_CHANGE;
4224
196
            *attlen -= 1;
4225
196
        }
4226
4227
        /* Report potential error */
4228
818k
        xmlSBufCleanup(&buf, ctxt, "AttValue length too long");
4229
818k
    } else {
4230
130k
        if (chunkSize > 0)
4231
89.2k
            xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4232
4233
130k
        if ((normalize) && (inSpace) && (buf.size > 0)) {
4234
342
            attvalFlags |= XML_ATTVAL_NORM_CHANGE;
4235
342
            buf.size--;
4236
342
        }
4237
4238
130k
        ret = xmlSBufFinish(&buf, attlen, ctxt, "AttValue length too long");
4239
130k
        attvalFlags |= XML_ATTVAL_ALLOC;
4240
4241
130k
        if (ret != NULL) {
4242
130k
            if (attlen != NULL)
4243
111k
                *attlen = buf.size;
4244
130k
        }
4245
130k
    }
4246
4247
949k
    if (outFlags != NULL)
4248
929k
        *outFlags = attvalFlags;
4249
4250
949k
    NEXTL(1);
4251
4252
949k
    return(ret);
4253
4254
9.03k
error:
4255
9.03k
    xmlSBufCleanup(&buf, ctxt, "AttValue length too long");
4256
9.03k
    return(NULL);
4257
958k
}
4258
4259
/**
4260
 * Parse a value for an attribute
4261
 * Note: the parser won't do substitution of entities here, this
4262
 * will be handled later in #xmlStringGetNodeList
4263
 *
4264
 * @deprecated Internal function, don't use.
4265
 *
4266
 *     [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4267
 *                       "'" ([^<&'] | Reference)* "'"
4268
 *
4269
 * 3.3.3 Attribute-Value Normalization:
4270
 *
4271
 * Before the value of an attribute is passed to the application or
4272
 * checked for validity, the XML processor must normalize it as follows:
4273
 *
4274
 * - a character reference is processed by appending the referenced
4275
 *   character to the attribute value
4276
 * - an entity reference is processed by recursively processing the
4277
 *   replacement text of the entity
4278
 * - a whitespace character (\#x20, \#xD, \#xA, \#x9) is processed by
4279
 *   appending \#x20 to the normalized value, except that only a single
4280
 *   \#x20 is appended for a "#xD#xA" sequence that is part of an external
4281
 *   parsed entity or the literal entity value of an internal parsed entity
4282
 * - other characters are processed by appending them to the normalized value
4283
 *
4284
 * If the declared value is not CDATA, then the XML processor must further
4285
 * process the normalized attribute value by discarding any leading and
4286
 * trailing space (\#x20) characters, and by replacing sequences of space
4287
 * (\#x20) characters by a single space (\#x20) character.
4288
 * All attributes for which no declaration has been read should be treated
4289
 * by a non-validating parser as if declared CDATA.
4290
 *
4291
 * @param ctxt  an XML parser context
4292
 * @returns the AttValue parsed or NULL. The value has to be freed by the
4293
 * caller.
4294
 */
4295
xmlChar *
4296
26.4k
xmlParseAttValue(xmlParserCtxt *ctxt) {
4297
26.4k
    if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4298
26.4k
    return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0, 0));
4299
26.4k
}
4300
4301
/**
4302
 * Parse an XML Literal
4303
 *
4304
 * @deprecated Internal function, don't use.
4305
 *
4306
 *     [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4307
 *
4308
 * @param ctxt  an XML parser context
4309
 * @returns the SystemLiteral parsed or NULL
4310
 */
4311
4312
xmlChar *
4313
14.7k
xmlParseSystemLiteral(xmlParserCtxt *ctxt) {
4314
14.7k
    xmlChar *buf = NULL;
4315
14.7k
    int len = 0;
4316
14.7k
    int size = XML_PARSER_BUFFER_SIZE;
4317
14.7k
    int cur, l;
4318
14.7k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4319
0
                    XML_MAX_TEXT_LENGTH :
4320
14.7k
                    XML_MAX_NAME_LENGTH;
4321
14.7k
    xmlChar stop;
4322
4323
14.7k
    if (RAW == '"') {
4324
11.3k
        NEXT;
4325
11.3k
  stop = '"';
4326
11.3k
    } else if (RAW == '\'') {
4327
1.87k
        NEXT;
4328
1.87k
  stop = '\'';
4329
1.87k
    } else {
4330
1.54k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4331
1.54k
  return(NULL);
4332
1.54k
    }
4333
4334
13.2k
    buf = xmlMalloc(size);
4335
13.2k
    if (buf == NULL) {
4336
1
        xmlErrMemory(ctxt);
4337
1
  return(NULL);
4338
1
    }
4339
13.2k
    cur = xmlCurrentCharRecover(ctxt, &l);
4340
1.09M
    while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4341
1.08M
  if (len + 5 >= size) {
4342
4.53k
      xmlChar *tmp;
4343
4.53k
            int newSize;
4344
4345
4.53k
            newSize = xmlGrowCapacity(size, 1, 1, maxLength);
4346
4.53k
            if (newSize < 0) {
4347
1
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4348
1
                xmlFree(buf);
4349
1
                return(NULL);
4350
1
            }
4351
4.53k
      tmp = xmlRealloc(buf, newSize);
4352
4.53k
      if (tmp == NULL) {
4353
3
          xmlFree(buf);
4354
3
    xmlErrMemory(ctxt);
4355
3
    return(NULL);
4356
3
      }
4357
4.52k
      buf = tmp;
4358
4.52k
            size = newSize;
4359
4.52k
  }
4360
1.08M
  COPY_BUF(buf, len, cur);
4361
1.08M
  NEXTL(l);
4362
1.08M
  cur = xmlCurrentCharRecover(ctxt, &l);
4363
1.08M
    }
4364
13.1k
    buf[len] = 0;
4365
13.1k
    if (!IS_CHAR(cur)) {
4366
2.22k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4367
10.9k
    } else {
4368
10.9k
  NEXT;
4369
10.9k
    }
4370
13.1k
    return(buf);
4371
13.2k
}
4372
4373
/**
4374
 * Parse an XML public literal
4375
 *
4376
 * @deprecated Internal function, don't use.
4377
 *
4378
 *     [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4379
 *
4380
 * @param ctxt  an XML parser context
4381
 * @returns the PubidLiteral parsed or NULL.
4382
 */
4383
4384
xmlChar *
4385
8.35k
xmlParsePubidLiteral(xmlParserCtxt *ctxt) {
4386
8.35k
    xmlChar *buf = NULL;
4387
8.35k
    int len = 0;
4388
8.35k
    int size = XML_PARSER_BUFFER_SIZE;
4389
8.35k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4390
0
                    XML_MAX_TEXT_LENGTH :
4391
8.35k
                    XML_MAX_NAME_LENGTH;
4392
8.35k
    xmlChar cur;
4393
8.35k
    xmlChar stop;
4394
4395
8.35k
    if (RAW == '"') {
4396
1.78k
        NEXT;
4397
1.78k
  stop = '"';
4398
6.56k
    } else if (RAW == '\'') {
4399
6.41k
        NEXT;
4400
6.41k
  stop = '\'';
4401
6.41k
    } else {
4402
151
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4403
151
  return(NULL);
4404
151
    }
4405
8.19k
    buf = xmlMalloc(size);
4406
8.19k
    if (buf == NULL) {
4407
1
  xmlErrMemory(ctxt);
4408
1
  return(NULL);
4409
1
    }
4410
8.19k
    cur = CUR;
4411
210k
    while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop) &&
4412
202k
           (PARSER_STOPPED(ctxt) == 0)) { /* checked */
4413
202k
  if (len + 1 >= size) {
4414
357
      xmlChar *tmp;
4415
357
            int newSize;
4416
4417
357
      newSize = xmlGrowCapacity(size, 1, 1, maxLength);
4418
357
            if (newSize < 0) {
4419
1
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4420
1
                xmlFree(buf);
4421
1
                return(NULL);
4422
1
            }
4423
356
      tmp = xmlRealloc(buf, newSize);
4424
356
      if (tmp == NULL) {
4425
1
    xmlErrMemory(ctxt);
4426
1
    xmlFree(buf);
4427
1
    return(NULL);
4428
1
      }
4429
355
      buf = tmp;
4430
355
            size = newSize;
4431
355
  }
4432
202k
  buf[len++] = cur;
4433
202k
  NEXT;
4434
202k
  cur = CUR;
4435
202k
    }
4436
8.19k
    buf[len] = 0;
4437
8.19k
    if (cur != stop) {
4438
5.07k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4439
5.07k
    } else {
4440
3.12k
  NEXTL(1);
4441
3.12k
    }
4442
8.19k
    return(buf);
4443
8.19k
}
4444
4445
static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial);
4446
4447
/*
4448
 * used for the test in the inner loop of the char data testing
4449
 */
4450
static const unsigned char test_char_data[256] = {
4451
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4452
    0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4453
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4454
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4455
    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4456
    0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4457
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4458
    0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4459
    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4460
    0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4461
    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4462
    0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4463
    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4464
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4465
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4466
    0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4467
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4468
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4469
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4470
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4471
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4472
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4473
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4474
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4475
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4476
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4477
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4478
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4479
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4480
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4481
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4482
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4483
};
4484
4485
static void
4486
xmlCharacters(xmlParserCtxtPtr ctxt, const xmlChar *buf, int size,
4487
1.24M
              int isBlank) {
4488
1.24M
    int checkBlanks;
4489
4490
1.24M
    if ((ctxt->sax == NULL) || (ctxt->disableSAX))
4491
277k
        return;
4492
4493
968k
    checkBlanks = (!ctxt->keepBlanks) ||
4494
968k
                  (ctxt->sax->ignorableWhitespace != ctxt->sax->characters);
4495
4496
    /*
4497
     * Calling areBlanks with only parts of a text node
4498
     * is fundamentally broken, making the NOBLANKS option
4499
     * essentially unusable.
4500
     */
4501
968k
    if ((checkBlanks) &&
4502
0
        (areBlanks(ctxt, buf, size, isBlank))) {
4503
0
        if ((ctxt->sax->ignorableWhitespace != NULL) &&
4504
0
            (ctxt->keepBlanks))
4505
0
            ctxt->sax->ignorableWhitespace(ctxt->userData, buf, size);
4506
968k
    } else {
4507
968k
        if (ctxt->sax->characters != NULL)
4508
968k
            ctxt->sax->characters(ctxt->userData, buf, size);
4509
4510
        /*
4511
         * The old code used to update this value for "complex" data
4512
         * even if checkBlanks was false. This was probably a bug.
4513
         */
4514
968k
        if ((checkBlanks) && (*ctxt->space == -1))
4515
0
            *ctxt->space = -2;
4516
968k
    }
4517
968k
}
4518
4519
/**
4520
 * Parse character data. Always makes progress if the first char isn't
4521
 * '<' or '&'.
4522
 *
4523
 * The right angle bracket (>) may be represented using the string "&gt;",
4524
 * and must, for compatibility, be escaped using "&gt;" or a character
4525
 * reference when it appears in the string "]]>" in content, when that
4526
 * string is not marking the end of a CDATA section.
4527
 *
4528
 *     [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4529
 * @param ctxt  an XML parser context
4530
 * @param partial  buffer may contain partial UTF-8 sequences
4531
 */
4532
static void
4533
1.40M
xmlParseCharDataInternal(xmlParserCtxtPtr ctxt, int partial) {
4534
1.40M
    const xmlChar *in;
4535
1.40M
    int line = ctxt->input->line;
4536
1.40M
    int col = ctxt->input->col;
4537
1.40M
    int ccol;
4538
1.40M
    int terminate = 0;
4539
4540
1.40M
    GROW;
4541
    /*
4542
     * Accelerated common case where input don't need to be
4543
     * modified before passing it to the handler.
4544
     */
4545
1.40M
    in = ctxt->input->cur;
4546
1.52M
    do {
4547
2.05M
get_more_space:
4548
4.48M
        while (*in == 0x20) { in++; ctxt->input->col++; }
4549
2.05M
        if (*in == 0xA) {
4550
1.51M
            do {
4551
1.51M
                ctxt->input->line++; ctxt->input->col = 1;
4552
1.51M
                in++;
4553
1.51M
            } while (*in == 0xA);
4554
524k
            goto get_more_space;
4555
524k
        }
4556
1.52M
        if (*in == '<') {
4557
900k
            while (in > ctxt->input->cur) {
4558
450k
                const xmlChar *tmp = ctxt->input->cur;
4559
450k
                size_t nbchar = in - tmp;
4560
4561
450k
                if (nbchar > XML_MAX_ITEMS)
4562
0
                    nbchar = XML_MAX_ITEMS;
4563
450k
                ctxt->input->cur += nbchar;
4564
4565
450k
                xmlCharacters(ctxt, tmp, nbchar, 1);
4566
450k
            }
4567
450k
            return;
4568
450k
        }
4569
4570
1.18M
get_more:
4571
1.18M
        ccol = ctxt->input->col;
4572
14.6M
        while (test_char_data[*in]) {
4573
13.4M
            in++;
4574
13.4M
            ccol++;
4575
13.4M
        }
4576
1.18M
        ctxt->input->col = ccol;
4577
1.18M
        if (*in == 0xA) {
4578
1.14M
            do {
4579
1.14M
                ctxt->input->line++; ctxt->input->col = 1;
4580
1.14M
                in++;
4581
1.14M
            } while (*in == 0xA);
4582
93.1k
            goto get_more;
4583
93.1k
        }
4584
1.09M
        if (*in == ']') {
4585
11.0k
            size_t avail = ctxt->input->end - in;
4586
4587
11.0k
            if (partial && avail < 2) {
4588
0
                terminate = 1;
4589
0
                goto invoke_callback;
4590
0
            }
4591
11.0k
            if (in[1] == ']') {
4592
2.22k
                if (partial && avail < 3) {
4593
0
                    terminate = 1;
4594
0
                    goto invoke_callback;
4595
0
                }
4596
2.22k
                if (in[2] == '>')
4597
979
                    xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4598
2.22k
            }
4599
4600
11.0k
            in++;
4601
11.0k
            ctxt->input->col++;
4602
11.0k
            goto get_more;
4603
11.0k
        }
4604
4605
1.07M
invoke_callback:
4606
1.71M
        while (in > ctxt->input->cur) {
4607
636k
            const xmlChar *tmp = ctxt->input->cur;
4608
636k
            size_t nbchar = in - tmp;
4609
4610
636k
            if (nbchar > XML_MAX_ITEMS)
4611
0
                nbchar = XML_MAX_ITEMS;
4612
636k
            ctxt->input->cur += nbchar;
4613
4614
636k
            xmlCharacters(ctxt, tmp, nbchar, 0);
4615
4616
636k
            line = ctxt->input->line;
4617
636k
            col = ctxt->input->col;
4618
636k
        }
4619
1.07M
        ctxt->input->cur = in;
4620
1.07M
        if (*in == 0xD) {
4621
159k
            in++;
4622
159k
            if (*in == 0xA) {
4623
131k
                ctxt->input->cur = in;
4624
131k
                in++;
4625
131k
                ctxt->input->line++; ctxt->input->col = 1;
4626
131k
                continue; /* while */
4627
131k
            }
4628
27.2k
            in--;
4629
27.2k
        }
4630
947k
        if (*in == '<') {
4631
521k
            return;
4632
521k
        }
4633
425k
        if (*in == '&') {
4634
19.5k
            return;
4635
19.5k
        }
4636
406k
        if (terminate) {
4637
0
            return;
4638
0
        }
4639
406k
        SHRINK;
4640
406k
        GROW;
4641
406k
        in = ctxt->input->cur;
4642
538k
    } while (((*in >= 0x20) && (*in <= 0x7F)) ||
4643
445k
             (*in == 0x09) || (*in == 0x0a));
4644
409k
    ctxt->input->line = line;
4645
409k
    ctxt->input->col = col;
4646
409k
    xmlParseCharDataComplex(ctxt, partial);
4647
409k
}
4648
4649
/**
4650
 * Always makes progress if the first char isn't '<' or '&'.
4651
 *
4652
 * parse a CharData section.this is the fallback function
4653
 * of #xmlParseCharData when the parsing requires handling
4654
 * of non-ASCII characters.
4655
 *
4656
 * @param ctxt  an XML parser context
4657
 * @param partial  whether the input can end with truncated UTF-8
4658
 */
4659
static void
4660
409k
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial) {
4661
409k
    xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4662
409k
    int nbchar = 0;
4663
409k
    int cur, l;
4664
4665
409k
    cur = xmlCurrentCharRecover(ctxt, &l);
4666
9.45M
    while ((cur != '<') && /* checked */
4667
9.36M
           (cur != '&') &&
4668
9.35M
     (IS_CHAR(cur))) {
4669
9.04M
        if (cur == ']') {
4670
33.3k
            size_t avail = ctxt->input->end - ctxt->input->cur;
4671
4672
33.3k
            if (partial && avail < 2)
4673
0
                break;
4674
33.3k
            if (NXT(1) == ']') {
4675
24.2k
                if (partial && avail < 3)
4676
0
                    break;
4677
24.2k
                if (NXT(2) == '>')
4678
549
                    xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4679
24.2k
            }
4680
33.3k
        }
4681
4682
9.04M
  COPY_BUF(buf, nbchar, cur);
4683
  /* move current position before possible calling of ctxt->sax->characters */
4684
9.04M
  NEXTL(l);
4685
9.04M
  if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4686
38.9k
      buf[nbchar] = 0;
4687
4688
38.9k
            xmlCharacters(ctxt, buf, nbchar, 0);
4689
38.9k
      nbchar = 0;
4690
38.9k
            SHRINK;
4691
38.9k
  }
4692
9.04M
  cur = xmlCurrentCharRecover(ctxt, &l);
4693
9.04M
    }
4694
409k
    if (nbchar != 0) {
4695
120k
        buf[nbchar] = 0;
4696
4697
120k
        xmlCharacters(ctxt, buf, nbchar, 0);
4698
120k
    }
4699
    /*
4700
     * cur == 0 can mean
4701
     *
4702
     * - End of buffer.
4703
     * - An actual 0 character.
4704
     * - An incomplete UTF-8 sequence. This is allowed if partial is set.
4705
     */
4706
409k
    if (ctxt->input->cur < ctxt->input->end) {
4707
404k
        if ((cur == 0) && (CUR != 0)) {
4708
831
            if (partial == 0) {
4709
831
                xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4710
831
                        "Incomplete UTF-8 sequence starting with %02X\n", CUR);
4711
831
                NEXTL(1);
4712
831
            }
4713
403k
        } else if ((cur != '<') && (cur != '&') && (cur != ']')) {
4714
            /* Generate the error and skip the offending character */
4715
300k
            xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4716
300k
                              "PCDATA invalid Char value %d\n", cur);
4717
300k
            NEXTL(l);
4718
300k
        }
4719
404k
    }
4720
409k
}
4721
4722
/**
4723
 * @deprecated Internal function, don't use.
4724
 * @param ctxt  an XML parser context
4725
 * @param cdata  unused
4726
 */
4727
void
4728
0
xmlParseCharData(xmlParserCtxt *ctxt, ATTRIBUTE_UNUSED int cdata) {
4729
0
    xmlParseCharDataInternal(ctxt, 0);
4730
0
}
4731
4732
/**
4733
 * Parse an External ID or a Public ID
4734
 *
4735
 * @deprecated Internal function, don't use.
4736
 *
4737
 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4738
 * `'PUBLIC' S PubidLiteral S SystemLiteral`
4739
 *
4740
 *     [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4741
 *                       | 'PUBLIC' S PubidLiteral S SystemLiteral
4742
 *
4743
 *     [83] PublicID ::= 'PUBLIC' S PubidLiteral
4744
 *
4745
 * @param ctxt  an XML parser context
4746
 * @param publicId  a xmlChar** receiving PubidLiteral
4747
 * @param strict  indicate whether we should restrict parsing to only
4748
 *          production [75], see NOTE below
4749
 * @returns the function returns SystemLiteral and in the second
4750
 *                case publicID receives PubidLiteral, is strict is off
4751
 *                it is possible to return NULL and have publicID set.
4752
 */
4753
4754
xmlChar *
4755
26.2k
xmlParseExternalID(xmlParserCtxt *ctxt, xmlChar **publicId, int strict) {
4756
26.2k
    xmlChar *URI = NULL;
4757
4758
26.2k
    *publicId = NULL;
4759
26.2k
    if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4760
7.55k
        SKIP(6);
4761
7.55k
  if (SKIP_BLANKS == 0) {
4762
568
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4763
568
                     "Space required after 'SYSTEM'\n");
4764
568
  }
4765
7.55k
  URI = xmlParseSystemLiteral(ctxt);
4766
7.55k
  if (URI == NULL) {
4767
284
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4768
284
        }
4769
18.7k
    } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4770
8.35k
        SKIP(6);
4771
8.35k
  if (SKIP_BLANKS == 0) {
4772
5.29k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4773
5.29k
        "Space required after 'PUBLIC'\n");
4774
5.29k
  }
4775
8.35k
  *publicId = xmlParsePubidLiteral(ctxt);
4776
8.35k
  if (*publicId == NULL) {
4777
154
      xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4778
154
  }
4779
8.35k
  if (strict) {
4780
      /*
4781
       * We don't handle [83] so "S SystemLiteral" is required.
4782
       */
4783
6.20k
      if (SKIP_BLANKS == 0) {
4784
5.35k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4785
5.35k
      "Space required after the Public Identifier\n");
4786
5.35k
      }
4787
6.20k
  } else {
4788
      /*
4789
       * We handle [83] so we return immediately, if
4790
       * "S SystemLiteral" is not detected. We skip blanks if no
4791
             * system literal was found, but this is harmless since we must
4792
             * be at the end of a NotationDecl.
4793
       */
4794
2.14k
      if (SKIP_BLANKS == 0) return(NULL);
4795
1.50k
      if ((CUR != '\'') && (CUR != '"')) return(NULL);
4796
1.50k
  }
4797
7.19k
  URI = xmlParseSystemLiteral(ctxt);
4798
7.19k
  if (URI == NULL) {
4799
1.26k
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4800
1.26k
        }
4801
7.19k
    }
4802
25.0k
    return(URI);
4803
26.2k
}
4804
4805
/**
4806
 * Skip an XML (SGML) comment <!-- .... -->
4807
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4808
 *  must not occur within comments. "
4809
 * This is the slow routine in case the accelerator for ascii didn't work
4810
 *
4811
 *     [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4812
 * @param ctxt  an XML parser context
4813
 * @param buf  the already parsed part of the buffer
4814
 * @param len  number of bytes in the buffer
4815
 * @param size  allocated size of the buffer
4816
 */
4817
static void
4818
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4819
26.9k
                       size_t len, size_t size) {
4820
26.9k
    int q, ql;
4821
26.9k
    int r, rl;
4822
26.9k
    int cur, l;
4823
26.9k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4824
0
                    XML_MAX_HUGE_LENGTH :
4825
26.9k
                    XML_MAX_TEXT_LENGTH;
4826
4827
26.9k
    if (buf == NULL) {
4828
3.50k
        len = 0;
4829
3.50k
  size = XML_PARSER_BUFFER_SIZE;
4830
3.50k
  buf = xmlMalloc(size);
4831
3.50k
  if (buf == NULL) {
4832
2
      xmlErrMemory(ctxt);
4833
2
      return;
4834
2
  }
4835
3.50k
    }
4836
26.9k
    q = xmlCurrentCharRecover(ctxt, &ql);
4837
26.9k
    if (q == 0)
4838
1.96k
        goto not_terminated;
4839
25.0k
    if (!IS_CHAR(q)) {
4840
1.85k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4841
1.85k
                          "xmlParseComment: invalid xmlChar value %d\n",
4842
1.85k
                    q);
4843
1.85k
  xmlFree (buf);
4844
1.85k
  return;
4845
1.85k
    }
4846
23.1k
    NEXTL(ql);
4847
23.1k
    r = xmlCurrentCharRecover(ctxt, &rl);
4848
23.1k
    if (r == 0)
4849
913
        goto not_terminated;
4850
22.2k
    if (!IS_CHAR(r)) {
4851
478
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4852
478
                          "xmlParseComment: invalid xmlChar value %d\n",
4853
478
                    r);
4854
478
  xmlFree (buf);
4855
478
  return;
4856
478
    }
4857
21.7k
    NEXTL(rl);
4858
21.7k
    cur = xmlCurrentCharRecover(ctxt, &l);
4859
21.7k
    if (cur == 0)
4860
455
        goto not_terminated;
4861
12.2M
    while (IS_CHAR(cur) && /* checked */
4862
12.2M
           ((cur != '>') ||
4863
12.2M
      (r != '-') || (q != '-'))) {
4864
12.2M
  if ((r == '-') && (q == '-')) {
4865
15.5k
      xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4866
15.5k
  }
4867
12.2M
  if (len + 5 >= size) {
4868
19.7k
      xmlChar *tmp;
4869
19.7k
            int newSize;
4870
4871
19.7k
      newSize = xmlGrowCapacity(size, 1, 1, maxLength);
4872
19.7k
            if (newSize < 0) {
4873
0
                xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4874
0
                             "Comment too big found", NULL);
4875
0
                xmlFree (buf);
4876
0
                return;
4877
0
            }
4878
19.7k
      tmp = xmlRealloc(buf, newSize);
4879
19.7k
      if (tmp == NULL) {
4880
2
    xmlErrMemory(ctxt);
4881
2
    xmlFree(buf);
4882
2
    return;
4883
2
      }
4884
19.7k
      buf = tmp;
4885
19.7k
            size = newSize;
4886
19.7k
  }
4887
12.2M
  COPY_BUF(buf, len, q);
4888
4889
12.2M
  q = r;
4890
12.2M
  ql = rl;
4891
12.2M
  r = cur;
4892
12.2M
  rl = l;
4893
4894
12.2M
  NEXTL(l);
4895
12.2M
  cur = xmlCurrentCharRecover(ctxt, &l);
4896
4897
12.2M
    }
4898
21.2k
    buf[len] = 0;
4899
21.2k
    if (cur == 0) {
4900
6.26k
  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4901
6.26k
                       "Comment not terminated \n<!--%.50s\n", buf);
4902
15.0k
    } else if (!IS_CHAR(cur)) {
4903
2.16k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4904
2.16k
                          "xmlParseComment: invalid xmlChar value %d\n",
4905
2.16k
                    cur);
4906
12.8k
    } else {
4907
12.8k
        NEXT;
4908
12.8k
  if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4909
12.8k
      (!ctxt->disableSAX))
4910
3.53k
      ctxt->sax->comment(ctxt->userData, buf);
4911
12.8k
    }
4912
21.2k
    xmlFree(buf);
4913
21.2k
    return;
4914
3.32k
not_terminated:
4915
3.32k
    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4916
3.32k
       "Comment not terminated\n", NULL);
4917
3.32k
    xmlFree(buf);
4918
3.32k
}
4919
4920
/**
4921
 * Parse an XML (SGML) comment. Always consumes '<!'.
4922
 *
4923
 * @deprecated Internal function, don't use.
4924
 *
4925
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4926
 *  must not occur within comments. "
4927
 *
4928
 *     [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4929
 * @param ctxt  an XML parser context
4930
 */
4931
void
4932
56.6k
xmlParseComment(xmlParserCtxt *ctxt) {
4933
56.6k
    xmlChar *buf = NULL;
4934
56.6k
    size_t size = XML_PARSER_BUFFER_SIZE;
4935
56.6k
    size_t len = 0;
4936
56.6k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4937
0
                       XML_MAX_HUGE_LENGTH :
4938
56.6k
                       XML_MAX_TEXT_LENGTH;
4939
56.6k
    const xmlChar *in;
4940
56.6k
    size_t nbchar = 0;
4941
56.6k
    int ccol;
4942
4943
    /*
4944
     * Check that there is a comment right here.
4945
     */
4946
56.6k
    if ((RAW != '<') || (NXT(1) != '!'))
4947
0
        return;
4948
56.6k
    SKIP(2);
4949
56.6k
    if ((RAW != '-') || (NXT(1) != '-'))
4950
6
        return;
4951
56.6k
    SKIP(2);
4952
56.6k
    GROW;
4953
4954
    /*
4955
     * Accelerated common case where input don't need to be
4956
     * modified before passing it to the handler.
4957
     */
4958
56.6k
    in = ctxt->input->cur;
4959
57.1k
    do {
4960
57.1k
  if (*in == 0xA) {
4961
3.54k
      do {
4962
3.54k
    ctxt->input->line++; ctxt->input->col = 1;
4963
3.54k
    in++;
4964
3.54k
      } while (*in == 0xA);
4965
1.10k
  }
4966
122k
get_more:
4967
122k
        ccol = ctxt->input->col;
4968
4.30M
  while (((*in > '-') && (*in <= 0x7F)) ||
4969
1.05M
         ((*in >= 0x20) && (*in < '-')) ||
4970
4.18M
         (*in == 0x09)) {
4971
4.18M
        in++;
4972
4.18M
        ccol++;
4973
4.18M
  }
4974
122k
  ctxt->input->col = ccol;
4975
122k
  if (*in == 0xA) {
4976
46.3k
      do {
4977
46.3k
    ctxt->input->line++; ctxt->input->col = 1;
4978
46.3k
    in++;
4979
46.3k
      } while (*in == 0xA);
4980
29.9k
      goto get_more;
4981
29.9k
  }
4982
92.5k
  nbchar = in - ctxt->input->cur;
4983
  /*
4984
   * save current set of data
4985
   */
4986
92.5k
  if (nbchar > 0) {
4987
85.7k
            if (nbchar > maxLength - len) {
4988
0
                xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4989
0
                                  "Comment too big found", NULL);
4990
0
                xmlFree(buf);
4991
0
                return;
4992
0
            }
4993
85.7k
            if (buf == NULL) {
4994
51.6k
                if ((*in == '-') && (in[1] == '-'))
4995
28.4k
                    size = nbchar + 1;
4996
23.1k
                else
4997
23.1k
                    size = XML_PARSER_BUFFER_SIZE + nbchar;
4998
51.6k
                buf = xmlMalloc(size);
4999
51.6k
                if (buf == NULL) {
5000
2
                    xmlErrMemory(ctxt);
5001
2
                    return;
5002
2
                }
5003
51.6k
                len = 0;
5004
51.6k
            } else if (len + nbchar + 1 >= size) {
5005
4.91k
                xmlChar *new_buf;
5006
4.91k
                size += len + nbchar + XML_PARSER_BUFFER_SIZE;
5007
4.91k
                new_buf = xmlRealloc(buf, size);
5008
4.91k
                if (new_buf == NULL) {
5009
2
                    xmlErrMemory(ctxt);
5010
2
                    xmlFree(buf);
5011
2
                    return;
5012
2
                }
5013
4.91k
                buf = new_buf;
5014
4.91k
            }
5015
85.7k
            memcpy(&buf[len], ctxt->input->cur, nbchar);
5016
85.7k
            len += nbchar;
5017
85.7k
            buf[len] = 0;
5018
85.7k
  }
5019
92.5k
  ctxt->input->cur = in;
5020
92.5k
  if (*in == 0xA) {
5021
0
      in++;
5022
0
      ctxt->input->line++; ctxt->input->col = 1;
5023
0
  }
5024
92.5k
  if (*in == 0xD) {
5025
11.1k
      in++;
5026
11.1k
      if (*in == 0xA) {
5027
1.76k
    ctxt->input->cur = in;
5028
1.76k
    in++;
5029
1.76k
    ctxt->input->line++; ctxt->input->col = 1;
5030
1.76k
    goto get_more;
5031
1.76k
      }
5032
9.35k
      in--;
5033
9.35k
  }
5034
90.7k
  SHRINK;
5035
90.7k
  GROW;
5036
90.7k
  in = ctxt->input->cur;
5037
90.7k
  if (*in == '-') {
5038
63.2k
      if (in[1] == '-') {
5039
43.7k
          if (in[2] == '>') {
5040
29.6k
        SKIP(3);
5041
29.6k
        if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5042
29.6k
            (!ctxt->disableSAX)) {
5043
12.3k
      if (buf != NULL)
5044
11.3k
          ctxt->sax->comment(ctxt->userData, buf);
5045
982
      else
5046
982
          ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5047
12.3k
        }
5048
29.6k
        if (buf != NULL)
5049
28.1k
            xmlFree(buf);
5050
29.6k
        return;
5051
29.6k
    }
5052
14.0k
    if (buf != NULL) {
5053
13.4k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5054
13.4k
                          "Double hyphen within comment: "
5055
13.4k
                                      "<!--%.50s\n",
5056
13.4k
              buf);
5057
13.4k
    } else
5058
593
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5059
593
                          "Double hyphen within comment\n", NULL);
5060
14.0k
    in++;
5061
14.0k
    ctxt->input->col++;
5062
14.0k
      }
5063
33.5k
      in++;
5064
33.5k
      ctxt->input->col++;
5065
33.5k
      goto get_more;
5066
63.2k
  }
5067
90.7k
    } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5068
26.9k
    xmlParseCommentComplex(ctxt, buf, len, size);
5069
26.9k
}
5070
5071
5072
/**
5073
 * Parse the name of a PI
5074
 *
5075
 * @deprecated Internal function, don't use.
5076
 *
5077
 *     [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5078
 *
5079
 * @param ctxt  an XML parser context
5080
 * @returns the PITarget name or NULL
5081
 */
5082
5083
const xmlChar *
5084
28.8k
xmlParsePITarget(xmlParserCtxt *ctxt) {
5085
28.8k
    const xmlChar *name;
5086
5087
28.8k
    name = xmlParseName(ctxt);
5088
28.8k
    if ((name != NULL) &&
5089
27.5k
        ((name[0] == 'x') || (name[0] == 'X')) &&
5090
12.1k
        ((name[1] == 'm') || (name[1] == 'M')) &&
5091
8.76k
        ((name[2] == 'l') || (name[2] == 'L'))) {
5092
3.35k
  int i;
5093
3.35k
  if ((name[0] == 'x') && (name[1] == 'm') &&
5094
2.37k
      (name[2] == 'l') && (name[3] == 0)) {
5095
1.08k
      xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5096
1.08k
     "XML declaration allowed only at the start of the document\n");
5097
1.08k
      return(name);
5098
2.27k
  } else if (name[3] == 0) {
5099
1.05k
      xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5100
1.05k
      return(name);
5101
1.05k
  }
5102
3.50k
  for (i = 0;;i++) {
5103
3.50k
      if (xmlW3CPIs[i] == NULL) break;
5104
2.43k
      if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5105
149
          return(name);
5106
2.43k
  }
5107
1.06k
  xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5108
1.06k
          "xmlParsePITarget: invalid name prefix 'xml'\n",
5109
1.06k
          NULL, NULL);
5110
1.06k
    }
5111
26.5k
    if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5112
2.81k
  xmlNsErr(ctxt, XML_NS_ERR_COLON,
5113
2.81k
     "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5114
2.81k
    }
5115
26.5k
    return(name);
5116
28.8k
}
5117
5118
#ifdef LIBXML_CATALOG_ENABLED
5119
/**
5120
 * Parse an XML Catalog Processing Instruction.
5121
 *
5122
 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5123
 *
5124
 * Occurs only if allowed by the user and if happening in the Misc
5125
 * part of the document before any doctype information
5126
 * This will add the given catalog to the parsing context in order
5127
 * to be used if there is a resolution need further down in the document
5128
 *
5129
 * @param ctxt  an XML parser context
5130
 * @param catalog  the PI value string
5131
 */
5132
5133
static void
5134
0
xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5135
0
    xmlChar *URL = NULL;
5136
0
    const xmlChar *tmp, *base;
5137
0
    xmlChar marker;
5138
5139
0
    tmp = catalog;
5140
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5141
0
    if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5142
0
  goto error;
5143
0
    tmp += 7;
5144
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5145
0
    if (*tmp != '=') {
5146
0
  return;
5147
0
    }
5148
0
    tmp++;
5149
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5150
0
    marker = *tmp;
5151
0
    if ((marker != '\'') && (marker != '"'))
5152
0
  goto error;
5153
0
    tmp++;
5154
0
    base = tmp;
5155
0
    while ((*tmp != 0) && (*tmp != marker)) tmp++;
5156
0
    if (*tmp == 0)
5157
0
  goto error;
5158
0
    URL = xmlStrndup(base, tmp - base);
5159
0
    tmp++;
5160
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5161
0
    if (*tmp != 0)
5162
0
  goto error;
5163
5164
0
    if (URL != NULL) {
5165
        /*
5166
         * Unfortunately, the catalog API doesn't report OOM errors.
5167
         * xmlGetLastError isn't very helpful since we don't know
5168
         * where the last error came from. We'd have to reset it
5169
         * before this call and restore it afterwards.
5170
         */
5171
0
  ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5172
0
  xmlFree(URL);
5173
0
    }
5174
0
    return;
5175
5176
0
error:
5177
0
    xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5178
0
            "Catalog PI syntax error: %s\n",
5179
0
      catalog, NULL);
5180
0
    if (URL != NULL)
5181
0
  xmlFree(URL);
5182
0
}
5183
#endif
5184
5185
/**
5186
 * Parse an XML Processing Instruction.
5187
 *
5188
 * @deprecated Internal function, don't use.
5189
 *
5190
 *     [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5191
 *
5192
 * The processing is transferred to SAX once parsed.
5193
 *
5194
 * @param ctxt  an XML parser context
5195
 */
5196
5197
void
5198
28.8k
xmlParsePI(xmlParserCtxt *ctxt) {
5199
28.8k
    xmlChar *buf = NULL;
5200
28.8k
    size_t len = 0;
5201
28.8k
    size_t size = XML_PARSER_BUFFER_SIZE;
5202
28.8k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5203
0
                       XML_MAX_HUGE_LENGTH :
5204
28.8k
                       XML_MAX_TEXT_LENGTH;
5205
28.8k
    int cur, l;
5206
28.8k
    const xmlChar *target;
5207
5208
28.8k
    if ((RAW == '<') && (NXT(1) == '?')) {
5209
  /*
5210
   * this is a Processing Instruction.
5211
   */
5212
28.8k
  SKIP(2);
5213
5214
  /*
5215
   * Parse the target name and check for special support like
5216
   * namespace.
5217
   */
5218
28.8k
        target = xmlParsePITarget(ctxt);
5219
28.8k
  if (target != NULL) {
5220
27.5k
      if ((RAW == '?') && (NXT(1) == '>')) {
5221
6.37k
    SKIP(2);
5222
5223
    /*
5224
     * SAX: PI detected.
5225
     */
5226
6.37k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5227
5.48k
        (ctxt->sax->processingInstruction != NULL))
5228
5.48k
        ctxt->sax->processingInstruction(ctxt->userData,
5229
5.48k
                                         target, NULL);
5230
6.37k
    return;
5231
6.37k
      }
5232
21.2k
      buf = xmlMalloc(size);
5233
21.2k
      if (buf == NULL) {
5234
2
    xmlErrMemory(ctxt);
5235
2
    return;
5236
2
      }
5237
21.2k
      if (SKIP_BLANKS == 0) {
5238
11.1k
    xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5239
11.1k
        "ParsePI: PI %s space expected\n", target);
5240
11.1k
      }
5241
21.2k
      cur = xmlCurrentCharRecover(ctxt, &l);
5242
4.45M
      while (IS_CHAR(cur) && /* checked */
5243
4.44M
       ((cur != '?') || (NXT(1) != '>'))) {
5244
4.43M
    if (len + 5 >= size) {
5245
27.9k
        xmlChar *tmp;
5246
27.9k
                    int newSize;
5247
5248
27.9k
                    newSize = xmlGrowCapacity(size, 1, 1, maxLength);
5249
27.9k
                    if (newSize < 0) {
5250
0
                        xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5251
0
                                          "PI %s too big found", target);
5252
0
                        xmlFree(buf);
5253
0
                        return;
5254
0
                    }
5255
27.9k
        tmp = xmlRealloc(buf, newSize);
5256
27.9k
        if (tmp == NULL) {
5257
13
      xmlErrMemory(ctxt);
5258
13
      xmlFree(buf);
5259
13
      return;
5260
13
        }
5261
27.9k
        buf = tmp;
5262
27.9k
                    size = newSize;
5263
27.9k
    }
5264
4.43M
    COPY_BUF(buf, len, cur);
5265
4.43M
    NEXTL(l);
5266
4.43M
    cur = xmlCurrentCharRecover(ctxt, &l);
5267
4.43M
      }
5268
21.2k
      buf[len] = 0;
5269
21.2k
      if (cur != '?') {
5270
10.5k
    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5271
10.5k
          "ParsePI: PI %s never end ...\n", target);
5272
10.6k
      } else {
5273
10.6k
    SKIP(2);
5274
5275
10.6k
#ifdef LIBXML_CATALOG_ENABLED
5276
10.6k
    if ((ctxt->inSubset == 0) &&
5277
7.80k
        (xmlStrEqual(target, XML_CATALOG_PI))) {
5278
164
        xmlCatalogAllow allow = xmlCatalogGetDefaults();
5279
5280
164
        if ((ctxt->options & XML_PARSE_CATALOG_PI) &&
5281
0
                        ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5282
0
       (allow == XML_CATA_ALLOW_ALL)))
5283
0
      xmlParseCatalogPI(ctxt, buf);
5284
164
    }
5285
10.6k
#endif
5286
5287
    /*
5288
     * SAX: PI detected.
5289
     */
5290
10.6k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5291
6.26k
        (ctxt->sax->processingInstruction != NULL))
5292
6.26k
        ctxt->sax->processingInstruction(ctxt->userData,
5293
6.26k
                                         target, buf);
5294
10.6k
      }
5295
21.2k
      xmlFree(buf);
5296
21.2k
  } else {
5297
1.23k
      xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5298
1.23k
  }
5299
28.8k
    }
5300
28.8k
}
5301
5302
/**
5303
 * Parse a notation declaration. Always consumes '<!'.
5304
 *
5305
 * @deprecated Internal function, don't use.
5306
 *
5307
 *     [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID)
5308
 *                           S? '>'
5309
 *
5310
 * Hence there is actually 3 choices:
5311
 *
5312
 *     'PUBLIC' S PubidLiteral
5313
 *     'PUBLIC' S PubidLiteral S SystemLiteral
5314
 *     'SYSTEM' S SystemLiteral
5315
 *
5316
 * See the NOTE on #xmlParseExternalID.
5317
 *
5318
 * @param ctxt  an XML parser context
5319
 */
5320
5321
void
5322
3.88k
xmlParseNotationDecl(xmlParserCtxt *ctxt) {
5323
3.88k
    const xmlChar *name;
5324
3.88k
    xmlChar *Pubid;
5325
3.88k
    xmlChar *Systemid;
5326
5327
3.88k
    if ((CUR != '<') || (NXT(1) != '!'))
5328
0
        return;
5329
3.88k
    SKIP(2);
5330
5331
3.88k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5332
3.84k
#ifdef LIBXML_VALID_ENABLED
5333
3.84k
  int oldInputNr = ctxt->inputNr;
5334
3.84k
#endif
5335
5336
3.84k
  SKIP(8);
5337
3.84k
  if (SKIP_BLANKS_PE == 0) {
5338
247
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5339
247
         "Space required after '<!NOTATION'\n");
5340
247
      return;
5341
247
  }
5342
5343
3.59k
        name = xmlParseName(ctxt);
5344
3.59k
  if (name == NULL) {
5345
226
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5346
226
      return;
5347
226
  }
5348
3.37k
  if (xmlStrchr(name, ':') != NULL) {
5349
223
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5350
223
         "colons are forbidden from notation names '%s'\n",
5351
223
         name, NULL, NULL);
5352
223
  }
5353
3.37k
  if (SKIP_BLANKS_PE == 0) {
5354
121
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5355
121
         "Space required after the NOTATION name'\n");
5356
121
      return;
5357
121
  }
5358
5359
  /*
5360
   * Parse the IDs.
5361
   */
5362
3.25k
  Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5363
3.25k
  SKIP_BLANKS_PE;
5364
5365
3.25k
  if (RAW == '>') {
5366
1.81k
#ifdef LIBXML_VALID_ENABLED
5367
1.81k
      if ((ctxt->validate) && (ctxt->inputNr > oldInputNr)) {
5368
0
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5369
0
                           "Notation declaration doesn't start and stop"
5370
0
                                 " in the same entity\n",
5371
0
                                 NULL, NULL);
5372
0
      }
5373
1.81k
#endif
5374
1.81k
      NEXT;
5375
1.81k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5376
824
    (ctxt->sax->notationDecl != NULL))
5377
824
    ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5378
1.81k
  } else {
5379
1.44k
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5380
1.44k
  }
5381
3.25k
  if (Systemid != NULL) xmlFree(Systemid);
5382
3.25k
  if (Pubid != NULL) xmlFree(Pubid);
5383
3.25k
    }
5384
3.88k
}
5385
5386
/**
5387
 * Parse an entity declaration. Always consumes '<!'.
5388
 *
5389
 * @deprecated Internal function, don't use.
5390
 *
5391
 *     [70] EntityDecl ::= GEDecl | PEDecl
5392
 *
5393
 *     [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5394
 *
5395
 *     [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5396
 *
5397
 *     [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5398
 *
5399
 *     [74] PEDef ::= EntityValue | ExternalID
5400
 *
5401
 *     [76] NDataDecl ::= S 'NDATA' S Name
5402
 *
5403
 * [ VC: Notation Declared ]
5404
 * The Name must match the declared name of a notation.
5405
 *
5406
 * @param ctxt  an XML parser context
5407
 */
5408
5409
void
5410
44.9k
xmlParseEntityDecl(xmlParserCtxt *ctxt) {
5411
44.9k
    const xmlChar *name = NULL;
5412
44.9k
    xmlChar *value = NULL;
5413
44.9k
    xmlChar *URI = NULL, *literal = NULL;
5414
44.9k
    const xmlChar *ndata = NULL;
5415
44.9k
    int isParameter = 0;
5416
44.9k
    xmlChar *orig = NULL;
5417
5418
44.9k
    if ((CUR != '<') || (NXT(1) != '!'))
5419
0
        return;
5420
44.9k
    SKIP(2);
5421
5422
    /* GROW; done in the caller */
5423
44.9k
    if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5424
44.9k
#ifdef LIBXML_VALID_ENABLED
5425
44.9k
  int oldInputNr = ctxt->inputNr;
5426
44.9k
#endif
5427
5428
44.9k
  SKIP(6);
5429
44.9k
  if (SKIP_BLANKS_PE == 0) {
5430
7.55k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5431
7.55k
         "Space required after '<!ENTITY'\n");
5432
7.55k
  }
5433
5434
44.9k
  if (RAW == '%') {
5435
23.6k
      NEXT;
5436
23.6k
      if (SKIP_BLANKS_PE == 0) {
5437
919
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5438
919
             "Space required after '%%'\n");
5439
919
      }
5440
23.6k
      isParameter = 1;
5441
23.6k
  }
5442
5443
44.9k
        name = xmlParseName(ctxt);
5444
44.9k
  if (name == NULL) {
5445
2.29k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5446
2.29k
                     "xmlParseEntityDecl: no name\n");
5447
2.29k
            return;
5448
2.29k
  }
5449
42.6k
  if (xmlStrchr(name, ':') != NULL) {
5450
5.44k
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5451
5.44k
         "colons are forbidden from entities names '%s'\n",
5452
5.44k
         name, NULL, NULL);
5453
5.44k
  }
5454
42.6k
  if (SKIP_BLANKS_PE == 0) {
5455
3.53k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5456
3.53k
         "Space required after the entity name\n");
5457
3.53k
  }
5458
5459
  /*
5460
   * handle the various case of definitions...
5461
   */
5462
42.6k
  if (isParameter) {
5463
23.2k
      if ((RAW == '"') || (RAW == '\'')) {
5464
15.5k
          value = xmlParseEntityValue(ctxt, &orig);
5465
15.5k
    if (value) {
5466
14.7k
        if ((ctxt->sax != NULL) &&
5467
14.7k
      (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5468
1.16k
      ctxt->sax->entityDecl(ctxt->userData, name,
5469
1.16k
                        XML_INTERNAL_PARAMETER_ENTITY,
5470
1.16k
            NULL, NULL, value);
5471
14.7k
    }
5472
15.5k
      } else {
5473
7.76k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5474
7.76k
    if ((URI == NULL) && (literal == NULL)) {
5475
1.18k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5476
1.18k
    }
5477
7.76k
    if (URI) {
5478
6.03k
                    if (xmlStrchr(URI, '#')) {
5479
640
                        xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5480
5.39k
                    } else {
5481
5.39k
                        if ((ctxt->sax != NULL) &&
5482
5.39k
                            (!ctxt->disableSAX) &&
5483
4.59k
                            (ctxt->sax->entityDecl != NULL))
5484
4.59k
                            ctxt->sax->entityDecl(ctxt->userData, name,
5485
4.59k
                                        XML_EXTERNAL_PARAMETER_ENTITY,
5486
4.59k
                                        literal, URI, NULL);
5487
5.39k
                    }
5488
6.03k
    }
5489
7.76k
      }
5490
23.2k
  } else {
5491
19.3k
      if ((RAW == '"') || (RAW == '\'')) {
5492
12.2k
          value = xmlParseEntityValue(ctxt, &orig);
5493
12.2k
    if ((ctxt->sax != NULL) &&
5494
12.2k
        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5495
1.58k
        ctxt->sax->entityDecl(ctxt->userData, name,
5496
1.58k
        XML_INTERNAL_GENERAL_ENTITY,
5497
1.58k
        NULL, NULL, value);
5498
    /*
5499
     * For expat compatibility in SAX mode.
5500
     */
5501
12.2k
    if ((ctxt->myDoc == NULL) ||
5502
11.6k
        (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5503
6.06k
        if (ctxt->myDoc == NULL) {
5504
658
      ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5505
658
      if (ctxt->myDoc == NULL) {
5506
1
          xmlErrMemory(ctxt);
5507
1
          goto done;
5508
1
      }
5509
657
      ctxt->myDoc->properties = XML_DOC_INTERNAL;
5510
657
        }
5511
6.05k
        if (ctxt->myDoc->intSubset == NULL) {
5512
657
      ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5513
657
              BAD_CAST "fake", NULL, NULL);
5514
657
                        if (ctxt->myDoc->intSubset == NULL) {
5515
2
                            xmlErrMemory(ctxt);
5516
2
                            goto done;
5517
2
                        }
5518
657
                    }
5519
5520
6.05k
        xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5521
6.05k
                    NULL, NULL, value);
5522
6.05k
    }
5523
12.2k
      } else {
5524
7.07k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5525
7.07k
    if ((URI == NULL) && (literal == NULL)) {
5526
1.25k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5527
1.25k
    }
5528
7.07k
    if (URI) {
5529
5.27k
                    if (xmlStrchr(URI, '#')) {
5530
979
                        xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5531
979
                    }
5532
5.27k
    }
5533
7.07k
    if ((RAW != '>') && (SKIP_BLANKS_PE == 0)) {
5534
1.68k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5535
1.68k
           "Space required before 'NDATA'\n");
5536
1.68k
    }
5537
7.07k
    if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5538
6
        SKIP(5);
5539
6
        if (SKIP_BLANKS_PE == 0) {
5540
3
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5541
3
               "Space required after 'NDATA'\n");
5542
3
        }
5543
6
        ndata = xmlParseName(ctxt);
5544
6
        if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5545
0
            (ctxt->sax->unparsedEntityDecl != NULL))
5546
0
      ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5547
0
            literal, URI, ndata);
5548
7.06k
    } else {
5549
7.06k
        if ((ctxt->sax != NULL) &&
5550
7.06k
            (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5551
1.15k
      ctxt->sax->entityDecl(ctxt->userData, name,
5552
1.15k
            XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5553
1.15k
            literal, URI, NULL);
5554
        /*
5555
         * For expat compatibility in SAX mode.
5556
         * assuming the entity replacement was asked for
5557
         */
5558
7.06k
        if ((ctxt->replaceEntities != 0) &&
5559
7.06k
      ((ctxt->myDoc == NULL) ||
5560
5.99k
      (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5561
4.44k
      if (ctxt->myDoc == NULL) {
5562
1.06k
          ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5563
1.06k
          if (ctxt->myDoc == NULL) {
5564
1
              xmlErrMemory(ctxt);
5565
1
        goto done;
5566
1
          }
5567
1.06k
          ctxt->myDoc->properties = XML_DOC_INTERNAL;
5568
1.06k
      }
5569
5570
4.43k
      if (ctxt->myDoc->intSubset == NULL) {
5571
1.06k
          ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5572
1.06k
            BAD_CAST "fake", NULL, NULL);
5573
1.06k
                            if (ctxt->myDoc->intSubset == NULL) {
5574
1
                                xmlErrMemory(ctxt);
5575
1
                                goto done;
5576
1
                            }
5577
1.06k
                        }
5578
4.43k
      xmlSAX2EntityDecl(ctxt, name,
5579
4.43k
                  XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5580
4.43k
                  literal, URI, NULL);
5581
4.43k
        }
5582
7.06k
    }
5583
7.07k
      }
5584
19.3k
  }
5585
42.6k
  SKIP_BLANKS_PE;
5586
42.6k
  if (RAW != '>') {
5587
6.28k
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5588
6.28k
              "xmlParseEntityDecl: entity %s not terminated\n", name);
5589
36.3k
  } else {
5590
36.3k
#ifdef LIBXML_VALID_ENABLED
5591
36.3k
      if ((ctxt->validate) && (ctxt->inputNr > oldInputNr)) {
5592
0
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5593
0
                           "Entity declaration doesn't start and stop in"
5594
0
                                 " the same entity\n",
5595
0
                                 NULL, NULL);
5596
0
      }
5597
36.3k
#endif
5598
36.3k
      NEXT;
5599
36.3k
  }
5600
42.6k
  if (orig != NULL) {
5601
      /*
5602
       * Ugly mechanism to save the raw entity value.
5603
       */
5604
26.1k
      xmlEntityPtr cur = NULL;
5605
5606
26.1k
      if (isParameter) {
5607
14.7k
          if ((ctxt->sax != NULL) &&
5608
14.7k
        (ctxt->sax->getParameterEntity != NULL))
5609
14.7k
        cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5610
14.7k
      } else {
5611
11.4k
          if ((ctxt->sax != NULL) &&
5612
11.4k
        (ctxt->sax->getEntity != NULL))
5613
11.4k
        cur = ctxt->sax->getEntity(ctxt->userData, name);
5614
11.4k
    if ((cur == NULL) && (ctxt->userData==ctxt)) {
5615
2.85k
        cur = xmlSAX2GetEntity(ctxt, name);
5616
2.85k
    }
5617
11.4k
      }
5618
26.1k
            if ((cur != NULL) && (cur->orig == NULL)) {
5619
3.03k
    cur->orig = orig;
5620
3.03k
                orig = NULL;
5621
3.03k
      }
5622
26.1k
  }
5623
5624
42.6k
done:
5625
42.6k
  if (value != NULL) xmlFree(value);
5626
42.6k
  if (URI != NULL) xmlFree(URI);
5627
42.6k
  if (literal != NULL) xmlFree(literal);
5628
42.6k
        if (orig != NULL) xmlFree(orig);
5629
42.6k
    }
5630
44.9k
}
5631
5632
/**
5633
 * Parse an attribute default declaration
5634
 *
5635
 * @deprecated Internal function, don't use.
5636
 *
5637
 *     [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5638
 *
5639
 * [ VC: Required Attribute ]
5640
 * if the default declaration is the keyword \#REQUIRED, then the
5641
 * attribute must be specified for all elements of the type in the
5642
 * attribute-list declaration.
5643
 *
5644
 * [ VC: Attribute Default Legal ]
5645
 * The declared default value must meet the lexical constraints of
5646
 * the declared attribute type c.f. #xmlValidateAttributeDecl
5647
 *
5648
 * [ VC: Fixed Attribute Default ]
5649
 * if an attribute has a default value declared with the \#FIXED
5650
 * keyword, instances of that attribute must match the default value.
5651
 *
5652
 * [ WFC: No < in Attribute Values ]
5653
 * handled in #xmlParseAttValue
5654
 *
5655
 * @param ctxt  an XML parser context
5656
 * @param value  Receive a possible fixed default value for the attribute
5657
 * @returns XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5658
 *          or XML_ATTRIBUTE_FIXED.
5659
 */
5660
5661
int
5662
30.6k
xmlParseDefaultDecl(xmlParserCtxt *ctxt, xmlChar **value) {
5663
30.6k
    int val;
5664
30.6k
    xmlChar *ret;
5665
5666
30.6k
    *value = NULL;
5667
30.6k
    if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5668
363
  SKIP(9);
5669
363
  return(XML_ATTRIBUTE_REQUIRED);
5670
363
    }
5671
30.2k
    if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5672
3.85k
  SKIP(8);
5673
3.85k
  return(XML_ATTRIBUTE_IMPLIED);
5674
3.85k
    }
5675
26.4k
    val = XML_ATTRIBUTE_NONE;
5676
26.4k
    if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5677
2.72k
  SKIP(6);
5678
2.72k
  val = XML_ATTRIBUTE_FIXED;
5679
2.72k
  if (SKIP_BLANKS_PE == 0) {
5680
537
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5681
537
         "Space required after '#FIXED'\n");
5682
537
  }
5683
2.72k
    }
5684
26.4k
    ret = xmlParseAttValue(ctxt);
5685
26.4k
    if (ret == NULL) {
5686
7.32k
  xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5687
7.32k
           "Attribute default value declaration error\n");
5688
7.32k
    } else
5689
19.0k
        *value = ret;
5690
26.4k
    return(val);
5691
30.2k
}
5692
5693
/**
5694
 * Parse an Notation attribute type.
5695
 *
5696
 * @deprecated Internal function, don't use.
5697
 *
5698
 * Note: the leading 'NOTATION' S part has already being parsed...
5699
 *
5700
 *     [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5701
 *
5702
 * [ VC: Notation Attributes ]
5703
 * Values of this type must match one of the notation names included
5704
 * in the declaration; all notation names in the declaration must be declared.
5705
 *
5706
 * @param ctxt  an XML parser context
5707
 * @returns the notation attribute tree built while parsing
5708
 */
5709
5710
xmlEnumeration *
5711
364
xmlParseNotationType(xmlParserCtxt *ctxt) {
5712
364
    const xmlChar *name;
5713
364
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5714
5715
364
    if (RAW != '(') {
5716
67
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5717
67
  return(NULL);
5718
67
    }
5719
860
    do {
5720
860
        NEXT;
5721
860
  SKIP_BLANKS_PE;
5722
860
        name = xmlParseName(ctxt);
5723
860
  if (name == NULL) {
5724
19
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5725
19
         "Name expected in NOTATION declaration\n");
5726
19
            xmlFreeEnumeration(ret);
5727
19
      return(NULL);
5728
19
  }
5729
841
        tmp = NULL;
5730
841
#ifdef LIBXML_VALID_ENABLED
5731
841
        if (ctxt->validate) {
5732
0
            tmp = ret;
5733
0
            while (tmp != NULL) {
5734
0
                if (xmlStrEqual(name, tmp->name)) {
5735
0
                    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5736
0
              "standalone: attribute notation value token %s duplicated\n",
5737
0
                                     name, NULL);
5738
0
                    if (!xmlDictOwns(ctxt->dict, name))
5739
0
                        xmlFree((xmlChar *) name);
5740
0
                    break;
5741
0
                }
5742
0
                tmp = tmp->next;
5743
0
            }
5744
0
        }
5745
841
#endif /* LIBXML_VALID_ENABLED */
5746
841
  if (tmp == NULL) {
5747
841
      cur = xmlCreateEnumeration(name);
5748
841
      if (cur == NULL) {
5749
5
                xmlErrMemory(ctxt);
5750
5
                xmlFreeEnumeration(ret);
5751
5
                return(NULL);
5752
5
            }
5753
836
      if (last == NULL) ret = last = cur;
5754
559
      else {
5755
559
    last->next = cur;
5756
559
    last = cur;
5757
559
      }
5758
836
  }
5759
836
  SKIP_BLANKS_PE;
5760
836
    } while (RAW == '|');
5761
273
    if (RAW != ')') {
5762
185
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5763
185
        xmlFreeEnumeration(ret);
5764
185
  return(NULL);
5765
185
    }
5766
88
    NEXT;
5767
88
    return(ret);
5768
273
}
5769
5770
/**
5771
 * Parse an Enumeration attribute type.
5772
 *
5773
 * @deprecated Internal function, don't use.
5774
 *
5775
 *     [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5776
 *
5777
 * [ VC: Enumeration ]
5778
 * Values of this type must match one of the Nmtoken tokens in
5779
 * the declaration
5780
 *
5781
 * @param ctxt  an XML parser context
5782
 * @returns the enumeration attribute tree built while parsing
5783
 */
5784
5785
xmlEnumeration *
5786
11.7k
xmlParseEnumerationType(xmlParserCtxt *ctxt) {
5787
11.7k
    xmlChar *name;
5788
11.7k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5789
5790
11.7k
    if (RAW != '(') {
5791
835
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5792
835
  return(NULL);
5793
835
    }
5794
12.4k
    do {
5795
12.4k
        NEXT;
5796
12.4k
  SKIP_BLANKS_PE;
5797
12.4k
        name = xmlParseNmtoken(ctxt);
5798
12.4k
  if (name == NULL) {
5799
232
      xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5800
232
      return(ret);
5801
232
  }
5802
12.2k
        tmp = NULL;
5803
12.2k
#ifdef LIBXML_VALID_ENABLED
5804
12.2k
        if (ctxt->validate) {
5805
0
            tmp = ret;
5806
0
            while (tmp != NULL) {
5807
0
                if (xmlStrEqual(name, tmp->name)) {
5808
0
                    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5809
0
              "standalone: attribute enumeration value token %s duplicated\n",
5810
0
                                     name, NULL);
5811
0
                    if (!xmlDictOwns(ctxt->dict, name))
5812
0
                        xmlFree(name);
5813
0
                    break;
5814
0
                }
5815
0
                tmp = tmp->next;
5816
0
            }
5817
0
        }
5818
12.2k
#endif /* LIBXML_VALID_ENABLED */
5819
12.2k
  if (tmp == NULL) {
5820
12.2k
      cur = xmlCreateEnumeration(name);
5821
12.2k
      if (!xmlDictOwns(ctxt->dict, name))
5822
12.2k
    xmlFree(name);
5823
12.2k
      if (cur == NULL) {
5824
5
                xmlErrMemory(ctxt);
5825
5
                xmlFreeEnumeration(ret);
5826
5
                return(NULL);
5827
5
            }
5828
12.2k
      if (last == NULL) ret = last = cur;
5829
1.54k
      else {
5830
1.54k
    last->next = cur;
5831
1.54k
    last = cur;
5832
1.54k
      }
5833
12.2k
  }
5834
12.2k
  SKIP_BLANKS_PE;
5835
12.2k
    } while (RAW == '|');
5836
10.6k
    if (RAW != ')') {
5837
439
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5838
439
  return(ret);
5839
439
    }
5840
10.2k
    NEXT;
5841
10.2k
    return(ret);
5842
10.6k
}
5843
5844
/**
5845
 * Parse an Enumerated attribute type.
5846
 *
5847
 * @deprecated Internal function, don't use.
5848
 *
5849
 *     [57] EnumeratedType ::= NotationType | Enumeration
5850
 *
5851
 *     [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5852
 *
5853
 * @param ctxt  an XML parser context
5854
 * @param tree  the enumeration tree built while parsing
5855
 * @returns XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5856
 */
5857
5858
int
5859
12.1k
xmlParseEnumeratedType(xmlParserCtxt *ctxt, xmlEnumeration **tree) {
5860
12.1k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5861
399
  SKIP(8);
5862
399
  if (SKIP_BLANKS_PE == 0) {
5863
35
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5864
35
         "Space required after 'NOTATION'\n");
5865
35
      return(0);
5866
35
  }
5867
364
  *tree = xmlParseNotationType(ctxt);
5868
364
  if (*tree == NULL) return(0);
5869
88
  return(XML_ATTRIBUTE_NOTATION);
5870
364
    }
5871
11.7k
    *tree = xmlParseEnumerationType(ctxt);
5872
11.7k
    if (*tree == NULL) return(0);
5873
10.6k
    return(XML_ATTRIBUTE_ENUMERATION);
5874
11.7k
}
5875
5876
/**
5877
 * Parse the Attribute list def for an element
5878
 *
5879
 * @deprecated Internal function, don't use.
5880
 *
5881
 *     [54] AttType ::= StringType | TokenizedType | EnumeratedType
5882
 *
5883
 *     [55] StringType ::= 'CDATA'
5884
 *
5885
 *     [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5886
 *                            'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5887
 *
5888
 * Validity constraints for attribute values syntax are checked in
5889
 * #xmlValidateAttributeValue
5890
 *
5891
 * [ VC: ID ]
5892
 * Values of type ID must match the Name production. A name must not
5893
 * appear more than once in an XML document as a value of this type;
5894
 * i.e., ID values must uniquely identify the elements which bear them.
5895
 *
5896
 * [ VC: One ID per Element Type ]
5897
 * No element type may have more than one ID attribute specified.
5898
 *
5899
 * [ VC: ID Attribute Default ]
5900
 * An ID attribute must have a declared default of \#IMPLIED or \#REQUIRED.
5901
 *
5902
 * [ VC: IDREF ]
5903
 * Values of type IDREF must match the Name production, and values
5904
 * of type IDREFS must match Names; each IDREF Name must match the value
5905
 * of an ID attribute on some element in the XML document; i.e. IDREF
5906
 * values must match the value of some ID attribute.
5907
 *
5908
 * [ VC: Entity Name ]
5909
 * Values of type ENTITY must match the Name production, values
5910
 * of type ENTITIES must match Names; each Entity Name must match the
5911
 * name of an unparsed entity declared in the DTD.
5912
 *
5913
 * [ VC: Name Token ]
5914
 * Values of type NMTOKEN must match the Nmtoken production; values
5915
 * of type NMTOKENS must match Nmtokens.
5916
 *
5917
 * @param ctxt  an XML parser context
5918
 * @param tree  the enumeration tree built while parsing
5919
 * @returns the attribute type
5920
 */
5921
int
5922
34.3k
xmlParseAttributeType(xmlParserCtxt *ctxt, xmlEnumeration **tree) {
5923
34.3k
    if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5924
9.09k
  SKIP(5);
5925
9.09k
  return(XML_ATTRIBUTE_CDATA);
5926
25.2k
     } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5927
424
  SKIP(6);
5928
424
  return(XML_ATTRIBUTE_IDREFS);
5929
24.8k
     } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5930
148
  SKIP(5);
5931
148
  return(XML_ATTRIBUTE_IDREF);
5932
24.6k
     } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5933
6.46k
        SKIP(2);
5934
6.46k
  return(XML_ATTRIBUTE_ID);
5935
18.2k
     } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5936
3.79k
  SKIP(6);
5937
3.79k
  return(XML_ATTRIBUTE_ENTITY);
5938
14.4k
     } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5939
799
  SKIP(8);
5940
799
  return(XML_ATTRIBUTE_ENTITIES);
5941
13.6k
     } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5942
628
  SKIP(8);
5943
628
  return(XML_ATTRIBUTE_NMTOKENS);
5944
12.9k
     } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5945
836
  SKIP(7);
5946
836
  return(XML_ATTRIBUTE_NMTOKEN);
5947
836
     }
5948
12.1k
     return(xmlParseEnumeratedType(ctxt, tree));
5949
34.3k
}
5950
5951
/**
5952
 * Parse an attribute list declaration for an element. Always consumes '<!'.
5953
 *
5954
 * @deprecated Internal function, don't use.
5955
 *
5956
 *     [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5957
 *
5958
 *     [53] AttDef ::= S Name S AttType S DefaultDecl
5959
 * @param ctxt  an XML parser context
5960
 */
5961
void
5962
25.9k
xmlParseAttributeListDecl(xmlParserCtxt *ctxt) {
5963
25.9k
    const xmlChar *elemName;
5964
25.9k
    const xmlChar *attrName;
5965
25.9k
    xmlEnumerationPtr tree;
5966
5967
25.9k
    if ((CUR != '<') || (NXT(1) != '!'))
5968
0
        return;
5969
25.9k
    SKIP(2);
5970
5971
25.9k
    if (CMP7(CUR_PTR, 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5972
25.8k
#ifdef LIBXML_VALID_ENABLED
5973
25.8k
  int oldInputNr = ctxt->inputNr;
5974
25.8k
#endif
5975
5976
25.8k
  SKIP(7);
5977
25.8k
  if (SKIP_BLANKS_PE == 0) {
5978
8.75k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5979
8.75k
                     "Space required after '<!ATTLIST'\n");
5980
8.75k
  }
5981
25.8k
        elemName = xmlParseName(ctxt);
5982
25.8k
  if (elemName == NULL) {
5983
1.63k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5984
1.63k
         "ATTLIST: no name for Element\n");
5985
1.63k
      return;
5986
1.63k
  }
5987
24.2k
  SKIP_BLANKS_PE;
5988
24.2k
  GROW;
5989
47.0k
  while ((RAW != '>') && (PARSER_STOPPED(ctxt) == 0)) {
5990
42.4k
      int type;
5991
42.4k
      int def;
5992
42.4k
      xmlChar *defaultValue = NULL;
5993
5994
42.4k
      GROW;
5995
42.4k
            tree = NULL;
5996
42.4k
      attrName = xmlParseName(ctxt);
5997
42.4k
      if (attrName == NULL) {
5998
6.60k
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5999
6.60k
             "ATTLIST: no name for Attribute\n");
6000
6.60k
    break;
6001
6.60k
      }
6002
35.8k
      GROW;
6003
35.8k
      if (SKIP_BLANKS_PE == 0) {
6004
1.48k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6005
1.48k
            "Space required after the attribute name\n");
6006
1.48k
    break;
6007
1.48k
      }
6008
6009
34.3k
      type = xmlParseAttributeType(ctxt, &tree);
6010
34.3k
      if (type <= 0) {
6011
1.37k
          break;
6012
1.37k
      }
6013
6014
32.9k
      GROW;
6015
32.9k
      if (SKIP_BLANKS_PE == 0) {
6016
2.32k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6017
2.32k
             "Space required after the attribute type\n");
6018
2.32k
          if (tree != NULL)
6019
562
        xmlFreeEnumeration(tree);
6020
2.32k
    break;
6021
2.32k
      }
6022
6023
30.6k
      def = xmlParseDefaultDecl(ctxt, &defaultValue);
6024
30.6k
      if (def <= 0) {
6025
0
                if (defaultValue != NULL)
6026
0
        xmlFree(defaultValue);
6027
0
          if (tree != NULL)
6028
0
        xmlFreeEnumeration(tree);
6029
0
          break;
6030
0
      }
6031
30.6k
      if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6032
16.3k
          xmlAttrNormalizeSpace(defaultValue, defaultValue);
6033
6034
30.6k
      GROW;
6035
30.6k
            if (RAW != '>') {
6036
27.7k
    if (SKIP_BLANKS_PE == 0) {
6037
7.79k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6038
7.79k
      "Space required after the attribute default value\n");
6039
7.79k
        if (defaultValue != NULL)
6040
424
      xmlFree(defaultValue);
6041
7.79k
        if (tree != NULL)
6042
1.81k
      xmlFreeEnumeration(tree);
6043
7.79k
        break;
6044
7.79k
    }
6045
27.7k
      }
6046
22.8k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6047
11.5k
    (ctxt->sax->attributeDecl != NULL))
6048
11.5k
    ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6049
11.5k
                          type, def, defaultValue, tree);
6050
11.3k
      else if (tree != NULL)
6051
3.37k
    xmlFreeEnumeration(tree);
6052
6053
22.8k
      if ((ctxt->sax2) && (defaultValue != NULL) &&
6054
18.6k
          (def != XML_ATTRIBUTE_IMPLIED) &&
6055
18.6k
    (def != XML_ATTRIBUTE_REQUIRED)) {
6056
18.6k
    xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6057
18.6k
      }
6058
22.8k
      if (ctxt->sax2) {
6059
22.8k
    xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6060
22.8k
      }
6061
22.8k
      if (defaultValue != NULL)
6062
18.6k
          xmlFree(defaultValue);
6063
22.8k
      GROW;
6064
22.8k
  }
6065
24.2k
  if (RAW == '>') {
6066
5.27k
#ifdef LIBXML_VALID_ENABLED
6067
5.27k
      if ((ctxt->validate) && (ctxt->inputNr > oldInputNr)) {
6068
0
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6069
0
                                 "Attribute list declaration doesn't start and"
6070
0
                                 " stop in the same entity\n",
6071
0
                                 NULL, NULL);
6072
0
      }
6073
5.27k
#endif
6074
5.27k
      NEXT;
6075
5.27k
  }
6076
24.2k
    }
6077
25.9k
}
6078
6079
/**
6080
 * Handle PEs and check that we don't pop the entity that started
6081
 * a balanced group.
6082
 *
6083
 * @param ctxt  parser context
6084
 * @param openInputNr  input nr of the entity with opening '('
6085
 */
6086
static void
6087
105k
xmlSkipBlankCharsPEBalanced(xmlParserCtxt *ctxt, int openInputNr) {
6088
105k
    SKIP_BLANKS;
6089
105k
    GROW;
6090
6091
105k
    (void) openInputNr;
6092
6093
105k
    if (!PARSER_EXTERNAL(ctxt) && !PARSER_IN_PE(ctxt))
6094
79.1k
        return;
6095
6096
31.7k
    while (!PARSER_STOPPED(ctxt)) {
6097
31.3k
        if (ctxt->input->cur >= ctxt->input->end) {
6098
1.91k
#ifdef LIBXML_VALID_ENABLED
6099
1.91k
            if ((ctxt->validate) && (ctxt->inputNr <= openInputNr)) {
6100
0
                xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6101
0
                                 "Element content declaration doesn't start "
6102
0
                                 "and stop in the same entity\n",
6103
0
                                 NULL, NULL);
6104
0
            }
6105
1.91k
#endif
6106
1.91k
            if (PARSER_IN_PE(ctxt))
6107
1.90k
                xmlPopPE(ctxt);
6108
17
            else
6109
17
                break;
6110
29.4k
        } else if (RAW == '%') {
6111
3.44k
            xmlParsePERefInternal(ctxt, 0);
6112
26.0k
        } else {
6113
26.0k
            break;
6114
26.0k
        }
6115
6116
5.34k
        SKIP_BLANKS;
6117
5.34k
        GROW;
6118
5.34k
    }
6119
26.3k
}
6120
6121
/**
6122
 * Parse the declaration for a Mixed Element content
6123
 * The leading '(' and spaces have been skipped in #xmlParseElementContentDecl
6124
 *
6125
 * @deprecated Internal function, don't use.
6126
 *
6127
 *     [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6128
 *                    '(' S? '#PCDATA' S? ')'
6129
 *
6130
 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6131
 *
6132
 * [ VC: No Duplicate Types ]
6133
 * The same name must not appear more than once in a single
6134
 * mixed-content declaration.
6135
 *
6136
 * @param ctxt  an XML parser context
6137
 * @param openInputNr  the input used for the current entity, needed for
6138
 * boundary checks
6139
 * @returns the list of the xmlElementContent describing the element choices
6140
 */
6141
xmlElementContent *
6142
2.80k
xmlParseElementMixedContentDecl(xmlParserCtxt *ctxt, int openInputNr) {
6143
2.80k
    xmlElementContentPtr ret = NULL, cur = NULL, n;
6144
2.80k
    const xmlChar *elem = NULL;
6145
6146
2.80k
    GROW;
6147
2.80k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6148
2.80k
  SKIP(7);
6149
2.80k
        xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6150
2.80k
  if (RAW == ')') {
6151
1.86k
#ifdef LIBXML_VALID_ENABLED
6152
1.86k
      if ((ctxt->validate) && (ctxt->inputNr > openInputNr)) {
6153
0
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6154
0
                                 "Element content declaration doesn't start "
6155
0
                                 "and stop in the same entity\n",
6156
0
                                 NULL, NULL);
6157
0
      }
6158
1.86k
#endif
6159
1.86k
      NEXT;
6160
1.86k
      ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6161
1.86k
      if (ret == NULL)
6162
1
                goto mem_error;
6163
1.85k
      if (RAW == '*') {
6164
248
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6165
248
    NEXT;
6166
248
      }
6167
1.85k
      return(ret);
6168
1.86k
  }
6169
943
  if ((RAW == '(') || (RAW == '|')) {
6170
685
      ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6171
685
      if (ret == NULL)
6172
1
                goto mem_error;
6173
685
  }
6174
3.66k
  while ((RAW == '|') && (PARSER_STOPPED(ctxt) == 0)) {
6175
2.87k
      NEXT;
6176
2.87k
            n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6177
2.87k
            if (n == NULL)
6178
0
                goto mem_error;
6179
2.87k
      if (elem == NULL) {
6180
682
    n->c1 = cur;
6181
682
    if (cur != NULL)
6182
682
        cur->parent = n;
6183
682
    ret = cur = n;
6184
2.19k
      } else {
6185
2.19k
          cur->c2 = n;
6186
2.19k
    n->parent = cur;
6187
2.19k
    n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6188
2.19k
                if (n->c1 == NULL)
6189
1
                    goto mem_error;
6190
2.19k
    n->c1->parent = n;
6191
2.19k
    cur = n;
6192
2.19k
      }
6193
2.87k
            xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6194
2.87k
      elem = xmlParseName(ctxt);
6195
2.87k
      if (elem == NULL) {
6196
154
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6197
154
      "xmlParseElementMixedContentDecl : Name expected\n");
6198
154
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6199
154
    return(NULL);
6200
154
      }
6201
2.72k
            xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6202
2.72k
  }
6203
787
  if ((RAW == ')') && (NXT(1) == '*')) {
6204
351
      if (elem != NULL) {
6205
351
    cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6206
351
                                   XML_ELEMENT_CONTENT_ELEMENT);
6207
351
    if (cur->c2 == NULL)
6208
1
                    goto mem_error;
6209
350
    cur->c2->parent = cur;
6210
350
            }
6211
350
            if (ret != NULL)
6212
350
                ret->ocur = XML_ELEMENT_CONTENT_MULT;
6213
350
#ifdef LIBXML_VALID_ENABLED
6214
350
      if ((ctxt->validate) && (ctxt->inputNr > openInputNr)) {
6215
0
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6216
0
                                 "Element content declaration doesn't start "
6217
0
                                 "and stop in the same entity\n",
6218
0
                                 NULL, NULL);
6219
0
      }
6220
350
#endif
6221
350
      SKIP(2);
6222
436
  } else {
6223
436
      xmlFreeDocElementContent(ctxt->myDoc, ret);
6224
436
      xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6225
436
      return(NULL);
6226
436
  }
6227
6228
787
    } else {
6229
0
  xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6230
0
    }
6231
350
    return(ret);
6232
6233
4
mem_error:
6234
4
    xmlErrMemory(ctxt);
6235
4
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6236
4
    return(NULL);
6237
2.80k
}
6238
6239
/**
6240
 * Parse the declaration for a Mixed Element content
6241
 * The leading '(' and spaces have been skipped in #xmlParseElementContentDecl
6242
 *
6243
 *     [47] children ::= (choice | seq) ('?' | '*' | '+')?
6244
 *
6245
 *     [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6246
 *
6247
 *     [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6248
 *
6249
 *     [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6250
 *
6251
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6252
 * TODO Parameter-entity replacement text must be properly nested
6253
 *  with parenthesized groups. That is to say, if either of the
6254
 *  opening or closing parentheses in a choice, seq, or Mixed
6255
 *  construct is contained in the replacement text for a parameter
6256
 *  entity, both must be contained in the same replacement text. For
6257
 *  interoperability, if a parameter-entity reference appears in a
6258
 *  choice, seq, or Mixed construct, its replacement text should not
6259
 *  be empty, and neither the first nor last non-blank character of
6260
 *  the replacement text should be a connector (| or ,).
6261
 *
6262
 * @param ctxt  an XML parser context
6263
 * @param openInputNr  the input used for the current entity, needed for
6264
 * boundary checks
6265
 * @param depth  the level of recursion
6266
 * @returns the tree of xmlElementContent describing the element
6267
 *          hierarchy.
6268
 */
6269
static xmlElementContentPtr
6270
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int openInputNr,
6271
27.7k
                                       int depth) {
6272
27.7k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
6273
27.7k
    xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6274
27.7k
    const xmlChar *elem;
6275
27.7k
    xmlChar type = 0;
6276
6277
27.7k
    if (depth > maxDepth) {
6278
1
        xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
6279
1
                "xmlParseElementChildrenContentDecl : depth %d too deep, "
6280
1
                "use XML_PARSE_HUGE\n", depth);
6281
1
  return(NULL);
6282
1
    }
6283
27.7k
    xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6284
27.7k
    if (RAW == '(') {
6285
5.60k
        int newInputNr = ctxt->inputNr;
6286
6287
        /* Recurse on first child */
6288
5.60k
  NEXT;
6289
5.60k
        cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, newInputNr,
6290
5.60k
                                                           depth + 1);
6291
5.60k
        if (cur == NULL)
6292
3.93k
            return(NULL);
6293
22.1k
    } else {
6294
22.1k
  elem = xmlParseName(ctxt);
6295
22.1k
  if (elem == NULL) {
6296
326
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6297
326
      return(NULL);
6298
326
  }
6299
21.8k
        cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6300
21.8k
  if (cur == NULL) {
6301
3
      xmlErrMemory(ctxt);
6302
3
      return(NULL);
6303
3
  }
6304
21.8k
  GROW;
6305
21.8k
  if (RAW == '?') {
6306
1.22k
      cur->ocur = XML_ELEMENT_CONTENT_OPT;
6307
1.22k
      NEXT;
6308
20.6k
  } else if (RAW == '*') {
6309
1.84k
      cur->ocur = XML_ELEMENT_CONTENT_MULT;
6310
1.84k
      NEXT;
6311
18.7k
  } else if (RAW == '+') {
6312
1.47k
      cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6313
1.47k
      NEXT;
6314
17.2k
  } else {
6315
17.2k
      cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6316
17.2k
  }
6317
21.8k
  GROW;
6318
21.8k
    }
6319
37.0k
    while (!PARSER_STOPPED(ctxt)) {
6320
35.7k
        xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6321
35.7k
        if (RAW == ')')
6322
8.79k
            break;
6323
        /*
6324
   * Each loop we parse one separator and one element.
6325
   */
6326
26.9k
        if (RAW == ',') {
6327
1.89k
      if (type == 0) type = CUR;
6328
6329
      /*
6330
       * Detect "Name | Name , Name" error
6331
       */
6332
583
      else if (type != CUR) {
6333
2
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6334
2
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6335
2
                      type);
6336
2
    if ((last != NULL) && (last != ret))
6337
2
        xmlFreeDocElementContent(ctxt->myDoc, last);
6338
2
    if (ret != NULL)
6339
2
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6340
2
    return(NULL);
6341
2
      }
6342
1.89k
      NEXT;
6343
6344
1.89k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6345
1.89k
      if (op == NULL) {
6346
2
                xmlErrMemory(ctxt);
6347
2
    if ((last != NULL) && (last != ret))
6348
1
        xmlFreeDocElementContent(ctxt->myDoc, last);
6349
2
          xmlFreeDocElementContent(ctxt->myDoc, ret);
6350
2
    return(NULL);
6351
2
      }
6352
1.89k
      if (last == NULL) {
6353
1.31k
    op->c1 = ret;
6354
1.31k
    if (ret != NULL)
6355
1.31k
        ret->parent = op;
6356
1.31k
    ret = cur = op;
6357
1.31k
      } else {
6358
580
          cur->c2 = op;
6359
580
    if (op != NULL)
6360
580
        op->parent = cur;
6361
580
    op->c1 = last;
6362
580
    if (last != NULL)
6363
580
        last->parent = op;
6364
580
    cur =op;
6365
580
    last = NULL;
6366
580
      }
6367
25.0k
  } else if (RAW == '|') {
6368
23.8k
      if (type == 0) type = CUR;
6369
6370
      /*
6371
       * Detect "Name , Name | Name" error
6372
       */
6373
7.16k
      else if (type != CUR) {
6374
2
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6375
2
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6376
2
          type);
6377
2
    if ((last != NULL) && (last != ret))
6378
2
        xmlFreeDocElementContent(ctxt->myDoc, last);
6379
2
    if (ret != NULL)
6380
2
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6381
2
    return(NULL);
6382
2
      }
6383
23.8k
      NEXT;
6384
6385
23.8k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6386
23.8k
      if (op == NULL) {
6387
2
                xmlErrMemory(ctxt);
6388
2
    if ((last != NULL) && (last != ret))
6389
1
        xmlFreeDocElementContent(ctxt->myDoc, last);
6390
2
    if (ret != NULL)
6391
2
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6392
2
    return(NULL);
6393
2
      }
6394
23.7k
      if (last == NULL) {
6395
16.6k
    op->c1 = ret;
6396
16.6k
    if (ret != NULL)
6397
16.6k
        ret->parent = op;
6398
16.6k
    ret = cur = op;
6399
16.6k
      } else {
6400
7.16k
          cur->c2 = op;
6401
7.16k
    if (op != NULL)
6402
7.16k
        op->parent = cur;
6403
7.16k
    op->c1 = last;
6404
7.16k
    if (last != NULL)
6405
7.16k
        last->parent = op;
6406
7.16k
    cur =op;
6407
7.16k
    last = NULL;
6408
7.16k
      }
6409
23.7k
  } else {
6410
1.21k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6411
1.21k
      if ((last != NULL) && (last != ret))
6412
591
          xmlFreeDocElementContent(ctxt->myDoc, last);
6413
1.21k
      if (ret != NULL)
6414
1.21k
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6415
1.21k
      return(NULL);
6416
1.21k
  }
6417
25.6k
        xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6418
25.6k
        if (RAW == '(') {
6419
16.9k
            int newInputNr = ctxt->inputNr;
6420
6421
      /* Recurse on second child */
6422
16.9k
      NEXT;
6423
16.9k
      last = xmlParseElementChildrenContentDeclPriv(ctxt, newInputNr,
6424
16.9k
                                                          depth + 1);
6425
16.9k
            if (last == NULL) {
6426
11.8k
    if (ret != NULL)
6427
11.8k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6428
11.8k
    return(NULL);
6429
11.8k
            }
6430
16.9k
  } else {
6431
8.72k
      elem = xmlParseName(ctxt);
6432
8.72k
      if (elem == NULL) {
6433
266
    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6434
266
    if (ret != NULL)
6435
266
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6436
266
    return(NULL);
6437
266
      }
6438
8.46k
      last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6439
8.46k
      if (last == NULL) {
6440
1
                xmlErrMemory(ctxt);
6441
1
    if (ret != NULL)
6442
1
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6443
1
    return(NULL);
6444
1
      }
6445
8.46k
      if (RAW == '?') {
6446
1.20k
    last->ocur = XML_ELEMENT_CONTENT_OPT;
6447
1.20k
    NEXT;
6448
7.26k
      } else if (RAW == '*') {
6449
620
    last->ocur = XML_ELEMENT_CONTENT_MULT;
6450
620
    NEXT;
6451
6.64k
      } else if (RAW == '+') {
6452
354
    last->ocur = XML_ELEMENT_CONTENT_PLUS;
6453
354
    NEXT;
6454
6.28k
      } else {
6455
6.28k
    last->ocur = XML_ELEMENT_CONTENT_ONCE;
6456
6.28k
      }
6457
8.46k
  }
6458
25.6k
    }
6459
10.1k
    if ((cur != NULL) && (last != NULL)) {
6460
5.24k
        cur->c2 = last;
6461
5.24k
  if (last != NULL)
6462
5.24k
      last->parent = cur;
6463
5.24k
    }
6464
10.1k
#ifdef LIBXML_VALID_ENABLED
6465
10.1k
    if ((ctxt->validate) && (ctxt->inputNr > openInputNr)) {
6466
0
        xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6467
0
                         "Element content declaration doesn't start "
6468
0
                         "and stop in the same entity\n",
6469
0
                         NULL, NULL);
6470
0
    }
6471
10.1k
#endif
6472
10.1k
    NEXT;
6473
10.1k
    if (RAW == '?') {
6474
1.20k
  if (ret != NULL) {
6475
1.20k
      if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6476
584
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6477
725
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6478
475
      else
6479
475
          ret->ocur = XML_ELEMENT_CONTENT_OPT;
6480
1.20k
  }
6481
1.20k
  NEXT;
6482
8.97k
    } else if (RAW == '*') {
6483
3.02k
  if (ret != NULL) {
6484
3.02k
      ret->ocur = XML_ELEMENT_CONTENT_MULT;
6485
3.02k
      cur = ret;
6486
      /*
6487
       * Some normalization:
6488
       * (a | b* | c?)* == (a | b | c)*
6489
       */
6490
6.67k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6491
3.64k
    if ((cur->c1 != NULL) &&
6492
3.64k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6493
3.51k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6494
485
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6495
3.64k
    if ((cur->c2 != NULL) &&
6496
3.64k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6497
3.16k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6498
891
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6499
3.64k
    cur = cur->c2;
6500
3.64k
      }
6501
3.02k
  }
6502
3.02k
  NEXT;
6503
5.95k
    } else if (RAW == '+') {
6504
1.32k
  if (ret != NULL) {
6505
1.32k
      int found = 0;
6506
6507
1.32k
      if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6508
975
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6509
438
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6510
891
      else
6511
891
          ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6512
      /*
6513
       * Some normalization:
6514
       * (a | b*)+ == (a | b)*
6515
       * (a | b?)+ == (a | b)*
6516
       */
6517
6.69k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6518
5.36k
    if ((cur->c1 != NULL) &&
6519
5.36k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6520
5.07k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6521
610
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6522
610
        found = 1;
6523
610
    }
6524
5.36k
    if ((cur->c2 != NULL) &&
6525
5.36k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6526
4.94k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6527
531
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6528
531
        found = 1;
6529
531
    }
6530
5.36k
    cur = cur->c2;
6531
5.36k
      }
6532
1.32k
      if (found)
6533
660
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6534
1.32k
  }
6535
1.32k
  NEXT;
6536
1.32k
    }
6537
10.1k
    return(ret);
6538
23.5k
}
6539
6540
/**
6541
 * Parse the declaration for a Mixed Element content
6542
 * The leading '(' and spaces have been skipped in #xmlParseElementContentDecl
6543
 *
6544
 * @deprecated Internal function, don't use.
6545
 *
6546
 *     [47] children ::= (choice | seq) ('?' | '*' | '+')?
6547
 *
6548
 *     [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6549
 *
6550
 *     [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6551
 *
6552
 *     [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6553
 *
6554
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6555
 * TODO Parameter-entity replacement text must be properly nested
6556
 *  with parenthesized groups. That is to say, if either of the
6557
 *  opening or closing parentheses in a choice, seq, or Mixed
6558
 *  construct is contained in the replacement text for a parameter
6559
 *  entity, both must be contained in the same replacement text. For
6560
 *  interoperability, if a parameter-entity reference appears in a
6561
 *  choice, seq, or Mixed construct, its replacement text should not
6562
 *  be empty, and neither the first nor last non-blank character of
6563
 *  the replacement text should be a connector (| or ,).
6564
 *
6565
 * @param ctxt  an XML parser context
6566
 * @param inputchk  the input used for the current entity, needed for boundary checks
6567
 * @returns the tree of xmlElementContent describing the element
6568
 *          hierarchy.
6569
 */
6570
xmlElementContent *
6571
0
xmlParseElementChildrenContentDecl(xmlParserCtxt *ctxt, int inputchk) {
6572
    /* stub left for API/ABI compat */
6573
0
    return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6574
0
}
6575
6576
/**
6577
 * Parse the declaration for an Element content either Mixed or Children,
6578
 * the cases EMPTY and ANY are handled directly in #xmlParseElementDecl
6579
 *
6580
 * @deprecated Internal function, don't use.
6581
 *
6582
 *     [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6583
 *
6584
 * @param ctxt  an XML parser context
6585
 * @param name  the name of the element being defined.
6586
 * @param result  the Element Content pointer will be stored here if any
6587
 * @returns an xmlElementTypeVal value or -1 on error
6588
 */
6589
6590
int
6591
xmlParseElementContentDecl(xmlParserCtxt *ctxt, const xmlChar *name,
6592
7.99k
                           xmlElementContent **result) {
6593
6594
7.99k
    xmlElementContentPtr tree = NULL;
6595
7.99k
    int openInputNr = ctxt->inputNr;
6596
7.99k
    int res;
6597
6598
7.99k
    *result = NULL;
6599
6600
7.99k
    if (RAW != '(') {
6601
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6602
0
    "xmlParseElementContentDecl : %s '(' expected\n", name);
6603
0
  return(-1);
6604
0
    }
6605
7.99k
    NEXT;
6606
7.99k
    xmlSkipBlankCharsPEBalanced(ctxt, openInputNr);
6607
7.99k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6608
2.80k
        tree = xmlParseElementMixedContentDecl(ctxt, openInputNr);
6609
2.80k
  res = XML_ELEMENT_TYPE_MIXED;
6610
5.19k
    } else {
6611
5.19k
        tree = xmlParseElementChildrenContentDeclPriv(ctxt, openInputNr, 1);
6612
5.19k
  res = XML_ELEMENT_TYPE_ELEMENT;
6613
5.19k
    }
6614
7.99k
    if (tree == NULL)
6615
2.41k
        return(-1);
6616
5.57k
    SKIP_BLANKS_PE;
6617
5.57k
    *result = tree;
6618
5.57k
    return(res);
6619
7.99k
}
6620
6621
/**
6622
 * Parse an element declaration. Always consumes '<!'.
6623
 *
6624
 * @deprecated Internal function, don't use.
6625
 *
6626
 *     [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6627
 *
6628
 * [ VC: Unique Element Type Declaration ]
6629
 * No element type may be declared more than once
6630
 *
6631
 * @param ctxt  an XML parser context
6632
 * @returns the type of the element, or -1 in case of error
6633
 */
6634
int
6635
10.8k
xmlParseElementDecl(xmlParserCtxt *ctxt) {
6636
10.8k
    const xmlChar *name;
6637
10.8k
    int ret = -1;
6638
10.8k
    xmlElementContentPtr content  = NULL;
6639
6640
10.8k
    if ((CUR != '<') || (NXT(1) != '!'))
6641
0
        return(ret);
6642
10.8k
    SKIP(2);
6643
6644
    /* GROW; done in the caller */
6645
10.8k
    if (CMP7(CUR_PTR, 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6646
10.8k
#ifdef LIBXML_VALID_ENABLED
6647
10.8k
  int oldInputNr = ctxt->inputNr;
6648
10.8k
#endif
6649
6650
10.8k
  SKIP(7);
6651
10.8k
  if (SKIP_BLANKS_PE == 0) {
6652
565
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6653
565
               "Space required after 'ELEMENT'\n");
6654
565
      return(-1);
6655
565
  }
6656
10.3k
        name = xmlParseName(ctxt);
6657
10.3k
  if (name == NULL) {
6658
278
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6659
278
         "xmlParseElementDecl: no name for Element\n");
6660
278
      return(-1);
6661
278
  }
6662
10.0k
  if (SKIP_BLANKS_PE == 0) {
6663
2.01k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6664
2.01k
         "Space required after the element name\n");
6665
2.01k
  }
6666
10.0k
  if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6667
1.34k
      SKIP(5);
6668
      /*
6669
       * Element must always be empty.
6670
       */
6671
1.34k
      ret = XML_ELEMENT_TYPE_EMPTY;
6672
8.68k
  } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6673
115
             (NXT(2) == 'Y')) {
6674
113
      SKIP(3);
6675
      /*
6676
       * Element is a generic container.
6677
       */
6678
113
      ret = XML_ELEMENT_TYPE_ANY;
6679
8.57k
  } else if (RAW == '(') {
6680
7.99k
      ret = xmlParseElementContentDecl(ctxt, name, &content);
6681
7.99k
            if (ret <= 0)
6682
2.41k
                return(-1);
6683
7.99k
  } else {
6684
      /*
6685
       * [ WFC: PEs in Internal Subset ] error handling.
6686
       */
6687
582
            xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6688
582
                  "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6689
582
      return(-1);
6690
582
  }
6691
6692
7.03k
  SKIP_BLANKS_PE;
6693
6694
7.03k
  if (RAW != '>') {
6695
1.80k
      xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6696
1.80k
      if (content != NULL) {
6697
1.62k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6698
1.62k
      }
6699
5.23k
  } else {
6700
5.23k
#ifdef LIBXML_VALID_ENABLED
6701
5.23k
      if ((ctxt->validate) && (ctxt->inputNr > oldInputNr)) {
6702
0
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6703
0
                                 "Element declaration doesn't start and stop in"
6704
0
                                 " the same entity\n",
6705
0
                                 NULL, NULL);
6706
0
      }
6707
5.23k
#endif
6708
6709
5.23k
      NEXT;
6710
5.23k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6711
2.81k
    (ctxt->sax->elementDecl != NULL)) {
6712
2.81k
    if (content != NULL)
6713
2.18k
        content->parent = NULL;
6714
2.81k
          ctxt->sax->elementDecl(ctxt->userData, name, ret,
6715
2.81k
                           content);
6716
2.81k
    if ((content != NULL) && (content->parent == NULL)) {
6717
        /*
6718
         * this is a trick: if xmlAddElementDecl is called,
6719
         * instead of copying the full tree it is plugged directly
6720
         * if called from the parser. Avoid duplicating the
6721
         * interfaces or change the API/ABI
6722
         */
6723
659
        xmlFreeDocElementContent(ctxt->myDoc, content);
6724
659
    }
6725
2.81k
      } else if (content != NULL) {
6726
1.77k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6727
1.77k
      }
6728
5.23k
  }
6729
7.03k
    }
6730
7.05k
    return(ret);
6731
10.8k
}
6732
6733
/**
6734
 * Parse a conditional section. Always consumes '<!['.
6735
 *
6736
 *     [61] conditionalSect ::= includeSect | ignoreSect
6737
 *     [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6738
 *     [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6739
 *     [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>'
6740
 *                                 Ignore)*
6741
 *     [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6742
 * @param ctxt  an XML parser context
6743
 */
6744
6745
static void
6746
2.01k
xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6747
2.01k
    size_t depth = 0;
6748
2.01k
    int isFreshPE = 0;
6749
2.01k
    int oldInputNr = ctxt->inputNr;
6750
2.01k
    int declInputNr = ctxt->inputNr;
6751
6752
4.29k
    while (!PARSER_STOPPED(ctxt)) {
6753
4.28k
        if (ctxt->input->cur >= ctxt->input->end) {
6754
329
            if (ctxt->inputNr <= oldInputNr) {
6755
288
                xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6756
288
                return;
6757
288
            }
6758
6759
41
            xmlPopPE(ctxt);
6760
41
            declInputNr = ctxt->inputNr;
6761
3.95k
        } else if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6762
2.25k
            SKIP(3);
6763
2.25k
            SKIP_BLANKS_PE;
6764
6765
2.25k
            isFreshPE = 0;
6766
6767
2.25k
            if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6768
852
                SKIP(7);
6769
852
                SKIP_BLANKS_PE;
6770
852
                if (RAW != '[') {
6771
217
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6772
217
                    return;
6773
217
                }
6774
635
#ifdef LIBXML_VALID_ENABLED
6775
635
                if ((ctxt->validate) && (ctxt->inputNr > declInputNr)) {
6776
0
        xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6777
0
                                     "All markup of the conditional section is"
6778
0
                                     " not in the same entity\n",
6779
0
                                     NULL, NULL);
6780
0
                }
6781
635
#endif
6782
635
                NEXT;
6783
6784
635
                depth++;
6785
1.40k
            } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6786
1.14k
                size_t ignoreDepth = 0;
6787
6788
1.14k
                SKIP(6);
6789
1.14k
                SKIP_BLANKS_PE;
6790
1.14k
                if (RAW != '[') {
6791
295
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6792
295
                    return;
6793
295
                }
6794
849
#ifdef LIBXML_VALID_ENABLED
6795
849
                if ((ctxt->validate) && (ctxt->inputNr > declInputNr)) {
6796
0
        xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6797
0
                                     "All markup of the conditional section is"
6798
0
                                     " not in the same entity\n",
6799
0
                                     NULL, NULL);
6800
0
                }
6801
849
#endif
6802
849
                NEXT;
6803
6804
135k
                while (PARSER_STOPPED(ctxt) == 0) {
6805
135k
                    if (RAW == 0) {
6806
546
                        xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6807
546
                        return;
6808
546
                    }
6809
135k
                    if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6810
1.05k
                        SKIP(3);
6811
1.05k
                        ignoreDepth++;
6812
                        /* Check for integer overflow */
6813
1.05k
                        if (ignoreDepth == 0) {
6814
0
                            xmlErrMemory(ctxt);
6815
0
                            return;
6816
0
                        }
6817
134k
                    } else if ((RAW == ']') && (NXT(1) == ']') &&
6818
1.58k
                               (NXT(2) == '>')) {
6819
402
                        SKIP(3);
6820
402
                        if (ignoreDepth == 0)
6821
301
                            break;
6822
101
                        ignoreDepth--;
6823
133k
                    } else {
6824
133k
                        NEXT;
6825
133k
                    }
6826
135k
                }
6827
6828
303
#ifdef LIBXML_VALID_ENABLED
6829
303
                if ((ctxt->validate) && (ctxt->inputNr > declInputNr)) {
6830
0
        xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6831
0
                                     "All markup of the conditional section is"
6832
0
                                     " not in the same entity\n",
6833
0
                                     NULL, NULL);
6834
0
                }
6835
303
#endif
6836
303
            } else {
6837
260
                xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6838
260
                return;
6839
260
            }
6840
2.25k
        } else if ((depth > 0) &&
6841
1.70k
                   (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6842
78
            if (isFreshPE) {
6843
0
                xmlFatalErrMsg(ctxt, XML_ERR_CONDSEC_INVALID,
6844
0
                               "Parameter entity must match "
6845
0
                               "extSubsetDecl\n");
6846
0
                return;
6847
0
            }
6848
6849
78
            depth--;
6850
78
#ifdef LIBXML_VALID_ENABLED
6851
78
            if ((ctxt->validate) && (ctxt->inputNr > declInputNr)) {
6852
0
    xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6853
0
                                 "All markup of the conditional section is not"
6854
0
                                 " in the same entity\n",
6855
0
                                 NULL, NULL);
6856
0
            }
6857
78
#endif
6858
78
            SKIP(3);
6859
1.62k
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
6860
1.21k
            isFreshPE = 0;
6861
1.21k
            xmlParseMarkupDecl(ctxt);
6862
1.21k
        } else if (RAW == '%') {
6863
388
            xmlParsePERefInternal(ctxt, 1);
6864
388
            if (ctxt->inputNr > declInputNr) {
6865
47
                isFreshPE = 1;
6866
47
                declInputNr = ctxt->inputNr;
6867
47
            }
6868
388
        } else {
6869
22
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6870
22
            return;
6871
22
        }
6872
6873
2.65k
        if (depth == 0)
6874
381
            break;
6875
6876
2.27k
        SKIP_BLANKS;
6877
2.27k
        SHRINK;
6878
2.27k
        GROW;
6879
2.27k
    }
6880
2.01k
}
6881
6882
/**
6883
 * Parse markup declarations. Always consumes '<!' or '<?'.
6884
 *
6885
 * @deprecated Internal function, don't use.
6886
 *
6887
 *     [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6888
 *                         NotationDecl | PI | Comment
6889
 *
6890
 * [ VC: Proper Declaration/PE Nesting ]
6891
 * Parameter-entity replacement text must be properly nested with
6892
 * markup declarations. That is to say, if either the first character
6893
 * or the last character of a markup declaration (markupdecl above) is
6894
 * contained in the replacement text for a parameter-entity reference,
6895
 * both must be contained in the same replacement text.
6896
 *
6897
 * [ WFC: PEs in Internal Subset ]
6898
 * In the internal DTD subset, parameter-entity references can occur
6899
 * only where markup declarations can occur, not within markup declarations.
6900
 * (This does not apply to references that occur in external parameter
6901
 * entities or to the external subset.)
6902
 *
6903
 * @param ctxt  an XML parser context
6904
 */
6905
void
6906
108k
xmlParseMarkupDecl(xmlParserCtxt *ctxt) {
6907
108k
    GROW;
6908
108k
    if (CUR == '<') {
6909
108k
        if (NXT(1) == '!') {
6910
95.3k
      switch (NXT(2)) {
6911
55.8k
          case 'E':
6912
55.8k
        if (NXT(3) == 'L')
6913
10.8k
      xmlParseElementDecl(ctxt);
6914
44.9k
        else if (NXT(3) == 'N')
6915
44.9k
      xmlParseEntityDecl(ctxt);
6916
20
                    else
6917
20
                        SKIP(2);
6918
55.8k
        break;
6919
25.9k
          case 'A':
6920
25.9k
        xmlParseAttributeListDecl(ctxt);
6921
25.9k
        break;
6922
3.88k
          case 'N':
6923
3.88k
        xmlParseNotationDecl(ctxt);
6924
3.88k
        break;
6925
6.37k
          case '-':
6926
6.37k
        xmlParseComment(ctxt);
6927
6.37k
        break;
6928
3.28k
    default:
6929
3.28k
                    xmlFatalErr(ctxt,
6930
3.28k
                                ctxt->inSubset == 2 ?
6931
0
                                    XML_ERR_EXT_SUBSET_NOT_FINISHED :
6932
3.28k
                                    XML_ERR_INT_SUBSET_NOT_FINISHED,
6933
3.28k
                                NULL);
6934
3.28k
                    SKIP(2);
6935
3.28k
        break;
6936
95.3k
      }
6937
95.3k
  } else if (NXT(1) == '?') {
6938
12.6k
      xmlParsePI(ctxt);
6939
12.6k
  }
6940
108k
    }
6941
108k
}
6942
6943
/**
6944
 * Parse an XML declaration header for external entities
6945
 *
6946
 * @deprecated Internal function, don't use.
6947
 *
6948
 *     [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6949
 * @param ctxt  an XML parser context
6950
 */
6951
6952
void
6953
57.4k
xmlParseTextDecl(xmlParserCtxt *ctxt) {
6954
57.4k
    xmlChar *version;
6955
6956
    /*
6957
     * We know that '<?xml' is here.
6958
     */
6959
57.4k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6960
57.4k
  SKIP(5);
6961
57.4k
    } else {
6962
0
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6963
0
  return;
6964
0
    }
6965
6966
57.4k
    if (SKIP_BLANKS == 0) {
6967
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6968
0
           "Space needed after '<?xml'\n");
6969
0
    }
6970
6971
    /*
6972
     * We may have the VersionInfo here.
6973
     */
6974
57.4k
    version = xmlParseVersionInfo(ctxt);
6975
57.4k
    if (version == NULL) {
6976
23.8k
  version = xmlCharStrdup(XML_DEFAULT_VERSION);
6977
23.8k
        if (version == NULL) {
6978
5
            xmlErrMemory(ctxt);
6979
5
            return;
6980
5
        }
6981
33.6k
    } else {
6982
33.6k
  if (SKIP_BLANKS == 0) {
6983
695
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6984
695
               "Space needed here\n");
6985
695
  }
6986
33.6k
    }
6987
57.4k
    ctxt->input->version = version;
6988
6989
    /*
6990
     * We must have the encoding declaration
6991
     */
6992
57.4k
    xmlParseEncodingDecl(ctxt);
6993
6994
57.4k
    SKIP_BLANKS;
6995
57.4k
    if ((RAW == '?') && (NXT(1) == '>')) {
6996
1.29k
        SKIP(2);
6997
56.1k
    } else if (RAW == '>') {
6998
        /* Deprecated old WD ... */
6999
280
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7000
280
  NEXT;
7001
55.8k
    } else {
7002
55.8k
        int c;
7003
7004
55.8k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7005
778M
        while ((PARSER_STOPPED(ctxt) == 0) && ((c = CUR) != 0)) {
7006
778M
            NEXT;
7007
778M
            if (c == '>')
7008
30.4k
                break;
7009
778M
        }
7010
55.8k
    }
7011
57.4k
}
7012
7013
/**
7014
 * Parse Markup declarations from an external subset
7015
 *
7016
 * @deprecated Internal function, don't use.
7017
 *
7018
 *     [30] extSubset ::= textDecl? extSubsetDecl
7019
 *
7020
 *     [31] extSubsetDecl ::= (markupdecl | conditionalSect |
7021
 *                             PEReference | S) *
7022
 * @param ctxt  an XML parser context
7023
 * @param publicId  the public identifier
7024
 * @param systemId  the system identifier (URL)
7025
 */
7026
void
7027
xmlParseExternalSubset(xmlParserCtxt *ctxt, const xmlChar *publicId,
7028
0
                       const xmlChar *systemId) {
7029
0
    int oldInputNr;
7030
7031
0
    xmlCtxtInitializeLate(ctxt);
7032
7033
0
    xmlDetectEncoding(ctxt);
7034
7035
0
    if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7036
0
  xmlParseTextDecl(ctxt);
7037
0
    }
7038
0
    if (ctxt->myDoc == NULL) {
7039
0
        ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7040
0
  if (ctxt->myDoc == NULL) {
7041
0
      xmlErrMemory(ctxt);
7042
0
      return;
7043
0
  }
7044
0
  ctxt->myDoc->properties = XML_DOC_INTERNAL;
7045
0
    }
7046
0
    if ((ctxt->myDoc->intSubset == NULL) &&
7047
0
        (xmlCreateIntSubset(ctxt->myDoc, NULL, publicId, systemId) == NULL)) {
7048
0
        xmlErrMemory(ctxt);
7049
0
    }
7050
7051
0
    ctxt->inSubset = 2;
7052
0
    oldInputNr = ctxt->inputNr;
7053
7054
0
    SKIP_BLANKS;
7055
0
    while (!PARSER_STOPPED(ctxt)) {
7056
0
        if (ctxt->input->cur >= ctxt->input->end) {
7057
0
            if (ctxt->inputNr <= oldInputNr) {
7058
0
                xmlParserCheckEOF(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED);
7059
0
                break;
7060
0
            }
7061
7062
0
            xmlPopPE(ctxt);
7063
0
        } else if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7064
0
            xmlParseConditionalSections(ctxt);
7065
0
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7066
0
            xmlParseMarkupDecl(ctxt);
7067
0
        } else if (RAW == '%') {
7068
0
            xmlParsePERefInternal(ctxt, 1);
7069
0
        } else {
7070
0
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7071
7072
0
            while (ctxt->inputNr > oldInputNr)
7073
0
                xmlPopPE(ctxt);
7074
0
            break;
7075
0
        }
7076
0
        SKIP_BLANKS;
7077
0
        SHRINK;
7078
0
        GROW;
7079
0
    }
7080
0
}
7081
7082
/**
7083
 * Parse and handle entity references in content, depending on the SAX
7084
 * interface, this may end-up in a call to character() if this is a
7085
 * CharRef, a predefined entity, if there is no reference() callback.
7086
 * or if the parser was asked to switch to that mode.
7087
 *
7088
 * @deprecated Internal function, don't use.
7089
 *
7090
 * Always consumes '&'.
7091
 *
7092
 *     [67] Reference ::= EntityRef | CharRef
7093
 * @param ctxt  an XML parser context
7094
 */
7095
void
7096
47.1k
xmlParseReference(xmlParserCtxt *ctxt) {
7097
47.1k
    xmlEntityPtr ent = NULL;
7098
47.1k
    const xmlChar *name;
7099
47.1k
    xmlChar *val;
7100
7101
47.1k
    if (RAW != '&')
7102
0
        return;
7103
7104
    /*
7105
     * Simple case of a CharRef
7106
     */
7107
47.1k
    if (NXT(1) == '#') {
7108
22.6k
  int i = 0;
7109
22.6k
  xmlChar out[16];
7110
22.6k
  int value = xmlParseCharRef(ctxt);
7111
7112
22.6k
  if (value == 0)
7113
5.80k
      return;
7114
7115
        /*
7116
         * Just encode the value in UTF-8
7117
         */
7118
16.8k
        COPY_BUF(out, i, value);
7119
16.8k
        out[i] = 0;
7120
16.8k
        if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7121
16.8k
            (!ctxt->disableSAX))
7122
14.3k
            ctxt->sax->characters(ctxt->userData, out, i);
7123
16.8k
  return;
7124
22.6k
    }
7125
7126
    /*
7127
     * We are seeing an entity reference
7128
     */
7129
24.5k
    name = xmlParseEntityRefInternal(ctxt);
7130
24.5k
    if (name == NULL)
7131
10.0k
        return;
7132
14.5k
    ent = xmlLookupGeneralEntity(ctxt, name, /* isAttr */ 0);
7133
14.5k
    if (ent == NULL) {
7134
        /*
7135
         * Create a reference for undeclared entities.
7136
         */
7137
3.00k
        if ((ctxt->replaceEntities == 0) &&
7138
0
            (ctxt->sax != NULL) &&
7139
0
            (ctxt->disableSAX == 0) &&
7140
0
            (ctxt->sax->reference != NULL)) {
7141
0
            ctxt->sax->reference(ctxt->userData, name);
7142
0
        }
7143
3.00k
        return;
7144
3.00k
    }
7145
11.5k
    if (!ctxt->wellFormed)
7146
5.59k
  return;
7147
7148
    /* special case of predefined entities */
7149
5.93k
    if ((ent->name == NULL) ||
7150
5.93k
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7151
261
  val = ent->content;
7152
261
  if (val == NULL) return;
7153
  /*
7154
   * inline the entity.
7155
   */
7156
261
  if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7157
261
      (!ctxt->disableSAX))
7158
261
      ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7159
261
  return;
7160
261
    }
7161
7162
    /*
7163
     * Some users try to parse entities on their own and used to set
7164
     * the renamed "checked" member. Fix the flags to cover this
7165
     * case.
7166
     */
7167
5.67k
    if (((ent->flags & XML_ENT_PARSED) == 0) && (ent->children != NULL))
7168
0
        ent->flags |= XML_ENT_PARSED;
7169
7170
    /*
7171
     * The first reference to the entity trigger a parsing phase
7172
     * where the ent->children is filled with the result from
7173
     * the parsing.
7174
     * Note: external parsed entities will not be loaded, it is not
7175
     * required for a non-validating parser, unless the parsing option
7176
     * of validating, or substituting entities were given. Doing so is
7177
     * far more secure as the parser will only process data coming from
7178
     * the document entity by default.
7179
     *
7180
     * FIXME: This doesn't work correctly since entities can be
7181
     * expanded with different namespace declarations in scope.
7182
     * For example:
7183
     *
7184
     * <!DOCTYPE doc [
7185
     *   <!ENTITY ent "<ns:elem/>">
7186
     * ]>
7187
     * <doc>
7188
     *   <decl1 xmlns:ns="urn:ns1">
7189
     *     &ent;
7190
     *   </decl1>
7191
     *   <decl2 xmlns:ns="urn:ns2">
7192
     *     &ent;
7193
     *   </decl2>
7194
     * </doc>
7195
     *
7196
     * Proposed fix:
7197
     *
7198
     * - Ignore current namespace declarations when parsing the
7199
     *   entity. If a prefix can't be resolved, don't report an error
7200
     *   but mark it as unresolved.
7201
     * - Try to resolve these prefixes when expanding the entity.
7202
     *   This will require a specialized version of xmlStaticCopyNode
7203
     *   which can also make use of the namespace hash table to avoid
7204
     *   quadratic behavior.
7205
     *
7206
     * Alternatively, we could simply reparse the entity on each
7207
     * expansion like we already do with custom SAX callbacks.
7208
     * External entity content should be cached in this case.
7209
     */
7210
5.67k
    if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7211
999
        (((ctxt->options & XML_PARSE_NO_XXE) == 0) &&
7212
999
         ((ctxt->replaceEntities) ||
7213
5.67k
          (ctxt->validate)))) {
7214
5.67k
        if ((ent->flags & XML_ENT_PARSED) == 0) {
7215
1.69k
            xmlCtxtParseEntity(ctxt, ent);
7216
3.97k
        } else if (ent->children == NULL) {
7217
            /*
7218
             * Probably running in SAX mode and the callbacks don't
7219
             * build the entity content. Parse the entity again.
7220
             *
7221
             * This will also be triggered in normal tree builder mode
7222
             * if an entity happens to be empty, causing unnecessary
7223
             * reloads. It's hard to come up with a reliable check in
7224
             * which mode we're running.
7225
             */
7226
535
            xmlCtxtParseEntity(ctxt, ent);
7227
535
        }
7228
5.67k
    }
7229
7230
    /*
7231
     * We also check for amplification if entities aren't substituted.
7232
     * They might be expanded later.
7233
     */
7234
5.67k
    if (xmlParserEntityCheck(ctxt, ent->expandedSize))
7235
73
        return;
7236
7237
5.59k
    if ((ctxt->sax == NULL) || (ctxt->disableSAX))
7238
816
        return;
7239
7240
4.78k
    if (ctxt->replaceEntities == 0) {
7241
  /*
7242
   * Create a reference
7243
   */
7244
0
        if (ctxt->sax->reference != NULL)
7245
0
      ctxt->sax->reference(ctxt->userData, ent->name);
7246
4.78k
    } else if ((ent->children != NULL) && (ctxt->node != NULL)) {
7247
4.20k
        xmlNodePtr copy, cur;
7248
7249
        /*
7250
         * Seems we are generating the DOM content, copy the tree
7251
   */
7252
4.20k
        cur = ent->children;
7253
7254
        /*
7255
         * Handle first text node with SAX to coalesce text efficiently
7256
         */
7257
4.20k
        if ((cur->type == XML_TEXT_NODE) ||
7258
3.06k
            (cur->type == XML_CDATA_SECTION_NODE)) {
7259
3.06k
            int len = xmlStrlen(cur->content);
7260
7261
3.06k
            if ((cur->type == XML_TEXT_NODE) ||
7262
2.99k
                (ctxt->options & XML_PARSE_NOCDATA)) {
7263
2.99k
                if (ctxt->sax->characters != NULL)
7264
2.99k
                    ctxt->sax->characters(ctxt, cur->content, len);
7265
2.99k
            } else {
7266
74
                if (ctxt->sax->cdataBlock != NULL)
7267
74
                    ctxt->sax->cdataBlock(ctxt, cur->content, len);
7268
74
            }
7269
7270
3.06k
            cur = cur->next;
7271
3.06k
        }
7272
7273
19.8k
        while (cur != NULL) {
7274
18.2k
            xmlNodePtr last;
7275
7276
            /*
7277
             * Handle last text node with SAX to coalesce text efficiently
7278
             */
7279
18.2k
            if ((cur->next == NULL) &&
7280
2.82k
                ((cur->type == XML_TEXT_NODE) ||
7281
2.51k
                 (cur->type == XML_CDATA_SECTION_NODE))) {
7282
2.51k
                int len = xmlStrlen(cur->content);
7283
7284
2.51k
                if ((cur->type == XML_TEXT_NODE) ||
7285
2.49k
                    (ctxt->options & XML_PARSE_NOCDATA)) {
7286
2.49k
                    if (ctxt->sax->characters != NULL)
7287
2.49k
                        ctxt->sax->characters(ctxt, cur->content, len);
7288
2.49k
                } else {
7289
19
                    if (ctxt->sax->cdataBlock != NULL)
7290
19
                        ctxt->sax->cdataBlock(ctxt, cur->content, len);
7291
19
                }
7292
7293
2.51k
                break;
7294
2.51k
            }
7295
7296
            /*
7297
             * Reset coalesce buffer stats only for non-text nodes.
7298
             */
7299
15.6k
            ctxt->nodemem = 0;
7300
15.6k
            ctxt->nodelen = 0;
7301
7302
15.6k
            copy = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7303
7304
15.6k
            if (copy == NULL) {
7305
42
                xmlErrMemory(ctxt);
7306
42
                break;
7307
42
            }
7308
7309
15.6k
            if (ctxt->parseMode == XML_PARSE_READER) {
7310
                /* Needed for reader */
7311
0
                copy->extra = cur->extra;
7312
                /* Maybe needed for reader */
7313
0
                copy->_private = cur->_private;
7314
0
            }
7315
7316
15.6k
            copy->parent = ctxt->node;
7317
15.6k
            last = ctxt->node->last;
7318
15.6k
            if (last == NULL) {
7319
189
                ctxt->node->children = copy;
7320
15.4k
            } else {
7321
15.4k
                last->next = copy;
7322
15.4k
                copy->prev = last;
7323
15.4k
            }
7324
15.6k
            ctxt->node->last = copy;
7325
7326
15.6k
            cur = cur->next;
7327
15.6k
        }
7328
4.20k
    }
7329
4.78k
}
7330
7331
static void
7332
129k
xmlHandleUndeclaredEntity(xmlParserCtxtPtr ctxt, const xmlChar *name) {
7333
    /*
7334
     * [ WFC: Entity Declared ]
7335
     * In a document without any DTD, a document with only an
7336
     * internal DTD subset which contains no parameter entity
7337
     * references, or a document with "standalone='yes'", the
7338
     * Name given in the entity reference must match that in an
7339
     * entity declaration, except that well-formed documents
7340
     * need not declare any of the following entities: amp, lt,
7341
     * gt, apos, quot.
7342
     * The declaration of a parameter entity must precede any
7343
     * reference to it.
7344
     * Similarly, the declaration of a general entity must
7345
     * precede any reference to it which appears in a default
7346
     * value in an attribute-list declaration. Note that if
7347
     * entities are declared in the external subset or in
7348
     * external parameter entities, a non-validating processor
7349
     * is not obligated to read and process their declarations;
7350
     * for such documents, the rule that an entity must be
7351
     * declared is a well-formedness constraint only if
7352
     * standalone='yes'.
7353
     */
7354
129k
    if ((ctxt->standalone == 1) ||
7355
128k
        ((ctxt->hasExternalSubset == 0) &&
7356
128k
         (ctxt->hasPErefs == 0))) {
7357
11.8k
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7358
11.8k
                          "Entity '%s' not defined\n", name);
7359
11.8k
#ifdef LIBXML_VALID_ENABLED
7360
117k
    } else if (ctxt->validate) {
7361
        /*
7362
         * [ VC: Entity Declared ]
7363
         * In a document with an external subset or external
7364
         * parameter entities with "standalone='no'", ...
7365
         * ... The declaration of a parameter entity must
7366
         * precede any reference to it...
7367
         */
7368
0
        xmlValidityError(ctxt, XML_ERR_UNDECLARED_ENTITY,
7369
0
                         "Entity '%s' not defined\n", name, NULL);
7370
0
#endif
7371
117k
    } else if ((ctxt->loadsubset & ~XML_SKIP_IDS) ||
7372
117k
               ((ctxt->replaceEntities) &&
7373
117k
                ((ctxt->options & XML_PARSE_NO_XXE) == 0))) {
7374
        /*
7375
         * Also raise a non-fatal error
7376
         *
7377
         * - if the external subset is loaded and all entity declarations
7378
         *   should be available, or
7379
         * - entity substition was requested without restricting
7380
         *   external entity access.
7381
         */
7382
117k
        xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7383
117k
                     "Entity '%s' not defined\n", name);
7384
117k
    } else {
7385
0
        xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7386
0
                      "Entity '%s' not defined\n", name, NULL);
7387
0
    }
7388
7389
129k
    ctxt->valid = 0;
7390
129k
}
7391
7392
static xmlEntityPtr
7393
311k
xmlLookupGeneralEntity(xmlParserCtxtPtr ctxt, const xmlChar *name, int inAttr) {
7394
311k
    xmlEntityPtr ent = NULL;
7395
7396
    /*
7397
     * Predefined entities override any extra definition
7398
     */
7399
311k
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7400
311k
        ent = xmlGetPredefinedEntity(name);
7401
311k
        if (ent != NULL)
7402
69.1k
            return(ent);
7403
311k
    }
7404
7405
    /*
7406
     * Ask first SAX for entity resolution, otherwise try the
7407
     * entities which may have stored in the parser context.
7408
     */
7409
242k
    if (ctxt->sax != NULL) {
7410
242k
  if (ctxt->sax->getEntity != NULL)
7411
242k
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7412
242k
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7413
6.69k
      (ctxt->options & XML_PARSE_OLDSAX))
7414
0
      ent = xmlGetPredefinedEntity(name);
7415
242k
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7416
6.69k
      (ctxt->userData==ctxt)) {
7417
6.69k
      ent = xmlSAX2GetEntity(ctxt, name);
7418
6.69k
  }
7419
242k
    }
7420
7421
242k
    if (ent == NULL) {
7422
68.3k
        xmlHandleUndeclaredEntity(ctxt, name);
7423
68.3k
    }
7424
7425
    /*
7426
     * [ WFC: Parsed Entity ]
7427
     * An entity reference must not contain the name of an
7428
     * unparsed entity
7429
     */
7430
173k
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7431
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7432
0
     "Entity reference to unparsed entity %s\n", name);
7433
0
        ent = NULL;
7434
0
    }
7435
7436
    /*
7437
     * [ WFC: No External Entity References ]
7438
     * Attribute values cannot contain direct or indirect
7439
     * entity references to external entities.
7440
     */
7441
173k
    else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7442
4.65k
        if (inAttr) {
7443
715
            xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7444
715
                 "Attribute references external entity '%s'\n", name);
7445
715
            ent = NULL;
7446
715
        }
7447
4.65k
    }
7448
7449
242k
    return(ent);
7450
311k
}
7451
7452
/**
7453
 * Parse an entity reference. Always consumes '&'.
7454
 *
7455
 *     [68] EntityRef ::= '&' Name ';'
7456
 *
7457
 * @param ctxt  an XML parser context
7458
 * @returns the name, or NULL in case of error.
7459
 */
7460
static const xmlChar *
7461
178k
xmlParseEntityRefInternal(xmlParserCtxtPtr ctxt) {
7462
178k
    const xmlChar *name;
7463
7464
178k
    GROW;
7465
7466
178k
    if (RAW != '&')
7467
0
        return(NULL);
7468
178k
    NEXT;
7469
178k
    name = xmlParseName(ctxt);
7470
178k
    if (name == NULL) {
7471
43.8k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7472
43.8k
           "xmlParseEntityRef: no name\n");
7473
43.8k
        return(NULL);
7474
43.8k
    }
7475
134k
    if (RAW != ';') {
7476
15.1k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7477
15.1k
  return(NULL);
7478
15.1k
    }
7479
119k
    NEXT;
7480
7481
119k
    return(name);
7482
134k
}
7483
7484
/**
7485
 * @deprecated Internal function, don't use.
7486
 *
7487
 * @param ctxt  an XML parser context
7488
 * @returns the xmlEntity if found, or NULL otherwise.
7489
 */
7490
xmlEntity *
7491
0
xmlParseEntityRef(xmlParserCtxt *ctxt) {
7492
0
    const xmlChar *name;
7493
7494
0
    if (ctxt == NULL)
7495
0
        return(NULL);
7496
7497
0
    name = xmlParseEntityRefInternal(ctxt);
7498
0
    if (name == NULL)
7499
0
        return(NULL);
7500
7501
0
    return(xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 0));
7502
0
}
7503
7504
/**
7505
 * Parse ENTITY references declarations, but this version parses it from
7506
 * a string value.
7507
 *
7508
 *     [68] EntityRef ::= '&' Name ';'
7509
 *
7510
 * [ WFC: Entity Declared ]
7511
 * In a document without any DTD, a document with only an internal DTD
7512
 * subset which contains no parameter entity references, or a document
7513
 * with "standalone='yes'", the Name given in the entity reference
7514
 * must match that in an entity declaration, except that well-formed
7515
 * documents need not declare any of the following entities: amp, lt,
7516
 * gt, apos, quot.  The declaration of a parameter entity must precede
7517
 * any reference to it.  Similarly, the declaration of a general entity
7518
 * must precede any reference to it which appears in a default value in an
7519
 * attribute-list declaration. Note that if entities are declared in the
7520
 * external subset or in external parameter entities, a non-validating
7521
 * processor is not obligated to read and process their declarations;
7522
 * for such documents, the rule that an entity must be declared is a
7523
 * well-formedness constraint only if standalone='yes'.
7524
 *
7525
 * [ WFC: Parsed Entity ]
7526
 * An entity reference must not contain the name of an unparsed entity
7527
 *
7528
 * @param ctxt  an XML parser context
7529
 * @param str  a pointer to an index in the string
7530
 * @returns the xmlEntity if found, or NULL otherwise. The str pointer
7531
 * is updated to the current location in the string.
7532
 */
7533
static xmlChar *
7534
192k
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7535
192k
    xmlChar *name;
7536
192k
    const xmlChar *ptr;
7537
192k
    xmlChar cur;
7538
7539
192k
    if ((str == NULL) || (*str == NULL))
7540
0
        return(NULL);
7541
192k
    ptr = *str;
7542
192k
    cur = *ptr;
7543
192k
    if (cur != '&')
7544
0
  return(NULL);
7545
7546
192k
    ptr++;
7547
192k
    name = xmlParseStringName(ctxt, &ptr);
7548
192k
    if (name == NULL) {
7549
7
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7550
7
           "xmlParseStringEntityRef: no name\n");
7551
7
  *str = ptr;
7552
7
  return(NULL);
7553
7
    }
7554
192k
    if (*ptr != ';') {
7555
3
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7556
3
        xmlFree(name);
7557
3
  *str = ptr;
7558
3
  return(NULL);
7559
3
    }
7560
192k
    ptr++;
7561
7562
192k
    *str = ptr;
7563
192k
    return(name);
7564
192k
}
7565
7566
/**
7567
 * Parse a parameter entity reference. Always consumes '%'.
7568
 *
7569
 * The entity content is handled directly by pushing it's content as
7570
 * a new input stream.
7571
 *
7572
 *     [69] PEReference ::= '%' Name ';'
7573
 *
7574
 * [ WFC: No Recursion ]
7575
 * A parsed entity must not contain a recursive
7576
 * reference to itself, either directly or indirectly.
7577
 *
7578
 * [ WFC: Entity Declared ]
7579
 * In a document without any DTD, a document with only an internal DTD
7580
 * subset which contains no parameter entity references, or a document
7581
 * with "standalone='yes'", ...  ... The declaration of a parameter
7582
 * entity must precede any reference to it...
7583
 *
7584
 * [ VC: Entity Declared ]
7585
 * In a document with an external subset or external parameter entities
7586
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7587
 * must precede any reference to it...
7588
 *
7589
 * [ WFC: In DTD ]
7590
 * Parameter-entity references may only appear in the DTD.
7591
 * NOTE: misleading but this is handled.
7592
 *
7593
 * @param ctxt  an XML parser context
7594
 * @param markupDecl  whether the PERef starts a markup declaration
7595
 */
7596
static void
7597
148k
xmlParsePERefInternal(xmlParserCtxt *ctxt, int markupDecl) {
7598
148k
    const xmlChar *name;
7599
148k
    xmlEntityPtr entity = NULL;
7600
148k
    xmlParserInputPtr input;
7601
7602
148k
    if (RAW != '%')
7603
0
        return;
7604
148k
    NEXT;
7605
148k
    name = xmlParseName(ctxt);
7606
148k
    if (name == NULL) {
7607
8.73k
  xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
7608
8.73k
  return;
7609
8.73k
    }
7610
140k
    if (RAW != ';') {
7611
12.5k
  xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
7612
12.5k
        return;
7613
12.5k
    }
7614
7615
127k
    NEXT;
7616
7617
    /* Must be set before xmlHandleUndeclaredEntity */
7618
127k
    ctxt->hasPErefs = 1;
7619
7620
    /*
7621
     * Request the entity from SAX
7622
     */
7623
127k
    if ((ctxt->sax != NULL) &&
7624
127k
  (ctxt->sax->getParameterEntity != NULL))
7625
127k
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7626
7627
127k
    if (entity == NULL) {
7628
59.9k
        xmlHandleUndeclaredEntity(ctxt, name);
7629
67.5k
    } else {
7630
  /*
7631
   * Internal checking in case the entity quest barfed
7632
   */
7633
67.5k
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7634
61.7k
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7635
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7636
0
      "Internal: %%%s; is not a parameter entity\n",
7637
0
        name, NULL);
7638
67.5k
  } else {
7639
67.5k
      if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7640
61.7k
                ((ctxt->options & XML_PARSE_NO_XXE) ||
7641
61.7k
     (((ctxt->loadsubset & ~XML_SKIP_IDS) == 0) &&
7642
61.7k
      (ctxt->replaceEntities == 0) &&
7643
0
      (ctxt->validate == 0))))
7644
0
    return;
7645
7646
67.5k
            if (entity->flags & XML_ENT_EXPANDING) {
7647
16
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7648
16
                return;
7649
16
            }
7650
7651
67.5k
      input = xmlNewEntityInputStream(ctxt, entity);
7652
67.5k
      if (xmlCtxtPushInput(ctxt, input) < 0) {
7653
3.15k
                xmlFreeInputStream(input);
7654
3.15k
    return;
7655
3.15k
            }
7656
7657
64.4k
            entity->flags |= XML_ENT_EXPANDING;
7658
7659
64.4k
            if (markupDecl)
7660
60.7k
                input->flags |= XML_INPUT_MARKUP_DECL;
7661
7662
64.4k
            GROW;
7663
7664
64.4k
      if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
7665
58.5k
                xmlDetectEncoding(ctxt);
7666
7667
58.5k
                if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7668
22.9k
                    (IS_BLANK_CH(NXT(5)))) {
7669
21.9k
                    xmlParseTextDecl(ctxt);
7670
21.9k
                }
7671
58.5k
            }
7672
64.4k
  }
7673
67.5k
    }
7674
127k
}
7675
7676
/**
7677
 * Parse a parameter entity reference.
7678
 *
7679
 * @deprecated Internal function, don't use.
7680
 *
7681
 * @param ctxt  an XML parser context
7682
 */
7683
void
7684
0
xmlParsePEReference(xmlParserCtxt *ctxt) {
7685
0
    xmlParsePERefInternal(ctxt, 0);
7686
0
}
7687
7688
/**
7689
 * Load the content of an entity.
7690
 *
7691
 * @param ctxt  an XML parser context
7692
 * @param entity  an unloaded system entity
7693
 * @returns 0 in case of success and -1 in case of failure
7694
 */
7695
static int
7696
45.5k
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7697
45.5k
    xmlParserInputPtr oldinput, input = NULL;
7698
45.5k
    xmlParserInputPtr *oldinputTab;
7699
45.5k
    xmlChar *oldencoding;
7700
45.5k
    xmlChar *content = NULL;
7701
45.5k
    xmlResourceType rtype;
7702
45.5k
    size_t length, i;
7703
45.5k
    int oldinputNr, oldinputMax;
7704
45.5k
    int ret = -1;
7705
45.5k
    int res;
7706
7707
45.5k
    if ((ctxt == NULL) || (entity == NULL) ||
7708
45.5k
        ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7709
0
   (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7710
45.5k
  (entity->content != NULL)) {
7711
0
  xmlFatalErr(ctxt, XML_ERR_ARGUMENT,
7712
0
              "xmlLoadEntityContent parameter error");
7713
0
        return(-1);
7714
0
    }
7715
7716
45.5k
    if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)
7717
45.5k
        rtype = XML_RESOURCE_PARAMETER_ENTITY;
7718
0
    else
7719
0
        rtype = XML_RESOURCE_GENERAL_ENTITY;
7720
7721
45.5k
    input = xmlLoadResource(ctxt, (char *) entity->URI,
7722
45.5k
                            (char *) entity->ExternalID, rtype);
7723
45.5k
    if (input == NULL)
7724
1.94k
        return(-1);
7725
7726
43.6k
    oldinput = ctxt->input;
7727
43.6k
    oldinputNr = ctxt->inputNr;
7728
43.6k
    oldinputMax = ctxt->inputMax;
7729
43.6k
    oldinputTab = ctxt->inputTab;
7730
43.6k
    oldencoding = ctxt->encoding;
7731
7732
43.6k
    ctxt->input = NULL;
7733
43.6k
    ctxt->inputNr = 0;
7734
43.6k
    ctxt->inputMax = 1;
7735
43.6k
    ctxt->encoding = NULL;
7736
43.6k
    ctxt->inputTab = xmlMalloc(sizeof(xmlParserInputPtr));
7737
43.6k
    if (ctxt->inputTab == NULL) {
7738
4
        xmlErrMemory(ctxt);
7739
4
        xmlFreeInputStream(input);
7740
4
        goto error;
7741
4
    }
7742
7743
43.6k
    xmlBufResetInput(input->buf->buffer, input);
7744
7745
43.6k
    if (xmlCtxtPushInput(ctxt, input) < 0) {
7746
4
        xmlFreeInputStream(input);
7747
4
        goto error;
7748
4
    }
7749
7750
43.6k
    xmlDetectEncoding(ctxt);
7751
7752
    /*
7753
     * Parse a possible text declaration first
7754
     */
7755
43.6k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7756
35.4k
  xmlParseTextDecl(ctxt);
7757
        /*
7758
         * An XML-1.0 document can't reference an entity not XML-1.0
7759
         */
7760
35.4k
        if ((xmlStrEqual(ctxt->version, BAD_CAST "1.0")) &&
7761
32.1k
            (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
7762
1.63k
            xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
7763
1.63k
                           "Version mismatch between document and entity\n");
7764
1.63k
        }
7765
35.4k
    }
7766
7767
43.6k
    length = input->cur - input->base;
7768
43.6k
    xmlBufShrink(input->buf->buffer, length);
7769
43.6k
    xmlSaturatedAdd(&ctxt->sizeentities, length);
7770
7771
110k
    while ((res = xmlParserInputBufferGrow(input->buf, 4096)) > 0)
7772
67.1k
        ;
7773
7774
43.6k
    xmlBufResetInput(input->buf->buffer, input);
7775
7776
43.6k
    if (res < 0) {
7777
1.24k
        xmlCtxtErrIO(ctxt, input->buf->error, NULL);
7778
1.24k
        goto error;
7779
1.24k
    }
7780
7781
42.3k
    length = xmlBufUse(input->buf->buffer);
7782
42.3k
    if (length > INT_MAX) {
7783
0
        xmlErrMemory(ctxt);
7784
0
        goto error;
7785
0
    }
7786
7787
42.3k
    content = xmlStrndup(xmlBufContent(input->buf->buffer), length);
7788
42.3k
    if (content == NULL) {
7789
6
        xmlErrMemory(ctxt);
7790
6
        goto error;
7791
6
    }
7792
7793
14.6M
    for (i = 0; i < length; ) {
7794
14.6M
        int clen = length - i;
7795
14.6M
        int c = xmlGetUTF8Char(content + i, &clen);
7796
7797
14.6M
        if ((c < 0) || (!IS_CHAR(c))) {
7798
42.2k
            xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
7799
42.2k
                              "xmlLoadEntityContent: invalid char value %d\n",
7800
42.2k
                              content[i]);
7801
42.2k
            goto error;
7802
42.2k
        }
7803
14.6M
        i += clen;
7804
14.6M
    }
7805
7806
133
    xmlSaturatedAdd(&ctxt->sizeentities, length);
7807
133
    entity->content = content;
7808
133
    entity->length = length;
7809
133
    content = NULL;
7810
133
    ret = 0;
7811
7812
43.6k
error:
7813
87.2k
    while (ctxt->inputNr > 0)
7814
43.6k
        xmlFreeInputStream(xmlCtxtPopInput(ctxt));
7815
43.6k
    xmlFree(ctxt->inputTab);
7816
43.6k
    xmlFree(ctxt->encoding);
7817
7818
43.6k
    ctxt->input = oldinput;
7819
43.6k
    ctxt->inputNr = oldinputNr;
7820
43.6k
    ctxt->inputMax = oldinputMax;
7821
43.6k
    ctxt->inputTab = oldinputTab;
7822
43.6k
    ctxt->encoding = oldencoding;
7823
7824
43.6k
    xmlFree(content);
7825
7826
43.6k
    return(ret);
7827
133
}
7828
7829
/**
7830
 * Parse PEReference declarations
7831
 *
7832
 *     [69] PEReference ::= '%' Name ';'
7833
 *
7834
 * [ WFC: No Recursion ]
7835
 * A parsed entity must not contain a recursive
7836
 * reference to itself, either directly or indirectly.
7837
 *
7838
 * [ WFC: Entity Declared ]
7839
 * In a document without any DTD, a document with only an internal DTD
7840
 * subset which contains no parameter entity references, or a document
7841
 * with "standalone='yes'", ...  ... The declaration of a parameter
7842
 * entity must precede any reference to it...
7843
 *
7844
 * [ VC: Entity Declared ]
7845
 * In a document with an external subset or external parameter entities
7846
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7847
 * must precede any reference to it...
7848
 *
7849
 * [ WFC: In DTD ]
7850
 * Parameter-entity references may only appear in the DTD.
7851
 * NOTE: misleading but this is handled.
7852
 *
7853
 * @param ctxt  an XML parser context
7854
 * @param str  a pointer to an index in the string
7855
 * @returns the string of the entity content.
7856
 *         str is updated to the current value of the index
7857
 */
7858
static xmlEntityPtr
7859
52.9k
xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7860
52.9k
    const xmlChar *ptr;
7861
52.9k
    xmlChar cur;
7862
52.9k
    xmlChar *name;
7863
52.9k
    xmlEntityPtr entity = NULL;
7864
7865
52.9k
    if ((str == NULL) || (*str == NULL)) return(NULL);
7866
52.9k
    ptr = *str;
7867
52.9k
    cur = *ptr;
7868
52.9k
    if (cur != '%')
7869
0
        return(NULL);
7870
52.9k
    ptr++;
7871
52.9k
    name = xmlParseStringName(ctxt, &ptr);
7872
52.9k
    if (name == NULL) {
7873
2.14k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7874
2.14k
           "xmlParseStringPEReference: no name\n");
7875
2.14k
  *str = ptr;
7876
2.14k
  return(NULL);
7877
2.14k
    }
7878
50.7k
    cur = *ptr;
7879
50.7k
    if (cur != ';') {
7880
1.67k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7881
1.67k
  xmlFree(name);
7882
1.67k
  *str = ptr;
7883
1.67k
  return(NULL);
7884
1.67k
    }
7885
49.0k
    ptr++;
7886
7887
    /* Must be set before xmlHandleUndeclaredEntity */
7888
49.0k
    ctxt->hasPErefs = 1;
7889
7890
    /*
7891
     * Request the entity from SAX
7892
     */
7893
49.0k
    if ((ctxt->sax != NULL) &&
7894
49.0k
  (ctxt->sax->getParameterEntity != NULL))
7895
49.0k
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7896
7897
49.0k
    if (entity == NULL) {
7898
716
        xmlHandleUndeclaredEntity(ctxt, name);
7899
48.3k
    } else {
7900
  /*
7901
   * Internal checking in case the entity quest barfed
7902
   */
7903
48.3k
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7904
47.8k
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7905
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7906
0
        "%%%s; is not a parameter entity\n",
7907
0
        name, NULL);
7908
0
  }
7909
48.3k
    }
7910
7911
49.0k
    xmlFree(name);
7912
49.0k
    *str = ptr;
7913
49.0k
    return(entity);
7914
50.7k
}
7915
7916
/**
7917
 * Parse a DOCTYPE declaration
7918
 *
7919
 * @deprecated Internal function, don't use.
7920
 *
7921
 *     [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7922
 *                          ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7923
 *
7924
 * [ VC: Root Element Type ]
7925
 * The Name in the document type declaration must match the element
7926
 * type of the root element.
7927
 *
7928
 * @param ctxt  an XML parser context
7929
 */
7930
7931
void
7932
8.16k
xmlParseDocTypeDecl(xmlParserCtxt *ctxt) {
7933
8.16k
    const xmlChar *name = NULL;
7934
8.16k
    xmlChar *publicId = NULL;
7935
8.16k
    xmlChar *URI = NULL;
7936
7937
    /*
7938
     * We know that '<!DOCTYPE' has been detected.
7939
     */
7940
8.16k
    SKIP(9);
7941
7942
8.16k
    if (SKIP_BLANKS == 0) {
7943
3.04k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7944
3.04k
                       "Space required after 'DOCTYPE'\n");
7945
3.04k
    }
7946
7947
    /*
7948
     * Parse the DOCTYPE name.
7949
     */
7950
8.16k
    name = xmlParseName(ctxt);
7951
8.16k
    if (name == NULL) {
7952
1.73k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7953
1.73k
           "xmlParseDocTypeDecl : no DOCTYPE name !\n");
7954
1.73k
    }
7955
8.16k
    ctxt->intSubName = name;
7956
7957
8.16k
    SKIP_BLANKS;
7958
7959
    /*
7960
     * Check for public and system identifier (URI)
7961
     */
7962
8.16k
    URI = xmlParseExternalID(ctxt, &publicId, 1);
7963
7964
8.16k
    if ((URI != NULL) || (publicId != NULL)) {
7965
105
        ctxt->hasExternalSubset = 1;
7966
105
    }
7967
8.16k
    ctxt->extSubURI = URI;
7968
8.16k
    ctxt->extSubSystem = publicId;
7969
7970
8.16k
    SKIP_BLANKS;
7971
7972
    /*
7973
     * Create and update the internal subset.
7974
     */
7975
8.16k
    if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
7976
8.16k
  (!ctxt->disableSAX))
7977
4.92k
  ctxt->sax->internalSubset(ctxt->userData, name, publicId, URI);
7978
7979
8.16k
    if ((RAW != '[') && (RAW != '>')) {
7980
122
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
7981
122
    }
7982
8.16k
}
7983
7984
/**
7985
 * Parse the internal subset declaration
7986
 *
7987
 *     [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7988
 * @param ctxt  an XML parser context
7989
 */
7990
7991
static void
7992
7.98k
xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
7993
    /*
7994
     * Is there any DTD definition ?
7995
     */
7996
7.98k
    if (RAW == '[') {
7997
7.98k
        int oldInputNr = ctxt->inputNr;
7998
7999
7.98k
        NEXT;
8000
  /*
8001
   * Parse the succession of Markup declarations and
8002
   * PEReferences.
8003
   * Subsequence (markupdecl | PEReference | S)*
8004
   */
8005
7.98k
  SKIP_BLANKS;
8006
316k
        while (1) {
8007
316k
            if (PARSER_STOPPED(ctxt)) {
8008
704
                return;
8009
315k
            } else if (ctxt->input->cur >= ctxt->input->end) {
8010
61.0k
                if (ctxt->inputNr <= oldInputNr) {
8011
1.40k
                xmlFatalErr(ctxt, XML_ERR_INT_SUBSET_NOT_FINISHED, NULL);
8012
1.40k
                    return;
8013
1.40k
                }
8014
59.6k
                xmlPopPE(ctxt);
8015
254k
            } else if ((RAW == ']') && (ctxt->inputNr <= oldInputNr)) {
8016
1.93k
                NEXT;
8017
1.93k
                SKIP_BLANKS;
8018
1.93k
                break;
8019
252k
            } else if ((PARSER_EXTERNAL(ctxt)) &&
8020
192k
                       (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8021
                /*
8022
                 * Conditional sections are allowed in external entities
8023
                 * included by PE References in the internal subset.
8024
                 */
8025
2.01k
                xmlParseConditionalSections(ctxt);
8026
250k
            } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
8027
106k
                xmlParseMarkupDecl(ctxt);
8028
144k
            } else if (RAW == '%') {
8029
140k
                xmlParsePERefInternal(ctxt, 1);
8030
140k
            } else {
8031
3.94k
                xmlFatalErr(ctxt, XML_ERR_INT_SUBSET_NOT_FINISHED, NULL);
8032
8033
4.40k
                while (ctxt->inputNr > oldInputNr)
8034
463
                    xmlPopPE(ctxt);
8035
3.94k
                return;
8036
3.94k
            }
8037
308k
            SKIP_BLANKS;
8038
308k
            SHRINK;
8039
308k
            GROW;
8040
308k
        }
8041
7.98k
    }
8042
8043
    /*
8044
     * We should be at the end of the DOCTYPE declaration.
8045
     */
8046
1.93k
    if (RAW != '>') {
8047
29
        xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8048
29
        return;
8049
29
    }
8050
1.90k
    NEXT;
8051
1.90k
}
8052
8053
#ifdef LIBXML_SAX1_ENABLED
8054
/**
8055
 * Parse an attribute
8056
 *
8057
 * @deprecated Internal function, don't use.
8058
 *
8059
 *     [41] Attribute ::= Name Eq AttValue
8060
 *
8061
 * [ WFC: No External Entity References ]
8062
 * Attribute values cannot contain direct or indirect entity references
8063
 * to external entities.
8064
 *
8065
 * [ WFC: No < in Attribute Values ]
8066
 * The replacement text of any entity referred to directly or indirectly in
8067
 * an attribute value (other than "&lt;") must not contain a <.
8068
 *
8069
 * [ VC: Attribute Value Type ]
8070
 * The attribute must have been declared; the value must be of the type
8071
 * declared for it.
8072
 *
8073
 *     [25] Eq ::= S? '=' S?
8074
 *
8075
 * With namespace:
8076
 *
8077
 *     [NS 11] Attribute ::= QName Eq AttValue
8078
 *
8079
 * Also the case QName == xmlns:??? is handled independently as a namespace
8080
 * definition.
8081
 *
8082
 * @param ctxt  an XML parser context
8083
 * @param value  a xmlChar ** used to store the value of the attribute
8084
 * @returns the attribute name, and the value in *value.
8085
 */
8086
8087
const xmlChar *
8088
0
xmlParseAttribute(xmlParserCtxt *ctxt, xmlChar **value) {
8089
0
    const xmlChar *name;
8090
0
    xmlChar *val;
8091
8092
0
    *value = NULL;
8093
0
    GROW;
8094
0
    name = xmlParseName(ctxt);
8095
0
    if (name == NULL) {
8096
0
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8097
0
                 "error parsing attribute name\n");
8098
0
        return(NULL);
8099
0
    }
8100
8101
    /*
8102
     * read the value
8103
     */
8104
0
    SKIP_BLANKS;
8105
0
    if (RAW == '=') {
8106
0
        NEXT;
8107
0
  SKIP_BLANKS;
8108
0
  val = xmlParseAttValue(ctxt);
8109
0
    } else {
8110
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8111
0
         "Specification mandates value for attribute %s\n", name);
8112
0
  return(name);
8113
0
    }
8114
8115
    /*
8116
     * Check that xml:lang conforms to the specification
8117
     * No more registered as an error, just generate a warning now
8118
     * since this was deprecated in XML second edition
8119
     */
8120
0
    if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8121
0
  if (!xmlCheckLanguageID(val)) {
8122
0
      xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8123
0
              "Malformed value for xml:lang : %s\n",
8124
0
        val, NULL);
8125
0
  }
8126
0
    }
8127
8128
    /*
8129
     * Check that xml:space conforms to the specification
8130
     */
8131
0
    if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8132
0
  if (xmlStrEqual(val, BAD_CAST "default"))
8133
0
      *(ctxt->space) = 0;
8134
0
  else if (xmlStrEqual(val, BAD_CAST "preserve"))
8135
0
      *(ctxt->space) = 1;
8136
0
  else {
8137
0
    xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8138
0
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8139
0
                                 val, NULL);
8140
0
  }
8141
0
    }
8142
8143
0
    *value = val;
8144
0
    return(name);
8145
0
}
8146
8147
/**
8148
 * Parse a start tag. Always consumes '<'.
8149
 *
8150
 * @deprecated Internal function, don't use.
8151
 *
8152
 *     [40] STag ::= '<' Name (S Attribute)* S? '>'
8153
 *
8154
 * [ WFC: Unique Att Spec ]
8155
 * No attribute name may appear more than once in the same start-tag or
8156
 * empty-element tag.
8157
 *
8158
 *     [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8159
 *
8160
 * [ WFC: Unique Att Spec ]
8161
 * No attribute name may appear more than once in the same start-tag or
8162
 * empty-element tag.
8163
 *
8164
 * With namespace:
8165
 *
8166
 *     [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8167
 *
8168
 *     [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8169
 *
8170
 * @param ctxt  an XML parser context
8171
 * @returns the element name parsed
8172
 */
8173
8174
const xmlChar *
8175
0
xmlParseStartTag(xmlParserCtxt *ctxt) {
8176
0
    const xmlChar *name;
8177
0
    const xmlChar *attname;
8178
0
    xmlChar *attvalue;
8179
0
    const xmlChar **atts = ctxt->atts;
8180
0
    int nbatts = 0;
8181
0
    int maxatts = ctxt->maxatts;
8182
0
    int i;
8183
8184
0
    if (RAW != '<') return(NULL);
8185
0
    NEXT1;
8186
8187
0
    name = xmlParseName(ctxt);
8188
0
    if (name == NULL) {
8189
0
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8190
0
       "xmlParseStartTag: invalid element name\n");
8191
0
        return(NULL);
8192
0
    }
8193
8194
    /*
8195
     * Now parse the attributes, it ends up with the ending
8196
     *
8197
     * (S Attribute)* S?
8198
     */
8199
0
    SKIP_BLANKS;
8200
0
    GROW;
8201
8202
0
    while (((RAW != '>') &&
8203
0
     ((RAW != '/') || (NXT(1) != '>')) &&
8204
0
     (IS_BYTE_CHAR(RAW))) && (PARSER_STOPPED(ctxt) == 0)) {
8205
0
  attname = xmlParseAttribute(ctxt, &attvalue);
8206
0
        if (attname == NULL)
8207
0
      break;
8208
0
        if (attvalue != NULL) {
8209
      /*
8210
       * [ WFC: Unique Att Spec ]
8211
       * No attribute name may appear more than once in the same
8212
       * start-tag or empty-element tag.
8213
       */
8214
0
      for (i = 0; i < nbatts;i += 2) {
8215
0
          if (xmlStrEqual(atts[i], attname)) {
8216
0
        xmlErrAttributeDup(ctxt, NULL, attname);
8217
0
        goto failed;
8218
0
    }
8219
0
      }
8220
      /*
8221
       * Add the pair to atts
8222
       */
8223
0
      if (nbatts + 4 > maxatts) {
8224
0
          const xmlChar **n;
8225
0
                int newSize;
8226
8227
0
                newSize = xmlGrowCapacity(maxatts, sizeof(n[0]) * 2,
8228
0
                                          11, XML_MAX_ATTRS);
8229
0
                if (newSize < 0) {
8230
0
        xmlErrMemory(ctxt);
8231
0
        goto failed;
8232
0
    }
8233
0
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
8234
0
                if (newSize < 2)
8235
0
                    newSize = 2;
8236
0
#endif
8237
0
          n = xmlRealloc(atts, newSize * sizeof(n[0]) * 2);
8238
0
    if (n == NULL) {
8239
0
        xmlErrMemory(ctxt);
8240
0
        goto failed;
8241
0
    }
8242
0
    atts = n;
8243
0
                maxatts = newSize * 2;
8244
0
    ctxt->atts = atts;
8245
0
    ctxt->maxatts = maxatts;
8246
0
      }
8247
8248
0
      atts[nbatts++] = attname;
8249
0
      atts[nbatts++] = attvalue;
8250
0
      atts[nbatts] = NULL;
8251
0
      atts[nbatts + 1] = NULL;
8252
8253
0
            attvalue = NULL;
8254
0
  }
8255
8256
0
failed:
8257
8258
0
        if (attvalue != NULL)
8259
0
            xmlFree(attvalue);
8260
8261
0
  GROW
8262
0
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8263
0
      break;
8264
0
  if (SKIP_BLANKS == 0) {
8265
0
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8266
0
         "attributes construct error\n");
8267
0
  }
8268
0
  SHRINK;
8269
0
        GROW;
8270
0
    }
8271
8272
    /*
8273
     * SAX: Start of Element !
8274
     */
8275
0
    if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8276
0
  (!ctxt->disableSAX)) {
8277
0
  if (nbatts > 0)
8278
0
      ctxt->sax->startElement(ctxt->userData, name, atts);
8279
0
  else
8280
0
      ctxt->sax->startElement(ctxt->userData, name, NULL);
8281
0
    }
8282
8283
0
    if (atts != NULL) {
8284
        /* Free only the content strings */
8285
0
        for (i = 1;i < nbatts;i+=2)
8286
0
      if (atts[i] != NULL)
8287
0
         xmlFree((xmlChar *) atts[i]);
8288
0
    }
8289
0
    return(name);
8290
0
}
8291
8292
/**
8293
 * Parse an end tag. Always consumes '</'.
8294
 *
8295
 *     [42] ETag ::= '</' Name S? '>'
8296
 *
8297
 * With namespace
8298
 *
8299
 *     [NS 9] ETag ::= '</' QName S? '>'
8300
 * @param ctxt  an XML parser context
8301
 * @param line  line of the start tag
8302
 */
8303
8304
static void
8305
0
xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8306
0
    const xmlChar *name;
8307
8308
0
    GROW;
8309
0
    if ((RAW != '<') || (NXT(1) != '/')) {
8310
0
  xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8311
0
           "xmlParseEndTag: '</' not found\n");
8312
0
  return;
8313
0
    }
8314
0
    SKIP(2);
8315
8316
0
    name = xmlParseNameAndCompare(ctxt,ctxt->name);
8317
8318
    /*
8319
     * We should definitely be at the ending "S? '>'" part
8320
     */
8321
0
    GROW;
8322
0
    SKIP_BLANKS;
8323
0
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8324
0
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8325
0
    } else
8326
0
  NEXT1;
8327
8328
    /*
8329
     * [ WFC: Element Type Match ]
8330
     * The Name in an element's end-tag must match the element type in the
8331
     * start-tag.
8332
     *
8333
     */
8334
0
    if (name != (xmlChar*)1) {
8335
0
        if (name == NULL) name = BAD_CAST "unparsable";
8336
0
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8337
0
         "Opening and ending tag mismatch: %s line %d and %s\n",
8338
0
                    ctxt->name, line, name);
8339
0
    }
8340
8341
    /*
8342
     * SAX: End of Tag
8343
     */
8344
0
    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8345
0
  (!ctxt->disableSAX))
8346
0
        ctxt->sax->endElement(ctxt->userData, ctxt->name);
8347
8348
0
    namePop(ctxt);
8349
0
    spacePop(ctxt);
8350
0
}
8351
8352
/**
8353
 * Parse an end of tag
8354
 *
8355
 * @deprecated Internal function, don't use.
8356
 *
8357
 *     [42] ETag ::= '</' Name S? '>'
8358
 *
8359
 * With namespace
8360
 *
8361
 *     [NS 9] ETag ::= '</' QName S? '>'
8362
 * @param ctxt  an XML parser context
8363
 */
8364
8365
void
8366
0
xmlParseEndTag(xmlParserCtxt *ctxt) {
8367
0
    xmlParseEndTag1(ctxt, 0);
8368
0
}
8369
#endif /* LIBXML_SAX1_ENABLED */
8370
8371
/************************************************************************
8372
 *                  *
8373
 *          SAX 2 specific operations       *
8374
 *                  *
8375
 ************************************************************************/
8376
8377
/**
8378
 * Parse an XML Namespace QName
8379
 *
8380
 *     [6]  QName  ::= (Prefix ':')? LocalPart
8381
 *     [7]  Prefix  ::= NCName
8382
 *     [8]  LocalPart  ::= NCName
8383
 *
8384
 * @param ctxt  an XML parser context
8385
 * @param prefix  pointer to store the prefix part
8386
 * @returns the Name parsed or NULL
8387
 */
8388
8389
static xmlHashedString
8390
2.05M
xmlParseQNameHashed(xmlParserCtxtPtr ctxt, xmlHashedString *prefix) {
8391
2.05M
    xmlHashedString l, p;
8392
2.05M
    int start, isNCName = 0;
8393
8394
2.05M
    l.name = NULL;
8395
2.05M
    p.name = NULL;
8396
8397
2.05M
    GROW;
8398
2.05M
    start = CUR_PTR - BASE_PTR;
8399
8400
2.05M
    l = xmlParseNCName(ctxt);
8401
2.05M
    if (l.name != NULL) {
8402
1.93M
        isNCName = 1;
8403
1.93M
        if (CUR == ':') {
8404
694k
            NEXT;
8405
694k
            p = l;
8406
694k
            l = xmlParseNCName(ctxt);
8407
694k
        }
8408
1.93M
    }
8409
2.05M
    if ((l.name == NULL) || (CUR == ':')) {
8410
122k
        xmlChar *tmp;
8411
8412
122k
        l.name = NULL;
8413
122k
        p.name = NULL;
8414
122k
        if ((isNCName == 0) && (CUR != ':'))
8415
89.8k
            return(l);
8416
32.3k
        tmp = xmlParseNmtoken(ctxt);
8417
32.3k
        if (tmp != NULL)
8418
28.6k
            xmlFree(tmp);
8419
32.3k
        l = xmlDictLookupHashed(ctxt->dict, BASE_PTR + start,
8420
32.3k
                                CUR_PTR - (BASE_PTR + start));
8421
32.3k
        if (l.name == NULL) {
8422
1
            xmlErrMemory(ctxt);
8423
1
            return(l);
8424
1
        }
8425
32.3k
        xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8426
32.3k
                 "Failed to parse QName '%s'\n", l.name, NULL, NULL);
8427
32.3k
    }
8428
8429
1.96M
    *prefix = p;
8430
1.96M
    return(l);
8431
2.05M
}
8432
8433
/**
8434
 * Parse an XML Namespace QName
8435
 *
8436
 *     [6]  QName  ::= (Prefix ':')? LocalPart
8437
 *     [7]  Prefix  ::= NCName
8438
 *     [8]  LocalPart  ::= NCName
8439
 *
8440
 * @param ctxt  an XML parser context
8441
 * @param prefix  pointer to store the prefix part
8442
 * @returns the Name parsed or NULL
8443
 */
8444
8445
static const xmlChar *
8446
4.05k
xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8447
4.05k
    xmlHashedString n, p;
8448
8449
4.05k
    n = xmlParseQNameHashed(ctxt, &p);
8450
4.05k
    if (n.name == NULL)
8451
720
        return(NULL);
8452
3.33k
    *prefix = p.name;
8453
3.33k
    return(n.name);
8454
4.05k
}
8455
8456
/**
8457
 * Parse an XML name and compares for match
8458
 * (specialized for endtag parsing)
8459
 *
8460
 * @param ctxt  an XML parser context
8461
 * @param name  the localname
8462
 * @param prefix  the prefix, if any.
8463
 * @returns NULL for an illegal name, (xmlChar*) 1 for success
8464
 * and the name for mismatch
8465
 */
8466
8467
static const xmlChar *
8468
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8469
157k
                        xmlChar const *prefix) {
8470
157k
    const xmlChar *cmp;
8471
157k
    const xmlChar *in;
8472
157k
    const xmlChar *ret;
8473
157k
    const xmlChar *prefix2;
8474
8475
157k
    if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8476
8477
157k
    GROW;
8478
157k
    in = ctxt->input->cur;
8479
8480
157k
    cmp = prefix;
8481
539k
    while (*in != 0 && *in == *cmp) {
8482
381k
  ++in;
8483
381k
  ++cmp;
8484
381k
    }
8485
157k
    if ((*cmp == 0) && (*in == ':')) {
8486
155k
        in++;
8487
155k
  cmp = name;
8488
1.51M
  while (*in != 0 && *in == *cmp) {
8489
1.36M
      ++in;
8490
1.36M
      ++cmp;
8491
1.36M
  }
8492
155k
  if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8493
      /* success */
8494
153k
            ctxt->input->col += in - ctxt->input->cur;
8495
153k
      ctxt->input->cur = in;
8496
153k
      return((const xmlChar*) 1);
8497
153k
  }
8498
155k
    }
8499
    /*
8500
     * all strings coms from the dictionary, equality can be done directly
8501
     */
8502
4.05k
    ret = xmlParseQName (ctxt, &prefix2);
8503
4.05k
    if (ret == NULL)
8504
720
        return(NULL);
8505
3.33k
    if ((ret == name) && (prefix == prefix2))
8506
242
  return((const xmlChar*) 1);
8507
3.08k
    return ret;
8508
3.33k
}
8509
8510
/**
8511
 * Parse an attribute in the new SAX2 framework.
8512
 *
8513
 * @param ctxt  an XML parser context
8514
 * @param pref  the element prefix
8515
 * @param elem  the element name
8516
 * @param hprefix  resulting attribute prefix
8517
 * @param value  resulting value of the attribute
8518
 * @param len  resulting length of the attribute
8519
 * @param alloc  resulting indicator if the attribute was allocated
8520
 * @returns the attribute name, and the value in *value, .
8521
 */
8522
8523
static xmlHashedString
8524
xmlParseAttribute2(xmlParserCtxtPtr ctxt,
8525
                   const xmlChar * pref, const xmlChar * elem,
8526
                   xmlHashedString * hprefix, xmlChar ** value,
8527
                   int *len, int *alloc)
8528
978k
{
8529
978k
    xmlHashedString hname;
8530
978k
    const xmlChar *prefix, *name;
8531
978k
    xmlChar *val = NULL, *internal_val = NULL;
8532
978k
    int special = 0;
8533
978k
    int isNamespace;
8534
978k
    int flags;
8535
8536
978k
    *value = NULL;
8537
978k
    GROW;
8538
978k
    hname = xmlParseQNameHashed(ctxt, hprefix);
8539
978k
    if (hname.name == NULL) {
8540
27.7k
        xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8541
27.7k
                       "error parsing attribute name\n");
8542
27.7k
        return(hname);
8543
27.7k
    }
8544
950k
    name = hname.name;
8545
950k
    prefix = hprefix->name;
8546
8547
    /*
8548
     * get the type if needed
8549
     */
8550
950k
    if (ctxt->attsSpecial != NULL) {
8551
16.8k
        special = XML_PTR_TO_INT(xmlHashQLookup2(ctxt->attsSpecial, pref, elem,
8552
16.8k
                                              prefix, name));
8553
16.8k
    }
8554
8555
    /*
8556
     * read the value
8557
     */
8558
950k
    SKIP_BLANKS;
8559
950k
    if (RAW != '=') {
8560
16.3k
        xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8561
16.3k
                          "Specification mandates value for attribute %s\n",
8562
16.3k
                          name);
8563
16.3k
        goto error;
8564
16.3k
    }
8565
8566
8567
934k
    NEXT;
8568
934k
    SKIP_BLANKS;
8569
934k
    flags = 0;
8570
934k
    isNamespace = (((prefix == NULL) && (name == ctxt->str_xmlns)) ||
8571
887k
                   (prefix == ctxt->str_xmlns));
8572
934k
    val = xmlParseAttValueInternal(ctxt, len, &flags, special,
8573
934k
                                   isNamespace);
8574
934k
    if (val == NULL)
8575
4.51k
        goto error;
8576
8577
929k
    *alloc = (flags & XML_ATTVAL_ALLOC) != 0;
8578
8579
929k
#ifdef LIBXML_VALID_ENABLED
8580
929k
    if ((ctxt->validate) &&
8581
0
        (ctxt->standalone == 1) &&
8582
0
        (special & XML_SPECIAL_EXTERNAL) &&
8583
0
        (flags & XML_ATTVAL_NORM_CHANGE)) {
8584
0
        xmlValidityError(ctxt, XML_DTD_NOT_STANDALONE,
8585
0
                         "standalone: normalization of attribute %s on %s "
8586
0
                         "by external subset declaration\n",
8587
0
                         name, elem);
8588
0
    }
8589
929k
#endif
8590
8591
929k
    if (prefix == ctxt->str_xml) {
8592
        /*
8593
         * Check that xml:lang conforms to the specification
8594
         * No more registered as an error, just generate a warning now
8595
         * since this was deprecated in XML second edition
8596
         */
8597
33.4k
        if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8598
0
            internal_val = xmlStrndup(val, *len);
8599
0
            if (internal_val == NULL)
8600
0
                goto mem_error;
8601
0
            if (!xmlCheckLanguageID(internal_val)) {
8602
0
                xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8603
0
                              "Malformed value for xml:lang : %s\n",
8604
0
                              internal_val, NULL);
8605
0
            }
8606
0
        }
8607
8608
        /*
8609
         * Check that xml:space conforms to the specification
8610
         */
8611
33.4k
        if (xmlStrEqual(name, BAD_CAST "space")) {
8612
658
            internal_val = xmlStrndup(val, *len);
8613
658
            if (internal_val == NULL)
8614
3
                goto mem_error;
8615
655
            if (xmlStrEqual(internal_val, BAD_CAST "default"))
8616
190
                *(ctxt->space) = 0;
8617
465
            else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8618
197
                *(ctxt->space) = 1;
8619
268
            else {
8620
268
                xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8621
268
                              "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8622
268
                              internal_val, NULL);
8623
268
            }
8624
655
        }
8625
33.4k
        if (internal_val) {
8626
655
            xmlFree(internal_val);
8627
655
        }
8628
33.4k
    }
8629
8630
929k
    *value = val;
8631
929k
    return (hname);
8632
8633
3
mem_error:
8634
3
    xmlErrMemory(ctxt);
8635
20.8k
error:
8636
20.8k
    if ((val != NULL) && (*alloc != 0))
8637
2
        xmlFree(val);
8638
20.8k
    return(hname);
8639
3
}
8640
8641
/**
8642
 * Inserts a new attribute into the hash table.
8643
 *
8644
 * @param ctxt  parser context
8645
 * @param size  size of the hash table
8646
 * @param name  attribute name
8647
 * @param uri  namespace uri
8648
 * @param hashValue  combined hash value of name and uri
8649
 * @param aindex  attribute index (this is a multiple of 5)
8650
 * @returns INT_MAX if no existing attribute was found, the attribute
8651
 * index if an attribute was found, -1 if a memory allocation failed.
8652
 */
8653
static int
8654
xmlAttrHashInsert(xmlParserCtxtPtr ctxt, unsigned size, const xmlChar *name,
8655
472k
                  const xmlChar *uri, unsigned hashValue, int aindex) {
8656
472k
    xmlAttrHashBucket *table = ctxt->attrHash;
8657
472k
    xmlAttrHashBucket *bucket;
8658
472k
    unsigned hindex;
8659
8660
472k
    hindex = hashValue & (size - 1);
8661
472k
    bucket = &table[hindex];
8662
8663
559k
    while (bucket->index >= 0) {
8664
98.2k
        const xmlChar **atts = &ctxt->atts[bucket->index];
8665
8666
98.2k
        if (name == atts[0]) {
8667
12.7k
            int nsIndex = XML_PTR_TO_INT(atts[2]);
8668
8669
12.7k
            if ((nsIndex == NS_INDEX_EMPTY) ? (uri == NULL) :
8670
12.7k
                (nsIndex == NS_INDEX_XML) ? (uri == ctxt->str_xml_ns) :
8671
4.76k
                (uri == ctxt->nsTab[nsIndex * 2 + 1]))
8672
10.8k
                return(bucket->index);
8673
12.7k
        }
8674
8675
87.4k
        hindex++;
8676
87.4k
        bucket++;
8677
87.4k
        if (hindex >= size) {
8678
18.9k
            hindex = 0;
8679
18.9k
            bucket = table;
8680
18.9k
        }
8681
87.4k
    }
8682
8683
461k
    bucket->index = aindex;
8684
8685
461k
    return(INT_MAX);
8686
472k
}
8687
8688
static int
8689
xmlAttrHashInsertQName(xmlParserCtxtPtr ctxt, unsigned size,
8690
                       const xmlChar *name, const xmlChar *prefix,
8691
2.89k
                       unsigned hashValue, int aindex) {
8692
2.89k
    xmlAttrHashBucket *table = ctxt->attrHash;
8693
2.89k
    xmlAttrHashBucket *bucket;
8694
2.89k
    unsigned hindex;
8695
8696
2.89k
    hindex = hashValue & (size - 1);
8697
2.89k
    bucket = &table[hindex];
8698
8699
3.49k
    while (bucket->index >= 0) {
8700
1.90k
        const xmlChar **atts = &ctxt->atts[bucket->index];
8701
8702
1.90k
        if ((name == atts[0]) && (prefix == atts[1]))
8703
1.29k
            return(bucket->index);
8704
8705
604
        hindex++;
8706
604
        bucket++;
8707
604
        if (hindex >= size) {
8708
98
            hindex = 0;
8709
98
            bucket = table;
8710
98
        }
8711
604
    }
8712
8713
1.59k
    bucket->index = aindex;
8714
8715
1.59k
    return(INT_MAX);
8716
2.89k
}
8717
/**
8718
 * Parse a start tag. Always consumes '<'.
8719
 *
8720
 * This routine is called when running SAX2 parsing
8721
 *
8722
 *     [40] STag ::= '<' Name (S Attribute)* S? '>'
8723
 *
8724
 * [ WFC: Unique Att Spec ]
8725
 * No attribute name may appear more than once in the same start-tag or
8726
 * empty-element tag.
8727
 *
8728
 *     [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8729
 *
8730
 * [ WFC: Unique Att Spec ]
8731
 * No attribute name may appear more than once in the same start-tag or
8732
 * empty-element tag.
8733
 *
8734
 * With namespace:
8735
 *
8736
 *     [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8737
 *
8738
 *     [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8739
 *
8740
 * @param ctxt  an XML parser context
8741
 * @param pref  resulting namespace prefix
8742
 * @param URI  resulting namespace URI
8743
 * @param nbNsPtr  resulting number of namespace declarations
8744
 * @returns the element name parsed
8745
 */
8746
8747
static const xmlChar *
8748
xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
8749
1.06M
                  const xmlChar **URI, int *nbNsPtr) {
8750
1.06M
    xmlHashedString hlocalname;
8751
1.06M
    xmlHashedString hprefix;
8752
1.06M
    xmlHashedString hattname;
8753
1.06M
    xmlHashedString haprefix;
8754
1.06M
    const xmlChar *localname;
8755
1.06M
    const xmlChar *prefix;
8756
1.06M
    const xmlChar *attname;
8757
1.06M
    const xmlChar *aprefix;
8758
1.06M
    const xmlChar *uri;
8759
1.06M
    xmlChar *attvalue = NULL;
8760
1.06M
    const xmlChar **atts = ctxt->atts;
8761
1.06M
    unsigned attrHashSize = 0;
8762
1.06M
    int maxatts = ctxt->maxatts;
8763
1.06M
    int nratts, nbatts, nbdef;
8764
1.06M
    int i, j, nbNs, nbTotalDef, attval, nsIndex, maxAtts;
8765
1.06M
    int alloc = 0;
8766
1.06M
    int numNsErr = 0;
8767
1.06M
    int numDupErr = 0;
8768
8769
1.06M
    if (RAW != '<') return(NULL);
8770
1.06M
    NEXT1;
8771
8772
1.06M
    nbatts = 0;
8773
1.06M
    nratts = 0;
8774
1.06M
    nbdef = 0;
8775
1.06M
    nbNs = 0;
8776
1.06M
    nbTotalDef = 0;
8777
1.06M
    attval = 0;
8778
8779
1.06M
    if (xmlParserNsStartElement(ctxt->nsdb) < 0) {
8780
0
        xmlErrMemory(ctxt);
8781
0
        return(NULL);
8782
0
    }
8783
8784
1.06M
    hlocalname = xmlParseQNameHashed(ctxt, &hprefix);
8785
1.06M
    if (hlocalname.name == NULL) {
8786
61.3k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8787
61.3k
           "StartTag: invalid element name\n");
8788
61.3k
        return(NULL);
8789
61.3k
    }
8790
1.00M
    localname = hlocalname.name;
8791
1.00M
    prefix = hprefix.name;
8792
8793
    /*
8794
     * Now parse the attributes, it ends up with the ending
8795
     *
8796
     * (S Attribute)* S?
8797
     */
8798
1.00M
    SKIP_BLANKS;
8799
1.00M
    GROW;
8800
8801
    /*
8802
     * The ctxt->atts array will be ultimately passed to the SAX callback
8803
     * containing five xmlChar pointers for each attribute:
8804
     *
8805
     * [0] attribute name
8806
     * [1] attribute prefix
8807
     * [2] namespace URI
8808
     * [3] attribute value
8809
     * [4] end of attribute value
8810
     *
8811
     * To save memory, we reuse this array temporarily and store integers
8812
     * in these pointer variables.
8813
     *
8814
     * [0] attribute name
8815
     * [1] attribute prefix
8816
     * [2] hash value of attribute prefix, and later namespace index
8817
     * [3] for non-allocated values: ptrdiff_t offset into input buffer
8818
     * [4] for non-allocated values: ptrdiff_t offset into input buffer
8819
     *
8820
     * The ctxt->attallocs array contains an additional unsigned int for
8821
     * each attribute, containing the hash value of the attribute name
8822
     * and the alloc flag in bit 31.
8823
     */
8824
8825
1.41M
    while (((RAW != '>') &&
8826
1.10M
     ((RAW != '/') || (NXT(1) != '>')) &&
8827
981k
     (IS_BYTE_CHAR(RAW))) && (PARSER_STOPPED(ctxt) == 0)) {
8828
978k
  int len = -1;
8829
8830
978k
  hattname = xmlParseAttribute2(ctxt, prefix, localname,
8831
978k
                                          &haprefix, &attvalue, &len,
8832
978k
                                          &alloc);
8833
978k
        if (hattname.name == NULL)
8834
27.7k
      break;
8835
950k
        if (attvalue == NULL)
8836
20.8k
            goto next_attr;
8837
929k
        attname = hattname.name;
8838
929k
        aprefix = haprefix.name;
8839
929k
  if (len < 0) len = xmlStrlen(attvalue);
8840
8841
929k
        if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8842
46.0k
            xmlHashedString huri;
8843
46.0k
            xmlURIPtr parsedUri;
8844
8845
46.0k
            huri = xmlDictLookupHashed(ctxt->dict, attvalue, len);
8846
46.0k
            uri = huri.name;
8847
46.0k
            if (uri == NULL) {
8848
1
                xmlErrMemory(ctxt);
8849
1
                goto next_attr;
8850
1
            }
8851
46.0k
            if (*uri != 0) {
8852
45.8k
                if (xmlParseURISafe((const char *) uri, &parsedUri) < 0) {
8853
11
                    xmlErrMemory(ctxt);
8854
11
                    goto next_attr;
8855
11
                }
8856
45.8k
                if (parsedUri == NULL) {
8857
32.2k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
8858
32.2k
                             "xmlns: '%s' is not a valid URI\n",
8859
32.2k
                                       uri, NULL, NULL);
8860
32.2k
                } else {
8861
13.5k
                    if (parsedUri->scheme == NULL) {
8862
8.26k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8863
8.26k
                                  "xmlns: URI %s is not absolute\n",
8864
8.26k
                                  uri, NULL, NULL);
8865
8.26k
                    }
8866
13.5k
                    xmlFreeURI(parsedUri);
8867
13.5k
                }
8868
45.8k
                if (uri == ctxt->str_xml_ns) {
8869
158
                    if (attname != ctxt->str_xml) {
8870
158
                        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8871
158
                     "xml namespace URI cannot be the default namespace\n",
8872
158
                                 NULL, NULL, NULL);
8873
158
                    }
8874
158
                    goto next_attr;
8875
158
                }
8876
45.6k
                if ((len == 29) &&
8877
1.26k
                    (xmlStrEqual(uri,
8878
1.26k
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8879
221
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8880
221
                         "reuse of the xmlns namespace name is forbidden\n",
8881
221
                             NULL, NULL, NULL);
8882
221
                    goto next_attr;
8883
221
                }
8884
45.6k
            }
8885
8886
45.6k
            if (xmlParserNsPush(ctxt, NULL, &huri, NULL, 0) > 0)
8887
43.6k
                nbNs++;
8888
883k
        } else if (aprefix == ctxt->str_xmlns) {
8889
58.4k
            xmlHashedString huri;
8890
58.4k
            xmlURIPtr parsedUri;
8891
8892
58.4k
            huri = xmlDictLookupHashed(ctxt->dict, attvalue, len);
8893
58.4k
            uri = huri.name;
8894
58.4k
            if (uri == NULL) {
8895
1
                xmlErrMemory(ctxt);
8896
1
                goto next_attr;
8897
1
            }
8898
8899
58.4k
            if (attname == ctxt->str_xml) {
8900
217
                if (uri != ctxt->str_xml_ns) {
8901
217
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8902
217
                             "xml namespace prefix mapped to wrong URI\n",
8903
217
                             NULL, NULL, NULL);
8904
217
                }
8905
                /*
8906
                 * Do not keep a namespace definition node
8907
                 */
8908
217
                goto next_attr;
8909
217
            }
8910
58.2k
            if (uri == ctxt->str_xml_ns) {
8911
34
                if (attname != ctxt->str_xml) {
8912
34
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8913
34
                             "xml namespace URI mapped to wrong prefix\n",
8914
34
                             NULL, NULL, NULL);
8915
34
                }
8916
34
                goto next_attr;
8917
34
            }
8918
58.2k
            if (attname == ctxt->str_xmlns) {
8919
363
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8920
363
                         "redefinition of the xmlns prefix is forbidden\n",
8921
363
                         NULL, NULL, NULL);
8922
363
                goto next_attr;
8923
363
            }
8924
57.8k
            if ((len == 29) &&
8925
534
                (xmlStrEqual(uri,
8926
534
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8927
186
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8928
186
                         "reuse of the xmlns namespace name is forbidden\n",
8929
186
                         NULL, NULL, NULL);
8930
186
                goto next_attr;
8931
186
            }
8932
57.6k
            if ((uri == NULL) || (uri[0] == 0)) {
8933
370
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8934
370
                         "xmlns:%s: Empty XML namespace is not allowed\n",
8935
370
                              attname, NULL, NULL);
8936
370
                goto next_attr;
8937
57.3k
            } else {
8938
57.3k
                if (xmlParseURISafe((const char *) uri, &parsedUri) < 0) {
8939
13
                    xmlErrMemory(ctxt);
8940
13
                    goto next_attr;
8941
13
                }
8942
57.2k
                if (parsedUri == NULL) {
8943
24.9k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
8944
24.9k
                         "xmlns:%s: '%s' is not a valid URI\n",
8945
24.9k
                                       attname, uri, NULL);
8946
32.3k
                } else {
8947
32.3k
                    if ((ctxt->pedantic) && (parsedUri->scheme == NULL)) {
8948
0
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8949
0
                                  "xmlns:%s: URI %s is not absolute\n",
8950
0
                                  attname, uri, NULL);
8951
0
                    }
8952
32.3k
                    xmlFreeURI(parsedUri);
8953
32.3k
                }
8954
57.2k
            }
8955
8956
57.2k
            if (xmlParserNsPush(ctxt, &hattname, &huri, NULL, 0) > 0)
8957
56.9k
                nbNs++;
8958
825k
        } else {
8959
            /*
8960
             * Populate attributes array, see above for repurposing
8961
             * of xmlChar pointers.
8962
             */
8963
825k
            if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8964
47.0k
                int res = xmlCtxtGrowAttrs(ctxt);
8965
8966
47.0k
                maxatts = ctxt->maxatts;
8967
47.0k
                atts = ctxt->atts;
8968
8969
47.0k
                if (res < 0)
8970
3
                    goto next_attr;
8971
47.0k
            }
8972
825k
            ctxt->attallocs[nratts++] = (hattname.hashValue & 0x7FFFFFFF) |
8973
825k
                                        ((unsigned) alloc << 31);
8974
825k
            atts[nbatts++] = attname;
8975
825k
            atts[nbatts++] = aprefix;
8976
825k
            atts[nbatts++] = XML_INT_TO_PTR(haprefix.hashValue);
8977
825k
            if (alloc) {
8978
87.3k
                atts[nbatts++] = attvalue;
8979
87.3k
                attvalue += len;
8980
87.3k
                atts[nbatts++] = attvalue;
8981
738k
            } else {
8982
                /*
8983
                 * attvalue points into the input buffer which can be
8984
                 * reallocated. Store differences to input->base instead.
8985
                 * The pointers will be reconstructed later.
8986
                 */
8987
738k
                atts[nbatts++] = XML_INT_TO_PTR(attvalue - BASE_PTR);
8988
738k
                attvalue += len;
8989
738k
                atts[nbatts++] = XML_INT_TO_PTR(attvalue - BASE_PTR);
8990
738k
            }
8991
            /*
8992
             * tag if some deallocation is needed
8993
             */
8994
825k
            if (alloc != 0) attval = 1;
8995
825k
            attvalue = NULL; /* moved into atts */
8996
825k
        }
8997
8998
950k
next_attr:
8999
950k
        if ((attvalue != NULL) && (alloc != 0)) {
9000
23.9k
            xmlFree(attvalue);
9001
23.9k
            attvalue = NULL;
9002
23.9k
        }
9003
9004
950k
  GROW
9005
950k
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9006
503k
      break;
9007
447k
  if (SKIP_BLANKS == 0) {
9008
37.2k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9009
37.2k
         "attributes construct error\n");
9010
37.2k
      break;
9011
37.2k
  }
9012
410k
        GROW;
9013
410k
    }
9014
9015
    /*
9016
     * Namespaces from default attributes
9017
     */
9018
1.00M
    if (ctxt->attsDefault != NULL) {
9019
24.2k
        xmlDefAttrsPtr defaults;
9020
9021
24.2k
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9022
24.2k
  if (defaults != NULL) {
9023
196k
      for (i = 0; i < defaults->nbAttrs; i++) {
9024
177k
                xmlDefAttr *attr = &defaults->attrs[i];
9025
9026
177k
          attname = attr->name.name;
9027
177k
    aprefix = attr->prefix.name;
9028
9029
177k
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9030
4.40k
                    xmlParserEntityCheck(ctxt, attr->expandedSize);
9031
9032
4.40k
                    if (xmlParserNsPush(ctxt, NULL, &attr->value, NULL, 1) > 0)
9033
4.19k
                        nbNs++;
9034
173k
    } else if (aprefix == ctxt->str_xmlns) {
9035
107k
                    xmlParserEntityCheck(ctxt, attr->expandedSize);
9036
9037
107k
                    if (xmlParserNsPush(ctxt, &attr->name, &attr->value,
9038
107k
                                      NULL, 1) > 0)
9039
106k
                        nbNs++;
9040
107k
    } else {
9041
66.0k
                    if (nratts + nbTotalDef >= XML_MAX_ATTRS) {
9042
0
                        xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
9043
0
                                    "Maximum number of attributes exceeded");
9044
0
                        break;
9045
0
                    }
9046
66.0k
                    nbTotalDef += 1;
9047
66.0k
                }
9048
177k
      }
9049
18.3k
  }
9050
24.2k
    }
9051
9052
    /*
9053
     * Resolve attribute namespaces
9054
     */
9055
1.83M
    for (i = 0; i < nbatts; i += 5) {
9056
825k
        attname = atts[i];
9057
825k
        aprefix = atts[i+1];
9058
9059
        /*
9060
  * The default namespace does not apply to attribute names.
9061
  */
9062
825k
  if (aprefix == NULL) {
9063
769k
            nsIndex = NS_INDEX_EMPTY;
9064
769k
        } else if (aprefix == ctxt->str_xml) {
9065
33.4k
            nsIndex = NS_INDEX_XML;
9066
33.4k
        } else {
9067
22.4k
            haprefix.name = aprefix;
9068
22.4k
            haprefix.hashValue = (size_t) atts[i+2];
9069
22.4k
            nsIndex = xmlParserNsLookup(ctxt, &haprefix, NULL);
9070
9071
22.4k
      if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex)) {
9072
13.8k
                xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9073
13.8k
        "Namespace prefix %s for %s on %s is not defined\n",
9074
13.8k
        aprefix, attname, localname);
9075
13.8k
                nsIndex = NS_INDEX_EMPTY;
9076
13.8k
            }
9077
22.4k
        }
9078
9079
825k
        atts[i+2] = XML_INT_TO_PTR(nsIndex);
9080
825k
    }
9081
9082
    /*
9083
     * Maximum number of attributes including default attributes.
9084
     */
9085
1.00M
    maxAtts = nratts + nbTotalDef;
9086
9087
    /*
9088
     * Verify that attribute names are unique.
9089
     */
9090
1.00M
    if (maxAtts > 1) {
9091
190k
        attrHashSize = 4;
9092
252k
        while (attrHashSize / 2 < (unsigned) maxAtts)
9093
62.2k
            attrHashSize *= 2;
9094
9095
190k
        if (attrHashSize > ctxt->attrHashMax) {
9096
16.9k
            xmlAttrHashBucket *tmp;
9097
9098
16.9k
            tmp = xmlRealloc(ctxt->attrHash, attrHashSize * sizeof(tmp[0]));
9099
16.9k
            if (tmp == NULL) {
9100
1
                xmlErrMemory(ctxt);
9101
1
                goto done;
9102
1
            }
9103
9104
16.9k
            ctxt->attrHash = tmp;
9105
16.9k
            ctxt->attrHashMax = attrHashSize;
9106
16.9k
        }
9107
9108
190k
        memset(ctxt->attrHash, -1, attrHashSize * sizeof(ctxt->attrHash[0]));
9109
9110
612k
        for (i = 0, j = 0; j < nratts; i += 5, j++) {
9111
422k
            const xmlChar *nsuri;
9112
422k
            unsigned hashValue, nameHashValue, uriHashValue;
9113
422k
            int res;
9114
9115
422k
            attname = atts[i];
9116
422k
            aprefix = atts[i+1];
9117
422k
            nsIndex = XML_PTR_TO_INT(atts[i+2]);
9118
            /* Hash values always have bit 31 set, see dict.c */
9119
422k
            nameHashValue = ctxt->attallocs[j] | 0x80000000;
9120
9121
422k
            if (nsIndex == NS_INDEX_EMPTY) {
9122
                /*
9123
                 * Prefix with empty namespace means an undeclared
9124
                 * prefix which was already reported above.
9125
                 */
9126
385k
                if (aprefix != NULL)
9127
12.4k
                    continue;
9128
372k
                nsuri = NULL;
9129
372k
                uriHashValue = URI_HASH_EMPTY;
9130
372k
            } else if (nsIndex == NS_INDEX_XML) {
9131
29.9k
                nsuri = ctxt->str_xml_ns;
9132
29.9k
                uriHashValue = URI_HASH_XML;
9133
29.9k
            } else {
9134
6.96k
                nsuri = ctxt->nsTab[nsIndex * 2 + 1];
9135
6.96k
                uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue;
9136
6.96k
            }
9137
9138
409k
            hashValue = xmlDictCombineHash(nameHashValue, uriHashValue);
9139
409k
            res = xmlAttrHashInsert(ctxt, attrHashSize, attname, nsuri,
9140
409k
                                    hashValue, i);
9141
409k
            if (res < 0)
9142
0
                continue;
9143
9144
            /*
9145
             * [ WFC: Unique Att Spec ]
9146
             * No attribute name may appear more than once in the same
9147
             * start-tag or empty-element tag.
9148
             * As extended by the Namespace in XML REC.
9149
             */
9150
409k
            if (res < INT_MAX) {
9151
5.13k
                if (aprefix == atts[res+1]) {
9152
3.55k
                    xmlErrAttributeDup(ctxt, aprefix, attname);
9153
3.55k
                    numDupErr += 1;
9154
3.55k
                } else {
9155
1.57k
                    xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9156
1.57k
                             "Namespaced Attribute %s in '%s' redefined\n",
9157
1.57k
                             attname, nsuri, NULL);
9158
1.57k
                    numNsErr += 1;
9159
1.57k
                }
9160
5.13k
            }
9161
409k
        }
9162
190k
    }
9163
9164
    /*
9165
     * Default attributes
9166
     */
9167
1.00M
    if (ctxt->attsDefault != NULL) {
9168
24.2k
        xmlDefAttrsPtr defaults;
9169
9170
24.2k
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9171
24.2k
  if (defaults != NULL) {
9172
196k
      for (i = 0; i < defaults->nbAttrs; i++) {
9173
177k
                xmlDefAttr *attr = &defaults->attrs[i];
9174
177k
                const xmlChar *nsuri = NULL;
9175
177k
                unsigned hashValue, uriHashValue = 0;
9176
177k
                int res;
9177
9178
177k
          attname = attr->name.name;
9179
177k
    aprefix = attr->prefix.name;
9180
9181
177k
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL))
9182
4.40k
                    continue;
9183
173k
    if (aprefix == ctxt->str_xmlns)
9184
107k
                    continue;
9185
9186
66.0k
                if (aprefix == NULL) {
9187
17.5k
                    nsIndex = NS_INDEX_EMPTY;
9188
17.5k
                    nsuri = NULL;
9189
17.5k
                    uriHashValue = URI_HASH_EMPTY;
9190
48.5k
                } else if (aprefix == ctxt->str_xml) {
9191
9.61k
                    nsIndex = NS_INDEX_XML;
9192
9.61k
                    nsuri = ctxt->str_xml_ns;
9193
9.61k
                    uriHashValue = URI_HASH_XML;
9194
38.9k
                } else {
9195
38.9k
                    nsIndex = xmlParserNsLookup(ctxt, &attr->prefix, NULL);
9196
38.9k
                    if ((nsIndex == INT_MAX) ||
9197
34.7k
                        (nsIndex < ctxt->nsdb->minNsIndex)) {
9198
34.7k
                        xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9199
34.7k
                                 "Namespace prefix %s for %s on %s is not "
9200
34.7k
                                 "defined\n",
9201
34.7k
                                 aprefix, attname, localname);
9202
34.7k
                        nsIndex = NS_INDEX_EMPTY;
9203
34.7k
                        nsuri = NULL;
9204
34.7k
                        uriHashValue = URI_HASH_EMPTY;
9205
34.7k
                    } else {
9206
4.11k
                        nsuri = ctxt->nsTab[nsIndex * 2 + 1];
9207
4.11k
                        uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue;
9208
4.11k
                    }
9209
38.9k
                }
9210
9211
                /*
9212
                 * Check whether the attribute exists
9213
                 */
9214
66.0k
                if (maxAtts > 1) {
9215
62.6k
                    hashValue = xmlDictCombineHash(attr->name.hashValue,
9216
62.6k
                                                   uriHashValue);
9217
62.6k
                    res = xmlAttrHashInsert(ctxt, attrHashSize, attname, nsuri,
9218
62.6k
                                            hashValue, nbatts);
9219
62.6k
                    if (res < 0)
9220
0
                        continue;
9221
62.6k
                    if (res < INT_MAX) {
9222
5.69k
                        if (aprefix == atts[res+1])
9223
1.89k
                            continue;
9224
3.79k
                        xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9225
3.79k
                                 "Namespaced Attribute %s in '%s' redefined\n",
9226
3.79k
                                 attname, nsuri, NULL);
9227
3.79k
                    }
9228
62.6k
                }
9229
9230
64.1k
                xmlParserEntityCheck(ctxt, attr->expandedSize);
9231
9232
64.1k
                if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9233
977
                    res = xmlCtxtGrowAttrs(ctxt);
9234
9235
977
                    maxatts = ctxt->maxatts;
9236
977
                    atts = ctxt->atts;
9237
9238
977
                    if (res < 0) {
9239
4
                        localname = NULL;
9240
4
                        goto done;
9241
4
                    }
9242
977
                }
9243
9244
64.1k
                atts[nbatts++] = attname;
9245
64.1k
                atts[nbatts++] = aprefix;
9246
64.1k
                atts[nbatts++] = XML_INT_TO_PTR(nsIndex);
9247
64.1k
                atts[nbatts++] = attr->value.name;
9248
64.1k
                atts[nbatts++] = attr->valueEnd;
9249
9250
64.1k
#ifdef LIBXML_VALID_ENABLED
9251
                /*
9252
                 * This should be moved to valid.c, but we don't keep track
9253
                 * whether an attribute was defaulted.
9254
                 */
9255
64.1k
                if ((ctxt->validate) &&
9256
0
                    (ctxt->standalone == 1) &&
9257
0
                    (attr->external != 0)) {
9258
0
                    xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9259
0
                            "standalone: attribute %s on %s defaulted "
9260
0
                            "from external subset\n",
9261
0
                            attname, localname);
9262
0
                }
9263
64.1k
#endif
9264
64.1k
                nbdef++;
9265
64.1k
      }
9266
18.3k
  }
9267
24.2k
    }
9268
9269
    /*
9270
     * Using a single hash table for nsUri/localName pairs cannot
9271
     * detect duplicate QNames reliably. The following example will
9272
     * only result in two namespace errors.
9273
     *
9274
     * <doc xmlns:a="a" xmlns:b="a">
9275
     *   <elem a:a="" b:a="" b:a=""/>
9276
     * </doc>
9277
     *
9278
     * If we saw more than one namespace error but no duplicate QNames
9279
     * were found, we have to scan for duplicate QNames.
9280
     */
9281
1.00M
    if ((numDupErr == 0) && (numNsErr > 1)) {
9282
427
        memset(ctxt->attrHash, -1,
9283
427
               attrHashSize * sizeof(ctxt->attrHash[0]));
9284
9285
3.86k
        for (i = 0, j = 0; j < nratts; i += 5, j++) {
9286
3.43k
            unsigned hashValue, nameHashValue, prefixHashValue;
9287
3.43k
            int res;
9288
9289
3.43k
            aprefix = atts[i+1];
9290
3.43k
            if (aprefix == NULL)
9291
543
                continue;
9292
9293
2.89k
            attname = atts[i];
9294
            /* Hash values always have bit 31 set, see dict.c */
9295
2.89k
            nameHashValue = ctxt->attallocs[j] | 0x80000000;
9296
2.89k
            prefixHashValue = xmlDictComputeHash(ctxt->dict, aprefix);
9297
9298
2.89k
            hashValue = xmlDictCombineHash(nameHashValue, prefixHashValue);
9299
2.89k
            res = xmlAttrHashInsertQName(ctxt, attrHashSize, attname,
9300
2.89k
                                         aprefix, hashValue, i);
9301
2.89k
            if (res < INT_MAX)
9302
1.29k
                xmlErrAttributeDup(ctxt, aprefix, attname);
9303
2.89k
        }
9304
427
    }
9305
9306
    /*
9307
     * Reconstruct attribute pointers
9308
     */
9309
1.89M
    for (i = 0, j = 0; i < nbatts; i += 5, j++) {
9310
        /* namespace URI */
9311
889k
        nsIndex = XML_PTR_TO_INT(atts[i+2]);
9312
889k
        if (nsIndex == INT_MAX)
9313
834k
            atts[i+2] = NULL;
9314
55.3k
        else if (nsIndex == INT_MAX - 1)
9315
42.6k
            atts[i+2] = ctxt->str_xml_ns;
9316
12.7k
        else
9317
12.7k
            atts[i+2] = ctxt->nsTab[nsIndex * 2 + 1];
9318
9319
889k
        if ((j < nratts) && (ctxt->attallocs[j] & 0x80000000) == 0) {
9320
738k
            atts[i+3] = BASE_PTR + XML_PTR_TO_INT(atts[i+3]);  /* value */
9321
738k
            atts[i+4] = BASE_PTR + XML_PTR_TO_INT(atts[i+4]);  /* valuend */
9322
738k
        }
9323
889k
    }
9324
9325
1.00M
    uri = xmlParserNsLookupUri(ctxt, &hprefix);
9326
1.00M
    if ((prefix != NULL) && (uri == NULL)) {
9327
31.3k
  xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9328
31.3k
           "Namespace prefix %s on %s is not defined\n",
9329
31.3k
     prefix, localname, NULL);
9330
31.3k
    }
9331
1.00M
    *pref = prefix;
9332
1.00M
    *URI = uri;
9333
9334
    /*
9335
     * SAX callback
9336
     */
9337
1.00M
    if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9338
1.00M
  (!ctxt->disableSAX)) {
9339
801k
  if (nbNs > 0)
9340
25.2k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix, uri,
9341
25.2k
                          nbNs, ctxt->nsTab + 2 * (ctxt->nsNr - nbNs),
9342
25.2k
        nbatts / 5, nbdef, atts);
9343
776k
  else
9344
776k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix, uri,
9345
776k
                          0, NULL, nbatts / 5, nbdef, atts);
9346
801k
    }
9347
9348
1.00M
done:
9349
    /*
9350
     * Free allocated attribute values
9351
     */
9352
1.00M
    if (attval != 0) {
9353
206k
  for (i = 0, j = 0; j < nratts; i += 5, j++)
9354
123k
      if (ctxt->attallocs[j] & 0x80000000)
9355
87.3k
          xmlFree((xmlChar *) atts[i+3]);
9356
83.0k
    }
9357
9358
1.00M
    *nbNsPtr = nbNs;
9359
1.00M
    return(localname);
9360
1.00M
}
9361
9362
/**
9363
 * Parse an end tag. Always consumes '</'.
9364
 *
9365
 *     [42] ETag ::= '</' Name S? '>'
9366
 *
9367
 * With namespace
9368
 *
9369
 *     [NS 9] ETag ::= '</' QName S? '>'
9370
 * @param ctxt  an XML parser context
9371
 * @param tag  the corresponding start tag
9372
 */
9373
9374
static void
9375
401k
xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9376
401k
    const xmlChar *name;
9377
9378
401k
    GROW;
9379
401k
    if ((RAW != '<') || (NXT(1) != '/')) {
9380
480
  xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9381
480
  return;
9382
480
    }
9383
400k
    SKIP(2);
9384
9385
400k
    if (tag->prefix == NULL)
9386
243k
        name = xmlParseNameAndCompare(ctxt, ctxt->name);
9387
157k
    else
9388
157k
        name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9389
9390
    /*
9391
     * We should definitely be at the ending "S? '>'" part
9392
     */
9393
400k
    GROW;
9394
400k
    SKIP_BLANKS;
9395
400k
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9396
10.0k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9397
10.0k
    } else
9398
390k
  NEXT1;
9399
9400
    /*
9401
     * [ WFC: Element Type Match ]
9402
     * The Name in an element's end-tag must match the element type in the
9403
     * start-tag.
9404
     *
9405
     */
9406
400k
    if (name != (xmlChar*)1) {
9407
10.7k
        if (name == NULL) name = BAD_CAST "unparsable";
9408
10.7k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9409
10.7k
         "Opening and ending tag mismatch: %s line %d and %s\n",
9410
10.7k
                    ctxt->name, tag->line, name);
9411
10.7k
    }
9412
9413
    /*
9414
     * SAX: End of Tag
9415
     */
9416
400k
    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9417
400k
  (!ctxt->disableSAX))
9418
375k
  ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9419
375k
                                tag->URI);
9420
9421
400k
    spacePop(ctxt);
9422
400k
    if (tag->nsNr != 0)
9423
22.4k
  xmlParserNsPop(ctxt, tag->nsNr);
9424
400k
}
9425
9426
/**
9427
 * Parse escaped pure raw content. Always consumes '<!['.
9428
 *
9429
 * @deprecated Internal function, don't use.
9430
 *
9431
 *     [18] CDSect ::= CDStart CData CDEnd
9432
 *
9433
 *     [19] CDStart ::= '<![CDATA['
9434
 *
9435
 *     [20] Data ::= (Char* - (Char* ']]>' Char*))
9436
 *
9437
 *     [21] CDEnd ::= ']]>'
9438
 * @param ctxt  an XML parser context
9439
 */
9440
void
9441
6.55k
xmlParseCDSect(xmlParserCtxt *ctxt) {
9442
6.55k
    xmlChar *buf = NULL;
9443
6.55k
    int len = 0;
9444
6.55k
    int size = XML_PARSER_BUFFER_SIZE;
9445
6.55k
    int r, rl;
9446
6.55k
    int s, sl;
9447
6.55k
    int cur, l;
9448
6.55k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9449
0
                    XML_MAX_HUGE_LENGTH :
9450
6.55k
                    XML_MAX_TEXT_LENGTH;
9451
9452
6.55k
    if ((CUR != '<') || (NXT(1) != '!') || (NXT(2) != '['))
9453
0
        return;
9454
6.55k
    SKIP(3);
9455
9456
6.55k
    if (!CMP6(CUR_PTR, 'C', 'D', 'A', 'T', 'A', '['))
9457
0
        return;
9458
6.55k
    SKIP(6);
9459
9460
6.55k
    r = xmlCurrentCharRecover(ctxt, &rl);
9461
6.55k
    if (!IS_CHAR(r)) {
9462
651
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9463
651
        goto out;
9464
651
    }
9465
5.90k
    NEXTL(rl);
9466
5.90k
    s = xmlCurrentCharRecover(ctxt, &sl);
9467
5.90k
    if (!IS_CHAR(s)) {
9468
475
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9469
475
        goto out;
9470
475
    }
9471
5.43k
    NEXTL(sl);
9472
5.43k
    cur = xmlCurrentCharRecover(ctxt, &l);
9473
5.43k
    buf = xmlMalloc(size);
9474
5.43k
    if (buf == NULL) {
9475
1
  xmlErrMemory(ctxt);
9476
1
        goto out;
9477
1
    }
9478
1.27M
    while (IS_CHAR(cur) &&
9479
1.26M
           ((r != ']') || (s != ']') || (cur != '>'))) {
9480
1.26M
  if (len + 5 >= size) {
9481
1.33k
      xmlChar *tmp;
9482
1.33k
            int newSize;
9483
9484
1.33k
            newSize = xmlGrowCapacity(size, 1, 1, maxLength);
9485
1.33k
            if (newSize < 0) {
9486
0
                xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9487
0
                               "CData section too big found\n");
9488
0
                goto out;
9489
0
            }
9490
1.33k
      tmp = xmlRealloc(buf, newSize);
9491
1.33k
      if (tmp == NULL) {
9492
1
    xmlErrMemory(ctxt);
9493
1
                goto out;
9494
1
      }
9495
1.33k
      buf = tmp;
9496
1.33k
      size = newSize;
9497
1.33k
  }
9498
1.26M
  COPY_BUF(buf, len, r);
9499
1.26M
  r = s;
9500
1.26M
  rl = sl;
9501
1.26M
  s = cur;
9502
1.26M
  sl = l;
9503
1.26M
  NEXTL(l);
9504
1.26M
  cur = xmlCurrentCharRecover(ctxt, &l);
9505
1.26M
    }
9506
5.43k
    buf[len] = 0;
9507
5.43k
    if (cur != '>') {
9508
2.80k
  xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9509
2.80k
                       "CData section not finished\n%.50s\n", buf);
9510
2.80k
        goto out;
9511
2.80k
    }
9512
2.62k
    NEXTL(l);
9513
9514
    /*
9515
     * OK the buffer is to be consumed as cdata.
9516
     */
9517
2.62k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9518
1.94k
        if ((ctxt->sax->cdataBlock != NULL) &&
9519
1.94k
            ((ctxt->options & XML_PARSE_NOCDATA) == 0)) {
9520
1.94k
            ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9521
1.94k
        } else if (ctxt->sax->characters != NULL) {
9522
0
            ctxt->sax->characters(ctxt->userData, buf, len);
9523
0
        }
9524
1.94k
    }
9525
9526
6.55k
out:
9527
6.55k
    xmlFree(buf);
9528
6.55k
}
9529
9530
/**
9531
 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9532
 * unexpected EOF to the caller.
9533
 *
9534
 * @param ctxt  an XML parser context
9535
 */
9536
9537
static void
9538
30.4k
xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9539
30.4k
    int oldNameNr = ctxt->nameNr;
9540
30.4k
    int oldSpaceNr = ctxt->spaceNr;
9541
30.4k
    int oldNodeNr = ctxt->nodeNr;
9542
9543
30.4k
    GROW;
9544
2.95M
    while ((ctxt->input->cur < ctxt->input->end) &&
9545
2.94M
     (PARSER_STOPPED(ctxt) == 0)) {
9546
2.94M
  const xmlChar *cur = ctxt->input->cur;
9547
9548
  /*
9549
   * First case : a Processing Instruction.
9550
   */
9551
2.94M
  if ((*cur == '<') && (cur[1] == '?')) {
9552
5.81k
      xmlParsePI(ctxt);
9553
5.81k
  }
9554
9555
  /*
9556
   * Second case : a CDSection
9557
   */
9558
  /* 2.6.0 test was *cur not RAW */
9559
2.93M
  else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9560
6.55k
      xmlParseCDSect(ctxt);
9561
6.55k
  }
9562
9563
  /*
9564
   * Third case :  a comment
9565
   */
9566
2.93M
  else if ((*cur == '<') && (NXT(1) == '!') &&
9567
53.8k
     (NXT(2) == '-') && (NXT(3) == '-')) {
9568
46.5k
      xmlParseComment(ctxt);
9569
46.5k
  }
9570
9571
  /*
9572
   * Fourth case :  a sub-element.
9573
   */
9574
2.88M
  else if (*cur == '<') {
9575
1.43M
            if (NXT(1) == '/') {
9576
400k
                if (ctxt->nameNr <= oldNameNr)
9577
19.6k
                    break;
9578
380k
          xmlParseElementEnd(ctxt);
9579
1.03M
            } else {
9580
1.03M
          xmlParseElementStart(ctxt);
9581
1.03M
            }
9582
1.43M
  }
9583
9584
  /*
9585
   * Fifth case : a reference. If if has not been resolved,
9586
   *    parsing returns it's Name, create the node
9587
   */
9588
9589
1.44M
  else if (*cur == '&') {
9590
47.1k
      xmlParseReference(ctxt);
9591
47.1k
  }
9592
9593
  /*
9594
   * Last case, text. Note that References are handled directly.
9595
   */
9596
1.40M
  else {
9597
1.40M
      xmlParseCharDataInternal(ctxt, 0);
9598
1.40M
  }
9599
9600
2.92M
  SHRINK;
9601
2.92M
  GROW;
9602
2.92M
    }
9603
9604
30.4k
    if ((ctxt->nameNr > oldNameNr) &&
9605
5.31k
        (ctxt->input->cur >= ctxt->input->end) &&
9606
5.00k
        (ctxt->wellFormed)) {
9607
245
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9608
245
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9609
245
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9610
245
                "Premature end of data in tag %s line %d\n",
9611
245
                name, line, NULL);
9612
245
    }
9613
9614
    /*
9615
     * Clean up in error case
9616
     */
9617
9618
45.8k
    while (ctxt->nodeNr > oldNodeNr)
9619
15.4k
        nodePop(ctxt);
9620
9621
116k
    while (ctxt->nameNr > oldNameNr) {
9622
85.9k
        xmlStartTag *tag = &ctxt->pushTab[ctxt->nameNr - 1];
9623
9624
85.9k
        if (tag->nsNr != 0)
9625
40.5k
            xmlParserNsPop(ctxt, tag->nsNr);
9626
9627
85.9k
        namePop(ctxt);
9628
85.9k
    }
9629
9630
116k
    while (ctxt->spaceNr > oldSpaceNr)
9631
85.9k
        spacePop(ctxt);
9632
30.4k
}
9633
9634
/**
9635
 * Parse XML element content. This is useful if you're only interested
9636
 * in custom SAX callbacks. If you want a node list, use
9637
 * #xmlCtxtParseContent.
9638
 *
9639
 * @param ctxt  an XML parser context
9640
 */
9641
void
9642
0
xmlParseContent(xmlParserCtxt *ctxt) {
9643
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
9644
0
        return;
9645
9646
0
    xmlCtxtInitializeLate(ctxt);
9647
9648
0
    xmlParseContentInternal(ctxt);
9649
9650
0
    xmlParserCheckEOF(ctxt, XML_ERR_NOT_WELL_BALANCED);
9651
0
}
9652
9653
/**
9654
 * Parse an XML element
9655
 *
9656
 * @deprecated Internal function, don't use.
9657
 *
9658
 *     [39] element ::= EmptyElemTag | STag content ETag
9659
 *
9660
 * [ WFC: Element Type Match ]
9661
 * The Name in an element's end-tag must match the element type in the
9662
 * start-tag.
9663
 *
9664
 * @param ctxt  an XML parser context
9665
 */
9666
9667
void
9668
32.1k
xmlParseElement(xmlParserCtxt *ctxt) {
9669
32.1k
    if (xmlParseElementStart(ctxt) != 0)
9670
3.64k
        return;
9671
9672
28.4k
    xmlParseContentInternal(ctxt);
9673
9674
28.4k
    if (ctxt->input->cur >= ctxt->input->end) {
9675
8.28k
        if (ctxt->wellFormed) {
9676
304
            const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9677
304
            int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9678
304
            xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9679
304
                    "Premature end of data in tag %s line %d\n",
9680
304
                    name, line, NULL);
9681
304
        }
9682
8.28k
        return;
9683
8.28k
    }
9684
9685
20.1k
    xmlParseElementEnd(ctxt);
9686
20.1k
}
9687
9688
/**
9689
 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
9690
 * opening tag was parsed, 1 if an empty element was parsed.
9691
 *
9692
 * Always consumes '<'.
9693
 *
9694
 * @param ctxt  an XML parser context
9695
 */
9696
static int
9697
1.06M
xmlParseElementStart(xmlParserCtxtPtr ctxt) {
9698
1.06M
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
9699
1.06M
    const xmlChar *name;
9700
1.06M
    const xmlChar *prefix = NULL;
9701
1.06M
    const xmlChar *URI = NULL;
9702
1.06M
    xmlParserNodeInfo node_info;
9703
1.06M
    int line;
9704
1.06M
    xmlNodePtr cur;
9705
1.06M
    int nbNs = 0;
9706
9707
1.06M
    if (ctxt->nameNr > maxDepth) {
9708
13
        xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
9709
13
                "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9710
13
                ctxt->nameNr);
9711
13
  return(-1);
9712
13
    }
9713
9714
    /* Capture start position */
9715
1.06M
    if (ctxt->record_info) {
9716
0
        node_info.begin_pos = ctxt->input->consumed +
9717
0
                          (CUR_PTR - ctxt->input->base);
9718
0
  node_info.begin_line = ctxt->input->line;
9719
0
    }
9720
9721
1.06M
    if (ctxt->spaceNr == 0)
9722
32.1k
  spacePush(ctxt, -1);
9723
1.03M
    else if (*ctxt->space == -2)
9724
0
  spacePush(ctxt, -1);
9725
1.03M
    else
9726
1.03M
  spacePush(ctxt, *ctxt->space);
9727
9728
1.06M
    line = ctxt->input->line;
9729
1.06M
#ifdef LIBXML_SAX1_ENABLED
9730
1.06M
    if (ctxt->sax2)
9731
1.06M
#endif /* LIBXML_SAX1_ENABLED */
9732
1.06M
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &nbNs);
9733
0
#ifdef LIBXML_SAX1_ENABLED
9734
0
    else
9735
0
  name = xmlParseStartTag(ctxt);
9736
1.06M
#endif /* LIBXML_SAX1_ENABLED */
9737
1.06M
    if (name == NULL) {
9738
61.3k
  spacePop(ctxt);
9739
61.3k
        return(-1);
9740
61.3k
    }
9741
1.00M
    nameNsPush(ctxt, name, prefix, URI, line, nbNs);
9742
1.00M
    cur = ctxt->node;
9743
9744
1.00M
#ifdef LIBXML_VALID_ENABLED
9745
    /*
9746
     * [ VC: Root Element Type ]
9747
     * The Name in the document type declaration must match the element
9748
     * type of the root element.
9749
     */
9750
1.00M
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9751
0
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
9752
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9753
1.00M
#endif /* LIBXML_VALID_ENABLED */
9754
9755
    /*
9756
     * Check for an Empty Element.
9757
     */
9758
1.00M
    if ((RAW == '/') && (NXT(1) == '>')) {
9759
442k
        SKIP(2);
9760
442k
  if (ctxt->sax2) {
9761
442k
      if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9762
442k
    (!ctxt->disableSAX))
9763
403k
    ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
9764
442k
#ifdef LIBXML_SAX1_ENABLED
9765
442k
  } else {
9766
0
      if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9767
0
    (!ctxt->disableSAX))
9768
0
    ctxt->sax->endElement(ctxt->userData, name);
9769
0
#endif /* LIBXML_SAX1_ENABLED */
9770
0
  }
9771
442k
  namePop(ctxt);
9772
442k
  spacePop(ctxt);
9773
442k
  if (nbNs > 0)
9774
4.81k
      xmlParserNsPop(ctxt, nbNs);
9775
442k
  if (cur != NULL && ctxt->record_info) {
9776
0
            node_info.node = cur;
9777
0
            node_info.end_pos = ctxt->input->consumed +
9778
0
                                (CUR_PTR - ctxt->input->base);
9779
0
            node_info.end_line = ctxt->input->line;
9780
0
            xmlParserAddNodeInfo(ctxt, &node_info);
9781
0
  }
9782
442k
  return(1);
9783
442k
    }
9784
563k
    if (RAW == '>') {
9785
495k
        NEXT1;
9786
495k
        if (cur != NULL && ctxt->record_info) {
9787
0
            node_info.node = cur;
9788
0
            node_info.end_pos = 0;
9789
0
            node_info.end_line = 0;
9790
0
            xmlParserAddNodeInfo(ctxt, &node_info);
9791
0
        }
9792
495k
    } else {
9793
68.2k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9794
68.2k
         "Couldn't find end of Start Tag %s line %d\n",
9795
68.2k
                    name, line, NULL);
9796
9797
  /*
9798
   * end of parsing of this node.
9799
   */
9800
68.2k
  nodePop(ctxt);
9801
68.2k
  namePop(ctxt);
9802
68.2k
  spacePop(ctxt);
9803
68.2k
  if (nbNs > 0)
9804
19.1k
      xmlParserNsPop(ctxt, nbNs);
9805
68.2k
  return(-1);
9806
68.2k
    }
9807
9808
495k
    return(0);
9809
563k
}
9810
9811
/**
9812
 * Parse the end of an XML element. Always consumes '</'.
9813
 *
9814
 * @param ctxt  an XML parser context
9815
 */
9816
static void
9817
401k
xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
9818
401k
    xmlNodePtr cur = ctxt->node;
9819
9820
401k
    if (ctxt->nameNr <= 0) {
9821
9
        if ((RAW == '<') && (NXT(1) == '/'))
9822
2
            SKIP(2);
9823
9
        return;
9824
9
    }
9825
9826
    /*
9827
     * parse the end of tag: '</' should be here.
9828
     */
9829
401k
    if (ctxt->sax2) {
9830
401k
  xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
9831
401k
  namePop(ctxt);
9832
401k
    }
9833
0
#ifdef LIBXML_SAX1_ENABLED
9834
0
    else
9835
0
  xmlParseEndTag1(ctxt, 0);
9836
401k
#endif /* LIBXML_SAX1_ENABLED */
9837
9838
    /*
9839
     * Capture end position
9840
     */
9841
401k
    if (cur != NULL && ctxt->record_info) {
9842
0
        xmlParserNodeInfoPtr node_info;
9843
9844
0
        node_info = (xmlParserNodeInfoPtr) xmlParserFindNodeInfo(ctxt, cur);
9845
0
        if (node_info != NULL) {
9846
0
            node_info->end_pos = ctxt->input->consumed +
9847
0
                                 (CUR_PTR - ctxt->input->base);
9848
0
            node_info->end_line = ctxt->input->line;
9849
0
        }
9850
0
    }
9851
401k
}
9852
9853
/**
9854
 * Parse the XML version value.
9855
 *
9856
 * @deprecated Internal function, don't use.
9857
 *
9858
 *     [26] VersionNum ::= '1.' [0-9]+
9859
 *
9860
 * In practice allow [0-9].[0-9]+ at that level
9861
 *
9862
 * @param ctxt  an XML parser context
9863
 * @returns the string giving the XML version number, or NULL
9864
 */
9865
xmlChar *
9866
48.6k
xmlParseVersionNum(xmlParserCtxt *ctxt) {
9867
48.6k
    xmlChar *buf = NULL;
9868
48.6k
    int len = 0;
9869
48.6k
    int size = 10;
9870
48.6k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9871
0
                    XML_MAX_TEXT_LENGTH :
9872
48.6k
                    XML_MAX_NAME_LENGTH;
9873
48.6k
    xmlChar cur;
9874
9875
48.6k
    buf = xmlMalloc(size);
9876
48.6k
    if (buf == NULL) {
9877
4
  xmlErrMemory(ctxt);
9878
4
  return(NULL);
9879
4
    }
9880
48.6k
    cur = CUR;
9881
48.6k
    if (!((cur >= '0') && (cur <= '9'))) {
9882
1.14k
  xmlFree(buf);
9883
1.14k
  return(NULL);
9884
1.14k
    }
9885
47.4k
    buf[len++] = cur;
9886
47.4k
    NEXT;
9887
47.4k
    cur=CUR;
9888
47.4k
    if (cur != '.') {
9889
972
  xmlFree(buf);
9890
972
  return(NULL);
9891
972
    }
9892
46.5k
    buf[len++] = cur;
9893
46.5k
    NEXT;
9894
46.5k
    cur=CUR;
9895
164k
    while ((cur >= '0') && (cur <= '9')) {
9896
117k
  if (len + 1 >= size) {
9897
1.22k
      xmlChar *tmp;
9898
1.22k
            int newSize;
9899
9900
1.22k
            newSize = xmlGrowCapacity(size, 1, 1, maxLength);
9901
1.22k
            if (newSize < 0) {
9902
1
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "VersionNum");
9903
1
                xmlFree(buf);
9904
1
                return(NULL);
9905
1
            }
9906
1.22k
      tmp = xmlRealloc(buf, newSize);
9907
1.22k
      if (tmp == NULL) {
9908
1
    xmlErrMemory(ctxt);
9909
1
          xmlFree(buf);
9910
1
    return(NULL);
9911
1
      }
9912
1.22k
      buf = tmp;
9913
1.22k
            size = newSize;
9914
1.22k
  }
9915
117k
  buf[len++] = cur;
9916
117k
  NEXT;
9917
117k
  cur=CUR;
9918
117k
    }
9919
46.5k
    buf[len] = 0;
9920
46.5k
    return(buf);
9921
46.5k
}
9922
9923
/**
9924
 * Parse the XML version.
9925
 *
9926
 * @deprecated Internal function, don't use.
9927
 *
9928
 *     [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
9929
 *
9930
 *     [25] Eq ::= S? '=' S?
9931
 *
9932
 * @param ctxt  an XML parser context
9933
 * @returns the version string, e.g. "1.0"
9934
 */
9935
9936
xmlChar *
9937
73.1k
xmlParseVersionInfo(xmlParserCtxt *ctxt) {
9938
73.1k
    xmlChar *version = NULL;
9939
9940
73.1k
    if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
9941
50.3k
  SKIP(7);
9942
50.3k
  SKIP_BLANKS;
9943
50.3k
  if (RAW != '=') {
9944
1.01k
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
9945
1.01k
      return(NULL);
9946
1.01k
        }
9947
49.3k
  NEXT;
9948
49.3k
  SKIP_BLANKS;
9949
49.3k
  if (RAW == '"') {
9950
47.4k
      NEXT;
9951
47.4k
      version = xmlParseVersionNum(ctxt);
9952
47.4k
      if (RAW != '"') {
9953
2.30k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9954
2.30k
      } else
9955
45.1k
          NEXT;
9956
47.4k
  } else if (RAW == '\''){
9957
1.16k
      NEXT;
9958
1.16k
      version = xmlParseVersionNum(ctxt);
9959
1.16k
      if (RAW != '\'') {
9960
384
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
9961
384
      } else
9962
780
          NEXT;
9963
1.16k
  } else {
9964
663
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
9965
663
  }
9966
49.3k
    }
9967
72.1k
    return(version);
9968
73.1k
}
9969
9970
/**
9971
 * Parse the XML encoding name
9972
 *
9973
 * @deprecated Internal function, don't use.
9974
 *
9975
 *     [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
9976
 *
9977
 * @param ctxt  an XML parser context
9978
 * @returns the encoding name value or NULL
9979
 */
9980
xmlChar *
9981
38.3k
xmlParseEncName(xmlParserCtxt *ctxt) {
9982
38.3k
    xmlChar *buf = NULL;
9983
38.3k
    int len = 0;
9984
38.3k
    int size = 10;
9985
38.3k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9986
0
                    XML_MAX_TEXT_LENGTH :
9987
38.3k
                    XML_MAX_NAME_LENGTH;
9988
38.3k
    xmlChar cur;
9989
9990
38.3k
    cur = CUR;
9991
38.3k
    if (((cur >= 'a') && (cur <= 'z')) ||
9992
37.9k
        ((cur >= 'A') && (cur <= 'Z'))) {
9993
37.9k
  buf = xmlMalloc(size);
9994
37.9k
  if (buf == NULL) {
9995
4
      xmlErrMemory(ctxt);
9996
4
      return(NULL);
9997
4
  }
9998
9999
37.9k
  buf[len++] = cur;
10000
37.9k
  NEXT;
10001
37.9k
  cur = CUR;
10002
17.4M
  while (((cur >= 'a') && (cur <= 'z')) ||
10003
17.4M
         ((cur >= 'A') && (cur <= 'Z')) ||
10004
288k
         ((cur >= '0') && (cur <= '9')) ||
10005
109k
         (cur == '.') || (cur == '_') ||
10006
17.4M
         (cur == '-')) {
10007
17.4M
      if (len + 1 >= size) {
10008
45.0k
          xmlChar *tmp;
10009
45.0k
                int newSize;
10010
10011
45.0k
                newSize = xmlGrowCapacity(size, 1, 1, maxLength);
10012
45.0k
                if (newSize < 0) {
10013
163
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "EncName");
10014
163
                    xmlFree(buf);
10015
163
                    return(NULL);
10016
163
                }
10017
44.8k
    tmp = xmlRealloc(buf, newSize);
10018
44.8k
    if (tmp == NULL) {
10019
4
        xmlErrMemory(ctxt);
10020
4
        xmlFree(buf);
10021
4
        return(NULL);
10022
4
    }
10023
44.8k
    buf = tmp;
10024
44.8k
                size = newSize;
10025
44.8k
      }
10026
17.4M
      buf[len++] = cur;
10027
17.4M
      NEXT;
10028
17.4M
      cur = CUR;
10029
17.4M
        }
10030
37.7k
  buf[len] = 0;
10031
37.7k
    } else {
10032
477
  xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10033
477
    }
10034
38.2k
    return(buf);
10035
38.3k
}
10036
10037
/**
10038
 * Parse the XML encoding declaration
10039
 *
10040
 * @deprecated Internal function, don't use.
10041
 *
10042
 *     [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | 
10043
 *                           "'" EncName "'")
10044
 *
10045
 * this setups the conversion filters.
10046
 *
10047
 * @param ctxt  an XML parser context
10048
 * @returns the encoding value or NULL
10049
 */
10050
10051
const xmlChar *
10052
67.8k
xmlParseEncodingDecl(xmlParserCtxt *ctxt) {
10053
67.8k
    xmlChar *encoding = NULL;
10054
10055
67.8k
    SKIP_BLANKS;
10056
67.8k
    if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g') == 0)
10057
28.7k
        return(NULL);
10058
10059
39.0k
    SKIP(8);
10060
39.0k
    SKIP_BLANKS;
10061
39.0k
    if (RAW != '=') {
10062
479
        xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10063
479
        return(NULL);
10064
479
    }
10065
38.6k
    NEXT;
10066
38.6k
    SKIP_BLANKS;
10067
38.6k
    if (RAW == '"') {
10068
37.9k
        NEXT;
10069
37.9k
        encoding = xmlParseEncName(ctxt);
10070
37.9k
        if (RAW != '"') {
10071
1.38k
            xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10072
1.38k
            xmlFree(encoding);
10073
1.38k
            return(NULL);
10074
1.38k
        } else
10075
36.5k
            NEXT;
10076
37.9k
    } else if (RAW == '\''){
10077
440
        NEXT;
10078
440
        encoding = xmlParseEncName(ctxt);
10079
440
        if (RAW != '\'') {
10080
217
            xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10081
217
            xmlFree(encoding);
10082
217
            return(NULL);
10083
217
        } else
10084
223
            NEXT;
10085
440
    } else {
10086
221
        xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10087
221
    }
10088
10089
37.0k
    if (encoding == NULL)
10090
222
        return(NULL);
10091
10092
36.7k
    xmlSetDeclaredEncoding(ctxt, encoding);
10093
10094
36.7k
    return(ctxt->encoding);
10095
37.0k
}
10096
10097
/**
10098
 * Parse the XML standalone declaration
10099
 *
10100
 * @deprecated Internal function, don't use.
10101
 *
10102
 *     [32] SDDecl ::= S 'standalone' Eq
10103
 *                     (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10104
 *
10105
 * [ VC: Standalone Document Declaration ]
10106
 * TODO The standalone document declaration must have the value "no"
10107
 * if any external markup declarations contain declarations of:
10108
 *  - attributes with default values, if elements to which these
10109
 *    attributes apply appear in the document without specifications
10110
 *    of values for these attributes, or
10111
 *  - entities (other than amp, lt, gt, apos, quot), if references
10112
 *    to those entities appear in the document, or
10113
 *  - attributes with values subject to normalization, where the
10114
 *    attribute appears in the document with a value which will change
10115
 *    as a result of normalization, or
10116
 *  - element types with element content, if white space occurs directly
10117
 *    within any instance of those types.
10118
 *
10119
 * @param ctxt  an XML parser context
10120
 * @returns
10121
 *   1 if standalone="yes"
10122
 *   0 if standalone="no"
10123
 *  -2 if standalone attribute is missing or invalid
10124
 *    (A standalone value of -2 means that the XML declaration was found,
10125
 *     but no value was specified for the standalone attribute).
10126
 */
10127
10128
int
10129
5.52k
xmlParseSDDecl(xmlParserCtxt *ctxt) {
10130
5.52k
    int standalone = -2;
10131
10132
5.52k
    SKIP_BLANKS;
10133
5.52k
    if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10134
271
  SKIP(10);
10135
271
        SKIP_BLANKS;
10136
271
  if (RAW != '=') {
10137
1
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10138
1
      return(standalone);
10139
1
        }
10140
270
  NEXT;
10141
270
  SKIP_BLANKS;
10142
270
        if (RAW == '\''){
10143
32
      NEXT;
10144
32
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10145
2
          standalone = 0;
10146
2
                SKIP(2);
10147
30
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10148
26
                 (NXT(2) == 's')) {
10149
25
          standalone = 1;
10150
25
    SKIP(3);
10151
25
            } else {
10152
5
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10153
5
      }
10154
32
      if (RAW != '\'') {
10155
9
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10156
9
      } else
10157
23
          NEXT;
10158
238
  } else if (RAW == '"'){
10159
237
      NEXT;
10160
237
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10161
232
          standalone = 0;
10162
232
    SKIP(2);
10163
232
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10164
3
                 (NXT(2) == 's')) {
10165
2
          standalone = 1;
10166
2
                SKIP(3);
10167
3
            } else {
10168
3
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10169
3
      }
10170
237
      if (RAW != '"') {
10171
6
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10172
6
      } else
10173
231
          NEXT;
10174
237
  } else {
10175
1
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10176
1
        }
10177
270
    }
10178
5.52k
    return(standalone);
10179
5.52k
}
10180
10181
/**
10182
 * Parse an XML declaration header
10183
 *
10184
 * @deprecated Internal function, don't use.
10185
 *
10186
 *     [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10187
 * @param ctxt  an XML parser context
10188
 */
10189
10190
void
10191
15.7k
xmlParseXMLDecl(xmlParserCtxt *ctxt) {
10192
15.7k
    xmlChar *version;
10193
10194
    /*
10195
     * This value for standalone indicates that the document has an
10196
     * XML declaration but it does not have a standalone attribute.
10197
     * It will be overwritten later if a standalone attribute is found.
10198
     */
10199
10200
15.7k
    ctxt->standalone = -2;
10201
10202
    /*
10203
     * We know that '<?xml' is here.
10204
     */
10205
15.7k
    SKIP(5);
10206
10207
15.7k
    if (!IS_BLANK_CH(RAW)) {
10208
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10209
0
                 "Blank needed after '<?xml'\n");
10210
0
    }
10211
15.7k
    SKIP_BLANKS;
10212
10213
    /*
10214
     * We must have the VersionInfo here.
10215
     */
10216
15.7k
    version = xmlParseVersionInfo(ctxt);
10217
15.7k
    if (version == NULL) {
10218
2.77k
  xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10219
12.9k
    } else {
10220
12.9k
  if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10221
      /*
10222
       * Changed here for XML-1.0 5th edition
10223
       */
10224
3.50k
      if (ctxt->options & XML_PARSE_OLD10) {
10225
0
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10226
0
                "Unsupported version '%s'\n",
10227
0
                version);
10228
3.50k
      } else {
10229
3.50k
          if ((version[0] == '1') && ((version[1] == '.'))) {
10230
3.12k
        xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10231
3.12k
                      "Unsupported version '%s'\n",
10232
3.12k
          version, NULL);
10233
3.12k
    } else {
10234
381
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10235
381
              "Unsupported version '%s'\n",
10236
381
              version);
10237
381
    }
10238
3.50k
      }
10239
3.50k
  }
10240
12.9k
  if (ctxt->version != NULL)
10241
0
      xmlFree(ctxt->version);
10242
12.9k
  ctxt->version = version;
10243
12.9k
    }
10244
10245
    /*
10246
     * We may have the encoding declaration
10247
     */
10248
15.7k
    if (!IS_BLANK_CH(RAW)) {
10249
8.25k
        if ((RAW == '?') && (NXT(1) == '>')) {
10250
5.29k
      SKIP(2);
10251
5.29k
      return;
10252
5.29k
  }
10253
2.95k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10254
2.95k
    }
10255
10.4k
    xmlParseEncodingDecl(ctxt);
10256
10257
    /*
10258
     * We may have the standalone status.
10259
     */
10260
10.4k
    if ((ctxt->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10261
4.95k
        if ((RAW == '?') && (NXT(1) == '>')) {
10262
4.87k
      SKIP(2);
10263
4.87k
      return;
10264
4.87k
  }
10265
79
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10266
79
    }
10267
10268
    /*
10269
     * We can grow the input buffer freely at that point
10270
     */
10271
5.52k
    GROW;
10272
10273
5.52k
    SKIP_BLANKS;
10274
5.52k
    ctxt->standalone = xmlParseSDDecl(ctxt);
10275
10276
5.52k
    SKIP_BLANKS;
10277
5.52k
    if ((RAW == '?') && (NXT(1) == '>')) {
10278
2.06k
        SKIP(2);
10279
3.46k
    } else if (RAW == '>') {
10280
        /* Deprecated old WD ... */
10281
1.04k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10282
1.04k
  NEXT;
10283
2.41k
    } else {
10284
2.41k
        int c;
10285
10286
2.41k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10287
70.1k
        while ((PARSER_STOPPED(ctxt) == 0) &&
10288
70.1k
               ((c = CUR) != 0)) {
10289
69.6k
            NEXT;
10290
69.6k
            if (c == '>')
10291
1.89k
                break;
10292
69.6k
        }
10293
2.41k
    }
10294
5.52k
}
10295
10296
/**
10297
 * @since 2.14.0
10298
 *
10299
 * @param ctxt  parser context
10300
 * @returns the version from the XML declaration.
10301
 */
10302
const xmlChar *
10303
0
xmlCtxtGetVersion(xmlParserCtxt *ctxt) {
10304
0
    if (ctxt == NULL)
10305
0
        return(NULL);
10306
10307
0
    return(ctxt->version);
10308
0
}
10309
10310
/**
10311
 * @since 2.14.0
10312
 *
10313
 * @param ctxt  parser context
10314
 * @returns the value from the standalone document declaration.
10315
 */
10316
int
10317
0
xmlCtxtGetStandalone(xmlParserCtxt *ctxt) {
10318
0
    if (ctxt == NULL)
10319
0
        return(0);
10320
10321
0
    return(ctxt->standalone);
10322
0
}
10323
10324
/**
10325
 * Parse an XML Misc* optional field.
10326
 *
10327
 * @deprecated Internal function, don't use.
10328
 *
10329
 *     [27] Misc ::= Comment | PI |  S
10330
 * @param ctxt  an XML parser context
10331
 */
10332
10333
void
10334
79.8k
xmlParseMisc(xmlParserCtxt *ctxt) {
10335
93.8k
    while (PARSER_STOPPED(ctxt) == 0) {
10336
92.2k
        SKIP_BLANKS;
10337
92.2k
        GROW;
10338
92.2k
        if ((RAW == '<') && (NXT(1) == '?')) {
10339
10.3k
      xmlParsePI(ctxt);
10340
81.9k
        } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10341
3.72k
      xmlParseComment(ctxt);
10342
78.2k
        } else {
10343
78.2k
            break;
10344
78.2k
        }
10345
92.2k
    }
10346
79.8k
}
10347
10348
static void
10349
39.5k
xmlFinishDocument(xmlParserCtxtPtr ctxt) {
10350
39.5k
    xmlDocPtr doc;
10351
10352
    /*
10353
     * SAX: end of the document processing.
10354
     */
10355
39.5k
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10356
39.5k
        ctxt->sax->endDocument(ctxt->userData);
10357
10358
    /*
10359
     * Remove locally kept entity definitions if the tree was not built
10360
     */
10361
39.5k
    doc = ctxt->myDoc;
10362
39.5k
    if ((doc != NULL) &&
10363
37.5k
        (xmlStrEqual(doc->version, SAX_COMPAT_MODE))) {
10364
1.72k
        xmlFreeDoc(doc);
10365
1.72k
        ctxt->myDoc = NULL;
10366
1.72k
    }
10367
39.5k
}
10368
10369
/**
10370
 * Parse an XML document and invoke the SAX handlers. This is useful
10371
 * if you're only interested in custom SAX callbacks. If you want a
10372
 * document tree, use #xmlCtxtParseDocument.
10373
 *
10374
 * @param ctxt  an XML parser context
10375
 * @returns 0, -1 in case of error.
10376
 */
10377
10378
int
10379
39.6k
xmlParseDocument(xmlParserCtxt *ctxt) {
10380
39.6k
    if ((ctxt == NULL) || (ctxt->input == NULL))
10381
0
        return(-1);
10382
10383
39.6k
    GROW;
10384
10385
    /*
10386
     * SAX: detecting the level.
10387
     */
10388
39.6k
    xmlCtxtInitializeLate(ctxt);
10389
10390
39.6k
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10391
39.6k
        ctxt->sax->setDocumentLocator(ctxt->userData,
10392
39.6k
                (xmlSAXLocator *) &xmlDefaultSAXLocator);
10393
39.6k
    }
10394
10395
39.6k
    xmlDetectEncoding(ctxt);
10396
10397
39.6k
    if (CUR == 0) {
10398
71
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10399
71
  return(-1);
10400
71
    }
10401
10402
39.5k
    GROW;
10403
39.5k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10404
10405
  /*
10406
   * Note that we will switch encoding on the fly.
10407
   */
10408
15.7k
  xmlParseXMLDecl(ctxt);
10409
15.7k
  SKIP_BLANKS;
10410
23.8k
    } else {
10411
23.8k
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10412
23.8k
        if (ctxt->version == NULL) {
10413
7
            xmlErrMemory(ctxt);
10414
7
            return(-1);
10415
7
        }
10416
23.8k
    }
10417
39.5k
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10418
35.8k
        ctxt->sax->startDocument(ctxt->userData);
10419
39.5k
    if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10420
35.8k
        (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10421
0
  ctxt->myDoc->compression = ctxt->input->buf->compressed;
10422
0
    }
10423
10424
    /*
10425
     * The Misc part of the Prolog
10426
     */
10427
39.5k
    xmlParseMisc(ctxt);
10428
10429
    /*
10430
     * Then possibly doc type declaration(s) and more Misc
10431
     * (doctypedecl Misc*)?
10432
     */
10433
39.5k
    GROW;
10434
39.5k
    if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10435
10436
8.16k
  ctxt->inSubset = 1;
10437
8.16k
  xmlParseDocTypeDecl(ctxt);
10438
8.16k
  if (RAW == '[') {
10439
7.98k
      xmlParseInternalSubset(ctxt);
10440
7.98k
  } else if (RAW == '>') {
10441
64
            NEXT;
10442
64
        }
10443
10444
  /*
10445
   * Create and update the external subset.
10446
   */
10447
8.16k
  ctxt->inSubset = 2;
10448
8.16k
  if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10449
8.16k
      (!ctxt->disableSAX))
10450
1.33k
      ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10451
1.33k
                                ctxt->extSubSystem, ctxt->extSubURI);
10452
8.16k
  ctxt->inSubset = 0;
10453
10454
8.16k
        xmlCleanSpecialAttr(ctxt);
10455
10456
8.16k
  xmlParseMisc(ctxt);
10457
8.16k
    }
10458
10459
    /*
10460
     * Time to start parsing the tree itself
10461
     */
10462
39.5k
    GROW;
10463
39.5k
    if (RAW != '<') {
10464
7.44k
        if (ctxt->wellFormed)
10465
380
            xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10466
380
                           "Start tag expected, '<' not found\n");
10467
32.1k
    } else {
10468
32.1k
  xmlParseElement(ctxt);
10469
10470
  /*
10471
   * The Misc part at the end
10472
   */
10473
32.1k
  xmlParseMisc(ctxt);
10474
10475
32.1k
        xmlParserCheckEOF(ctxt, XML_ERR_DOCUMENT_END);
10476
32.1k
    }
10477
10478
39.5k
    ctxt->instate = XML_PARSER_EOF;
10479
39.5k
    xmlFinishDocument(ctxt);
10480
10481
39.5k
    if (! ctxt->wellFormed) {
10482
20.4k
  ctxt->valid = 0;
10483
20.4k
  return(-1);
10484
20.4k
    }
10485
10486
19.1k
    return(0);
10487
39.5k
}
10488
10489
/**
10490
 * Parse a general parsed entity
10491
 * An external general parsed entity is well-formed if it matches the
10492
 * production labeled extParsedEnt.
10493
 *
10494
 * @deprecated Internal function, don't use.
10495
 *
10496
 *     [78] extParsedEnt ::= TextDecl? content
10497
 *
10498
 * @param ctxt  an XML parser context
10499
 * @returns 0, -1 in case of error. the parser context is augmented
10500
 *                as a result of the parsing.
10501
 */
10502
10503
int
10504
0
xmlParseExtParsedEnt(xmlParserCtxt *ctxt) {
10505
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
10506
0
        return(-1);
10507
10508
0
    xmlCtxtInitializeLate(ctxt);
10509
10510
0
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10511
0
        ctxt->sax->setDocumentLocator(ctxt->userData,
10512
0
                (xmlSAXLocator *) &xmlDefaultSAXLocator);
10513
0
    }
10514
10515
0
    xmlDetectEncoding(ctxt);
10516
10517
0
    if (CUR == 0) {
10518
0
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10519
0
    }
10520
10521
    /*
10522
     * Check for the XMLDecl in the Prolog.
10523
     */
10524
0
    GROW;
10525
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10526
10527
  /*
10528
   * Note that we will switch encoding on the fly.
10529
   */
10530
0
  xmlParseXMLDecl(ctxt);
10531
0
  SKIP_BLANKS;
10532
0
    } else {
10533
0
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10534
0
    }
10535
0
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10536
0
        ctxt->sax->startDocument(ctxt->userData);
10537
10538
    /*
10539
     * Doing validity checking on chunk doesn't make sense
10540
     */
10541
0
    ctxt->options &= ~XML_PARSE_DTDVALID;
10542
0
    ctxt->validate = 0;
10543
0
    ctxt->depth = 0;
10544
10545
0
    xmlParseContentInternal(ctxt);
10546
10547
0
    if (ctxt->input->cur < ctxt->input->end)
10548
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10549
10550
    /*
10551
     * SAX: end of the document processing.
10552
     */
10553
0
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10554
0
        ctxt->sax->endDocument(ctxt->userData);
10555
10556
0
    if (! ctxt->wellFormed) return(-1);
10557
0
    return(0);
10558
0
}
10559
10560
#ifdef LIBXML_PUSH_ENABLED
10561
/************************************************************************
10562
 *                  *
10563
 *    Progressive parsing interfaces        *
10564
 *                  *
10565
 ************************************************************************/
10566
10567
/**
10568
 * Check whether the input buffer contains a character.
10569
 *
10570
 * @param ctxt  an XML parser context
10571
 * @param c  character
10572
 */
10573
static int
10574
0
xmlParseLookupChar(xmlParserCtxtPtr ctxt, int c) {
10575
0
    const xmlChar *cur;
10576
10577
0
    if (ctxt->checkIndex == 0) {
10578
0
        cur = ctxt->input->cur + 1;
10579
0
    } else {
10580
0
        cur = ctxt->input->cur + ctxt->checkIndex;
10581
0
    }
10582
10583
0
    if (memchr(cur, c, ctxt->input->end - cur) == NULL) {
10584
0
        size_t index = ctxt->input->end - ctxt->input->cur;
10585
10586
0
        if (index > LONG_MAX) {
10587
0
            ctxt->checkIndex = 0;
10588
0
            return(1);
10589
0
        }
10590
0
        ctxt->checkIndex = index;
10591
0
        return(0);
10592
0
    } else {
10593
0
        ctxt->checkIndex = 0;
10594
0
        return(1);
10595
0
    }
10596
0
}
10597
10598
/**
10599
 * Check whether the input buffer contains a string.
10600
 *
10601
 * @param ctxt  an XML parser context
10602
 * @param startDelta  delta to apply at the start
10603
 * @param str  string
10604
 * @param strLen  length of string
10605
 */
10606
static const xmlChar *
10607
xmlParseLookupString(xmlParserCtxtPtr ctxt, size_t startDelta,
10608
0
                     const char *str, size_t strLen) {
10609
0
    const xmlChar *cur, *term;
10610
10611
0
    if (ctxt->checkIndex == 0) {
10612
0
        cur = ctxt->input->cur + startDelta;
10613
0
    } else {
10614
0
        cur = ctxt->input->cur + ctxt->checkIndex;
10615
0
    }
10616
10617
0
    term = BAD_CAST strstr((const char *) cur, str);
10618
0
    if (term == NULL) {
10619
0
        const xmlChar *end = ctxt->input->end;
10620
0
        size_t index;
10621
10622
        /* Rescan (strLen - 1) characters. */
10623
0
        if ((size_t) (end - cur) < strLen)
10624
0
            end = cur;
10625
0
        else
10626
0
            end -= strLen - 1;
10627
0
        index = end - ctxt->input->cur;
10628
0
        if (index > LONG_MAX) {
10629
0
            ctxt->checkIndex = 0;
10630
0
            return(ctxt->input->end - strLen);
10631
0
        }
10632
0
        ctxt->checkIndex = index;
10633
0
    } else {
10634
0
        ctxt->checkIndex = 0;
10635
0
    }
10636
10637
0
    return(term);
10638
0
}
10639
10640
/**
10641
 * Check whether the input buffer contains terminated char data.
10642
 *
10643
 * @param ctxt  an XML parser context
10644
 */
10645
static int
10646
0
xmlParseLookupCharData(xmlParserCtxtPtr ctxt) {
10647
0
    const xmlChar *cur = ctxt->input->cur + ctxt->checkIndex;
10648
0
    const xmlChar *end = ctxt->input->end;
10649
0
    size_t index;
10650
10651
0
    while (cur < end) {
10652
0
        if ((*cur == '<') || (*cur == '&')) {
10653
0
            ctxt->checkIndex = 0;
10654
0
            return(1);
10655
0
        }
10656
0
        cur++;
10657
0
    }
10658
10659
0
    index = cur - ctxt->input->cur;
10660
0
    if (index > LONG_MAX) {
10661
0
        ctxt->checkIndex = 0;
10662
0
        return(1);
10663
0
    }
10664
0
    ctxt->checkIndex = index;
10665
0
    return(0);
10666
0
}
10667
10668
/**
10669
 * Check whether there's enough data in the input buffer to finish parsing
10670
 * a start tag. This has to take quotes into account.
10671
 *
10672
 * @param ctxt  an XML parser context
10673
 */
10674
static int
10675
0
xmlParseLookupGt(xmlParserCtxtPtr ctxt) {
10676
0
    const xmlChar *cur;
10677
0
    const xmlChar *end = ctxt->input->end;
10678
0
    int state = ctxt->endCheckState;
10679
0
    size_t index;
10680
10681
0
    if (ctxt->checkIndex == 0)
10682
0
        cur = ctxt->input->cur + 1;
10683
0
    else
10684
0
        cur = ctxt->input->cur + ctxt->checkIndex;
10685
10686
0
    while (cur < end) {
10687
0
        if (state) {
10688
0
            if (*cur == state)
10689
0
                state = 0;
10690
0
        } else if (*cur == '\'' || *cur == '"') {
10691
0
            state = *cur;
10692
0
        } else if (*cur == '>') {
10693
0
            ctxt->checkIndex = 0;
10694
0
            ctxt->endCheckState = 0;
10695
0
            return(1);
10696
0
        }
10697
0
        cur++;
10698
0
    }
10699
10700
0
    index = cur - ctxt->input->cur;
10701
0
    if (index > LONG_MAX) {
10702
0
        ctxt->checkIndex = 0;
10703
0
        ctxt->endCheckState = 0;
10704
0
        return(1);
10705
0
    }
10706
0
    ctxt->checkIndex = index;
10707
0
    ctxt->endCheckState = state;
10708
0
    return(0);
10709
0
}
10710
10711
/**
10712
 * Check whether there's enough data in the input buffer to finish parsing
10713
 * the internal subset.
10714
 *
10715
 * @param ctxt  an XML parser context
10716
 */
10717
static int
10718
0
xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt) {
10719
    /*
10720
     * Sorry, but progressive parsing of the internal subset is not
10721
     * supported. We first check that the full content of the internal
10722
     * subset is available and parsing is launched only at that point.
10723
     * Internal subset ends with "']' S? '>'" in an unescaped section and
10724
     * not in a ']]>' sequence which are conditional sections.
10725
     */
10726
0
    const xmlChar *cur, *start;
10727
0
    const xmlChar *end = ctxt->input->end;
10728
0
    int state = ctxt->endCheckState;
10729
0
    size_t index;
10730
10731
0
    if (ctxt->checkIndex == 0) {
10732
0
        cur = ctxt->input->cur + 1;
10733
0
    } else {
10734
0
        cur = ctxt->input->cur + ctxt->checkIndex;
10735
0
    }
10736
0
    start = cur;
10737
10738
0
    while (cur < end) {
10739
0
        if (state == '-') {
10740
0
            if ((*cur == '-') &&
10741
0
                (cur[1] == '-') &&
10742
0
                (cur[2] == '>')) {
10743
0
                state = 0;
10744
0
                cur += 3;
10745
0
                start = cur;
10746
0
                continue;
10747
0
            }
10748
0
        }
10749
0
        else if (state == ']') {
10750
0
            if (*cur == '>') {
10751
0
                ctxt->checkIndex = 0;
10752
0
                ctxt->endCheckState = 0;
10753
0
                return(1);
10754
0
            }
10755
0
            if (IS_BLANK_CH(*cur)) {
10756
0
                state = ' ';
10757
0
            } else if (*cur != ']') {
10758
0
                state = 0;
10759
0
                start = cur;
10760
0
                continue;
10761
0
            }
10762
0
        }
10763
0
        else if (state == ' ') {
10764
0
            if (*cur == '>') {
10765
0
                ctxt->checkIndex = 0;
10766
0
                ctxt->endCheckState = 0;
10767
0
                return(1);
10768
0
            }
10769
0
            if (!IS_BLANK_CH(*cur)) {
10770
0
                state = 0;
10771
0
                start = cur;
10772
0
                continue;
10773
0
            }
10774
0
        }
10775
0
        else if (state != 0) {
10776
0
            if (*cur == state) {
10777
0
                state = 0;
10778
0
                start = cur + 1;
10779
0
            }
10780
0
        }
10781
0
        else if (*cur == '<') {
10782
0
            if ((cur[1] == '!') &&
10783
0
                (cur[2] == '-') &&
10784
0
                (cur[3] == '-')) {
10785
0
                state = '-';
10786
0
                cur += 4;
10787
                /* Don't treat <!--> as comment */
10788
0
                start = cur;
10789
0
                continue;
10790
0
            }
10791
0
        }
10792
0
        else if ((*cur == '"') || (*cur == '\'') || (*cur == ']')) {
10793
0
            state = *cur;
10794
0
        }
10795
10796
0
        cur++;
10797
0
    }
10798
10799
    /*
10800
     * Rescan the three last characters to detect "<!--" and "-->"
10801
     * split across chunks.
10802
     */
10803
0
    if ((state == 0) || (state == '-')) {
10804
0
        if (cur - start < 3)
10805
0
            cur = start;
10806
0
        else
10807
0
            cur -= 3;
10808
0
    }
10809
0
    index = cur - ctxt->input->cur;
10810
0
    if (index > LONG_MAX) {
10811
0
        ctxt->checkIndex = 0;
10812
0
        ctxt->endCheckState = 0;
10813
0
        return(1);
10814
0
    }
10815
0
    ctxt->checkIndex = index;
10816
0
    ctxt->endCheckState = state;
10817
0
    return(0);
10818
0
}
10819
10820
/**
10821
 * Try to progress on parsing
10822
 *
10823
 * @param ctxt  an XML parser context
10824
 * @param terminate  last chunk indicator
10825
 * @returns zero if no parsing was possible
10826
 */
10827
static int
10828
0
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
10829
0
    int ret = 0;
10830
0
    size_t avail;
10831
0
    xmlChar cur, next;
10832
10833
0
    if (ctxt->input == NULL)
10834
0
        return(0);
10835
10836
0
    if ((ctxt->input != NULL) &&
10837
0
        (ctxt->input->cur - ctxt->input->base > 4096)) {
10838
0
        xmlParserShrink(ctxt);
10839
0
    }
10840
10841
0
    while (ctxt->disableSAX == 0) {
10842
0
        avail = ctxt->input->end - ctxt->input->cur;
10843
0
        if (avail < 1)
10844
0
      goto done;
10845
0
        switch (ctxt->instate) {
10846
0
            case XML_PARSER_EOF:
10847
          /*
10848
     * Document parsing is done !
10849
     */
10850
0
          goto done;
10851
0
            case XML_PARSER_START:
10852
                /*
10853
                 * Very first chars read from the document flow.
10854
                 */
10855
0
                if ((!terminate) && (avail < 4))
10856
0
                    goto done;
10857
10858
                /*
10859
                 * We need more bytes to detect EBCDIC code pages.
10860
                 * See xmlDetectEBCDIC.
10861
                 */
10862
0
                if ((CMP4(CUR_PTR, 0x4C, 0x6F, 0xA7, 0x94)) &&
10863
0
                    (!terminate) && (avail < 200))
10864
0
                    goto done;
10865
10866
0
                xmlDetectEncoding(ctxt);
10867
0
                ctxt->instate = XML_PARSER_XML_DECL;
10868
0
    break;
10869
10870
0
            case XML_PARSER_XML_DECL:
10871
0
    if ((!terminate) && (avail < 2))
10872
0
        goto done;
10873
0
    cur = ctxt->input->cur[0];
10874
0
    next = ctxt->input->cur[1];
10875
0
          if ((cur == '<') && (next == '?')) {
10876
        /* PI or XML decl */
10877
0
        if ((!terminate) &&
10878
0
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
10879
0
      goto done;
10880
0
        if ((ctxt->input->cur[2] == 'x') &&
10881
0
      (ctxt->input->cur[3] == 'm') &&
10882
0
      (ctxt->input->cur[4] == 'l') &&
10883
0
      (IS_BLANK_CH(ctxt->input->cur[5]))) {
10884
0
      ret += 5;
10885
0
      xmlParseXMLDecl(ctxt);
10886
0
        } else {
10887
0
      ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10888
0
                        if (ctxt->version == NULL) {
10889
0
                            xmlErrMemory(ctxt);
10890
0
                            break;
10891
0
                        }
10892
0
        }
10893
0
    } else {
10894
0
        ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10895
0
        if (ctxt->version == NULL) {
10896
0
            xmlErrMemory(ctxt);
10897
0
      break;
10898
0
        }
10899
0
    }
10900
0
                if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10901
0
                    ctxt->sax->setDocumentLocator(ctxt->userData,
10902
0
                            (xmlSAXLocator *) &xmlDefaultSAXLocator);
10903
0
                }
10904
0
                if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10905
0
                    (!ctxt->disableSAX))
10906
0
                    ctxt->sax->startDocument(ctxt->userData);
10907
0
                ctxt->instate = XML_PARSER_MISC;
10908
0
    break;
10909
0
            case XML_PARSER_START_TAG: {
10910
0
          const xmlChar *name;
10911
0
    const xmlChar *prefix = NULL;
10912
0
    const xmlChar *URI = NULL;
10913
0
                int line = ctxt->input->line;
10914
0
    int nbNs = 0;
10915
10916
0
    if ((!terminate) && (avail < 2))
10917
0
        goto done;
10918
0
    cur = ctxt->input->cur[0];
10919
0
          if (cur != '<') {
10920
0
        xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10921
0
                                   "Start tag expected, '<' not found");
10922
0
                    ctxt->instate = XML_PARSER_EOF;
10923
0
                    xmlFinishDocument(ctxt);
10924
0
        goto done;
10925
0
    }
10926
0
    if ((!terminate) && (!xmlParseLookupGt(ctxt)))
10927
0
                    goto done;
10928
0
    if (ctxt->spaceNr == 0)
10929
0
        spacePush(ctxt, -1);
10930
0
    else if (*ctxt->space == -2)
10931
0
        spacePush(ctxt, -1);
10932
0
    else
10933
0
        spacePush(ctxt, *ctxt->space);
10934
0
#ifdef LIBXML_SAX1_ENABLED
10935
0
    if (ctxt->sax2)
10936
0
#endif /* LIBXML_SAX1_ENABLED */
10937
0
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &nbNs);
10938
0
#ifdef LIBXML_SAX1_ENABLED
10939
0
    else
10940
0
        name = xmlParseStartTag(ctxt);
10941
0
#endif /* LIBXML_SAX1_ENABLED */
10942
0
    if (name == NULL) {
10943
0
        spacePop(ctxt);
10944
0
                    ctxt->instate = XML_PARSER_EOF;
10945
0
                    xmlFinishDocument(ctxt);
10946
0
        goto done;
10947
0
    }
10948
0
#ifdef LIBXML_VALID_ENABLED
10949
    /*
10950
     * [ VC: Root Element Type ]
10951
     * The Name in the document type declaration must match
10952
     * the element type of the root element.
10953
     */
10954
0
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10955
0
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
10956
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10957
0
#endif /* LIBXML_VALID_ENABLED */
10958
10959
    /*
10960
     * Check for an Empty Element.
10961
     */
10962
0
    if ((RAW == '/') && (NXT(1) == '>')) {
10963
0
        SKIP(2);
10964
10965
0
        if (ctxt->sax2) {
10966
0
      if ((ctxt->sax != NULL) &&
10967
0
          (ctxt->sax->endElementNs != NULL) &&
10968
0
          (!ctxt->disableSAX))
10969
0
          ctxt->sax->endElementNs(ctxt->userData, name,
10970
0
                                  prefix, URI);
10971
0
      if (nbNs > 0)
10972
0
          xmlParserNsPop(ctxt, nbNs);
10973
0
#ifdef LIBXML_SAX1_ENABLED
10974
0
        } else {
10975
0
      if ((ctxt->sax != NULL) &&
10976
0
          (ctxt->sax->endElement != NULL) &&
10977
0
          (!ctxt->disableSAX))
10978
0
          ctxt->sax->endElement(ctxt->userData, name);
10979
0
#endif /* LIBXML_SAX1_ENABLED */
10980
0
        }
10981
0
        spacePop(ctxt);
10982
0
    } else if (RAW == '>') {
10983
0
        NEXT;
10984
0
                    nameNsPush(ctxt, name, prefix, URI, line, nbNs);
10985
0
    } else {
10986
0
        xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
10987
0
           "Couldn't find end of Start Tag %s\n",
10988
0
           name);
10989
0
        nodePop(ctxt);
10990
0
        spacePop(ctxt);
10991
0
                    if (nbNs > 0)
10992
0
                        xmlParserNsPop(ctxt, nbNs);
10993
0
    }
10994
10995
0
                if (ctxt->nameNr == 0)
10996
0
                    ctxt->instate = XML_PARSER_EPILOG;
10997
0
                else
10998
0
                    ctxt->instate = XML_PARSER_CONTENT;
10999
0
                break;
11000
0
      }
11001
0
            case XML_PARSER_CONTENT: {
11002
0
    cur = ctxt->input->cur[0];
11003
11004
0
    if (cur == '<') {
11005
0
                    if ((!terminate) && (avail < 2))
11006
0
                        goto done;
11007
0
        next = ctxt->input->cur[1];
11008
11009
0
                    if (next == '/') {
11010
0
                        ctxt->instate = XML_PARSER_END_TAG;
11011
0
                        break;
11012
0
                    } else if (next == '?') {
11013
0
                        if ((!terminate) &&
11014
0
                            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11015
0
                            goto done;
11016
0
                        xmlParsePI(ctxt);
11017
0
                        ctxt->instate = XML_PARSER_CONTENT;
11018
0
                        break;
11019
0
                    } else if (next == '!') {
11020
0
                        if ((!terminate) && (avail < 3))
11021
0
                            goto done;
11022
0
                        next = ctxt->input->cur[2];
11023
11024
0
                        if (next == '-') {
11025
0
                            if ((!terminate) && (avail < 4))
11026
0
                                goto done;
11027
0
                            if (ctxt->input->cur[3] == '-') {
11028
0
                                if ((!terminate) &&
11029
0
                                    (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11030
0
                                    goto done;
11031
0
                                xmlParseComment(ctxt);
11032
0
                                ctxt->instate = XML_PARSER_CONTENT;
11033
0
                                break;
11034
0
                            }
11035
0
                        } else if (next == '[') {
11036
0
                            if ((!terminate) && (avail < 9))
11037
0
                                goto done;
11038
0
                            if ((ctxt->input->cur[2] == '[') &&
11039
0
                                (ctxt->input->cur[3] == 'C') &&
11040
0
                                (ctxt->input->cur[4] == 'D') &&
11041
0
                                (ctxt->input->cur[5] == 'A') &&
11042
0
                                (ctxt->input->cur[6] == 'T') &&
11043
0
                                (ctxt->input->cur[7] == 'A') &&
11044
0
                                (ctxt->input->cur[8] == '[')) {
11045
0
                                if ((!terminate) &&
11046
0
                                    (!xmlParseLookupString(ctxt, 9, "]]>", 3)))
11047
0
                                    goto done;
11048
0
                                ctxt->instate = XML_PARSER_CDATA_SECTION;
11049
0
                                xmlParseCDSect(ctxt);
11050
0
                                ctxt->instate = XML_PARSER_CONTENT;
11051
0
                                break;
11052
0
                            }
11053
0
                        }
11054
0
                    }
11055
0
    } else if (cur == '&') {
11056
0
        if ((!terminate) && (!xmlParseLookupChar(ctxt, ';')))
11057
0
      goto done;
11058
0
        xmlParseReference(ctxt);
11059
0
                    break;
11060
0
    } else {
11061
        /* TODO Avoid the extra copy, handle directly !!! */
11062
        /*
11063
         * Goal of the following test is:
11064
         *  - minimize calls to the SAX 'character' callback
11065
         *    when they are mergeable
11066
         *  - handle an problem for isBlank when we only parse
11067
         *    a sequence of blank chars and the next one is
11068
         *    not available to check against '<' presence.
11069
         *  - tries to homogenize the differences in SAX
11070
         *    callbacks between the push and pull versions
11071
         *    of the parser.
11072
         */
11073
0
        if (avail < XML_PARSER_BIG_BUFFER_SIZE) {
11074
0
      if ((!terminate) && (!xmlParseLookupCharData(ctxt)))
11075
0
          goto done;
11076
0
                    }
11077
0
                    ctxt->checkIndex = 0;
11078
0
        xmlParseCharDataInternal(ctxt, !terminate);
11079
0
                    break;
11080
0
    }
11081
11082
0
                ctxt->instate = XML_PARSER_START_TAG;
11083
0
    break;
11084
0
      }
11085
0
            case XML_PARSER_END_TAG:
11086
0
    if ((!terminate) && (!xmlParseLookupChar(ctxt, '>')))
11087
0
        goto done;
11088
0
    if (ctxt->sax2) {
11089
0
              xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11090
0
        nameNsPop(ctxt);
11091
0
    }
11092
0
#ifdef LIBXML_SAX1_ENABLED
11093
0
      else
11094
0
        xmlParseEndTag1(ctxt, 0);
11095
0
#endif /* LIBXML_SAX1_ENABLED */
11096
0
    if (ctxt->nameNr == 0) {
11097
0
        ctxt->instate = XML_PARSER_EPILOG;
11098
0
    } else {
11099
0
        ctxt->instate = XML_PARSER_CONTENT;
11100
0
    }
11101
0
    break;
11102
0
            case XML_PARSER_MISC:
11103
0
            case XML_PARSER_PROLOG:
11104
0
            case XML_PARSER_EPILOG:
11105
0
    SKIP_BLANKS;
11106
0
                avail = ctxt->input->end - ctxt->input->cur;
11107
0
    if (avail < 1)
11108
0
        goto done;
11109
0
    if (ctxt->input->cur[0] == '<') {
11110
0
                    if ((!terminate) && (avail < 2))
11111
0
                        goto done;
11112
0
                    next = ctxt->input->cur[1];
11113
0
                    if (next == '?') {
11114
0
                        if ((!terminate) &&
11115
0
                            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11116
0
                            goto done;
11117
0
                        xmlParsePI(ctxt);
11118
0
                        break;
11119
0
                    } else if (next == '!') {
11120
0
                        if ((!terminate) && (avail < 3))
11121
0
                            goto done;
11122
11123
0
                        if (ctxt->input->cur[2] == '-') {
11124
0
                            if ((!terminate) && (avail < 4))
11125
0
                                goto done;
11126
0
                            if (ctxt->input->cur[3] == '-') {
11127
0
                                if ((!terminate) &&
11128
0
                                    (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11129
0
                                    goto done;
11130
0
                                xmlParseComment(ctxt);
11131
0
                                break;
11132
0
                            }
11133
0
                        } else if (ctxt->instate == XML_PARSER_MISC) {
11134
0
                            if ((!terminate) && (avail < 9))
11135
0
                                goto done;
11136
0
                            if ((ctxt->input->cur[2] == 'D') &&
11137
0
                                (ctxt->input->cur[3] == 'O') &&
11138
0
                                (ctxt->input->cur[4] == 'C') &&
11139
0
                                (ctxt->input->cur[5] == 'T') &&
11140
0
                                (ctxt->input->cur[6] == 'Y') &&
11141
0
                                (ctxt->input->cur[7] == 'P') &&
11142
0
                                (ctxt->input->cur[8] == 'E')) {
11143
0
                                if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11144
0
                                    goto done;
11145
0
                                ctxt->inSubset = 1;
11146
0
                                xmlParseDocTypeDecl(ctxt);
11147
0
                                if (RAW == '[') {
11148
0
                                    ctxt->instate = XML_PARSER_DTD;
11149
0
                                } else {
11150
0
                                    if (RAW == '>')
11151
0
                                        NEXT;
11152
                                    /*
11153
                                     * Create and update the external subset.
11154
                                     */
11155
0
                                    ctxt->inSubset = 2;
11156
0
                                    if ((ctxt->sax != NULL) &&
11157
0
                                        (!ctxt->disableSAX) &&
11158
0
                                        (ctxt->sax->externalSubset != NULL))
11159
0
                                        ctxt->sax->externalSubset(
11160
0
                                                ctxt->userData,
11161
0
                                                ctxt->intSubName,
11162
0
                                                ctxt->extSubSystem,
11163
0
                                                ctxt->extSubURI);
11164
0
                                    ctxt->inSubset = 0;
11165
0
                                    xmlCleanSpecialAttr(ctxt);
11166
0
                                    ctxt->instate = XML_PARSER_PROLOG;
11167
0
                                }
11168
0
                                break;
11169
0
                            }
11170
0
                        }
11171
0
                    }
11172
0
                }
11173
11174
0
                if (ctxt->instate == XML_PARSER_EPILOG) {
11175
0
                    if (ctxt->errNo == XML_ERR_OK)
11176
0
                        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11177
0
        ctxt->instate = XML_PARSER_EOF;
11178
0
                    xmlFinishDocument(ctxt);
11179
0
                } else {
11180
0
        ctxt->instate = XML_PARSER_START_TAG;
11181
0
    }
11182
0
    break;
11183
0
            case XML_PARSER_DTD: {
11184
0
                if ((!terminate) && (!xmlParseLookupInternalSubset(ctxt)))
11185
0
                    goto done;
11186
0
    xmlParseInternalSubset(ctxt);
11187
0
    ctxt->inSubset = 2;
11188
0
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11189
0
        (ctxt->sax->externalSubset != NULL))
11190
0
        ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11191
0
          ctxt->extSubSystem, ctxt->extSubURI);
11192
0
    ctxt->inSubset = 0;
11193
0
    xmlCleanSpecialAttr(ctxt);
11194
0
    ctxt->instate = XML_PARSER_PROLOG;
11195
0
                break;
11196
0
      }
11197
0
            default:
11198
0
                xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
11199
0
      "PP: internal error\n");
11200
0
    ctxt->instate = XML_PARSER_EOF;
11201
0
    break;
11202
0
  }
11203
0
    }
11204
0
done:
11205
0
    return(ret);
11206
0
}
11207
11208
/**
11209
 * Parse a chunk of memory in push parser mode.
11210
 *
11211
 * Assumes that the parser context was initialized with
11212
 * #xmlCreatePushParserCtxt.
11213
 *
11214
 * The last chunk, which will often be empty, must be marked with
11215
 * the `terminate` flag. With the default SAX callbacks, the resulting
11216
 * document will be available in ctxt->myDoc. This pointer will not
11217
 * be freed when calling #xmlFreeParserCtxt and must be freed by the
11218
 * caller. If the document isn't well-formed, it will still be returned
11219
 * in ctxt->myDoc.
11220
 *
11221
 * As an exception, #xmlCtxtResetPush will free the document in
11222
 * ctxt->myDoc. So ctxt->myDoc should be set to NULL after extracting
11223
 * the document.
11224
 *
11225
 * Since 2.14.0, #xmlCtxtGetDocument can be used to retrieve the
11226
 * result document.
11227
 *
11228
 * @param ctxt  an XML parser context
11229
 * @param chunk  chunk of memory
11230
 * @param size  size of chunk in bytes
11231
 * @param terminate  last chunk indicator
11232
 * @returns an xmlParserErrors code (0 on success).
11233
 */
11234
int
11235
xmlParseChunk(xmlParserCtxt *ctxt, const char *chunk, int size,
11236
0
              int terminate) {
11237
0
    size_t curBase;
11238
0
    size_t maxLength;
11239
0
    size_t pos;
11240
0
    int end_in_lf = 0;
11241
0
    int res;
11242
11243
0
    if ((ctxt == NULL) || (size < 0))
11244
0
        return(XML_ERR_ARGUMENT);
11245
0
    if ((chunk == NULL) && (size > 0))
11246
0
        return(XML_ERR_ARGUMENT);
11247
0
    if ((ctxt->input == NULL) || (ctxt->input->buf == NULL))
11248
0
        return(XML_ERR_ARGUMENT);
11249
0
    if (ctxt->disableSAX != 0)
11250
0
        return(ctxt->errNo);
11251
11252
0
    ctxt->input->flags |= XML_INPUT_PROGRESSIVE;
11253
0
    if (ctxt->instate == XML_PARSER_START)
11254
0
        xmlCtxtInitializeLate(ctxt);
11255
0
    if ((size > 0) && (chunk != NULL) && (!terminate) &&
11256
0
        (chunk[size - 1] == '\r')) {
11257
0
  end_in_lf = 1;
11258
0
  size--;
11259
0
    }
11260
11261
    /*
11262
     * Also push an empty chunk to make sure that the raw buffer
11263
     * will be flushed if there is an encoder.
11264
     */
11265
0
    pos = ctxt->input->cur - ctxt->input->base;
11266
0
    res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11267
0
    xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
11268
0
    if (res < 0) {
11269
0
        xmlCtxtErrIO(ctxt, ctxt->input->buf->error, NULL);
11270
0
        return(ctxt->errNo);
11271
0
    }
11272
11273
0
    xmlParseTryOrFinish(ctxt, terminate);
11274
11275
0
    curBase = ctxt->input->cur - ctxt->input->base;
11276
0
    maxLength = (ctxt->options & XML_PARSE_HUGE) ?
11277
0
                XML_MAX_HUGE_LENGTH :
11278
0
                XML_MAX_LOOKUP_LIMIT;
11279
0
    if (curBase > maxLength) {
11280
0
        xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
11281
0
                    "Buffer size limit exceeded, try XML_PARSE_HUGE\n");
11282
0
    }
11283
11284
0
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX != 0))
11285
0
        return(ctxt->errNo);
11286
11287
0
    if (end_in_lf == 1) {
11288
0
  pos = ctxt->input->cur - ctxt->input->base;
11289
0
  res = xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
11290
0
  xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
11291
0
        if (res < 0) {
11292
0
            xmlCtxtErrIO(ctxt, ctxt->input->buf->error, NULL);
11293
0
            return(ctxt->errNo);
11294
0
        }
11295
0
    }
11296
0
    if (terminate) {
11297
  /*
11298
   * Check for termination
11299
   */
11300
0
        if ((ctxt->instate != XML_PARSER_EOF) &&
11301
0
            (ctxt->instate != XML_PARSER_EPILOG)) {
11302
0
            if (ctxt->nameNr > 0) {
11303
0
                const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
11304
0
                int line = ctxt->pushTab[ctxt->nameNr - 1].line;
11305
0
                xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
11306
0
                        "Premature end of data in tag %s line %d\n",
11307
0
                        name, line, NULL);
11308
0
            } else if (ctxt->instate == XML_PARSER_START) {
11309
0
                xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11310
0
            } else {
11311
0
                xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
11312
0
                               "Start tag expected, '<' not found\n");
11313
0
            }
11314
0
        } else {
11315
0
            xmlParserCheckEOF(ctxt, XML_ERR_DOCUMENT_END);
11316
0
        }
11317
0
  if (ctxt->instate != XML_PARSER_EOF) {
11318
0
            ctxt->instate = XML_PARSER_EOF;
11319
0
            xmlFinishDocument(ctxt);
11320
0
  }
11321
0
    }
11322
0
    if (ctxt->wellFormed == 0)
11323
0
  return((xmlParserErrors) ctxt->errNo);
11324
0
    else
11325
0
        return(0);
11326
0
}
11327
11328
/************************************************************************
11329
 *                  *
11330
 *    I/O front end functions to the parser     *
11331
 *                  *
11332
 ************************************************************************/
11333
11334
/**
11335
 * Create a parser context for using the XML parser in push mode.
11336
 * See #xmlParseChunk.
11337
 *
11338
 * Passing an initial chunk is useless and deprecated.
11339
 *
11340
 * The push parser doesn't support recovery mode or the
11341
 * XML_PARSE_NOBLANKS option.
11342
 *
11343
 * `filename` is used as base URI to fetch external entities and for
11344
 * error reports.
11345
 *
11346
 * @param sax  a SAX handler (optional)
11347
 * @param user_data  user data for SAX callbacks (optional)
11348
 * @param chunk  initial chunk (optional, deprecated)
11349
 * @param size  size of initial chunk in bytes
11350
 * @param filename  file name or URI (optional)
11351
 * @returns the new parser context or NULL if a memory allocation
11352
 * failed.
11353
 */
11354
11355
xmlParserCtxt *
11356
xmlCreatePushParserCtxt(xmlSAXHandler *sax, void *user_data,
11357
0
                        const char *chunk, int size, const char *filename) {
11358
0
    xmlParserCtxtPtr ctxt;
11359
0
    xmlParserInputPtr input;
11360
11361
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
11362
0
    if (ctxt == NULL)
11363
0
  return(NULL);
11364
11365
0
    ctxt->options &= ~XML_PARSE_NODICT;
11366
0
    ctxt->dictNames = 1;
11367
11368
0
    input = xmlNewPushInput(filename, chunk, size);
11369
0
    if (input == NULL) {
11370
0
  xmlFreeParserCtxt(ctxt);
11371
0
  return(NULL);
11372
0
    }
11373
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
11374
0
        xmlFreeInputStream(input);
11375
0
        xmlFreeParserCtxt(ctxt);
11376
0
        return(NULL);
11377
0
    }
11378
11379
0
    return(ctxt);
11380
0
}
11381
#endif /* LIBXML_PUSH_ENABLED */
11382
11383
/**
11384
 * Blocks further parser processing
11385
 *
11386
 * @param ctxt  an XML parser context
11387
 */
11388
void
11389
0
xmlStopParser(xmlParserCtxt *ctxt) {
11390
0
    if (ctxt == NULL)
11391
0
        return;
11392
11393
    /* This stops the parser */
11394
0
    ctxt->disableSAX = 2;
11395
11396
    /*
11397
     * xmlStopParser is often called from error handlers,
11398
     * so we can't raise an error here to avoid infinite
11399
     * loops. Just make sure that an error condition is
11400
     * reported.
11401
     */
11402
0
    if (ctxt->errNo == XML_ERR_OK) {
11403
0
        ctxt->errNo = XML_ERR_USER_STOP;
11404
0
        ctxt->lastError.code = XML_ERR_USER_STOP;
11405
0
        ctxt->wellFormed = 0;
11406
0
    }
11407
0
}
11408
11409
/**
11410
 * Create a parser context for using the XML parser with an existing
11411
 * I/O stream
11412
 *
11413
 * @param sax  a SAX handler (optional)
11414
 * @param user_data  user data for SAX callbacks (optional)
11415
 * @param ioread  an I/O read function
11416
 * @param ioclose  an I/O close function (optional)
11417
 * @param ioctx  an I/O handler
11418
 * @param enc  the charset encoding if known (deprecated)
11419
 * @returns the new parser context or NULL
11420
 */
11421
xmlParserCtxt *
11422
xmlCreateIOParserCtxt(xmlSAXHandler *sax, void *user_data,
11423
                      xmlInputReadCallback ioread,
11424
                      xmlInputCloseCallback ioclose,
11425
0
                      void *ioctx, xmlCharEncoding enc) {
11426
0
    xmlParserCtxtPtr ctxt;
11427
0
    xmlParserInputPtr input;
11428
0
    const char *encoding;
11429
11430
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
11431
0
    if (ctxt == NULL)
11432
0
  return(NULL);
11433
11434
0
    encoding = xmlGetCharEncodingName(enc);
11435
0
    input = xmlCtxtNewInputFromIO(ctxt, NULL, ioread, ioclose, ioctx,
11436
0
                                  encoding, 0);
11437
0
    if (input == NULL) {
11438
0
  xmlFreeParserCtxt(ctxt);
11439
0
        return (NULL);
11440
0
    }
11441
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
11442
0
        xmlFreeInputStream(input);
11443
0
        xmlFreeParserCtxt(ctxt);
11444
0
        return(NULL);
11445
0
    }
11446
11447
0
    return(ctxt);
11448
0
}
11449
11450
#ifdef LIBXML_VALID_ENABLED
11451
/************************************************************************
11452
 *                  *
11453
 *    Front ends when parsing a DTD       *
11454
 *                  *
11455
 ************************************************************************/
11456
11457
/**
11458
 * Parse a DTD.
11459
 *
11460
 * Option XML_PARSE_DTDLOAD should be enabled in the parser context
11461
 * to make external entities work.
11462
 *
11463
 * @since 2.14.0
11464
 *
11465
 * @param ctxt  a parser context
11466
 * @param input  a parser input
11467
 * @param publicId  public ID of the DTD (optional)
11468
 * @param systemId  system ID of the DTD (optional)
11469
 * @returns the resulting xmlDtd or NULL in case of error.
11470
 * `input` will be freed by the function in any case.
11471
 */
11472
xmlDtd *
11473
xmlCtxtParseDtd(xmlParserCtxt *ctxt, xmlParserInput *input,
11474
0
                const xmlChar *publicId, const xmlChar *systemId) {
11475
0
    xmlDtdPtr ret = NULL;
11476
11477
0
    if ((ctxt == NULL) || (input == NULL)) {
11478
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
11479
0
        xmlFreeInputStream(input);
11480
0
        return(NULL);
11481
0
    }
11482
11483
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
11484
0
        xmlFreeInputStream(input);
11485
0
        return(NULL);
11486
0
    }
11487
11488
0
    if (publicId == NULL)
11489
0
        publicId = BAD_CAST "none";
11490
0
    if (systemId == NULL)
11491
0
        systemId = BAD_CAST "none";
11492
11493
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11494
0
    if (ctxt->myDoc == NULL) {
11495
0
        xmlErrMemory(ctxt);
11496
0
        goto error;
11497
0
    }
11498
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
11499
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11500
0
                                       publicId, systemId);
11501
0
    if (ctxt->myDoc->extSubset == NULL) {
11502
0
        xmlErrMemory(ctxt);
11503
0
        xmlFreeDoc(ctxt->myDoc);
11504
0
        goto error;
11505
0
    }
11506
11507
0
    xmlParseExternalSubset(ctxt, publicId, systemId);
11508
11509
0
    if (ctxt->wellFormed) {
11510
0
        ret = ctxt->myDoc->extSubset;
11511
0
        ctxt->myDoc->extSubset = NULL;
11512
0
        if (ret != NULL) {
11513
0
            xmlNodePtr tmp;
11514
11515
0
            ret->doc = NULL;
11516
0
            tmp = ret->children;
11517
0
            while (tmp != NULL) {
11518
0
                tmp->doc = NULL;
11519
0
                tmp = tmp->next;
11520
0
            }
11521
0
        }
11522
0
    } else {
11523
0
        ret = NULL;
11524
0
    }
11525
0
    xmlFreeDoc(ctxt->myDoc);
11526
0
    ctxt->myDoc = NULL;
11527
11528
0
error:
11529
0
    xmlFreeInputStream(xmlCtxtPopInput(ctxt));
11530
11531
0
    return(ret);
11532
0
}
11533
11534
/**
11535
 * Load and parse a DTD
11536
 *
11537
 * @deprecated Use #xmlCtxtParseDtd.
11538
 *
11539
 * @param sax  the SAX handler block or NULL
11540
 * @param input  an Input Buffer
11541
 * @param enc  the charset encoding if known
11542
 * @returns the resulting xmlDtd or NULL in case of error.
11543
 * `input` will be freed by the function in any case.
11544
 */
11545
11546
xmlDtd *
11547
xmlIOParseDTD(xmlSAXHandler *sax, xmlParserInputBuffer *input,
11548
0
        xmlCharEncoding enc) {
11549
0
    xmlDtdPtr ret = NULL;
11550
0
    xmlParserCtxtPtr ctxt;
11551
0
    xmlParserInputPtr pinput = NULL;
11552
11553
0
    if (input == NULL)
11554
0
  return(NULL);
11555
11556
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
11557
0
    if (ctxt == NULL) {
11558
0
        xmlFreeParserInputBuffer(input);
11559
0
  return(NULL);
11560
0
    }
11561
0
    xmlCtxtSetOptions(ctxt, XML_PARSE_DTDLOAD);
11562
11563
    /*
11564
     * generate a parser input from the I/O handler
11565
     */
11566
11567
0
    pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
11568
0
    if (pinput == NULL) {
11569
0
        xmlFreeParserInputBuffer(input);
11570
0
  xmlFreeParserCtxt(ctxt);
11571
0
  return(NULL);
11572
0
    }
11573
11574
0
    if (enc != XML_CHAR_ENCODING_NONE) {
11575
0
        xmlSwitchEncoding(ctxt, enc);
11576
0
    }
11577
11578
0
    ret = xmlCtxtParseDtd(ctxt, pinput, NULL, NULL);
11579
11580
0
    xmlFreeParserCtxt(ctxt);
11581
0
    return(ret);
11582
0
}
11583
11584
/**
11585
 * Load and parse an external subset.
11586
 *
11587
 * @deprecated Use #xmlCtxtParseDtd.
11588
 *
11589
 * @param sax  the SAX handler block
11590
 * @param publicId  public identifier of the DTD (optional)
11591
 * @param systemId  system identifier (URL) of the DTD
11592
 * @returns the resulting xmlDtd or NULL in case of error.
11593
 */
11594
11595
xmlDtd *
11596
xmlSAXParseDTD(xmlSAXHandler *sax, const xmlChar *publicId,
11597
0
               const xmlChar *systemId) {
11598
0
    xmlDtdPtr ret = NULL;
11599
0
    xmlParserCtxtPtr ctxt;
11600
0
    xmlParserInputPtr input = NULL;
11601
0
    xmlChar* systemIdCanonic;
11602
11603
0
    if ((publicId == NULL) && (systemId == NULL)) return(NULL);
11604
11605
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
11606
0
    if (ctxt == NULL) {
11607
0
  return(NULL);
11608
0
    }
11609
0
    xmlCtxtSetOptions(ctxt, XML_PARSE_DTDLOAD);
11610
11611
    /*
11612
     * Canonicalise the system ID
11613
     */
11614
0
    systemIdCanonic = xmlCanonicPath(systemId);
11615
0
    if ((systemId != NULL) && (systemIdCanonic == NULL)) {
11616
0
  xmlFreeParserCtxt(ctxt);
11617
0
  return(NULL);
11618
0
    }
11619
11620
    /*
11621
     * Ask the Entity resolver to load the damn thing
11622
     */
11623
11624
0
    if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
11625
0
  input = ctxt->sax->resolveEntity(ctxt->userData, publicId,
11626
0
                                   systemIdCanonic);
11627
0
    if (input == NULL) {
11628
0
  xmlFreeParserCtxt(ctxt);
11629
0
  if (systemIdCanonic != NULL)
11630
0
      xmlFree(systemIdCanonic);
11631
0
  return(NULL);
11632
0
    }
11633
11634
0
    if (input->filename == NULL)
11635
0
  input->filename = (char *) systemIdCanonic;
11636
0
    else
11637
0
  xmlFree(systemIdCanonic);
11638
11639
0
    ret = xmlCtxtParseDtd(ctxt, input, publicId, systemId);
11640
11641
0
    xmlFreeParserCtxt(ctxt);
11642
0
    return(ret);
11643
0
}
11644
11645
11646
/**
11647
 * Load and parse an external subset.
11648
 *
11649
 * @param publicId  public identifier of the DTD (optional)
11650
 * @param systemId  system identifier (URL) of the DTD
11651
 * @returns the resulting xmlDtd or NULL in case of error.
11652
 */
11653
11654
xmlDtd *
11655
0
xmlParseDTD(const xmlChar *publicId, const xmlChar *systemId) {
11656
0
    return(xmlSAXParseDTD(NULL, publicId, systemId));
11657
0
}
11658
#endif /* LIBXML_VALID_ENABLED */
11659
11660
/************************************************************************
11661
 *                  *
11662
 *    Front ends when parsing an Entity     *
11663
 *                  *
11664
 ************************************************************************/
11665
11666
static xmlNodePtr
11667
xmlCtxtParseContentInternal(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
11668
1.97k
                            int hasTextDecl, int buildTree) {
11669
1.97k
    xmlNodePtr root = NULL;
11670
1.97k
    xmlNodePtr list = NULL;
11671
1.97k
    xmlChar *rootName = BAD_CAST "#root";
11672
1.97k
    int result;
11673
11674
1.97k
    if (buildTree) {
11675
1.97k
        root = xmlNewDocNode(ctxt->myDoc, NULL, rootName, NULL);
11676
1.97k
        if (root == NULL) {
11677
2
            xmlErrMemory(ctxt);
11678
2
            goto error;
11679
2
        }
11680
1.97k
    }
11681
11682
1.97k
    if (xmlCtxtPushInput(ctxt, input) < 0)
11683
2
        goto error;
11684
11685
1.96k
    nameNsPush(ctxt, rootName, NULL, NULL, 0, 0);
11686
1.96k
    spacePush(ctxt, -1);
11687
11688
1.96k
    if (buildTree)
11689
1.96k
        nodePush(ctxt, root);
11690
11691
1.96k
    if (hasTextDecl) {
11692
735
        xmlDetectEncoding(ctxt);
11693
11694
        /*
11695
         * Parse a possible text declaration first
11696
         */
11697
735
        if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
11698
26
            (IS_BLANK_CH(NXT(5)))) {
11699
16
            xmlParseTextDecl(ctxt);
11700
            /*
11701
             * An XML-1.0 document can't reference an entity not XML-1.0
11702
             */
11703
16
            if ((xmlStrEqual(ctxt->version, BAD_CAST "1.0")) &&
11704
15
                (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
11705
8
                xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
11706
8
                               "Version mismatch between document and "
11707
8
                               "entity\n");
11708
8
            }
11709
16
        }
11710
735
    }
11711
11712
1.96k
    xmlParseContentInternal(ctxt);
11713
11714
1.96k
    if (ctxt->input->cur < ctxt->input->end)
11715
174
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11716
11717
1.96k
    if ((ctxt->wellFormed) ||
11718
1.13k
        ((ctxt->recovery) && (!xmlCtxtIsCatastrophicError(ctxt)))) {
11719
1.13k
        if (root != NULL) {
11720
1.13k
            xmlNodePtr cur;
11721
11722
            /*
11723
             * Unlink newly created node list.
11724
             */
11725
1.13k
            list = root->children;
11726
1.13k
            root->children = NULL;
11727
1.13k
            root->last = NULL;
11728
3.83k
            for (cur = list; cur != NULL; cur = cur->next)
11729
2.70k
                cur->parent = NULL;
11730
1.13k
        }
11731
1.13k
    }
11732
11733
    /*
11734
     * Read the rest of the stream in case of errors. We want
11735
     * to account for the whole entity size.
11736
     */
11737
2.06k
    do {
11738
2.06k
        ctxt->input->cur = ctxt->input->end;
11739
2.06k
        xmlParserShrink(ctxt);
11740
2.06k
        result = xmlParserGrow(ctxt);
11741
2.06k
    } while (result > 0);
11742
11743
1.96k
    if (buildTree)
11744
1.96k
        nodePop(ctxt);
11745
11746
1.96k
    namePop(ctxt);
11747
1.96k
    spacePop(ctxt);
11748
11749
1.96k
    xmlCtxtPopInput(ctxt);
11750
11751
1.97k
error:
11752
1.97k
    xmlFreeNode(root);
11753
11754
1.97k
    return(list);
11755
1.96k
}
11756
11757
static void
11758
2.23k
xmlCtxtParseEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr ent) {
11759
2.23k
    xmlParserInputPtr input;
11760
2.23k
    xmlNodePtr list;
11761
2.23k
    unsigned long consumed;
11762
2.23k
    int isExternal;
11763
2.23k
    int buildTree;
11764
2.23k
    int oldMinNsIndex;
11765
2.23k
    int oldNodelen, oldNodemem;
11766
11767
2.23k
    isExternal = (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY);
11768
2.23k
    buildTree = (ctxt->node != NULL);
11769
11770
    /*
11771
     * Recursion check
11772
     */
11773
2.23k
    if (ent->flags & XML_ENT_EXPANDING) {
11774
13
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
11775
13
        goto error;
11776
13
    }
11777
11778
    /*
11779
     * Load entity
11780
     */
11781
2.21k
    input = xmlNewEntityInputStream(ctxt, ent);
11782
2.21k
    if (input == NULL)
11783
245
        goto error;
11784
11785
    /*
11786
     * When building a tree, we need to limit the scope of namespace
11787
     * declarations, so that entities don't reference xmlNs structs
11788
     * from the parent of a reference.
11789
     */
11790
1.97k
    oldMinNsIndex = ctxt->nsdb->minNsIndex;
11791
1.97k
    if (buildTree)
11792
1.97k
        ctxt->nsdb->minNsIndex = ctxt->nsNr;
11793
11794
1.97k
    oldNodelen = ctxt->nodelen;
11795
1.97k
    oldNodemem = ctxt->nodemem;
11796
1.97k
    ctxt->nodelen = 0;
11797
1.97k
    ctxt->nodemem = 0;
11798
11799
    /*
11800
     * Parse content
11801
     *
11802
     * This initiates a recursive call chain:
11803
     *
11804
     * - xmlCtxtParseContentInternal
11805
     * - xmlParseContentInternal
11806
     * - xmlParseReference
11807
     * - xmlCtxtParseEntity
11808
     *
11809
     * The nesting depth is limited by the maximum number of inputs,
11810
     * see xmlCtxtPushInput.
11811
     *
11812
     * It's possible to make this non-recursive (minNsIndex must be
11813
     * stored in the input struct) at the expense of code readability.
11814
     */
11815
11816
1.97k
    ent->flags |= XML_ENT_EXPANDING;
11817
11818
1.97k
    list = xmlCtxtParseContentInternal(ctxt, input, isExternal, buildTree);
11819
11820
1.97k
    ent->flags &= ~XML_ENT_EXPANDING;
11821
11822
1.97k
    ctxt->nsdb->minNsIndex = oldMinNsIndex;
11823
1.97k
    ctxt->nodelen = oldNodelen;
11824
1.97k
    ctxt->nodemem = oldNodemem;
11825
11826
    /*
11827
     * Entity size accounting
11828
     */
11829
1.97k
    consumed = input->consumed;
11830
1.97k
    xmlSaturatedAddSizeT(&consumed, input->end - input->base);
11831
11832
1.97k
    if ((ent->flags & XML_ENT_CHECKED) == 0)
11833
1.64k
        xmlSaturatedAdd(&ent->expandedSize, consumed);
11834
11835
1.97k
    if ((ent->flags & XML_ENT_PARSED) == 0) {
11836
1.64k
        if (isExternal)
11837
667
            xmlSaturatedAdd(&ctxt->sizeentities, consumed);
11838
11839
1.64k
        ent->children = list;
11840
11841
4.34k
        while (list != NULL) {
11842
2.70k
            list->parent = (xmlNodePtr) ent;
11843
11844
            /*
11845
             * Downstream code like the nginx xslt module can set
11846
             * ctxt->myDoc->extSubset to a separate DTD, so the entity
11847
             * might have a different or a NULL document.
11848
             */
11849
2.70k
            if (list->doc != ent->doc)
11850
0
                xmlSetTreeDoc(list, ent->doc);
11851
11852
2.70k
            if (list->next == NULL)
11853
794
                ent->last = list;
11854
2.70k
            list = list->next;
11855
2.70k
        }
11856
1.64k
    } else {
11857
328
        xmlFreeNodeList(list);
11858
328
    }
11859
11860
1.97k
    xmlFreeInputStream(input);
11861
11862
2.23k
error:
11863
2.23k
    ent->flags |= XML_ENT_PARSED | XML_ENT_CHECKED;
11864
2.23k
}
11865
11866
/**
11867
 * Parse an external general entity within an existing parsing context
11868
 * An external general parsed entity is well-formed if it matches the
11869
 * production labeled extParsedEnt.
11870
 *
11871
 *     [78] extParsedEnt ::= TextDecl? content
11872
 *
11873
 * @param ctxt  the existing parsing context
11874
 * @param URL  the URL for the entity to load
11875
 * @param ID  the System ID for the entity to load
11876
 * @param listOut  the return value for the set of parsed nodes
11877
 * @returns 0 if the entity is well formed, -1 in case of args problem and
11878
 *    the parser error code otherwise
11879
 */
11880
11881
int
11882
xmlParseCtxtExternalEntity(xmlParserCtxt *ctxt, const xmlChar *URL,
11883
0
                           const xmlChar *ID, xmlNode **listOut) {
11884
0
    xmlParserInputPtr input;
11885
0
    xmlNodePtr list;
11886
11887
0
    if (listOut != NULL)
11888
0
        *listOut = NULL;
11889
11890
0
    if (ctxt == NULL)
11891
0
        return(XML_ERR_ARGUMENT);
11892
11893
0
    input = xmlLoadResource(ctxt, (char *) URL, (char *) ID,
11894
0
                            XML_RESOURCE_GENERAL_ENTITY);
11895
0
    if (input == NULL)
11896
0
        return(ctxt->errNo);
11897
11898
0
    xmlCtxtInitializeLate(ctxt);
11899
11900
0
    list = xmlCtxtParseContentInternal(ctxt, input, /* hasTextDecl */ 1, 1);
11901
0
    if (listOut != NULL)
11902
0
        *listOut = list;
11903
0
    else
11904
0
        xmlFreeNodeList(list);
11905
11906
0
    xmlFreeInputStream(input);
11907
0
    return(ctxt->errNo);
11908
0
}
11909
11910
#ifdef LIBXML_SAX1_ENABLED
11911
/**
11912
 * Parse an external general entity
11913
 * An external general parsed entity is well-formed if it matches the
11914
 * production labeled extParsedEnt.
11915
 *
11916
 * This function uses deprecated global variables to set parser options
11917
 * which default to XML_PARSE_NODICT.
11918
 *
11919
 * @deprecated Use #xmlParseCtxtExternalEntity.
11920
 *
11921
 *     [78] extParsedEnt ::= TextDecl? content
11922
 *
11923
 * @param doc  the document the chunk pertains to
11924
 * @param sax  the SAX handler block (possibly NULL)
11925
 * @param user_data  The user data returned on SAX callbacks (possibly NULL)
11926
 * @param depth  Used for loop detection, use 0
11927
 * @param URL  the URL for the entity to load
11928
 * @param ID  the System ID for the entity to load
11929
 * @param list  the return value for the set of parsed nodes
11930
 * @returns 0 if the entity is well formed, -1 in case of args problem and
11931
 *    the parser error code otherwise
11932
 */
11933
11934
int
11935
xmlParseExternalEntity(xmlDoc *doc, xmlSAXHandler *sax, void *user_data,
11936
0
    int depth, const xmlChar *URL, const xmlChar *ID, xmlNode **list) {
11937
0
    xmlParserCtxtPtr ctxt;
11938
0
    int ret;
11939
11940
0
    if (list != NULL)
11941
0
        *list = NULL;
11942
11943
0
    if (doc == NULL)
11944
0
        return(XML_ERR_ARGUMENT);
11945
11946
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
11947
0
    if (ctxt == NULL)
11948
0
        return(XML_ERR_NO_MEMORY);
11949
11950
0
    ctxt->depth = depth;
11951
0
    ctxt->myDoc = doc;
11952
0
    ret = xmlParseCtxtExternalEntity(ctxt, URL, ID, list);
11953
11954
0
    xmlFreeParserCtxt(ctxt);
11955
0
    return(ret);
11956
0
}
11957
11958
/**
11959
 * Parse a well-balanced chunk of an XML document
11960
 * called by the parser
11961
 * The allowed sequence for the Well Balanced Chunk is the one defined by
11962
 * the content production in the XML grammar:
11963
 *
11964
 *     [43] content ::= (element | CharData | Reference | CDSect | PI |
11965
 *                       Comment)*
11966
 *
11967
 * This function uses deprecated global variables to set parser options
11968
 * which default to XML_PARSE_NODICT.
11969
 *
11970
 * @param doc  the document the chunk pertains to (must not be NULL)
11971
 * @param sax  the SAX handler block (possibly NULL)
11972
 * @param user_data  The user data returned on SAX callbacks (possibly NULL)
11973
 * @param depth  Used for loop detection, use 0
11974
 * @param string  the input string in UTF8 or ISO-Latin (zero terminated)
11975
 * @param lst  the return value for the set of parsed nodes
11976
 * @returns 0 if the chunk is well balanced, -1 in case of args problem and
11977
 *    the parser error code otherwise
11978
 */
11979
11980
int
11981
xmlParseBalancedChunkMemory(xmlDoc *doc, xmlSAXHandler *sax,
11982
0
     void *user_data, int depth, const xmlChar *string, xmlNode **lst) {
11983
0
    return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
11984
0
                                                depth, string, lst, 0 );
11985
0
}
11986
#endif /* LIBXML_SAX1_ENABLED */
11987
11988
/**
11989
 * Parse a well-balanced chunk of XML matching the 'content' production.
11990
 *
11991
 * Namespaces in scope of `node` and entities of `node`'s document are
11992
 * recognized. When validating, the DTD of `node`'s document is used.
11993
 *
11994
 * Always consumes `input` even in error case.
11995
 *
11996
 * @since 2.14.0
11997
 *
11998
 * @param ctxt  parser context
11999
 * @param input  parser input
12000
 * @param node  target node or document
12001
 * @param hasTextDecl  whether to parse text declaration
12002
 * @returns a node list or NULL in case of error.
12003
 */
12004
xmlNode *
12005
xmlCtxtParseContent(xmlParserCtxt *ctxt, xmlParserInput *input,
12006
0
                    xmlNode *node, int hasTextDecl) {
12007
0
    xmlDocPtr doc;
12008
0
    xmlNodePtr cur, list = NULL;
12009
0
    int nsnr = 0;
12010
0
    xmlDictPtr oldDict;
12011
0
    int oldOptions, oldDictNames, oldLoadSubset;
12012
12013
0
    if ((ctxt == NULL) || (input == NULL) || (node == NULL)) {
12014
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
12015
0
        goto exit;
12016
0
    }
12017
12018
0
    doc = node->doc;
12019
0
    if (doc == NULL) {
12020
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
12021
0
        goto exit;
12022
0
    }
12023
12024
0
    switch (node->type) {
12025
0
        case XML_ELEMENT_NODE:
12026
0
        case XML_DOCUMENT_NODE:
12027
0
        case XML_HTML_DOCUMENT_NODE:
12028
0
            break;
12029
12030
0
        case XML_ATTRIBUTE_NODE:
12031
0
        case XML_TEXT_NODE:
12032
0
        case XML_CDATA_SECTION_NODE:
12033
0
        case XML_ENTITY_REF_NODE:
12034
0
        case XML_PI_NODE:
12035
0
        case XML_COMMENT_NODE:
12036
0
            for (cur = node->parent; cur != NULL; cur = cur->parent) {
12037
0
                if ((cur->type == XML_ELEMENT_NODE) ||
12038
0
                    (cur->type == XML_DOCUMENT_NODE) ||
12039
0
                    (cur->type == XML_HTML_DOCUMENT_NODE)) {
12040
0
                    node = cur;
12041
0
                    break;
12042
0
                }
12043
0
            }
12044
0
            break;
12045
12046
0
        default:
12047
0
            xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
12048
0
            goto exit;
12049
0
    }
12050
12051
0
    xmlCtxtReset(ctxt);
12052
12053
0
    oldDict = ctxt->dict;
12054
0
    oldOptions = ctxt->options;
12055
0
    oldDictNames = ctxt->dictNames;
12056
0
    oldLoadSubset = ctxt->loadsubset;
12057
12058
    /*
12059
     * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
12060
     */
12061
0
    if (doc->dict != NULL) {
12062
0
        ctxt->dict = doc->dict;
12063
0
    } else {
12064
0
        ctxt->options |= XML_PARSE_NODICT;
12065
0
        ctxt->dictNames = 0;
12066
0
    }
12067
12068
    /*
12069
     * Disable IDs
12070
     */
12071
0
    ctxt->loadsubset |= XML_SKIP_IDS;
12072
0
    ctxt->options |= XML_PARSE_SKIP_IDS;
12073
12074
0
    ctxt->myDoc = doc;
12075
12076
0
#ifdef LIBXML_HTML_ENABLED
12077
0
    if (ctxt->html) {
12078
        /*
12079
         * When parsing in context, it makes no sense to add implied
12080
         * elements like html/body/etc...
12081
         */
12082
0
        ctxt->options |= HTML_PARSE_NOIMPLIED;
12083
12084
0
        list = htmlCtxtParseContentInternal(ctxt, input);
12085
0
    } else
12086
0
#endif
12087
0
    {
12088
0
        xmlCtxtInitializeLate(ctxt);
12089
12090
        /*
12091
         * initialize the SAX2 namespaces stack
12092
         */
12093
0
        cur = node;
12094
0
        while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
12095
0
            xmlNsPtr ns = cur->nsDef;
12096
0
            xmlHashedString hprefix, huri;
12097
12098
0
            while (ns != NULL) {
12099
0
                hprefix = xmlDictLookupHashed(ctxt->dict, ns->prefix, -1);
12100
0
                huri = xmlDictLookupHashed(ctxt->dict, ns->href, -1);
12101
0
                if (xmlParserNsPush(ctxt, &hprefix, &huri, ns, 1) > 0)
12102
0
                    nsnr++;
12103
0
                ns = ns->next;
12104
0
            }
12105
0
            cur = cur->parent;
12106
0
        }
12107
12108
0
        list = xmlCtxtParseContentInternal(ctxt, input, hasTextDecl, 1);
12109
12110
0
        if (nsnr > 0)
12111
0
            xmlParserNsPop(ctxt, nsnr);
12112
0
    }
12113
12114
0
    ctxt->dict = oldDict;
12115
0
    ctxt->options = oldOptions;
12116
0
    ctxt->dictNames = oldDictNames;
12117
0
    ctxt->loadsubset = oldLoadSubset;
12118
0
    ctxt->myDoc = NULL;
12119
0
    ctxt->node = NULL;
12120
12121
0
exit:
12122
0
    xmlFreeInputStream(input);
12123
0
    return(list);
12124
0
}
12125
12126
/**
12127
 * Parse a well-balanced chunk of an XML document
12128
 * within the context (DTD, namespaces, etc ...) of the given node.
12129
 *
12130
 * The allowed sequence for the data is a Well Balanced Chunk defined by
12131
 * the content production in the XML grammar:
12132
 *
12133
 *     [43] content ::= (element | CharData | Reference | CDSect | PI |
12134
 *                       Comment)*
12135
 *
12136
 * This function assumes the encoding of `node`'s document which is
12137
 * typically not what you want. A better alternative is
12138
 * #xmlCtxtParseContent.
12139
 *
12140
 * @param node  the context node
12141
 * @param data  the input string
12142
 * @param datalen  the input string length in bytes
12143
 * @param options  a combination of xmlParserOption
12144
 * @param listOut  the return value for the set of parsed nodes
12145
 * @returns XML_ERR_OK if the chunk is well balanced, and the parser
12146
 * error code otherwise
12147
 */
12148
xmlParserErrors
12149
xmlParseInNodeContext(xmlNode *node, const char *data, int datalen,
12150
0
                      int options, xmlNode **listOut) {
12151
0
    xmlParserCtxtPtr ctxt;
12152
0
    xmlParserInputPtr input;
12153
0
    xmlDocPtr doc;
12154
0
    xmlNodePtr list;
12155
0
    xmlParserErrors ret;
12156
12157
0
    if (listOut == NULL)
12158
0
        return(XML_ERR_INTERNAL_ERROR);
12159
0
    *listOut = NULL;
12160
12161
0
    if ((node == NULL) || (data == NULL) || (datalen < 0))
12162
0
        return(XML_ERR_INTERNAL_ERROR);
12163
12164
0
    doc = node->doc;
12165
0
    if (doc == NULL)
12166
0
        return(XML_ERR_INTERNAL_ERROR);
12167
12168
0
#ifdef LIBXML_HTML_ENABLED
12169
0
    if (doc->type == XML_HTML_DOCUMENT_NODE) {
12170
0
        ctxt = htmlNewParserCtxt();
12171
0
    }
12172
0
    else
12173
0
#endif
12174
0
        ctxt = xmlNewParserCtxt();
12175
12176
0
    if (ctxt == NULL)
12177
0
        return(XML_ERR_NO_MEMORY);
12178
12179
0
    input = xmlCtxtNewInputFromMemory(ctxt, NULL, data, datalen,
12180
0
                                      (const char *) doc->encoding,
12181
0
                                      XML_INPUT_BUF_STATIC);
12182
0
    if (input == NULL) {
12183
0
        xmlFreeParserCtxt(ctxt);
12184
0
        return(XML_ERR_NO_MEMORY);
12185
0
    }
12186
12187
0
    xmlCtxtUseOptions(ctxt, options);
12188
12189
0
    list = xmlCtxtParseContent(ctxt, input, node, /* hasTextDecl */ 0);
12190
12191
0
    if (list == NULL) {
12192
0
        ret = ctxt->errNo;
12193
0
        if (ret == XML_ERR_ARGUMENT)
12194
0
            ret = XML_ERR_INTERNAL_ERROR;
12195
0
    } else {
12196
0
        ret = XML_ERR_OK;
12197
0
        *listOut = list;
12198
0
    }
12199
12200
0
    xmlFreeParserCtxt(ctxt);
12201
12202
0
    return(ret);
12203
0
}
12204
12205
#ifdef LIBXML_SAX1_ENABLED
12206
/**
12207
 * Parse a well-balanced chunk of an XML document
12208
 *
12209
 * The allowed sequence for the Well Balanced Chunk is the one defined by
12210
 * the content production in the XML grammar:
12211
 *
12212
 *     [43] content ::= (element | CharData | Reference | CDSect | PI |
12213
 *                       Comment)*
12214
 *
12215
 * In case recover is set to 1, the nodelist will not be empty even if
12216
 * the parsed chunk is not well balanced, assuming the parsing succeeded to
12217
 * some extent.
12218
 *
12219
 * This function uses deprecated global variables to set parser options
12220
 * which default to XML_PARSE_NODICT.
12221
 *
12222
 * @param doc  the document the chunk pertains to (must not be NULL)
12223
 * @param sax  the SAX handler block (possibly NULL)
12224
 * @param user_data  The user data returned on SAX callbacks (possibly NULL)
12225
 * @param depth  Used for loop detection, use 0
12226
 * @param string  the input string in UTF8 or ISO-Latin (zero terminated)
12227
 * @param listOut  the return value for the set of parsed nodes
12228
 * @param recover  return nodes even if the data is broken (use 0)
12229
 * @returns 0 if the chunk is well balanced, or thehe parser error code
12230
 * otherwise.
12231
 */
12232
int
12233
xmlParseBalancedChunkMemoryRecover(xmlDoc *doc, xmlSAXHandler *sax,
12234
     void *user_data, int depth, const xmlChar *string, xmlNode **listOut,
12235
0
     int recover) {
12236
0
    xmlParserCtxtPtr ctxt;
12237
0
    xmlParserInputPtr input;
12238
0
    xmlNodePtr list;
12239
0
    int ret;
12240
12241
0
    if (listOut != NULL)
12242
0
        *listOut = NULL;
12243
12244
0
    if (string == NULL)
12245
0
        return(XML_ERR_ARGUMENT);
12246
12247
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12248
0
    if (ctxt == NULL)
12249
0
        return(XML_ERR_NO_MEMORY);
12250
12251
0
    xmlCtxtInitializeLate(ctxt);
12252
12253
0
    ctxt->depth = depth;
12254
0
    ctxt->myDoc = doc;
12255
0
    if (recover) {
12256
0
        ctxt->options |= XML_PARSE_RECOVER;
12257
0
        ctxt->recovery = 1;
12258
0
    }
12259
12260
0
    input = xmlNewStringInputStream(ctxt, string);
12261
0
    if (input == NULL) {
12262
0
        ret = ctxt->errNo;
12263
0
        goto error;
12264
0
    }
12265
12266
0
    list = xmlCtxtParseContentInternal(ctxt, input, /* hasTextDecl */ 0, 1);
12267
0
    if (listOut != NULL)
12268
0
        *listOut = list;
12269
0
    else
12270
0
        xmlFreeNodeList(list);
12271
12272
0
    if (!ctxt->wellFormed)
12273
0
        ret = ctxt->errNo;
12274
0
    else
12275
0
        ret = XML_ERR_OK;
12276
12277
0
error:
12278
0
    xmlFreeInputStream(input);
12279
0
    xmlFreeParserCtxt(ctxt);
12280
0
    return(ret);
12281
0
}
12282
12283
/**
12284
 * Parse an XML external entity out of context and build a tree.
12285
 * It use the given SAX function block to handle the parsing callback.
12286
 * If sax is NULL, fallback to the default DOM tree building routines.
12287
 *
12288
 * @deprecated Don't use.
12289
 *
12290
 *     [78] extParsedEnt ::= TextDecl? content
12291
 *
12292
 * This correspond to a "Well Balanced" chunk
12293
 *
12294
 * This function uses deprecated global variables to set parser options
12295
 * which default to XML_PARSE_NODICT.
12296
 *
12297
 * @param sax  the SAX handler block
12298
 * @param filename  the filename
12299
 * @returns the resulting document tree
12300
 */
12301
12302
xmlDoc *
12303
0
xmlSAXParseEntity(xmlSAXHandler *sax, const char *filename) {
12304
0
    xmlDocPtr ret;
12305
0
    xmlParserCtxtPtr ctxt;
12306
12307
0
    ctxt = xmlCreateFileParserCtxt(filename);
12308
0
    if (ctxt == NULL) {
12309
0
  return(NULL);
12310
0
    }
12311
0
    if (sax != NULL) {
12312
0
        if (sax->initialized == XML_SAX2_MAGIC) {
12313
0
            *ctxt->sax = *sax;
12314
0
        } else {
12315
0
            memset(ctxt->sax, 0, sizeof(*ctxt->sax));
12316
0
            memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12317
0
        }
12318
0
        ctxt->userData = NULL;
12319
0
    }
12320
12321
0
    xmlParseExtParsedEnt(ctxt);
12322
12323
0
    if (ctxt->wellFormed) {
12324
0
  ret = ctxt->myDoc;
12325
0
    } else {
12326
0
        ret = NULL;
12327
0
        xmlFreeDoc(ctxt->myDoc);
12328
0
    }
12329
12330
0
    xmlFreeParserCtxt(ctxt);
12331
12332
0
    return(ret);
12333
0
}
12334
12335
/**
12336
 * Parse an XML external entity out of context and build a tree.
12337
 *
12338
 *     [78] extParsedEnt ::= TextDecl? content
12339
 *
12340
 * This correspond to a "Well Balanced" chunk
12341
 *
12342
 * This function uses deprecated global variables to set parser options
12343
 * which default to XML_PARSE_NODICT.
12344
 *
12345
 * @deprecated Don't use.
12346
 *
12347
 * @param filename  the filename
12348
 * @returns the resulting document tree
12349
 */
12350
12351
xmlDoc *
12352
0
xmlParseEntity(const char *filename) {
12353
0
    return(xmlSAXParseEntity(NULL, filename));
12354
0
}
12355
#endif /* LIBXML_SAX1_ENABLED */
12356
12357
/**
12358
 * Create a parser context for an external entity
12359
 * Automatic support for ZLIB/Compress compressed document is provided
12360
 * by default if found at compile-time.
12361
 *
12362
 * @deprecated Don't use.
12363
 *
12364
 * @param URL  the entity URL
12365
 * @param ID  the entity PUBLIC ID
12366
 * @param base  a possible base for the target URI
12367
 * @returns the new parser context or NULL
12368
 */
12369
xmlParserCtxt *
12370
xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
12371
0
                    const xmlChar *base) {
12372
0
    xmlParserCtxtPtr ctxt;
12373
0
    xmlParserInputPtr input;
12374
0
    xmlChar *uri = NULL;
12375
12376
0
    ctxt = xmlNewParserCtxt();
12377
0
    if (ctxt == NULL)
12378
0
  return(NULL);
12379
12380
0
    if (base != NULL) {
12381
0
        if (xmlBuildURISafe(URL, base, &uri) < 0)
12382
0
            goto error;
12383
0
        if (uri != NULL)
12384
0
            URL = uri;
12385
0
    }
12386
12387
0
    input = xmlLoadResource(ctxt, (char *) URL, (char *) ID,
12388
0
                            XML_RESOURCE_UNKNOWN);
12389
0
    if (input == NULL)
12390
0
        goto error;
12391
12392
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
12393
0
        xmlFreeInputStream(input);
12394
0
        goto error;
12395
0
    }
12396
12397
0
    xmlFree(uri);
12398
0
    return(ctxt);
12399
12400
0
error:
12401
0
    xmlFree(uri);
12402
0
    xmlFreeParserCtxt(ctxt);
12403
0
    return(NULL);
12404
0
}
12405
12406
/************************************************************************
12407
 *                  *
12408
 *    Front ends when parsing from a file     *
12409
 *                  *
12410
 ************************************************************************/
12411
12412
/**
12413
 * Create a parser context for a file or URL content.
12414
 * Automatic support for ZLIB/Compress compressed document is provided
12415
 * by default if found at compile-time and for file accesses
12416
 *
12417
 * @deprecated Use #xmlNewParserCtxt and #xmlCtxtReadFile.
12418
 *
12419
 * @param filename  the filename or URL
12420
 * @param options  a combination of xmlParserOption
12421
 * @returns the new parser context or NULL
12422
 */
12423
xmlParserCtxt *
12424
xmlCreateURLParserCtxt(const char *filename, int options)
12425
0
{
12426
0
    xmlParserCtxtPtr ctxt;
12427
0
    xmlParserInputPtr input;
12428
12429
0
    ctxt = xmlNewParserCtxt();
12430
0
    if (ctxt == NULL)
12431
0
  return(NULL);
12432
12433
0
    xmlCtxtUseOptions(ctxt, options);
12434
12435
0
    input = xmlLoadResource(ctxt, filename, NULL, XML_RESOURCE_MAIN_DOCUMENT);
12436
0
    if (input == NULL) {
12437
0
  xmlFreeParserCtxt(ctxt);
12438
0
  return(NULL);
12439
0
    }
12440
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
12441
0
        xmlFreeInputStream(input);
12442
0
        xmlFreeParserCtxt(ctxt);
12443
0
        return(NULL);
12444
0
    }
12445
12446
0
    return(ctxt);
12447
0
}
12448
12449
/**
12450
 * Create a parser context for a file content.
12451
 * Automatic support for ZLIB/Compress compressed document is provided
12452
 * by default if found at compile-time.
12453
 *
12454
 * @deprecated Use #xmlNewParserCtxt and #xmlCtxtReadFile.
12455
 *
12456
 * @param filename  the filename
12457
 * @returns the new parser context or NULL
12458
 */
12459
xmlParserCtxt *
12460
xmlCreateFileParserCtxt(const char *filename)
12461
0
{
12462
0
    return(xmlCreateURLParserCtxt(filename, 0));
12463
0
}
12464
12465
#ifdef LIBXML_SAX1_ENABLED
12466
/**
12467
 * Parse an XML file and build a tree. Automatic support for ZLIB/Compress
12468
 * compressed document is provided by default if found at compile-time.
12469
 * It use the given SAX function block to handle the parsing callback.
12470
 * If sax is NULL, fallback to the default DOM tree building routines.
12471
 *
12472
 * This function uses deprecated global variables to set parser options
12473
 * which default to XML_PARSE_NODICT.
12474
 *
12475
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadFile.
12476
 *
12477
 * User data (void *) is stored within the parser context in the
12478
 * context's _private member, so it is available nearly everywhere in libxml
12479
 *
12480
 * @param sax  the SAX handler block
12481
 * @param filename  the filename
12482
 * @param recovery  work in recovery mode, i.e. tries to read no Well Formed
12483
 *             documents
12484
 * @param data  the userdata
12485
 * @returns the resulting document tree
12486
 */
12487
12488
xmlDoc *
12489
xmlSAXParseFileWithData(xmlSAXHandler *sax, const char *filename,
12490
0
                        int recovery, void *data) {
12491
0
    xmlDocPtr ret = NULL;
12492
0
    xmlParserCtxtPtr ctxt;
12493
0
    xmlParserInputPtr input;
12494
12495
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12496
0
    if (ctxt == NULL)
12497
0
  return(NULL);
12498
12499
0
    if (data != NULL)
12500
0
  ctxt->_private = data;
12501
12502
0
    if (recovery) {
12503
0
        ctxt->options |= XML_PARSE_RECOVER;
12504
0
        ctxt->recovery = 1;
12505
0
    }
12506
12507
0
    if ((filename != NULL) && (filename[0] == '-') && (filename[1] == 0))
12508
0
        input = xmlCtxtNewInputFromFd(ctxt, filename, STDIN_FILENO, NULL, 0);
12509
0
    else
12510
0
        input = xmlCtxtNewInputFromUrl(ctxt, filename, NULL, NULL, 0);
12511
12512
0
    if (input != NULL)
12513
0
        ret = xmlCtxtParseDocument(ctxt, input);
12514
12515
0
    xmlFreeParserCtxt(ctxt);
12516
0
    return(ret);
12517
0
}
12518
12519
/**
12520
 * Parse an XML file and build a tree. Automatic support for ZLIB/Compress
12521
 * compressed document is provided by default if found at compile-time.
12522
 * It use the given SAX function block to handle the parsing callback.
12523
 * If sax is NULL, fallback to the default DOM tree building routines.
12524
 *
12525
 * This function uses deprecated global variables to set parser options
12526
 * which default to XML_PARSE_NODICT.
12527
 *
12528
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadFile.
12529
 *
12530
 * @param sax  the SAX handler block
12531
 * @param filename  the filename
12532
 * @param recovery  work in recovery mode, i.e. tries to read no Well Formed
12533
 *             documents
12534
 * @returns the resulting document tree
12535
 */
12536
12537
xmlDoc *
12538
xmlSAXParseFile(xmlSAXHandler *sax, const char *filename,
12539
0
                          int recovery) {
12540
0
    return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
12541
0
}
12542
12543
/**
12544
 * Parse an XML in-memory document and build a tree.
12545
 * In the case the document is not Well Formed, a attempt to build a
12546
 * tree is tried anyway
12547
 *
12548
 * This function uses deprecated global variables to set parser options
12549
 * which default to XML_PARSE_NODICT | XML_PARSE_RECOVER.
12550
 *
12551
 * @deprecated Use #xmlReadDoc with XML_PARSE_RECOVER.
12552
 *
12553
 * @param cur  a pointer to an array of xmlChar
12554
 * @returns the resulting document tree or NULL in case of failure
12555
 */
12556
12557
xmlDoc *
12558
0
xmlRecoverDoc(const xmlChar *cur) {
12559
0
    return(xmlSAXParseDoc(NULL, cur, 1));
12560
0
}
12561
12562
/**
12563
 * Parse an XML file and build a tree. Automatic support for ZLIB/Compress
12564
 * compressed document is provided by default if found at compile-time.
12565
 *
12566
 * This function uses deprecated global variables to set parser options
12567
 * which default to XML_PARSE_NODICT.
12568
 *
12569
 * @deprecated Use #xmlReadFile.
12570
 *
12571
 * @param filename  the filename
12572
 * @returns the resulting document tree if the file was wellformed,
12573
 * NULL otherwise.
12574
 */
12575
12576
xmlDoc *
12577
0
xmlParseFile(const char *filename) {
12578
0
    return(xmlSAXParseFile(NULL, filename, 0));
12579
0
}
12580
12581
/**
12582
 * Parse an XML file and build a tree. Automatic support for ZLIB/Compress
12583
 * compressed document is provided by default if found at compile-time.
12584
 * In the case the document is not Well Formed, it attempts to build
12585
 * a tree anyway
12586
 *
12587
 * This function uses deprecated global variables to set parser options
12588
 * which default to XML_PARSE_NODICT | XML_PARSE_RECOVER.
12589
 *
12590
 * @deprecated Use #xmlReadFile with XML_PARSE_RECOVER.
12591
 *
12592
 * @param filename  the filename
12593
 * @returns the resulting document tree or NULL in case of failure
12594
 */
12595
12596
xmlDoc *
12597
0
xmlRecoverFile(const char *filename) {
12598
0
    return(xmlSAXParseFile(NULL, filename, 1));
12599
0
}
12600
12601
12602
/**
12603
 * Setup the parser context to parse a new buffer; Clears any prior
12604
 * contents from the parser context. The buffer parameter must not be
12605
 * NULL, but the filename parameter can be
12606
 *
12607
 * @deprecated Don't use.
12608
 *
12609
 * @param ctxt  an XML parser context
12610
 * @param buffer  a xmlChar * buffer
12611
 * @param filename  a file name
12612
 */
12613
void
12614
xmlSetupParserForBuffer(xmlParserCtxt *ctxt, const xmlChar* buffer,
12615
                             const char* filename)
12616
0
{
12617
0
    xmlParserInputPtr input;
12618
12619
0
    if ((ctxt == NULL) || (buffer == NULL))
12620
0
        return;
12621
12622
0
    xmlCtxtReset(ctxt);
12623
12624
0
    input = xmlCtxtNewInputFromString(ctxt, filename, (const char *) buffer,
12625
0
                                      NULL, 0);
12626
0
    if (input == NULL)
12627
0
        return;
12628
0
    if (xmlCtxtPushInput(ctxt, input) < 0)
12629
0
        xmlFreeInputStream(input);
12630
0
}
12631
12632
/**
12633
 * Parse an XML file and call the given SAX handler routines.
12634
 * Automatic support for ZLIB/Compress compressed document is provided
12635
 *
12636
 * This function uses deprecated global variables to set parser options
12637
 * which default to XML_PARSE_NODICT.
12638
 *
12639
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadFile.
12640
 *
12641
 * @param sax  a SAX handler
12642
 * @param user_data  The user data returned on SAX callbacks
12643
 * @param filename  a file name
12644
 * @returns 0 in case of success or a error number otherwise
12645
 */
12646
int
12647
xmlSAXUserParseFile(xmlSAXHandler *sax, void *user_data,
12648
0
                    const char *filename) {
12649
0
    int ret = 0;
12650
0
    xmlParserCtxtPtr ctxt;
12651
12652
0
    ctxt = xmlCreateFileParserCtxt(filename);
12653
0
    if (ctxt == NULL) return -1;
12654
0
    if (sax != NULL) {
12655
0
        if (sax->initialized == XML_SAX2_MAGIC) {
12656
0
            *ctxt->sax = *sax;
12657
0
        } else {
12658
0
            memset(ctxt->sax, 0, sizeof(*ctxt->sax));
12659
0
            memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12660
0
        }
12661
0
  ctxt->userData = user_data;
12662
0
    }
12663
12664
0
    xmlParseDocument(ctxt);
12665
12666
0
    if (ctxt->wellFormed)
12667
0
  ret = 0;
12668
0
    else {
12669
0
        if (ctxt->errNo != 0)
12670
0
      ret = ctxt->errNo;
12671
0
  else
12672
0
      ret = -1;
12673
0
    }
12674
0
    if (ctxt->myDoc != NULL) {
12675
0
        xmlFreeDoc(ctxt->myDoc);
12676
0
  ctxt->myDoc = NULL;
12677
0
    }
12678
0
    xmlFreeParserCtxt(ctxt);
12679
12680
0
    return ret;
12681
0
}
12682
#endif /* LIBXML_SAX1_ENABLED */
12683
12684
/************************************************************************
12685
 *                  *
12686
 *    Front ends when parsing from memory     *
12687
 *                  *
12688
 ************************************************************************/
12689
12690
/**
12691
 * Create a parser context for an XML in-memory document. The input buffer
12692
 * must not contain a terminating null byte.
12693
 *
12694
 * @param buffer  a pointer to a char array
12695
 * @param size  the size of the array
12696
 * @returns the new parser context or NULL
12697
 */
12698
xmlParserCtxt *
12699
0
xmlCreateMemoryParserCtxt(const char *buffer, int size) {
12700
0
    xmlParserCtxtPtr ctxt;
12701
0
    xmlParserInputPtr input;
12702
12703
0
    if (size < 0)
12704
0
  return(NULL);
12705
12706
0
    ctxt = xmlNewParserCtxt();
12707
0
    if (ctxt == NULL)
12708
0
  return(NULL);
12709
12710
0
    input = xmlCtxtNewInputFromMemory(ctxt, NULL, buffer, size, NULL, 0);
12711
0
    if (input == NULL) {
12712
0
  xmlFreeParserCtxt(ctxt);
12713
0
  return(NULL);
12714
0
    }
12715
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
12716
0
        xmlFreeInputStream(input);
12717
0
        xmlFreeParserCtxt(ctxt);
12718
0
        return(NULL);
12719
0
    }
12720
12721
0
    return(ctxt);
12722
0
}
12723
12724
#ifdef LIBXML_SAX1_ENABLED
12725
/**
12726
 * Parse an XML in-memory block and use the given SAX function block
12727
 * to handle the parsing callback. If sax is NULL, fallback to the default
12728
 * DOM tree building routines.
12729
 *
12730
 * This function uses deprecated global variables to set parser options
12731
 * which default to XML_PARSE_NODICT.
12732
 *
12733
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadMemory.
12734
 *
12735
 * User data (void *) is stored within the parser context in the
12736
 * context's _private member, so it is available nearly everywhere in libxml
12737
 *
12738
 * @param sax  the SAX handler block
12739
 * @param buffer  an pointer to a char array
12740
 * @param size  the size of the array
12741
 * @param recovery  work in recovery mode, i.e. tries to read no Well Formed
12742
 *             documents
12743
 * @param data  the userdata
12744
 * @returns the resulting document tree
12745
 */
12746
12747
xmlDoc *
12748
xmlSAXParseMemoryWithData(xmlSAXHandler *sax, const char *buffer,
12749
0
                          int size, int recovery, void *data) {
12750
0
    xmlDocPtr ret = NULL;
12751
0
    xmlParserCtxtPtr ctxt;
12752
0
    xmlParserInputPtr input;
12753
12754
0
    if (size < 0)
12755
0
        return(NULL);
12756
12757
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12758
0
    if (ctxt == NULL)
12759
0
        return(NULL);
12760
12761
0
    if (data != NULL)
12762
0
  ctxt->_private=data;
12763
12764
0
    if (recovery) {
12765
0
        ctxt->options |= XML_PARSE_RECOVER;
12766
0
        ctxt->recovery = 1;
12767
0
    }
12768
12769
0
    input = xmlCtxtNewInputFromMemory(ctxt, NULL, buffer, size, NULL,
12770
0
                                      XML_INPUT_BUF_STATIC);
12771
12772
0
    if (input != NULL)
12773
0
        ret = xmlCtxtParseDocument(ctxt, input);
12774
12775
0
    xmlFreeParserCtxt(ctxt);
12776
0
    return(ret);
12777
0
}
12778
12779
/**
12780
 * Parse an XML in-memory block and use the given SAX function block
12781
 * to handle the parsing callback. If sax is NULL, fallback to the default
12782
 * DOM tree building routines.
12783
 *
12784
 * This function uses deprecated global variables to set parser options
12785
 * which default to XML_PARSE_NODICT.
12786
 *
12787
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadMemory.
12788
 *
12789
 * @param sax  the SAX handler block
12790
 * @param buffer  an pointer to a char array
12791
 * @param size  the size of the array
12792
 * @param recovery  work in recovery mode, i.e. tries to read not Well Formed
12793
 *             documents
12794
 * @returns the resulting document tree
12795
 */
12796
xmlDoc *
12797
xmlSAXParseMemory(xmlSAXHandler *sax, const char *buffer,
12798
0
            int size, int recovery) {
12799
0
    return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
12800
0
}
12801
12802
/**
12803
 * Parse an XML in-memory block and build a tree.
12804
 *
12805
 * This function uses deprecated global variables to set parser options
12806
 * which default to XML_PARSE_NODICT.
12807
 *
12808
 * @deprecated Use #xmlReadMemory.
12809
 *
12810
 * @param buffer  an pointer to a char array
12811
 * @param size  the size of the array
12812
 * @returns the resulting document tree
12813
 */
12814
12815
0
xmlDoc *xmlParseMemory(const char *buffer, int size) {
12816
0
   return(xmlSAXParseMemory(NULL, buffer, size, 0));
12817
0
}
12818
12819
/**
12820
 * Parse an XML in-memory block and build a tree.
12821
 * In the case the document is not Well Formed, an attempt to
12822
 * build a tree is tried anyway
12823
 *
12824
 * This function uses deprecated global variables to set parser options
12825
 * which default to XML_PARSE_NODICT | XML_PARSE_RECOVER.
12826
 *
12827
 * @deprecated Use #xmlReadMemory with XML_PARSE_RECOVER.
12828
 *
12829
 * @param buffer  an pointer to a char array
12830
 * @param size  the size of the array
12831
 * @returns the resulting document tree or NULL in case of error
12832
 */
12833
12834
0
xmlDoc *xmlRecoverMemory(const char *buffer, int size) {
12835
0
   return(xmlSAXParseMemory(NULL, buffer, size, 1));
12836
0
}
12837
12838
/**
12839
 * Parse an XML in-memory buffer and call the given SAX handler routines.
12840
 *
12841
 * This function uses deprecated global variables to set parser options
12842
 * which default to XML_PARSE_NODICT.
12843
 *
12844
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadMemory.
12845
 *
12846
 * @param sax  a SAX handler
12847
 * @param user_data  The user data returned on SAX callbacks
12848
 * @param buffer  an in-memory XML document input
12849
 * @param size  the length of the XML document in bytes
12850
 * @returns 0 in case of success or a error number otherwise
12851
 */
12852
int xmlSAXUserParseMemory(xmlSAXHandler *sax, void *user_data,
12853
0
        const char *buffer, int size) {
12854
0
    int ret = 0;
12855
0
    xmlParserCtxtPtr ctxt;
12856
12857
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12858
0
    if (ctxt == NULL) return -1;
12859
0
    if (sax != NULL) {
12860
0
        if (sax->initialized == XML_SAX2_MAGIC) {
12861
0
            *ctxt->sax = *sax;
12862
0
        } else {
12863
0
            memset(ctxt->sax, 0, sizeof(*ctxt->sax));
12864
0
            memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12865
0
        }
12866
0
  ctxt->userData = user_data;
12867
0
    }
12868
12869
0
    xmlParseDocument(ctxt);
12870
12871
0
    if (ctxt->wellFormed)
12872
0
  ret = 0;
12873
0
    else {
12874
0
        if (ctxt->errNo != 0)
12875
0
      ret = ctxt->errNo;
12876
0
  else
12877
0
      ret = -1;
12878
0
    }
12879
0
    if (ctxt->myDoc != NULL) {
12880
0
        xmlFreeDoc(ctxt->myDoc);
12881
0
  ctxt->myDoc = NULL;
12882
0
    }
12883
0
    xmlFreeParserCtxt(ctxt);
12884
12885
0
    return ret;
12886
0
}
12887
#endif /* LIBXML_SAX1_ENABLED */
12888
12889
/**
12890
 * Creates a parser context for an XML in-memory document.
12891
 *
12892
 * @param str  a pointer to an array of xmlChar
12893
 * @returns the new parser context or NULL
12894
 */
12895
xmlParserCtxt *
12896
0
xmlCreateDocParserCtxt(const xmlChar *str) {
12897
0
    xmlParserCtxtPtr ctxt;
12898
0
    xmlParserInputPtr input;
12899
12900
0
    ctxt = xmlNewParserCtxt();
12901
0
    if (ctxt == NULL)
12902
0
  return(NULL);
12903
12904
0
    input = xmlCtxtNewInputFromString(ctxt, NULL, (const char *) str, NULL, 0);
12905
0
    if (input == NULL) {
12906
0
  xmlFreeParserCtxt(ctxt);
12907
0
  return(NULL);
12908
0
    }
12909
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
12910
0
        xmlFreeInputStream(input);
12911
0
        xmlFreeParserCtxt(ctxt);
12912
0
        return(NULL);
12913
0
    }
12914
12915
0
    return(ctxt);
12916
0
}
12917
12918
#ifdef LIBXML_SAX1_ENABLED
12919
/**
12920
 * Parse an XML in-memory document and build a tree.
12921
 * It use the given SAX function block to handle the parsing callback.
12922
 * If sax is NULL, fallback to the default DOM tree building routines.
12923
 *
12924
 * This function uses deprecated global variables to set parser options
12925
 * which default to XML_PARSE_NODICT.
12926
 *
12927
 * @deprecated Use #xmlNewSAXParserCtxt and #xmlCtxtReadDoc.
12928
 *
12929
 * @param sax  the SAX handler block
12930
 * @param cur  a pointer to an array of xmlChar
12931
 * @param recovery  work in recovery mode, i.e. tries to read no Well Formed
12932
 *             documents
12933
 * @returns the resulting document tree
12934
 */
12935
12936
xmlDoc *
12937
0
xmlSAXParseDoc(xmlSAXHandler *sax, const xmlChar *cur, int recovery) {
12938
0
    xmlDocPtr ret;
12939
0
    xmlParserCtxtPtr ctxt;
12940
0
    xmlSAXHandlerPtr oldsax = NULL;
12941
12942
0
    if (cur == NULL) return(NULL);
12943
12944
12945
0
    ctxt = xmlCreateDocParserCtxt(cur);
12946
0
    if (ctxt == NULL) return(NULL);
12947
0
    if (sax != NULL) {
12948
0
        oldsax = ctxt->sax;
12949
0
        ctxt->sax = sax;
12950
0
        ctxt->userData = NULL;
12951
0
    }
12952
12953
0
    xmlParseDocument(ctxt);
12954
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12955
0
    else {
12956
0
       ret = NULL;
12957
0
       xmlFreeDoc(ctxt->myDoc);
12958
0
       ctxt->myDoc = NULL;
12959
0
    }
12960
0
    if (sax != NULL)
12961
0
  ctxt->sax = oldsax;
12962
0
    xmlFreeParserCtxt(ctxt);
12963
12964
0
    return(ret);
12965
0
}
12966
12967
/**
12968
 * Parse an XML in-memory document and build a tree.
12969
 *
12970
 * This function uses deprecated global variables to set parser options
12971
 * which default to XML_PARSE_NODICT.
12972
 *
12973
 * @deprecated Use #xmlReadDoc.
12974
 *
12975
 * @param cur  a pointer to an array of xmlChar
12976
 * @returns the resulting document tree
12977
 */
12978
12979
xmlDoc *
12980
0
xmlParseDoc(const xmlChar *cur) {
12981
0
    return(xmlSAXParseDoc(NULL, cur, 0));
12982
0
}
12983
#endif /* LIBXML_SAX1_ENABLED */
12984
12985
/************************************************************************
12986
 *                  *
12987
 *  New set (2.6.0) of simpler and more flexible APIs   *
12988
 *                  *
12989
 ************************************************************************/
12990
12991
/**
12992
 * Reset a parser context
12993
 *
12994
 * @param ctxt  an XML parser context
12995
 */
12996
void
12997
xmlCtxtReset(xmlParserCtxt *ctxt)
12998
47.2k
{
12999
47.2k
    xmlParserInputPtr input;
13000
13001
47.2k
    if (ctxt == NULL)
13002
0
        return;
13003
13004
47.2k
    while ((input = xmlCtxtPopInput(ctxt)) != NULL) { /* Non consuming */
13005
0
        xmlFreeInputStream(input);
13006
0
    }
13007
47.2k
    ctxt->inputNr = 0;
13008
47.2k
    ctxt->input = NULL;
13009
13010
47.2k
    ctxt->spaceNr = 0;
13011
47.2k
    if (ctxt->spaceTab != NULL) {
13012
47.2k
  ctxt->spaceTab[0] = -1;
13013
47.2k
  ctxt->space = &ctxt->spaceTab[0];
13014
47.2k
    } else {
13015
0
        ctxt->space = NULL;
13016
0
    }
13017
13018
13019
47.2k
    ctxt->nodeNr = 0;
13020
47.2k
    ctxt->node = NULL;
13021
13022
47.2k
    ctxt->nameNr = 0;
13023
47.2k
    ctxt->name = NULL;
13024
13025
47.2k
    ctxt->nsNr = 0;
13026
47.2k
    xmlParserNsReset(ctxt->nsdb);
13027
13028
47.2k
    if (ctxt->version != NULL) {
13029
0
        xmlFree(ctxt->version);
13030
0
        ctxt->version = NULL;
13031
0
    }
13032
47.2k
    if (ctxt->encoding != NULL) {
13033
0
        xmlFree(ctxt->encoding);
13034
0
        ctxt->encoding = NULL;
13035
0
    }
13036
47.2k
    if (ctxt->extSubURI != NULL) {
13037
0
        xmlFree(ctxt->extSubURI);
13038
0
        ctxt->extSubURI = NULL;
13039
0
    }
13040
47.2k
    if (ctxt->extSubSystem != NULL) {
13041
0
        xmlFree(ctxt->extSubSystem);
13042
0
        ctxt->extSubSystem = NULL;
13043
0
    }
13044
47.2k
    if (ctxt->directory != NULL) {
13045
0
        xmlFree(ctxt->directory);
13046
0
        ctxt->directory = NULL;
13047
0
    }
13048
13049
47.2k
    if (ctxt->myDoc != NULL)
13050
0
        xmlFreeDoc(ctxt->myDoc);
13051
47.2k
    ctxt->myDoc = NULL;
13052
13053
47.2k
    ctxt->standalone = -1;
13054
47.2k
    ctxt->hasExternalSubset = 0;
13055
47.2k
    ctxt->hasPErefs = 0;
13056
47.2k
    ctxt->html = ctxt->html ? 1 : 0;
13057
47.2k
    ctxt->instate = XML_PARSER_START;
13058
13059
47.2k
    ctxt->wellFormed = 1;
13060
47.2k
    ctxt->nsWellFormed = 1;
13061
47.2k
    ctxt->disableSAX = 0;
13062
47.2k
    ctxt->valid = 1;
13063
47.2k
    ctxt->record_info = 0;
13064
47.2k
    ctxt->checkIndex = 0;
13065
47.2k
    ctxt->endCheckState = 0;
13066
47.2k
    ctxt->inSubset = 0;
13067
47.2k
    ctxt->errNo = XML_ERR_OK;
13068
47.2k
    ctxt->depth = 0;
13069
47.2k
    ctxt->catalogs = NULL;
13070
47.2k
    ctxt->sizeentities = 0;
13071
47.2k
    ctxt->sizeentcopy = 0;
13072
47.2k
    xmlInitNodeInfoSeq(&ctxt->node_seq);
13073
13074
47.2k
    if (ctxt->attsDefault != NULL) {
13075
0
        xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
13076
0
        ctxt->attsDefault = NULL;
13077
0
    }
13078
47.2k
    if (ctxt->attsSpecial != NULL) {
13079
0
        xmlHashFree(ctxt->attsSpecial, NULL);
13080
0
        ctxt->attsSpecial = NULL;
13081
0
    }
13082
13083
47.2k
#ifdef LIBXML_CATALOG_ENABLED
13084
47.2k
    if (ctxt->catalogs != NULL)
13085
0
  xmlCatalogFreeLocal(ctxt->catalogs);
13086
47.2k
#endif
13087
47.2k
    ctxt->nbErrors = 0;
13088
47.2k
    ctxt->nbWarnings = 0;
13089
47.2k
    if (ctxt->lastError.code != XML_ERR_OK)
13090
0
        xmlResetError(&ctxt->lastError);
13091
47.2k
}
13092
13093
/**
13094
 * Reset a push parser context
13095
 *
13096
 * @param ctxt  an XML parser context
13097
 * @param chunk  a pointer to an array of chars
13098
 * @param size  number of chars in the array
13099
 * @param filename  an optional file name or URI
13100
 * @param encoding  the document encoding, or NULL
13101
 * @returns 0 in case of success and 1 in case of error
13102
 */
13103
int
13104
xmlCtxtResetPush(xmlParserCtxt *ctxt, const char *chunk,
13105
                 int size, const char *filename, const char *encoding)
13106
0
{
13107
0
    xmlParserInputPtr input;
13108
13109
0
    if (ctxt == NULL)
13110
0
        return(1);
13111
13112
0
    xmlCtxtReset(ctxt);
13113
13114
0
    input = xmlNewPushInput(filename, chunk, size);
13115
0
    if (input == NULL)
13116
0
        return(1);
13117
13118
0
    if (xmlCtxtPushInput(ctxt, input) < 0) {
13119
0
        xmlFreeInputStream(input);
13120
0
        return(1);
13121
0
    }
13122
13123
0
    if (encoding != NULL)
13124
0
        xmlSwitchEncodingName(ctxt, encoding);
13125
13126
0
    return(0);
13127
0
}
13128
13129
static int
13130
xmlCtxtSetOptionsInternal(xmlParserCtxtPtr ctxt, int options, int keepMask)
13131
47.2k
{
13132
47.2k
    int allMask;
13133
13134
47.2k
    if (ctxt == NULL)
13135
0
        return(-1);
13136
13137
    /*
13138
     * XInclude options aren't handled by the parser.
13139
     *
13140
     * XML_PARSE_XINCLUDE
13141
     * XML_PARSE_NOXINCNODE
13142
     * XML_PARSE_NOBASEFIX
13143
     */
13144
47.2k
    allMask = XML_PARSE_RECOVER |
13145
47.2k
              XML_PARSE_NOENT |
13146
47.2k
              XML_PARSE_DTDLOAD |
13147
47.2k
              XML_PARSE_DTDATTR |
13148
47.2k
              XML_PARSE_DTDVALID |
13149
47.2k
              XML_PARSE_NOERROR |
13150
47.2k
              XML_PARSE_NOWARNING |
13151
47.2k
              XML_PARSE_PEDANTIC |
13152
47.2k
              XML_PARSE_NOBLANKS |
13153
47.2k
#ifdef LIBXML_SAX1_ENABLED
13154
47.2k
              XML_PARSE_SAX1 |
13155
47.2k
#endif
13156
47.2k
              XML_PARSE_NONET |
13157
47.2k
              XML_PARSE_NODICT |
13158
47.2k
              XML_PARSE_NSCLEAN |
13159
47.2k
              XML_PARSE_NOCDATA |
13160
47.2k
              XML_PARSE_COMPACT |
13161
47.2k
              XML_PARSE_OLD10 |
13162
47.2k
              XML_PARSE_HUGE |
13163
47.2k
              XML_PARSE_OLDSAX |
13164
47.2k
              XML_PARSE_IGNORE_ENC |
13165
47.2k
              XML_PARSE_BIG_LINES |
13166
47.2k
              XML_PARSE_NO_XXE |
13167
47.2k
              XML_PARSE_UNZIP |
13168
47.2k
              XML_PARSE_NO_SYS_CATALOG |
13169
47.2k
              XML_PARSE_CATALOG_PI;
13170
13171
47.2k
    ctxt->options = (ctxt->options & keepMask) | (options & allMask);
13172
13173
    /*
13174
     * For some options, struct members are historically the source
13175
     * of truth. The values are initalized from global variables and
13176
     * old code could also modify them directly. Several older API
13177
     * functions that don't take an options argument rely on these
13178
     * deprecated mechanisms.
13179
     *
13180
     * Once public access to struct members and the globals are
13181
     * disabled, we can use the options bitmask as source of
13182
     * truth, making all these struct members obsolete.
13183
     *
13184
     * The XML_DETECT_IDS flags is misnamed. It simply enables
13185
     * loading of the external subset.
13186
     */
13187
47.2k
    ctxt->recovery = (options & XML_PARSE_RECOVER) ? 1 : 0;
13188
47.2k
    ctxt->replaceEntities = (options & XML_PARSE_NOENT) ? 1 : 0;
13189
47.2k
    ctxt->loadsubset = (options & XML_PARSE_DTDLOAD) ? XML_DETECT_IDS : 0;
13190
47.2k
    ctxt->loadsubset |= (options & XML_PARSE_DTDATTR) ? XML_COMPLETE_ATTRS : 0;
13191
47.2k
    ctxt->loadsubset |= (options & XML_PARSE_SKIP_IDS) ? XML_SKIP_IDS : 0;
13192
47.2k
    ctxt->validate = (options & XML_PARSE_DTDVALID) ? 1 : 0;
13193
47.2k
    ctxt->pedantic = (options & XML_PARSE_PEDANTIC) ? 1 : 0;
13194
47.2k
    ctxt->keepBlanks = (options & XML_PARSE_NOBLANKS) ? 0 : 1;
13195
47.2k
    ctxt->dictNames = (options & XML_PARSE_NODICT) ? 0 : 1;
13196
13197
47.2k
    return(options & ~allMask);
13198
47.2k
}
13199
13200
/**
13201
 * Applies the options to the parser context. Unset options are
13202
 * cleared.
13203
 *
13204
 * @since 2.13.0
13205
 *
13206
 * With older versions, you can use #xmlCtxtUseOptions.
13207
 *
13208
 * @param ctxt  an XML parser context
13209
 * @param options  a bitmask of xmlParserOption values
13210
 * @returns 0 in case of success, the set of unknown or unimplemented options
13211
 *         in case of error.
13212
 */
13213
int
13214
xmlCtxtSetOptions(xmlParserCtxt *ctxt, int options)
13215
0
{
13216
0
#ifdef LIBXML_HTML_ENABLED
13217
0
    if ((ctxt != NULL) && (ctxt->html))
13218
0
        return(htmlCtxtSetOptions(ctxt, options));
13219
0
#endif
13220
13221
0
    return(xmlCtxtSetOptionsInternal(ctxt, options, 0));
13222
0
}
13223
13224
/**
13225
 * Get the current options of the parser context.
13226
 *
13227
 * @since 2.14.0
13228
 *
13229
 * @param ctxt  an XML parser context
13230
 * @returns the current options set in the parser context, or -1 if ctxt is NULL.
13231
 */
13232
int
13233
xmlCtxtGetOptions(xmlParserCtxt *ctxt)
13234
0
{
13235
0
    if (ctxt == NULL)
13236
0
        return(-1);
13237
13238
0
    return(ctxt->options);
13239
0
}
13240
13241
/**
13242
 * Applies the options to the parser context. The following options
13243
 * are never cleared and can only be enabled:
13244
 *
13245
 * - XML_PARSE_NOERROR
13246
 * - XML_PARSE_NOWARNING
13247
 * - XML_PARSE_NONET
13248
 * - XML_PARSE_NSCLEAN
13249
 * - XML_PARSE_NOCDATA
13250
 * - XML_PARSE_COMPACT
13251
 * - XML_PARSE_OLD10
13252
 * - XML_PARSE_HUGE
13253
 * - XML_PARSE_OLDSAX
13254
 * - XML_PARSE_IGNORE_ENC
13255
 * - XML_PARSE_BIG_LINES
13256
 *
13257
 * @deprecated Use #xmlCtxtSetOptions.
13258
 *
13259
 * @param ctxt  an XML parser context
13260
 * @param options  a combination of xmlParserOption
13261
 * @returns 0 in case of success, the set of unknown or unimplemented options
13262
 *         in case of error.
13263
 */
13264
int
13265
xmlCtxtUseOptions(xmlParserCtxt *ctxt, int options)
13266
47.2k
{
13267
47.2k
    int keepMask;
13268
13269
47.2k
#ifdef LIBXML_HTML_ENABLED
13270
47.2k
    if ((ctxt != NULL) && (ctxt->html))
13271
0
        return(htmlCtxtUseOptions(ctxt, options));
13272
47.2k
#endif
13273
13274
    /*
13275
     * For historic reasons, some options can only be enabled.
13276
     */
13277
47.2k
    keepMask = XML_PARSE_NOERROR |
13278
47.2k
               XML_PARSE_NOWARNING |
13279
47.2k
               XML_PARSE_NONET |
13280
47.2k
               XML_PARSE_NSCLEAN |
13281
47.2k
               XML_PARSE_NOCDATA |
13282
47.2k
               XML_PARSE_COMPACT |
13283
47.2k
               XML_PARSE_OLD10 |
13284
47.2k
               XML_PARSE_HUGE |
13285
47.2k
               XML_PARSE_OLDSAX |
13286
47.2k
               XML_PARSE_IGNORE_ENC |
13287
47.2k
               XML_PARSE_BIG_LINES;
13288
13289
47.2k
    return(xmlCtxtSetOptionsInternal(ctxt, options, keepMask));
13290
47.2k
}
13291
13292
/**
13293
 * To protect against exponential entity expansion ("billion laughs"), the
13294
 * size of serialized output is (roughly) limited to the input size
13295
 * multiplied by this factor. The default value is 5.
13296
 *
13297
 * When working with documents making heavy use of entity expansion, it can
13298
 * be necessary to increase the value. For security reasons, this should only
13299
 * be considered when processing trusted input.
13300
 *
13301
 * @param ctxt  an XML parser context
13302
 * @param maxAmpl  maximum amplification factor
13303
 */
13304
void
13305
xmlCtxtSetMaxAmplification(xmlParserCtxt *ctxt, unsigned maxAmpl)
13306
0
{
13307
0
    if (ctxt == NULL)
13308
0
        return;
13309
0
    ctxt->maxAmpl = maxAmpl;
13310
0
}
13311
13312
/**
13313
 * Parse an XML document and return the resulting document tree.
13314
 * Takes ownership of the input object.
13315
 *
13316
 * @since 2.13.0
13317
 *
13318
 * @param ctxt  an XML parser context
13319
 * @param input  parser input
13320
 * @returns the resulting document tree or NULL
13321
 */
13322
xmlDoc *
13323
xmlCtxtParseDocument(xmlParserCtxt *ctxt, xmlParserInput *input)
13324
39.6k
{
13325
39.6k
    xmlDocPtr ret = NULL;
13326
13327
39.6k
    if ((ctxt == NULL) || (input == NULL)) {
13328
0
        xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
13329
0
        xmlFreeInputStream(input);
13330
0
        return(NULL);
13331
0
    }
13332
13333
    /* assert(ctxt->inputNr == 0); */
13334
39.6k
    while (ctxt->inputNr > 0)
13335
0
        xmlFreeInputStream(xmlCtxtPopInput(ctxt));
13336
13337
39.6k
    if (xmlCtxtPushInput(ctxt, input) < 0) {
13338
3
        xmlFreeInputStream(input);
13339
3
        return(NULL);
13340
3
    }
13341
13342
39.6k
    xmlParseDocument(ctxt);
13343
13344
39.6k
    ret = xmlCtxtGetDocument(ctxt);
13345
13346
    /* assert(ctxt->inputNr == 1); */
13347
79.7k
    while (ctxt->inputNr > 0)
13348
40.1k
        xmlFreeInputStream(xmlCtxtPopInput(ctxt));
13349
13350
39.6k
    return(ret);
13351
39.6k
}
13352
13353
/**
13354
 * Convenience function to parse an XML document from a
13355
 * zero-terminated string.
13356
 *
13357
 * See #xmlCtxtReadDoc for details.
13358
 *
13359
 * @param cur  a pointer to a zero terminated string
13360
 * @param URL  base URL (optional)
13361
 * @param encoding  the document encoding (optional)
13362
 * @param options  a combination of xmlParserOption
13363
 * @returns the resulting document tree
13364
 */
13365
xmlDoc *
13366
xmlReadDoc(const xmlChar *cur, const char *URL, const char *encoding,
13367
           int options)
13368
0
{
13369
0
    xmlParserCtxtPtr ctxt;
13370
0
    xmlParserInputPtr input;
13371
0
    xmlDocPtr doc = NULL;
13372
13373
0
    ctxt = xmlNewParserCtxt();
13374
0
    if (ctxt == NULL)
13375
0
        return(NULL);
13376
13377
0
    xmlCtxtUseOptions(ctxt, options);
13378
13379
0
    input = xmlCtxtNewInputFromString(ctxt, URL, (const char *) cur, encoding,
13380
0
                                      XML_INPUT_BUF_STATIC);
13381
13382
0
    if (input != NULL)
13383
0
        doc = xmlCtxtParseDocument(ctxt, input);
13384
13385
0
    xmlFreeParserCtxt(ctxt);
13386
0
    return(doc);
13387
0
}
13388
13389
/**
13390
 * Convenience function to parse an XML file from the filesystem
13391
 * or a global, user-defined resource loader.
13392
 *
13393
 * If a "-" filename is passed, the function will read from stdin.
13394
 * This feature is potentially insecure and might be removed from
13395
 * later versions.
13396
 *
13397
 * See #xmlCtxtReadFile for details.
13398
 *
13399
 * @param filename  a file or URL
13400
 * @param encoding  the document encoding (optional)
13401
 * @param options  a combination of xmlParserOption
13402
 * @returns the resulting document tree
13403
 */
13404
xmlDoc *
13405
xmlReadFile(const char *filename, const char *encoding, int options)
13406
0
{
13407
0
    xmlParserCtxtPtr ctxt;
13408
0
    xmlParserInputPtr input;
13409
0
    xmlDocPtr doc = NULL;
13410
13411
0
    ctxt = xmlNewParserCtxt();
13412
0
    if (ctxt == NULL)
13413
0
        return(NULL);
13414
13415
0
    xmlCtxtUseOptions(ctxt, options);
13416
13417
    /*
13418
     * Backward compatibility for users of command line utilities like
13419
     * xmlstarlet expecting "-" to mean stdin. This is dangerous and
13420
     * should be removed at some point.
13421
     */
13422
0
    if ((filename != NULL) && (filename[0] == '-') && (filename[1] == 0))
13423
0
        input = xmlCtxtNewInputFromFd(ctxt, filename, STDIN_FILENO,
13424
0
                                      encoding, 0);
13425
0
    else
13426
0
        input = xmlCtxtNewInputFromUrl(ctxt, filename, NULL, encoding, 0);
13427
13428
0
    if (input != NULL)
13429
0
        doc = xmlCtxtParseDocument(ctxt, input);
13430
13431
0
    xmlFreeParserCtxt(ctxt);
13432
0
    return(doc);
13433
0
}
13434
13435
/**
13436
 * Parse an XML in-memory document and build a tree. The input buffer must
13437
 * not contain a terminating null byte.
13438
 *
13439
 * See #xmlCtxtReadMemory for details.
13440
 *
13441
 * @param buffer  a pointer to a char array
13442
 * @param size  the size of the array
13443
 * @param url  base URL (optional)
13444
 * @param encoding  the document encoding (optional)
13445
 * @param options  a combination of xmlParserOption
13446
 * @returns the resulting document tree
13447
 */
13448
xmlDoc *
13449
xmlReadMemory(const char *buffer, int size, const char *url,
13450
              const char *encoding, int options)
13451
0
{
13452
0
    xmlParserCtxtPtr ctxt;
13453
0
    xmlParserInputPtr input;
13454
0
    xmlDocPtr doc = NULL;
13455
13456
0
    if (size < 0)
13457
0
  return(NULL);
13458
13459
0
    ctxt = xmlNewParserCtxt();
13460
0
    if (ctxt == NULL)
13461
0
        return(NULL);
13462
13463
0
    xmlCtxtUseOptions(ctxt, options);
13464
13465
0
    input = xmlCtxtNewInputFromMemory(ctxt, url, buffer, size, encoding,
13466
0
                                      XML_INPUT_BUF_STATIC);
13467
13468
0
    if (input != NULL)
13469
0
        doc = xmlCtxtParseDocument(ctxt, input);
13470
13471
0
    xmlFreeParserCtxt(ctxt);
13472
0
    return(doc);
13473
0
}
13474
13475
/**
13476
 * Parse an XML from a file descriptor and build a tree.
13477
 *
13478
 * See #xmlCtxtReadFd for details.
13479
 *
13480
 * NOTE that the file descriptor will not be closed when the
13481
 * context is freed or reset.
13482
 *
13483
 * @param fd  an open file descriptor
13484
 * @param URL  base URL (optional)
13485
 * @param encoding  the document encoding (optional)
13486
 * @param options  a combination of xmlParserOption
13487
 * @returns the resulting document tree
13488
 */
13489
xmlDoc *
13490
xmlReadFd(int fd, const char *URL, const char *encoding, int options)
13491
0
{
13492
0
    xmlParserCtxtPtr ctxt;
13493
0
    xmlParserInputPtr input;
13494
0
    xmlDocPtr doc = NULL;
13495
13496
0
    ctxt = xmlNewParserCtxt();
13497
0
    if (ctxt == NULL)
13498
0
        return(NULL);
13499
13500
0
    xmlCtxtUseOptions(ctxt, options);
13501
13502
0
    input = xmlCtxtNewInputFromFd(ctxt, URL, fd, encoding, 0);
13503
13504
0
    if (input != NULL)
13505
0
        doc = xmlCtxtParseDocument(ctxt, input);
13506
13507
0
    xmlFreeParserCtxt(ctxt);
13508
0
    return(doc);
13509
0
}
13510
13511
/**
13512
 * Parse an XML document from I/O functions and context and build a tree.
13513
 *
13514
 * See #xmlCtxtReadIO for details.
13515
 *
13516
 * @param ioread  an I/O read function
13517
 * @param ioclose  an I/O close function (optional)
13518
 * @param ioctx  an I/O handler
13519
 * @param URL  base URL (optional)
13520
 * @param encoding  the document encoding (optional)
13521
 * @param options  a combination of xmlParserOption
13522
 * @returns the resulting document tree
13523
 */
13524
xmlDoc *
13525
xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
13526
          void *ioctx, const char *URL, const char *encoding, int options)
13527
0
{
13528
0
    xmlParserCtxtPtr ctxt;
13529
0
    xmlParserInputPtr input;
13530
0
    xmlDocPtr doc = NULL;
13531
13532
0
    ctxt = xmlNewParserCtxt();
13533
0
    if (ctxt == NULL)
13534
0
        return(NULL);
13535
13536
0
    xmlCtxtUseOptions(ctxt, options);
13537
13538
0
    input = xmlCtxtNewInputFromIO(ctxt, URL, ioread, ioclose, ioctx,
13539
0
                                  encoding, 0);
13540
13541
0
    if (input != NULL)
13542
0
        doc = xmlCtxtParseDocument(ctxt, input);
13543
13544
0
    xmlFreeParserCtxt(ctxt);
13545
0
    return(doc);
13546
0
}
13547
13548
/**
13549
 * Parse an XML in-memory document and build a tree.
13550
 *
13551
 * `URL` is used as base to resolve external entities and for error
13552
 * reporting.
13553
 *
13554
 * @param ctxt  an XML parser context
13555
 * @param str  a pointer to a zero terminated string
13556
 * @param URL  base URL (optional)
13557
 * @param encoding  the document encoding (optional)
13558
 * @param options  a combination of xmlParserOption
13559
 * @returns the resulting document tree
13560
 */
13561
xmlDoc *
13562
xmlCtxtReadDoc(xmlParserCtxt *ctxt, const xmlChar *str,
13563
               const char *URL, const char *encoding, int options)
13564
0
{
13565
0
    xmlParserInputPtr input;
13566
13567
0
    if (ctxt == NULL)
13568
0
        return(NULL);
13569
13570
0
    xmlCtxtReset(ctxt);
13571
0
    xmlCtxtUseOptions(ctxt, options);
13572
13573
0
    input = xmlCtxtNewInputFromString(ctxt, URL, (const char *) str, encoding,
13574
0
                                      XML_INPUT_BUF_STATIC);
13575
0
    if (input == NULL)
13576
0
        return(NULL);
13577
13578
0
    return(xmlCtxtParseDocument(ctxt, input));
13579
0
}
13580
13581
/**
13582
 * Parse an XML file from the filesystem or a global, user-defined
13583
 * resource loader.
13584
 *
13585
 * @param ctxt  an XML parser context
13586
 * @param filename  a file or URL
13587
 * @param encoding  the document encoding (optional)
13588
 * @param options  a combination of xmlParserOption
13589
 * @returns the resulting document tree
13590
 */
13591
xmlDoc *
13592
xmlCtxtReadFile(xmlParserCtxt *ctxt, const char *filename,
13593
                const char *encoding, int options)
13594
47.3k
{
13595
47.3k
    xmlParserInputPtr input;
13596
13597
47.3k
    if (ctxt == NULL)
13598
67
        return(NULL);
13599
13600
47.2k
    xmlCtxtReset(ctxt);
13601
47.2k
    xmlCtxtUseOptions(ctxt, options);
13602
13603
47.2k
    input = xmlCtxtNewInputFromUrl(ctxt, filename, NULL, encoding, 0);
13604
47.2k
    if (input == NULL)
13605
7.66k
        return(NULL);
13606
13607
39.6k
    return(xmlCtxtParseDocument(ctxt, input));
13608
47.2k
}
13609
13610
/**
13611
 * Parse an XML in-memory document and build a tree. The input buffer must
13612
 * not contain a terminating null byte.
13613
 *
13614
 * `URL` is used as base to resolve external entities and for error
13615
 * reporting.
13616
 *
13617
 * @param ctxt  an XML parser context
13618
 * @param buffer  a pointer to a char array
13619
 * @param size  the size of the array
13620
 * @param URL  base URL (optional)
13621
 * @param encoding  the document encoding (optional)
13622
 * @param options  a combination of xmlParserOption
13623
 * @returns the resulting document tree
13624
 */
13625
xmlDoc *
13626
xmlCtxtReadMemory(xmlParserCtxt *ctxt, const char *buffer, int size,
13627
                  const char *URL, const char *encoding, int options)
13628
0
{
13629
0
    xmlParserInputPtr input;
13630
13631
0
    if ((ctxt == NULL) || (size < 0))
13632
0
        return(NULL);
13633
13634
0
    xmlCtxtReset(ctxt);
13635
0
    xmlCtxtUseOptions(ctxt, options);
13636
13637
0
    input = xmlCtxtNewInputFromMemory(ctxt, URL, buffer, size, encoding,
13638
0
                                      XML_INPUT_BUF_STATIC);
13639
0
    if (input == NULL)
13640
0
        return(NULL);
13641
13642
0
    return(xmlCtxtParseDocument(ctxt, input));
13643
0
}
13644
13645
/**
13646
 * Parse an XML document from a file descriptor and build a tree.
13647
 *
13648
 * NOTE that the file descriptor will not be closed when the
13649
 * context is freed or reset.
13650
 *
13651
 * `URL` is used as base to resolve external entities and for error
13652
 * reporting.
13653
 *
13654
 * @param ctxt  an XML parser context
13655
 * @param fd  an open file descriptor
13656
 * @param URL  base URL (optional)
13657
 * @param encoding  the document encoding (optional)
13658
 * @param options  a combination of xmlParserOption
13659
 * @returns the resulting document tree
13660
 */
13661
xmlDoc *
13662
xmlCtxtReadFd(xmlParserCtxt *ctxt, int fd,
13663
              const char *URL, const char *encoding, int options)
13664
0
{
13665
0
    xmlParserInputPtr input;
13666
13667
0
    if (ctxt == NULL)
13668
0
        return(NULL);
13669
13670
0
    xmlCtxtReset(ctxt);
13671
0
    xmlCtxtUseOptions(ctxt, options);
13672
13673
0
    input = xmlCtxtNewInputFromFd(ctxt, URL, fd, encoding, 0);
13674
0
    if (input == NULL)
13675
0
        return(NULL);
13676
13677
0
    return(xmlCtxtParseDocument(ctxt, input));
13678
0
}
13679
13680
/**
13681
 * Parse an XML document from I/O functions and source and build a tree.
13682
 * This reuses the existing `ctxt` parser context
13683
 *
13684
 * `URL` is used as base to resolve external entities and for error
13685
 * reporting.
13686
 *
13687
 * @param ctxt  an XML parser context
13688
 * @param ioread  an I/O read function
13689
 * @param ioclose  an I/O close function
13690
 * @param ioctx  an I/O handler
13691
 * @param URL  the base URL to use for the document
13692
 * @param encoding  the document encoding, or NULL
13693
 * @param options  a combination of xmlParserOption
13694
 * @returns the resulting document tree
13695
 */
13696
xmlDoc *
13697
xmlCtxtReadIO(xmlParserCtxt *ctxt, xmlInputReadCallback ioread,
13698
              xmlInputCloseCallback ioclose, void *ioctx,
13699
        const char *URL,
13700
              const char *encoding, int options)
13701
0
{
13702
0
    xmlParserInputPtr input;
13703
13704
0
    if (ctxt == NULL)
13705
0
        return(NULL);
13706
13707
0
    xmlCtxtReset(ctxt);
13708
0
    xmlCtxtUseOptions(ctxt, options);
13709
13710
0
    input = xmlCtxtNewInputFromIO(ctxt, URL, ioread, ioclose, ioctx,
13711
0
                                  encoding, 0);
13712
0
    if (input == NULL)
13713
0
        return(NULL);
13714
13715
0
    return(xmlCtxtParseDocument(ctxt, input));
13716
0
}
13717