Coverage Report

Created: 2024-02-25 06:19

/src/libxml2/parser.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
 *            implemented on top of the SAX interfaces
4
 *
5
 * References:
6
 *   The XML specification:
7
 *     http://www.w3.org/TR/REC-xml
8
 *   Original 1.0 version:
9
 *     http://www.w3.org/TR/1998/REC-xml-19980210
10
 *   XML second edition working draft
11
 *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12
 *
13
 * Okay this is a big file, the parser core is around 7000 lines, then it
14
 * is followed by the progressive parser top routines, then the various
15
 * high level APIs to call the parser and a few miscellaneous functions.
16
 * A number of helper functions and deprecated ones have been moved to
17
 * parserInternals.c to reduce this file size.
18
 * As much as possible the functions are associated with their relative
19
 * production in the XML specification. A few productions defining the
20
 * different ranges of character are actually implanted either in
21
 * parserInternals.h or parserInternals.c
22
 * The DOM tree build is realized from the default SAX callbacks in
23
 * the module SAX.c.
24
 * The routines doing the validation checks are in valid.c and called either
25
 * from the SAX callbacks or as standalone functions using a preparsed
26
 * document.
27
 *
28
 * See Copyright for the status of this software.
29
 *
30
 * daniel@veillard.com
31
 */
32
33
/* To avoid EBCDIC trouble when parsing on zOS */
34
#if defined(__MVS__)
35
#pragma convert("ISO8859-1")
36
#endif
37
38
#define IN_LIBXML
39
#include "libxml.h"
40
41
#if defined(_WIN32)
42
#define XML_DIR_SEP '\\'
43
#else
44
#define XML_DIR_SEP '/'
45
#endif
46
47
#include <stdlib.h>
48
#include <limits.h>
49
#include <string.h>
50
#include <stdarg.h>
51
#include <stddef.h>
52
#include <ctype.h>
53
#include <stdlib.h>
54
#include <libxml/parser.h>
55
#include <libxml/xmlmemory.h>
56
#include <libxml/tree.h>
57
#include <libxml/parserInternals.h>
58
#include <libxml/valid.h>
59
#include <libxml/entities.h>
60
#include <libxml/xmlerror.h>
61
#include <libxml/encoding.h>
62
#include <libxml/xmlIO.h>
63
#include <libxml/uri.h>
64
#include <libxml/SAX2.h>
65
#ifdef LIBXML_CATALOG_ENABLED
66
#include <libxml/catalog.h>
67
#endif
68
69
#include "private/buf.h"
70
#include "private/dict.h"
71
#include "private/entities.h"
72
#include "private/error.h"
73
#include "private/html.h"
74
#include "private/io.h"
75
#include "private/parser.h"
76
77
2.00M
#define NS_INDEX_EMPTY  INT_MAX
78
199k
#define NS_INDEX_XML    (INT_MAX - 1)
79
787k
#define URI_HASH_EMPTY  0xD943A04E
80
81.9k
#define URI_HASH_XML    0xF0451F02
81
82
struct _xmlStartTag {
83
    const xmlChar *prefix;
84
    const xmlChar *URI;
85
    int line;
86
    int nsNr;
87
};
88
89
typedef struct {
90
    void *saxData;
91
    unsigned prefixHashValue;
92
    unsigned uriHashValue;
93
    unsigned elementId;
94
    int oldIndex;
95
} xmlParserNsExtra;
96
97
typedef struct {
98
    unsigned hashValue;
99
    int index;
100
} xmlParserNsBucket;
101
102
struct _xmlParserNsData {
103
    xmlParserNsExtra *extra;
104
105
    unsigned hashSize;
106
    unsigned hashElems;
107
    xmlParserNsBucket *hash;
108
109
    unsigned elementId;
110
    int defaultNsIndex;
111
    int minNsIndex;
112
};
113
114
struct _xmlAttrHashBucket {
115
    int index;
116
};
117
118
static int
119
xmlParseElementStart(xmlParserCtxtPtr ctxt);
120
121
static void
122
xmlParseElementEnd(xmlParserCtxtPtr ctxt);
123
124
static xmlEntityPtr
125
xmlLookupGeneralEntity(xmlParserCtxtPtr ctxt, const xmlChar *name, int inAttr);
126
127
static const xmlChar *
128
xmlParseEntityRefInternal(xmlParserCtxtPtr ctxt);
129
130
/************************************************************************
131
 *                  *
132
 *  Arbitrary limits set in the parser. See XML_PARSE_HUGE    *
133
 *                  *
134
 ************************************************************************/
135
136
#define XML_PARSER_BIG_ENTITY 1000
137
#define XML_PARSER_LOT_ENTITY 5000
138
139
/*
140
 * Constants for protection against abusive entity expansion
141
 * ("billion laughs").
142
 */
143
144
/*
145
 * A certain amount of entity expansion which is always allowed.
146
 */
147
717k
#define XML_PARSER_ALLOWED_EXPANSION 1000000
148
149
/*
150
 * Fixed cost for each entity reference. This crudely models processing time
151
 * as well to protect, for example, against exponential expansion of empty
152
 * or very short entities.
153
 */
154
722k
#define XML_ENT_FIXED_COST 20
155
156
/**
157
 * xmlParserMaxDepth:
158
 *
159
 * arbitrary depth limit for the XML documents that we allow to
160
 * process. This is not a limitation of the parser but a safety
161
 * boundary feature. It can be disabled with the XML_PARSE_HUGE
162
 * parser option.
163
 */
164
const unsigned int xmlParserMaxDepth = 256;
165
166
167
168
100M
#define XML_PARSER_BIG_BUFFER_SIZE 300
169
253k
#define XML_PARSER_BUFFER_SIZE 100
170
124k
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
171
172
/**
173
 * XML_PARSER_CHUNK_SIZE
174
 *
175
 * When calling GROW that's the minimal amount of data
176
 * the parser expected to have received. It is not a hard
177
 * limit but an optimization when reading strings like Names
178
 * It is not strictly needed as long as inputs available characters
179
 * are followed by 0, which should be provided by the I/O level
180
 */
181
#define XML_PARSER_CHUNK_SIZE 100
182
183
/**
184
 * xmlParserVersion:
185
 *
186
 * Constant string describing the internal version of the library
187
 */
188
const char *const
189
xmlParserVersion = LIBXML_VERSION_STRING LIBXML_VERSION_EXTRA;
190
191
/*
192
 * List of XML prefixed PI allowed by W3C specs
193
 */
194
195
static const char* const xmlW3CPIs[] = {
196
    "xml-stylesheet",
197
    "xml-model",
198
    NULL
199
};
200
201
202
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
203
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
204
                                              const xmlChar **str);
205
206
static void
207
xmlCtxtParseEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr ent);
208
209
static int
210
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
211
212
/************************************************************************
213
 *                  *
214
 *    Some factorized error routines        *
215
 *                  *
216
 ************************************************************************/
217
218
static void
219
443
xmlErrMemory(xmlParserCtxtPtr ctxt) {
220
443
    xmlCtxtErrMemory(ctxt);
221
443
}
222
223
/**
224
 * xmlErrAttributeDup:
225
 * @ctxt:  an XML parser context
226
 * @prefix:  the attribute prefix
227
 * @localname:  the attribute localname
228
 *
229
 * Handle a redefinition of attribute error
230
 */
231
static void
232
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
233
                   const xmlChar * localname)
234
37.5k
{
235
37.5k
    if (prefix == NULL)
236
19.5k
        xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, XML_ERR_ATTRIBUTE_REDEFINED,
237
19.5k
                   XML_ERR_FATAL, localname, NULL, NULL, 0,
238
19.5k
                   "Attribute %s redefined\n", localname);
239
17.9k
    else
240
17.9k
        xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, XML_ERR_ATTRIBUTE_REDEFINED,
241
17.9k
                   XML_ERR_FATAL, prefix, localname, NULL, 0,
242
17.9k
                   "Attribute %s:%s redefined\n", prefix, localname);
243
37.5k
}
244
245
/**
246
 * xmlFatalErrMsg:
247
 * @ctxt:  an XML parser context
248
 * @error:  the error number
249
 * @msg:  the error message
250
 *
251
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
252
 */
253
static void LIBXML_ATTR_FORMAT(3,0)
254
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
255
               const char *msg)
256
33.4M
{
257
33.4M
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
258
33.4M
               NULL, NULL, NULL, 0, "%s", msg);
259
33.4M
}
260
261
/**
262
 * xmlWarningMsg:
263
 * @ctxt:  an XML parser context
264
 * @error:  the error number
265
 * @msg:  the error message
266
 * @str1:  extra data
267
 * @str2:  extra data
268
 *
269
 * Handle a warning.
270
 */
271
void LIBXML_ATTR_FORMAT(3,0)
272
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
273
              const char *msg, const xmlChar *str1, const xmlChar *str2)
274
45.7k
{
275
45.7k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_WARNING,
276
45.7k
               str1, str2, NULL, 0, msg, str1, str2);
277
45.7k
}
278
279
/**
280
 * xmlValidityError:
281
 * @ctxt:  an XML parser context
282
 * @error:  the error number
283
 * @msg:  the error message
284
 * @str1:  extra data
285
 *
286
 * Handle a validity error.
287
 */
288
static void LIBXML_ATTR_FORMAT(3,0)
289
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
290
              const char *msg, const xmlChar *str1, const xmlChar *str2)
291
565
{
292
565
    ctxt->valid = 0;
293
294
565
    xmlCtxtErr(ctxt, NULL, XML_FROM_DTD, error, XML_ERR_ERROR,
295
565
               str1, str2, NULL, 0, msg, str1, str2);
296
565
}
297
298
/**
299
 * xmlFatalErrMsgInt:
300
 * @ctxt:  an XML parser context
301
 * @error:  the error number
302
 * @msg:  the error message
303
 * @val:  an integer value
304
 *
305
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
306
 */
307
static void LIBXML_ATTR_FORMAT(3,0)
308
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
309
                  const char *msg, int val)
310
7.79M
{
311
7.79M
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
312
7.79M
               NULL, NULL, NULL, val, msg, val);
313
7.79M
}
314
315
/**
316
 * xmlFatalErrMsgStrIntStr:
317
 * @ctxt:  an XML parser context
318
 * @error:  the error number
319
 * @msg:  the error message
320
 * @str1:  an string info
321
 * @val:  an integer value
322
 * @str2:  an string info
323
 *
324
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
325
 */
326
static void LIBXML_ATTR_FORMAT(3,0)
327
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
328
                  const char *msg, const xmlChar *str1, int val,
329
      const xmlChar *str2)
330
2.39M
{
331
2.39M
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
332
2.39M
               str1, str2, NULL, val, msg, str1, val, str2);
333
2.39M
}
334
335
/**
336
 * xmlFatalErrMsgStr:
337
 * @ctxt:  an XML parser context
338
 * @error:  the error number
339
 * @msg:  the error message
340
 * @val:  a string value
341
 *
342
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
343
 */
344
static void LIBXML_ATTR_FORMAT(3,0)
345
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
346
                  const char *msg, const xmlChar * val)
347
1.17M
{
348
1.17M
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
349
1.17M
               val, NULL, NULL, 0, msg, val);
350
1.17M
}
351
352
/**
353
 * xmlErrMsgStr:
354
 * @ctxt:  an XML parser context
355
 * @error:  the error number
356
 * @msg:  the error message
357
 * @val:  a string value
358
 *
359
 * Handle a non fatal parser error
360
 */
361
static void LIBXML_ATTR_FORMAT(3,0)
362
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
363
                  const char *msg, const xmlChar * val)
364
7.02k
{
365
7.02k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_ERROR,
366
7.02k
               val, NULL, NULL, 0, msg, val);
367
7.02k
}
368
369
/**
370
 * xmlNsErr:
371
 * @ctxt:  an XML parser context
372
 * @error:  the error number
373
 * @msg:  the message
374
 * @info1:  extra information string
375
 * @info2:  extra information string
376
 *
377
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
378
 */
379
static void LIBXML_ATTR_FORMAT(3,0)
380
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
381
         const char *msg,
382
         const xmlChar * info1, const xmlChar * info2,
383
         const xmlChar * info3)
384
1.15M
{
385
1.15M
    ctxt->nsWellFormed = 0;
386
387
1.15M
    xmlCtxtErr(ctxt, NULL, XML_FROM_NAMESPACE, error, XML_ERR_ERROR,
388
1.15M
               info1, info2, info3, 0, msg, info1, info2, info3);
389
1.15M
}
390
391
/**
392
 * xmlNsWarn
393
 * @ctxt:  an XML parser context
394
 * @error:  the error number
395
 * @msg:  the message
396
 * @info1:  extra information string
397
 * @info2:  extra information string
398
 *
399
 * Handle a namespace warning error
400
 */
401
static void LIBXML_ATTR_FORMAT(3,0)
402
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
403
         const char *msg,
404
         const xmlChar * info1, const xmlChar * info2,
405
         const xmlChar * info3)
406
14.8k
{
407
14.8k
    xmlCtxtErr(ctxt, NULL, XML_FROM_NAMESPACE, error, XML_ERR_WARNING,
408
14.8k
               info1, info2, info3, 0, msg, info1, info2, info3);
409
14.8k
}
410
411
static void
412
2.16M
xmlSaturatedAdd(unsigned long *dst, unsigned long val) {
413
2.16M
    if (val > ULONG_MAX - *dst)
414
0
        *dst = ULONG_MAX;
415
2.16M
    else
416
2.16M
        *dst += val;
417
2.16M
}
418
419
static void
420
720k
xmlSaturatedAddSizeT(unsigned long *dst, unsigned long val) {
421
720k
    if (val > ULONG_MAX - *dst)
422
0
        *dst = ULONG_MAX;
423
720k
    else
424
720k
        *dst += val;
425
720k
}
426
427
/**
428
 * xmlParserEntityCheck:
429
 * @ctxt:  parser context
430
 * @extra:  sum of unexpanded entity sizes
431
 *
432
 * Check for non-linear entity expansion behaviour.
433
 *
434
 * In some cases like xmlExpandEntityInAttValue, this function is called
435
 * for each, possibly nested entity and its unexpanded content length.
436
 *
437
 * In other cases like xmlParseReference, it's only called for each
438
 * top-level entity with its unexpanded content length plus the sum of
439
 * the unexpanded content lengths (plus fixed cost) of all nested
440
 * entities.
441
 *
442
 * Summing the unexpanded lengths also adds the length of the reference.
443
 * This is by design. Taking the length of the entity name into account
444
 * discourages attacks that try to waste CPU time with abusively long
445
 * entity names. See test/recurse/lol6.xml for example. Each call also
446
 * adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with
447
 * short entities.
448
 *
449
 * Returns 1 on error, 0 on success.
450
 */
451
static int
452
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long extra)
453
717k
{
454
717k
    unsigned long consumed;
455
717k
    unsigned long *expandedSize;
456
717k
    xmlParserInputPtr input = ctxt->input;
457
717k
    xmlEntityPtr entity = input->entity;
458
459
717k
    if ((entity) && (entity->flags & XML_ENT_CHECKED))
460
11
        return(0);
461
462
    /*
463
     * Compute total consumed bytes so far, including input streams of
464
     * external entities.
465
     */
466
717k
    consumed = input->consumed;
467
717k
    xmlSaturatedAddSizeT(&consumed, input->cur - input->base);
468
717k
    xmlSaturatedAdd(&consumed, ctxt->sizeentities);
469
470
717k
    if (entity)
471
153
        expandedSize = &entity->expandedSize;
472
717k
    else
473
717k
        expandedSize = &ctxt->sizeentcopy;
474
475
    /*
476
     * Add extra cost and some fixed cost.
477
     */
478
717k
    xmlSaturatedAdd(expandedSize, extra);
479
717k
    xmlSaturatedAdd(expandedSize, XML_ENT_FIXED_COST);
480
481
    /*
482
     * It's important to always use saturation arithmetic when tracking
483
     * entity sizes to make the size checks reliable. If "sizeentcopy"
484
     * overflows, we have to abort.
485
     */
486
717k
    if ((*expandedSize > XML_PARSER_ALLOWED_EXPANSION) &&
487
717k
        ((*expandedSize >= ULONG_MAX) ||
488
309k
         (*expandedSize / ctxt->maxAmpl > consumed))) {
489
234
        xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
490
234
                       "Maximum entity amplification factor exceeded, see "
491
234
                       "xmlCtxtSetMaxAmplification.\n");
492
234
        xmlHaltParser(ctxt);
493
234
        return(1);
494
234
    }
495
496
717k
    return(0);
497
717k
}
498
499
/************************************************************************
500
 *                  *
501
 *    Library wide options          *
502
 *                  *
503
 ************************************************************************/
504
505
/**
506
  * xmlHasFeature:
507
  * @feature: the feature to be examined
508
  *
509
  * Examines if the library has been compiled with a given feature.
510
  *
511
  * Returns a non-zero value if the feature exist, otherwise zero.
512
  * Returns zero (0) if the feature does not exist or an unknown
513
  * unknown feature is requested, non-zero otherwise.
514
  */
515
int
516
xmlHasFeature(xmlFeature feature)
517
0
{
518
0
    switch (feature) {
519
0
  case XML_WITH_THREAD:
520
0
#ifdef LIBXML_THREAD_ENABLED
521
0
      return(1);
522
#else
523
      return(0);
524
#endif
525
0
        case XML_WITH_TREE:
526
0
#ifdef LIBXML_TREE_ENABLED
527
0
            return(1);
528
#else
529
            return(0);
530
#endif
531
0
        case XML_WITH_OUTPUT:
532
0
#ifdef LIBXML_OUTPUT_ENABLED
533
0
            return(1);
534
#else
535
            return(0);
536
#endif
537
0
        case XML_WITH_PUSH:
538
#ifdef LIBXML_PUSH_ENABLED
539
            return(1);
540
#else
541
0
            return(0);
542
0
#endif
543
0
        case XML_WITH_READER:
544
#ifdef LIBXML_READER_ENABLED
545
            return(1);
546
#else
547
0
            return(0);
548
0
#endif
549
0
        case XML_WITH_PATTERN:
550
0
#ifdef LIBXML_PATTERN_ENABLED
551
0
            return(1);
552
#else
553
            return(0);
554
#endif
555
0
        case XML_WITH_WRITER:
556
#ifdef LIBXML_WRITER_ENABLED
557
            return(1);
558
#else
559
0
            return(0);
560
0
#endif
561
0
        case XML_WITH_SAX1:
562
#ifdef LIBXML_SAX1_ENABLED
563
            return(1);
564
#else
565
0
            return(0);
566
0
#endif
567
0
        case XML_WITH_FTP:
568
#ifdef LIBXML_FTP_ENABLED
569
            return(1);
570
#else
571
0
            return(0);
572
0
#endif
573
0
        case XML_WITH_HTTP:
574
0
#ifdef LIBXML_HTTP_ENABLED
575
0
            return(1);
576
#else
577
            return(0);
578
#endif
579
0
        case XML_WITH_VALID:
580
#ifdef LIBXML_VALID_ENABLED
581
            return(1);
582
#else
583
0
            return(0);
584
0
#endif
585
0
        case XML_WITH_HTML:
586
0
#ifdef LIBXML_HTML_ENABLED
587
0
            return(1);
588
#else
589
            return(0);
590
#endif
591
0
        case XML_WITH_LEGACY:
592
#ifdef LIBXML_LEGACY_ENABLED
593
            return(1);
594
#else
595
0
            return(0);
596
0
#endif
597
0
        case XML_WITH_C14N:
598
#ifdef LIBXML_C14N_ENABLED
599
            return(1);
600
#else
601
0
            return(0);
602
0
#endif
603
0
        case XML_WITH_CATALOG:
604
0
#ifdef LIBXML_CATALOG_ENABLED
605
0
            return(1);
606
#else
607
            return(0);
608
#endif
609
0
        case XML_WITH_XPATH:
610
0
#ifdef LIBXML_XPATH_ENABLED
611
0
            return(1);
612
#else
613
            return(0);
614
#endif
615
0
        case XML_WITH_XPTR:
616
0
#ifdef LIBXML_XPTR_ENABLED
617
0
            return(1);
618
#else
619
            return(0);
620
#endif
621
0
        case XML_WITH_XINCLUDE:
622
0
#ifdef LIBXML_XINCLUDE_ENABLED
623
0
            return(1);
624
#else
625
            return(0);
626
#endif
627
0
        case XML_WITH_ICONV:
628
0
#ifdef LIBXML_ICONV_ENABLED
629
0
            return(1);
630
#else
631
            return(0);
632
#endif
633
0
        case XML_WITH_ISO8859X:
634
0
#ifdef LIBXML_ISO8859X_ENABLED
635
0
            return(1);
636
#else
637
            return(0);
638
#endif
639
0
        case XML_WITH_UNICODE:
640
#ifdef LIBXML_UNICODE_ENABLED
641
            return(1);
642
#else
643
0
            return(0);
644
0
#endif
645
0
        case XML_WITH_REGEXP:
646
#ifdef LIBXML_REGEXP_ENABLED
647
            return(1);
648
#else
649
0
            return(0);
650
0
#endif
651
0
        case XML_WITH_AUTOMATA:
652
#ifdef LIBXML_AUTOMATA_ENABLED
653
            return(1);
654
#else
655
0
            return(0);
656
0
#endif
657
0
        case XML_WITH_EXPR:
658
#ifdef LIBXML_EXPR_ENABLED
659
            return(1);
660
#else
661
0
            return(0);
662
0
#endif
663
0
        case XML_WITH_SCHEMAS:
664
#ifdef LIBXML_SCHEMAS_ENABLED
665
            return(1);
666
#else
667
0
            return(0);
668
0
#endif
669
0
        case XML_WITH_SCHEMATRON:
670
#ifdef LIBXML_SCHEMATRON_ENABLED
671
            return(1);
672
#else
673
0
            return(0);
674
0
#endif
675
0
        case XML_WITH_MODULES:
676
0
#ifdef LIBXML_MODULES_ENABLED
677
0
            return(1);
678
#else
679
            return(0);
680
#endif
681
0
        case XML_WITH_DEBUG:
682
0
#ifdef LIBXML_DEBUG_ENABLED
683
0
            return(1);
684
#else
685
            return(0);
686
#endif
687
0
        case XML_WITH_DEBUG_MEM:
688
#ifdef DEBUG_MEMORY_LOCATION
689
            return(1);
690
#else
691
0
            return(0);
692
0
#endif
693
0
        case XML_WITH_ZLIB:
694
#ifdef LIBXML_ZLIB_ENABLED
695
            return(1);
696
#else
697
0
            return(0);
698
0
#endif
699
0
        case XML_WITH_LZMA:
700
#ifdef LIBXML_LZMA_ENABLED
701
            return(1);
702
#else
703
0
            return(0);
704
0
#endif
705
0
        case XML_WITH_ICU:
706
#ifdef LIBXML_ICU_ENABLED
707
            return(1);
708
#else
709
0
            return(0);
710
0
#endif
711
0
        default:
712
0
      break;
713
0
     }
714
0
     return(0);
715
0
}
716
717
/************************************************************************
718
 *                  *
719
 *      Simple string buffer        *
720
 *                  *
721
 ************************************************************************/
722
723
typedef struct {
724
    xmlChar *mem;
725
    unsigned size;
726
    unsigned cap; /* size < cap */
727
    unsigned max; /* size <= max */
728
    xmlParserErrors code;
729
} xmlSBuf;
730
731
static void
732
1.26M
xmlSBufInit(xmlSBuf *buf, unsigned max) {
733
1.26M
    buf->mem = NULL;
734
1.26M
    buf->size = 0;
735
1.26M
    buf->cap = 0;
736
1.26M
    buf->max = max;
737
1.26M
    buf->code = XML_ERR_OK;
738
1.26M
}
739
740
static int
741
217k
xmlSBufGrow(xmlSBuf *buf, unsigned len) {
742
217k
    xmlChar *mem;
743
217k
    unsigned cap;
744
745
217k
    if (len >= UINT_MAX / 2 - buf->size) {
746
0
        buf->code = XML_ERR_RESOURCE_LIMIT;
747
0
        return(-1);
748
0
    }
749
750
217k
    cap = (buf->size + len) * 2;
751
217k
    if (cap < 240)
752
152k
        cap = 240;
753
754
217k
    mem = xmlRealloc(buf->mem, cap);
755
217k
    if (mem == NULL) {
756
22.0k
        buf->code = XML_ERR_NO_MEMORY;
757
22.0k
        return(-1);
758
22.0k
    }
759
760
195k
    buf->mem = mem;
761
195k
    buf->cap = cap;
762
763
195k
    return(0);
764
217k
}
765
766
static void
767
89.6M
xmlSBufAddString(xmlSBuf *buf, const xmlChar *str, unsigned len) {
768
89.6M
    if (buf->max - buf->size < len) {
769
62.0k
        buf->code = XML_ERR_RESOURCE_LIMIT;
770
62.0k
        return;
771
62.0k
    }
772
773
89.6M
    if (buf->cap - buf->size <= len) {
774
214k
        if (xmlSBufGrow(buf, len) < 0)
775
21.5k
            return;
776
214k
    }
777
778
89.6M
    if (len > 0)
779
89.6M
        memcpy(buf->mem + buf->size, str, len);
780
89.6M
    buf->size += len;
781
89.6M
}
782
783
static void
784
87.9M
xmlSBufAddCString(xmlSBuf *buf, const char *str, unsigned len) {
785
87.9M
    xmlSBufAddString(buf, (const xmlChar *) str, len);
786
87.9M
}
787
788
static void
789
302k
xmlSBufAddChar(xmlSBuf *buf, int c) {
790
302k
    xmlChar *end;
791
792
302k
    if (buf->max - buf->size < 4) {
793
132
        buf->code = XML_ERR_RESOURCE_LIMIT;
794
132
        return;
795
132
    }
796
797
302k
    if (buf->cap - buf->size <= 4) {
798
2.46k
        if (xmlSBufGrow(buf, 4) < 0)
799
447
            return;
800
2.46k
    }
801
802
302k
    end = buf->mem + buf->size;
803
804
302k
    if (c < 0x80) {
805
46.2k
        *end = (xmlChar) c;
806
46.2k
        buf->size += 1;
807
255k
    } else {
808
255k
        buf->size += xmlCopyCharMultiByte(end, c);
809
255k
    }
810
302k
}
811
812
static void
813
61.7M
xmlSBufAddReplChar(xmlSBuf *buf) {
814
61.7M
    xmlSBufAddCString(buf, "\xEF\xBF\xBD", 3);
815
61.7M
}
816
817
static void
818
67
xmlSBufReportError(xmlSBuf *buf, xmlParserCtxtPtr ctxt, const char *errMsg) {
819
67
    if (buf->code == XML_ERR_NO_MEMORY)
820
38
        xmlCtxtErrMemory(ctxt);
821
29
    else
822
29
        xmlFatalErr(ctxt, buf->code, errMsg);
823
67
}
824
825
static xmlChar *
826
xmlSBufFinish(xmlSBuf *buf, int *sizeOut, xmlParserCtxtPtr ctxt,
827
164k
              const char *errMsg) {
828
164k
    if (buf->mem == NULL) {
829
19.9k
        buf->mem = xmlMalloc(1);
830
19.9k
        if (buf->mem == NULL) {
831
8
            buf->code = XML_ERR_NO_MEMORY;
832
19.9k
        } else {
833
19.9k
            buf->mem[0] = 0;
834
19.9k
        }
835
144k
    } else {
836
144k
        buf->mem[buf->size] = 0;
837
144k
    }
838
839
164k
    if (buf->code == XML_ERR_OK) {
840
164k
        if (sizeOut != NULL)
841
90.1k
            *sizeOut = buf->size;
842
164k
        return(buf->mem);
843
164k
    }
844
845
25
    xmlSBufReportError(buf, ctxt, errMsg);
846
847
25
    xmlFree(buf->mem);
848
849
25
    if (sizeOut != NULL)
850
16
        *sizeOut = 0;
851
25
    return(NULL);
852
164k
}
853
854
static void
855
1.05M
xmlSBufCleanup(xmlSBuf *buf, xmlParserCtxtPtr ctxt, const char *errMsg) {
856
1.05M
    if (buf->code != XML_ERR_OK)
857
42
        xmlSBufReportError(buf, ctxt, errMsg);
858
859
1.05M
    xmlFree(buf->mem);
860
1.05M
}
861
862
static int
863
xmlUTF8MultibyteLen(xmlParserCtxtPtr ctxt, const xmlChar *str,
864
219M
                    const char *errMsg) {
865
219M
    int c = str[0];
866
219M
    int c1 = str[1];
867
868
219M
    if ((c1 & 0xC0) != 0x80)
869
22.9M
        goto encoding_error;
870
871
196M
    if (c < 0xE0) {
872
        /* 2-byte sequence */
873
11.8M
        if (c < 0xC2)
874
8.49M
            goto encoding_error;
875
876
3.38M
        return(2);
877
184M
    } else {
878
184M
        int c2 = str[2];
879
880
184M
        if ((c2 & 0xC0) != 0x80)
881
35.3k
            goto encoding_error;
882
883
184M
        if (c < 0xF0) {
884
            /* 3-byte sequence */
885
184M
            if (c == 0xE0) {
886
                /* overlong */
887
179M
                if (c1 < 0xA0)
888
453
                    goto encoding_error;
889
179M
            } else if (c == 0xED) {
890
                /* surrogate */
891
807
                if (c1 >= 0xA0)
892
90
                    goto encoding_error;
893
4.82M
            } else if (c == 0xEF) {
894
                /* U+FFFE and U+FFFF are invalid Chars */
895
371k
                if ((c1 == 0xBF) && (c2 >= 0xBE))
896
4.80k
                    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR, errMsg);
897
371k
            }
898
899
184M
            return(3);
900
184M
        } else {
901
            /* 4-byte sequence */
902
33.0k
            if ((str[3] & 0xC0) != 0x80)
903
4.17k
                goto encoding_error;
904
28.8k
            if (c == 0xF0) {
905
                /* overlong */
906
5.36k
                if (c1 < 0x90)
907
1.18k
                    goto encoding_error;
908
23.4k
            } else if (c >= 0xF4) {
909
                /* greater than 0x10FFFF */
910
6.11k
                if ((c > 0xF4) || (c1 >= 0x90))
911
5.83k
                    goto encoding_error;
912
6.11k
            }
913
914
21.8k
            return(4);
915
28.8k
        }
916
184M
    }
917
918
31.4M
encoding_error:
919
    /* Only report the first error */
920
31.4M
    if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
921
3.36k
        xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
922
3.36k
        ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
923
3.36k
    }
924
925
31.4M
    return(0);
926
196M
}
927
928
/************************************************************************
929
 *                  *
930
 *    SAX2 defaulted attributes handling      *
931
 *                  *
932
 ************************************************************************/
933
934
/**
935
 * xmlCtxtInitializeLate:
936
 * @ctxt:  an XML parser context
937
 *
938
 * Final initialization of the parser context before starting to parse.
939
 *
940
 * This accounts for users modifying struct members of parser context
941
 * directly.
942
 */
943
static void
944
91.6k
xmlCtxtInitializeLate(xmlParserCtxtPtr ctxt) {
945
91.6k
    xmlSAXHandlerPtr sax;
946
947
    /* Avoid unused variable warning if features are disabled. */
948
91.6k
    (void) sax;
949
950
    /*
951
     * Changing the SAX struct directly is still widespread practice
952
     * in internal and external code.
953
     */
954
91.6k
    if (ctxt == NULL) return;
955
91.6k
    sax = ctxt->sax;
956
#ifdef LIBXML_SAX1_ENABLED
957
    /*
958
     * Only enable SAX2 if there SAX2 element handlers, except when there
959
     * are no element handlers at all.
960
     */
961
    if (((ctxt->options & XML_PARSE_SAX1) == 0) &&
962
        (sax) &&
963
        (sax->initialized == XML_SAX2_MAGIC) &&
964
        ((sax->startElementNs != NULL) ||
965
         (sax->endElementNs != NULL) ||
966
         ((sax->startElement == NULL) && (sax->endElement == NULL))))
967
        ctxt->sax2 = 1;
968
#else
969
91.6k
    ctxt->sax2 = 1;
970
91.6k
#endif /* LIBXML_SAX1_ENABLED */
971
972
    /*
973
     * Some users replace the dictionary directly in the context struct.
974
     * We really need an API function to do that cleanly.
975
     */
976
91.6k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
977
91.6k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
978
91.6k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
979
91.6k
    if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
980
91.6k
    (ctxt->str_xml_ns == NULL)) {
981
0
        xmlErrMemory(ctxt);
982
0
    }
983
91.6k
}
984
985
typedef struct {
986
    xmlHashedString prefix;
987
    xmlHashedString name;
988
    xmlHashedString value;
989
    const xmlChar *valueEnd;
990
    int external;
991
    int expandedSize;
992
} xmlDefAttr;
993
994
typedef struct _xmlDefAttrs xmlDefAttrs;
995
typedef xmlDefAttrs *xmlDefAttrsPtr;
996
struct _xmlDefAttrs {
997
    int nbAttrs;  /* number of defaulted attributes on that element */
998
    int maxAttrs;       /* the size of the array */
999
#if __STDC_VERSION__ >= 199901L
1000
    /* Using a C99 flexible array member avoids UBSan errors. */
1001
    xmlDefAttr attrs[]; /* array of localname/prefix/values/external */
1002
#else
1003
    xmlDefAttr attrs[1];
1004
#endif
1005
};
1006
1007
/**
1008
 * xmlAttrNormalizeSpace:
1009
 * @src: the source string
1010
 * @dst: the target string
1011
 *
1012
 * Normalize the space in non CDATA attribute values:
1013
 * If the attribute type is not CDATA, then the XML processor MUST further
1014
 * process the normalized attribute value by discarding any leading and
1015
 * trailing space (#x20) characters, and by replacing sequences of space
1016
 * (#x20) characters by a single space (#x20) character.
1017
 * Note that the size of dst need to be at least src, and if one doesn't need
1018
 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1019
 * passing src as dst is just fine.
1020
 *
1021
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1022
 *         is needed.
1023
 */
1024
static xmlChar *
1025
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1026
24.0k
{
1027
24.0k
    if ((src == NULL) || (dst == NULL))
1028
0
        return(NULL);
1029
1030
30.3k
    while (*src == 0x20) src++;
1031
1.76M
    while (*src != 0) {
1032
1.73M
  if (*src == 0x20) {
1033
1.22M
      while (*src == 0x20) src++;
1034
17.7k
      if (*src != 0)
1035
12.9k
    *dst++ = 0x20;
1036
1.71M
  } else {
1037
1.71M
      *dst++ = *src++;
1038
1.71M
  }
1039
1.73M
    }
1040
24.0k
    *dst = 0;
1041
24.0k
    if (dst == src)
1042
18.2k
       return(NULL);
1043
5.80k
    return(dst);
1044
24.0k
}
1045
1046
/**
1047
 * xmlAddDefAttrs:
1048
 * @ctxt:  an XML parser context
1049
 * @fullname:  the element fullname
1050
 * @fullattr:  the attribute fullname
1051
 * @value:  the attribute value
1052
 *
1053
 * Add a defaulted attribute for an element
1054
 */
1055
static void
1056
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1057
               const xmlChar *fullname,
1058
               const xmlChar *fullattr,
1059
24.7k
               const xmlChar *value) {
1060
24.7k
    xmlDefAttrsPtr defaults;
1061
24.7k
    xmlDefAttr *attr;
1062
24.7k
    int len, expandedSize;
1063
24.7k
    xmlHashedString name;
1064
24.7k
    xmlHashedString prefix;
1065
24.7k
    xmlHashedString hvalue;
1066
24.7k
    const xmlChar *localname;
1067
1068
    /*
1069
     * Allows to detect attribute redefinitions
1070
     */
1071
24.7k
    if (ctxt->attsSpecial != NULL) {
1072
20.8k
        if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1073
8.64k
      return;
1074
20.8k
    }
1075
1076
16.1k
    if (ctxt->attsDefault == NULL) {
1077
3.98k
        ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1078
3.98k
  if (ctxt->attsDefault == NULL)
1079
3
      goto mem_error;
1080
3.98k
    }
1081
1082
    /*
1083
     * split the element name into prefix:localname , the string found
1084
     * are within the DTD and then not associated to namespace names.
1085
     */
1086
16.1k
    localname = xmlSplitQName3(fullname, &len);
1087
16.1k
    if (localname == NULL) {
1088
11.7k
        name = xmlDictLookupHashed(ctxt->dict, fullname, -1);
1089
11.7k
  prefix.name = NULL;
1090
11.7k
    } else {
1091
4.34k
        name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1092
4.34k
  prefix = xmlDictLookupHashed(ctxt->dict, fullname, len);
1093
4.34k
        if (prefix.name == NULL)
1094
0
            goto mem_error;
1095
4.34k
    }
1096
16.1k
    if (name.name == NULL)
1097
0
        goto mem_error;
1098
1099
    /*
1100
     * make sure there is some storage
1101
     */
1102
16.1k
    defaults = xmlHashLookup2(ctxt->attsDefault, name.name, prefix.name);
1103
16.1k
    if ((defaults == NULL) ||
1104
16.1k
        (defaults->nbAttrs >= defaults->maxAttrs)) {
1105
6.03k
        xmlDefAttrsPtr temp;
1106
6.03k
        int newSize;
1107
1108
6.03k
        newSize = (defaults != NULL) ? 2 * defaults->maxAttrs : 4;
1109
6.03k
        temp = xmlRealloc(defaults,
1110
6.03k
                          sizeof(*defaults) + newSize * sizeof(xmlDefAttr));
1111
6.03k
  if (temp == NULL)
1112
1
      goto mem_error;
1113
6.03k
        if (defaults == NULL)
1114
4.56k
            temp->nbAttrs = 0;
1115
6.03k
  temp->maxAttrs = newSize;
1116
6.03k
        defaults = temp;
1117
6.03k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name.name, prefix.name,
1118
6.03k
                          defaults, NULL) < 0) {
1119
0
      xmlFree(defaults);
1120
0
      goto mem_error;
1121
0
  }
1122
6.03k
    }
1123
1124
    /*
1125
     * Split the attribute name into prefix:localname , the string found
1126
     * are within the DTD and hen not associated to namespace names.
1127
     */
1128
16.1k
    localname = xmlSplitQName3(fullattr, &len);
1129
16.1k
    if (localname == NULL) {
1130
9.05k
        name = xmlDictLookupHashed(ctxt->dict, fullattr, -1);
1131
9.05k
  prefix.name = NULL;
1132
9.05k
    } else {
1133
7.08k
        name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1134
7.08k
  prefix = xmlDictLookupHashed(ctxt->dict, fullattr, len);
1135
7.08k
        if (prefix.name == NULL)
1136
1
            goto mem_error;
1137
7.08k
    }
1138
16.1k
    if (name.name == NULL)
1139
0
        goto mem_error;
1140
1141
    /* intern the string and precompute the end */
1142
16.1k
    len = strlen((const char *) value);
1143
16.1k
    hvalue = xmlDictLookupHashed(ctxt->dict, value, len);
1144
16.1k
    if (hvalue.name == NULL)
1145
0
        goto mem_error;
1146
1147
16.1k
    expandedSize = strlen((const char *) name.name);
1148
16.1k
    if (prefix.name != NULL)
1149
7.08k
        expandedSize += strlen((const char *) prefix.name);
1150
16.1k
    expandedSize += len;
1151
1152
16.1k
    attr = &defaults->attrs[defaults->nbAttrs++];
1153
16.1k
    attr->name = name;
1154
16.1k
    attr->prefix = prefix;
1155
16.1k
    attr->value = hvalue;
1156
16.1k
    attr->valueEnd = hvalue.name + len;
1157
16.1k
    attr->external = PARSER_EXTERNAL(ctxt);
1158
16.1k
    attr->expandedSize = expandedSize;
1159
1160
16.1k
    return;
1161
1162
5
mem_error:
1163
5
    xmlErrMemory(ctxt);
1164
5
    return;
1165
16.1k
}
1166
1167
/**
1168
 * xmlAddSpecialAttr:
1169
 * @ctxt:  an XML parser context
1170
 * @fullname:  the element fullname
1171
 * @fullattr:  the attribute fullname
1172
 * @type:  the attribute type
1173
 *
1174
 * Register this attribute type
1175
 */
1176
static void
1177
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1178
      const xmlChar *fullname,
1179
      const xmlChar *fullattr,
1180
      int type)
1181
25.8k
{
1182
25.8k
    if (ctxt->attsSpecial == NULL) {
1183
4.32k
        ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1184
4.32k
  if (ctxt->attsSpecial == NULL)
1185
5
      goto mem_error;
1186
4.32k
    }
1187
1188
25.7k
    if (xmlHashAdd2(ctxt->attsSpecial, fullname, fullattr,
1189
25.7k
                    (void *) (ptrdiff_t) type) < 0)
1190
0
        goto mem_error;
1191
25.7k
    return;
1192
1193
25.7k
mem_error:
1194
5
    xmlErrMemory(ctxt);
1195
5
    return;
1196
25.7k
}
1197
1198
/**
1199
 * xmlCleanSpecialAttrCallback:
1200
 *
1201
 * Removes CDATA attributes from the special attribute table
1202
 */
1203
static void
1204
xmlCleanSpecialAttrCallback(void *payload, void *data,
1205
                            const xmlChar *fullname, const xmlChar *fullattr,
1206
16.6k
                            const xmlChar *unused ATTRIBUTE_UNUSED) {
1207
16.6k
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1208
1209
16.6k
    if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1210
1.24k
        xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1211
1.24k
    }
1212
16.6k
}
1213
1214
/**
1215
 * xmlCleanSpecialAttr:
1216
 * @ctxt:  an XML parser context
1217
 *
1218
 * Trim the list of attributes defined to remove all those of type
1219
 * CDATA as they are not special. This call should be done when finishing
1220
 * to parse the DTD and before starting to parse the document root.
1221
 */
1222
static void
1223
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1224
21.2k
{
1225
21.2k
    if (ctxt->attsSpecial == NULL)
1226
16.9k
        return;
1227
1228
4.31k
    xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1229
1230
4.31k
    if (xmlHashSize(ctxt->attsSpecial) == 0) {
1231
295
        xmlHashFree(ctxt->attsSpecial, NULL);
1232
295
        ctxt->attsSpecial = NULL;
1233
295
    }
1234
4.31k
    return;
1235
21.2k
}
1236
1237
/**
1238
 * xmlCheckLanguageID:
1239
 * @lang:  pointer to the string value
1240
 *
1241
 * DEPRECATED: Internal function, do not use.
1242
 *
1243
 * Checks that the value conforms to the LanguageID production:
1244
 *
1245
 * NOTE: this is somewhat deprecated, those productions were removed from
1246
 *       the XML Second edition.
1247
 *
1248
 * [33] LanguageID ::= Langcode ('-' Subcode)*
1249
 * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1250
 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1251
 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1252
 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1253
 * [38] Subcode ::= ([a-z] | [A-Z])+
1254
 *
1255
 * The current REC reference the successors of RFC 1766, currently 5646
1256
 *
1257
 * http://www.rfc-editor.org/rfc/rfc5646.txt
1258
 * langtag       = language
1259
 *                 ["-" script]
1260
 *                 ["-" region]
1261
 *                 *("-" variant)
1262
 *                 *("-" extension)
1263
 *                 ["-" privateuse]
1264
 * language      = 2*3ALPHA            ; shortest ISO 639 code
1265
 *                 ["-" extlang]       ; sometimes followed by
1266
 *                                     ; extended language subtags
1267
 *               / 4ALPHA              ; or reserved for future use
1268
 *               / 5*8ALPHA            ; or registered language subtag
1269
 *
1270
 * extlang       = 3ALPHA              ; selected ISO 639 codes
1271
 *                 *2("-" 3ALPHA)      ; permanently reserved
1272
 *
1273
 * script        = 4ALPHA              ; ISO 15924 code
1274
 *
1275
 * region        = 2ALPHA              ; ISO 3166-1 code
1276
 *               / 3DIGIT              ; UN M.49 code
1277
 *
1278
 * variant       = 5*8alphanum         ; registered variants
1279
 *               / (DIGIT 3alphanum)
1280
 *
1281
 * extension     = singleton 1*("-" (2*8alphanum))
1282
 *
1283
 *                                     ; Single alphanumerics
1284
 *                                     ; "x" reserved for private use
1285
 * singleton     = DIGIT               ; 0 - 9
1286
 *               / %x41-57             ; A - W
1287
 *               / %x59-5A             ; Y - Z
1288
 *               / %x61-77             ; a - w
1289
 *               / %x79-7A             ; y - z
1290
 *
1291
 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1292
 * The parser below doesn't try to cope with extension or privateuse
1293
 * that could be added but that's not interoperable anyway
1294
 *
1295
 * Returns 1 if correct 0 otherwise
1296
 **/
1297
int
1298
xmlCheckLanguageID(const xmlChar * lang)
1299
0
{
1300
0
    const xmlChar *cur = lang, *nxt;
1301
1302
0
    if (cur == NULL)
1303
0
        return (0);
1304
0
    if (((cur[0] == 'i') && (cur[1] == '-')) ||
1305
0
        ((cur[0] == 'I') && (cur[1] == '-')) ||
1306
0
        ((cur[0] == 'x') && (cur[1] == '-')) ||
1307
0
        ((cur[0] == 'X') && (cur[1] == '-'))) {
1308
        /*
1309
         * Still allow IANA code and user code which were coming
1310
         * from the previous version of the XML-1.0 specification
1311
         * it's deprecated but we should not fail
1312
         */
1313
0
        cur += 2;
1314
0
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1315
0
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
1316
0
            cur++;
1317
0
        return(cur[0] == 0);
1318
0
    }
1319
0
    nxt = cur;
1320
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1321
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1322
0
           nxt++;
1323
0
    if (nxt - cur >= 4) {
1324
        /*
1325
         * Reserved
1326
         */
1327
0
        if ((nxt - cur > 8) || (nxt[0] != 0))
1328
0
            return(0);
1329
0
        return(1);
1330
0
    }
1331
0
    if (nxt - cur < 2)
1332
0
        return(0);
1333
    /* we got an ISO 639 code */
1334
0
    if (nxt[0] == 0)
1335
0
        return(1);
1336
0
    if (nxt[0] != '-')
1337
0
        return(0);
1338
1339
0
    nxt++;
1340
0
    cur = nxt;
1341
    /* now we can have extlang or script or region or variant */
1342
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1343
0
        goto region_m49;
1344
1345
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1346
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1347
0
           nxt++;
1348
0
    if (nxt - cur == 4)
1349
0
        goto script;
1350
0
    if (nxt - cur == 2)
1351
0
        goto region;
1352
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1353
0
        goto variant;
1354
0
    if (nxt - cur != 3)
1355
0
        return(0);
1356
    /* we parsed an extlang */
1357
0
    if (nxt[0] == 0)
1358
0
        return(1);
1359
0
    if (nxt[0] != '-')
1360
0
        return(0);
1361
1362
0
    nxt++;
1363
0
    cur = nxt;
1364
    /* now we can have script or region or variant */
1365
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1366
0
        goto region_m49;
1367
1368
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1369
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1370
0
           nxt++;
1371
0
    if (nxt - cur == 2)
1372
0
        goto region;
1373
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1374
0
        goto variant;
1375
0
    if (nxt - cur != 4)
1376
0
        return(0);
1377
    /* we parsed a script */
1378
0
script:
1379
0
    if (nxt[0] == 0)
1380
0
        return(1);
1381
0
    if (nxt[0] != '-')
1382
0
        return(0);
1383
1384
0
    nxt++;
1385
0
    cur = nxt;
1386
    /* now we can have region or variant */
1387
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1388
0
        goto region_m49;
1389
1390
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1391
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1392
0
           nxt++;
1393
1394
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1395
0
        goto variant;
1396
0
    if (nxt - cur != 2)
1397
0
        return(0);
1398
    /* we parsed a region */
1399
0
region:
1400
0
    if (nxt[0] == 0)
1401
0
        return(1);
1402
0
    if (nxt[0] != '-')
1403
0
        return(0);
1404
1405
0
    nxt++;
1406
0
    cur = nxt;
1407
    /* now we can just have a variant */
1408
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1409
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1410
0
           nxt++;
1411
1412
0
    if ((nxt - cur < 5) || (nxt - cur > 8))
1413
0
        return(0);
1414
1415
    /* we parsed a variant */
1416
0
variant:
1417
0
    if (nxt[0] == 0)
1418
0
        return(1);
1419
0
    if (nxt[0] != '-')
1420
0
        return(0);
1421
    /* extensions and private use subtags not checked */
1422
0
    return (1);
1423
1424
0
region_m49:
1425
0
    if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1426
0
        ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1427
0
        nxt += 3;
1428
0
        goto region;
1429
0
    }
1430
0
    return(0);
1431
0
}
1432
1433
/************************************************************************
1434
 *                  *
1435
 *    Parser stacks related functions and macros    *
1436
 *                  *
1437
 ************************************************************************/
1438
1439
static xmlChar *
1440
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar **str);
1441
1442
/**
1443
 * xmlParserNsCreate:
1444
 *
1445
 * Create a new namespace database.
1446
 *
1447
 * Returns the new obejct.
1448
 */
1449
xmlParserNsData *
1450
94.0k
xmlParserNsCreate(void) {
1451
94.0k
    xmlParserNsData *nsdb = xmlMalloc(sizeof(*nsdb));
1452
1453
94.0k
    if (nsdb == NULL)
1454
9
        return(NULL);
1455
94.0k
    memset(nsdb, 0, sizeof(*nsdb));
1456
94.0k
    nsdb->defaultNsIndex = INT_MAX;
1457
1458
94.0k
    return(nsdb);
1459
94.0k
}
1460
1461
/**
1462
 * xmlParserNsFree:
1463
 * @nsdb: namespace database
1464
 *
1465
 * Free a namespace database.
1466
 */
1467
void
1468
94.0k
xmlParserNsFree(xmlParserNsData *nsdb) {
1469
94.0k
    if (nsdb == NULL)
1470
0
        return;
1471
1472
94.0k
    xmlFree(nsdb->extra);
1473
94.0k
    xmlFree(nsdb->hash);
1474
94.0k
    xmlFree(nsdb);
1475
94.0k
}
1476
1477
/**
1478
 * xmlParserNsReset:
1479
 * @nsdb: namespace database
1480
 *
1481
 * Reset a namespace database.
1482
 */
1483
static void
1484
0
xmlParserNsReset(xmlParserNsData *nsdb) {
1485
0
    if (nsdb == NULL)
1486
0
        return;
1487
1488
0
    nsdb->hashElems = 0;
1489
0
    nsdb->elementId = 0;
1490
0
    nsdb->defaultNsIndex = INT_MAX;
1491
1492
0
    if (nsdb->hash)
1493
0
        memset(nsdb->hash, 0, nsdb->hashSize * sizeof(nsdb->hash[0]));
1494
0
}
1495
1496
/**
1497
 * xmlParserStartElement:
1498
 * @nsdb: namespace database
1499
 *
1500
 * Signal that a new element has started.
1501
 *
1502
 * Returns 0 on success, -1 if the element counter overflowed.
1503
 */
1504
static int
1505
4.55M
xmlParserNsStartElement(xmlParserNsData *nsdb) {
1506
4.55M
    if (nsdb->elementId == UINT_MAX)
1507
0
        return(-1);
1508
4.55M
    nsdb->elementId++;
1509
1510
4.55M
    return(0);
1511
4.55M
}
1512
1513
/**
1514
 * xmlParserNsLookup:
1515
 * @ctxt: parser context
1516
 * @prefix: namespace prefix
1517
 * @bucketPtr: optional bucket (return value)
1518
 *
1519
 * Lookup namespace with given prefix. If @bucketPtr is non-NULL, it will
1520
 * be set to the matching bucket, or the first empty bucket if no match
1521
 * was found.
1522
 *
1523
 * Returns the namespace index on success, INT_MAX if no namespace was
1524
 * found.
1525
 */
1526
static int
1527
xmlParserNsLookup(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1528
5.16M
                  xmlParserNsBucket **bucketPtr) {
1529
5.16M
    xmlParserNsBucket *bucket;
1530
5.16M
    unsigned index, hashValue;
1531
1532
5.16M
    if (prefix->name == NULL)
1533
3.20M
        return(ctxt->nsdb->defaultNsIndex);
1534
1535
1.96M
    if (ctxt->nsdb->hashSize == 0)
1536
177k
        return(INT_MAX);
1537
1538
1.78M
    hashValue = prefix->hashValue;
1539
1.78M
    index = hashValue & (ctxt->nsdb->hashSize - 1);
1540
1.78M
    bucket = &ctxt->nsdb->hash[index];
1541
1542
17.1M
    while (bucket->hashValue) {
1543
16.3M
        if ((bucket->hashValue == hashValue) &&
1544
16.3M
            (bucket->index != INT_MAX)) {
1545
986k
            if (ctxt->nsTab[bucket->index * 2] == prefix->name) {
1546
986k
                if (bucketPtr != NULL)
1547
140k
                    *bucketPtr = bucket;
1548
986k
                return(bucket->index);
1549
986k
            }
1550
986k
        }
1551
1552
15.3M
        index++;
1553
15.3M
        bucket++;
1554
15.3M
        if (index == ctxt->nsdb->hashSize) {
1555
31.0k
            index = 0;
1556
31.0k
            bucket = ctxt->nsdb->hash;
1557
31.0k
        }
1558
15.3M
    }
1559
1560
796k
    if (bucketPtr != NULL)
1561
34.6k
        *bucketPtr = bucket;
1562
796k
    return(INT_MAX);
1563
1.78M
}
1564
1565
/**
1566
 * xmlParserNsLookupUri:
1567
 * @ctxt: parser context
1568
 * @prefix: namespace prefix
1569
 *
1570
 * Lookup namespace URI with given prefix.
1571
 *
1572
 * Returns the namespace URI on success, NULL if no namespace was found.
1573
 */
1574
static const xmlChar *
1575
3.81M
xmlParserNsLookupUri(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix) {
1576
3.81M
    const xmlChar *ret;
1577
3.81M
    int nsIndex;
1578
1579
3.81M
    if (prefix->name == ctxt->str_xml)
1580
584
        return(ctxt->str_xml_ns);
1581
1582
    /*
1583
     * minNsIndex is used when building an entity tree. We must
1584
     * ignore namespaces declared outside the entity.
1585
     */
1586
3.81M
    nsIndex = xmlParserNsLookup(ctxt, prefix, NULL);
1587
3.81M
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1588
3.07M
        return(NULL);
1589
1590
744k
    ret = ctxt->nsTab[nsIndex * 2 + 1];
1591
744k
    if (ret[0] == 0)
1592
13.4k
        ret = NULL;
1593
744k
    return(ret);
1594
3.81M
}
1595
1596
/**
1597
 * xmlParserNsLookupSax:
1598
 * @ctxt: parser context
1599
 * @prefix: namespace prefix
1600
 *
1601
 * Lookup extra data for the given prefix. This returns data stored
1602
 * with xmlParserNsUdpateSax.
1603
 *
1604
 * Returns the data on success, NULL if no namespace was found.
1605
 */
1606
void *
1607
727k
xmlParserNsLookupSax(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
1608
727k
    xmlHashedString hprefix;
1609
727k
    int nsIndex;
1610
1611
727k
    if (prefix == ctxt->str_xml)
1612
94.8k
        return(NULL);
1613
1614
632k
    hprefix.name = prefix;
1615
632k
    if (prefix != NULL)
1616
339k
        hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1617
292k
    else
1618
292k
        hprefix.hashValue = 0;
1619
632k
    nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1620
632k
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1621
0
        return(NULL);
1622
1623
632k
    return(ctxt->nsdb->extra[nsIndex].saxData);
1624
632k
}
1625
1626
/**
1627
 * xmlParserNsUpdateSax:
1628
 * @ctxt: parser context
1629
 * @prefix: namespace prefix
1630
 * @saxData: extra data for SAX handler
1631
 *
1632
 * Sets or updates extra data for the given prefix. This value will be
1633
 * returned by xmlParserNsLookupSax as long as the namespace with the
1634
 * given prefix is in scope.
1635
 *
1636
 * Returns the data on success, NULL if no namespace was found.
1637
 */
1638
int
1639
xmlParserNsUpdateSax(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
1640
110k
                     void *saxData) {
1641
110k
    xmlHashedString hprefix;
1642
110k
    int nsIndex;
1643
1644
110k
    if (prefix == ctxt->str_xml)
1645
0
        return(-1);
1646
1647
110k
    hprefix.name = prefix;
1648
110k
    if (prefix != NULL)
1649
92.7k
        hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1650
17.2k
    else
1651
17.2k
        hprefix.hashValue = 0;
1652
110k
    nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1653
110k
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1654
0
        return(-1);
1655
1656
110k
    ctxt->nsdb->extra[nsIndex].saxData = saxData;
1657
110k
    return(0);
1658
110k
}
1659
1660
/**
1661
 * xmlParserNsGrow:
1662
 * @ctxt: parser context
1663
 *
1664
 * Grows the namespace tables.
1665
 *
1666
 * Returns 0 on success, -1 if a memory allocation failed.
1667
 */
1668
static int
1669
51.5k
xmlParserNsGrow(xmlParserCtxtPtr ctxt) {
1670
51.5k
    const xmlChar **table;
1671
51.5k
    xmlParserNsExtra *extra;
1672
51.5k
    int newSize;
1673
1674
51.5k
    if (ctxt->nsMax > INT_MAX / 2)
1675
0
        goto error;
1676
51.5k
    newSize = ctxt->nsMax ? ctxt->nsMax * 2 : 16;
1677
1678
51.5k
    table = xmlRealloc(ctxt->nsTab, 2 * newSize * sizeof(table[0]));
1679
51.5k
    if (table == NULL)
1680
11
        goto error;
1681
51.5k
    ctxt->nsTab = table;
1682
1683
51.5k
    extra = xmlRealloc(ctxt->nsdb->extra, newSize * sizeof(extra[0]));
1684
51.5k
    if (extra == NULL)
1685
6
        goto error;
1686
51.5k
    ctxt->nsdb->extra = extra;
1687
1688
51.5k
    ctxt->nsMax = newSize;
1689
51.5k
    return(0);
1690
1691
17
error:
1692
17
    xmlErrMemory(ctxt);
1693
17
    return(-1);
1694
51.5k
}
1695
1696
/**
1697
 * xmlParserNsPush:
1698
 * @ctxt: parser context
1699
 * @prefix: prefix with hash value
1700
 * @uri: uri with hash value
1701
 * @saxData: extra data for SAX handler
1702
 * @defAttr: whether the namespace comes from a default attribute
1703
 *
1704
 * Push a new namespace on the table.
1705
 *
1706
 * Returns 1 if the namespace was pushed, 0 if the namespace was ignored,
1707
 * -1 if a memory allocation failed.
1708
 */
1709
static int
1710
xmlParserNsPush(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1711
147k
                const xmlHashedString *uri, void *saxData, int defAttr) {
1712
147k
    xmlParserNsBucket *bucket = NULL;
1713
147k
    xmlParserNsExtra *extra;
1714
147k
    const xmlChar **ns;
1715
147k
    unsigned hashValue, nsIndex, oldIndex;
1716
1717
147k
    if ((prefix != NULL) && (prefix->name == ctxt->str_xml))
1718
70
        return(0);
1719
1720
147k
    if ((ctxt->nsNr >= ctxt->nsMax) && (xmlParserNsGrow(ctxt) < 0)) {
1721
17
        xmlErrMemory(ctxt);
1722
17
        return(-1);
1723
17
    }
1724
1725
    /*
1726
     * Default namespace and 'xml' namespace
1727
     */
1728
147k
    if ((prefix == NULL) || (prefix->name == NULL)) {
1729
26.3k
        oldIndex = ctxt->nsdb->defaultNsIndex;
1730
1731
26.3k
        if (oldIndex != INT_MAX) {
1732
15.2k
            extra = &ctxt->nsdb->extra[oldIndex];
1733
1734
15.2k
            if (extra->elementId == ctxt->nsdb->elementId) {
1735
1.23k
                if (defAttr == 0)
1736
415
                    xmlErrAttributeDup(ctxt, NULL, BAD_CAST "xmlns");
1737
1.23k
                return(0);
1738
1.23k
            }
1739
1740
14.0k
            if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1741
14.0k
                (uri->name == ctxt->nsTab[oldIndex * 2 + 1]))
1742
0
                return(0);
1743
14.0k
        }
1744
1745
25.1k
        ctxt->nsdb->defaultNsIndex = ctxt->nsNr;
1746
25.1k
        goto populate_entry;
1747
26.3k
    }
1748
1749
    /*
1750
     * Hash table lookup
1751
     */
1752
121k
    oldIndex = xmlParserNsLookup(ctxt, prefix, &bucket);
1753
121k
    if (oldIndex != INT_MAX) {
1754
36.3k
        extra = &ctxt->nsdb->extra[oldIndex];
1755
1756
        /*
1757
         * Check for duplicate definitions on the same element.
1758
         */
1759
36.3k
        if (extra->elementId == ctxt->nsdb->elementId) {
1760
9.04k
            if (defAttr == 0)
1761
8.97k
                xmlErrAttributeDup(ctxt, BAD_CAST "xmlns", prefix->name);
1762
9.04k
            return(0);
1763
9.04k
        }
1764
1765
27.2k
        if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1766
27.2k
            (uri->name == ctxt->nsTab[bucket->index * 2 + 1]))
1767
0
            return(0);
1768
1769
27.2k
        bucket->index = ctxt->nsNr;
1770
27.2k
        goto populate_entry;
1771
27.2k
    }
1772
1773
    /*
1774
     * Insert new bucket
1775
     */
1776
1777
84.8k
    hashValue = prefix->hashValue;
1778
1779
    /*
1780
     * Grow hash table, 50% fill factor
1781
     */
1782
84.8k
    if (ctxt->nsdb->hashElems + 1 > ctxt->nsdb->hashSize / 2) {
1783
50.9k
        xmlParserNsBucket *newHash;
1784
50.9k
        unsigned newSize, i, index;
1785
1786
50.9k
        if (ctxt->nsdb->hashSize > UINT_MAX / 2) {
1787
0
            xmlErrMemory(ctxt);
1788
0
            return(-1);
1789
0
        }
1790
50.9k
        newSize = ctxt->nsdb->hashSize ? ctxt->nsdb->hashSize * 2 : 16;
1791
50.9k
        newHash = xmlMalloc(newSize * sizeof(newHash[0]));
1792
50.9k
        if (newHash == NULL) {
1793
10
            xmlErrMemory(ctxt);
1794
10
            return(-1);
1795
10
        }
1796
50.9k
        memset(newHash, 0, newSize * sizeof(newHash[0]));
1797
1798
118k
        for (i = 0; i < ctxt->nsdb->hashSize; i++) {
1799
67.5k
            unsigned hv = ctxt->nsdb->hash[i].hashValue;
1800
67.5k
            unsigned newIndex;
1801
1802
67.5k
            if (hv == 0)
1803
33.7k
                continue;
1804
33.7k
            newIndex = hv & (newSize - 1);
1805
1806
2.57M
            while (newHash[newIndex].hashValue != 0) {
1807
2.53M
                newIndex++;
1808
2.53M
                if (newIndex == newSize)
1809
3.30k
                    newIndex = 0;
1810
2.53M
            }
1811
1812
33.7k
            newHash[newIndex] = ctxt->nsdb->hash[i];
1813
33.7k
        }
1814
1815
50.9k
        xmlFree(ctxt->nsdb->hash);
1816
50.9k
        ctxt->nsdb->hash = newHash;
1817
50.9k
        ctxt->nsdb->hashSize = newSize;
1818
1819
        /*
1820
         * Relookup
1821
         */
1822
50.9k
        index = hashValue & (newSize - 1);
1823
1824
67.5k
        while (newHash[index].hashValue != 0) {
1825
16.5k
            index++;
1826
16.5k
            if (index == newSize)
1827
174
                index = 0;
1828
16.5k
        }
1829
1830
50.9k
        bucket = &newHash[index];
1831
50.9k
    }
1832
1833
84.8k
    bucket->hashValue = hashValue;
1834
84.8k
    bucket->index = ctxt->nsNr;
1835
84.8k
    ctxt->nsdb->hashElems++;
1836
84.8k
    oldIndex = INT_MAX;
1837
1838
137k
populate_entry:
1839
137k
    nsIndex = ctxt->nsNr;
1840
1841
137k
    ns = &ctxt->nsTab[nsIndex * 2];
1842
137k
    ns[0] = prefix ? prefix->name : NULL;
1843
137k
    ns[1] = uri->name;
1844
1845
137k
    extra = &ctxt->nsdb->extra[nsIndex];
1846
137k
    extra->saxData = saxData;
1847
137k
    extra->prefixHashValue = prefix ? prefix->hashValue : 0;
1848
137k
    extra->uriHashValue = uri->hashValue;
1849
137k
    extra->elementId = ctxt->nsdb->elementId;
1850
137k
    extra->oldIndex = oldIndex;
1851
1852
137k
    ctxt->nsNr++;
1853
1854
137k
    return(1);
1855
84.8k
}
1856
1857
/**
1858
 * xmlParserNsPop:
1859
 * @ctxt: an XML parser context
1860
 * @nr:  the number to pop
1861
 *
1862
 * Pops the top @nr namespaces and restores the hash table.
1863
 *
1864
 * Returns the number of namespaces popped.
1865
 */
1866
static int
1867
xmlParserNsPop(xmlParserCtxtPtr ctxt, int nr)
1868
95.5k
{
1869
95.5k
    int i;
1870
1871
    /* assert(nr <= ctxt->nsNr); */
1872
1873
222k
    for (i = ctxt->nsNr - 1; i >= ctxt->nsNr - nr; i--) {
1874
127k
        const xmlChar *prefix = ctxt->nsTab[i * 2];
1875
127k
        xmlParserNsExtra *extra = &ctxt->nsdb->extra[i];
1876
1877
127k
        if (prefix == NULL) {
1878
23.0k
            ctxt->nsdb->defaultNsIndex = extra->oldIndex;
1879
104k
        } else {
1880
104k
            xmlHashedString hprefix;
1881
104k
            xmlParserNsBucket *bucket = NULL;
1882
1883
104k
            hprefix.name = prefix;
1884
104k
            hprefix.hashValue = extra->prefixHashValue;
1885
104k
            xmlParserNsLookup(ctxt, &hprefix, &bucket);
1886
            /* assert(bucket && bucket->hashValue); */
1887
104k
            bucket->index = extra->oldIndex;
1888
104k
        }
1889
127k
    }
1890
1891
95.5k
    ctxt->nsNr -= nr;
1892
95.5k
    return(nr);
1893
95.5k
}
1894
1895
static int
1896
53.3k
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1897
53.3k
    const xmlChar **atts;
1898
53.3k
    unsigned *attallocs;
1899
53.3k
    int maxatts;
1900
1901
53.3k
    if (nr + 5 > ctxt->maxatts) {
1902
53.3k
  maxatts = ctxt->maxatts == 0 ? 55 : (nr + 5) * 2;
1903
53.3k
  atts = (const xmlChar **) xmlMalloc(
1904
53.3k
             maxatts * sizeof(const xmlChar *));
1905
53.3k
  if (atts == NULL) goto mem_error;
1906
53.2k
  attallocs = xmlRealloc(ctxt->attallocs,
1907
53.2k
                               (maxatts / 5) * sizeof(attallocs[0]));
1908
53.2k
  if (attallocs == NULL) {
1909
12
            xmlFree(atts);
1910
12
            goto mem_error;
1911
12
        }
1912
53.2k
        if (ctxt->maxatts > 0)
1913
152
            memcpy(atts, ctxt->atts, ctxt->maxatts * sizeof(const xmlChar *));
1914
53.2k
        xmlFree(ctxt->atts);
1915
53.2k
  ctxt->atts = atts;
1916
53.2k
  ctxt->attallocs = attallocs;
1917
53.2k
  ctxt->maxatts = maxatts;
1918
53.2k
    }
1919
53.2k
    return(ctxt->maxatts);
1920
24
mem_error:
1921
24
    xmlErrMemory(ctxt);
1922
24
    return(-1);
1923
53.3k
}
1924
1925
/**
1926
 * inputPush:
1927
 * @ctxt:  an XML parser context
1928
 * @value:  the parser input
1929
 *
1930
 * Pushes a new parser input on top of the input stack
1931
 *
1932
 * Returns -1 in case of error, the index in the stack otherwise
1933
 */
1934
int
1935
inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1936
98.4k
{
1937
98.4k
    if ((ctxt == NULL) || (value == NULL))
1938
0
        return(-1);
1939
98.4k
    if (ctxt->inputNr >= ctxt->inputMax) {
1940
0
        size_t newSize = ctxt->inputMax * 2;
1941
0
        xmlParserInputPtr *tmp;
1942
1943
0
        tmp = (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1944
0
                                               newSize * sizeof(*tmp));
1945
0
        if (tmp == NULL) {
1946
0
            xmlErrMemory(ctxt);
1947
0
            return (-1);
1948
0
        }
1949
0
        ctxt->inputTab = tmp;
1950
0
        ctxt->inputMax = newSize;
1951
0
    }
1952
98.4k
    ctxt->inputTab[ctxt->inputNr] = value;
1953
98.4k
    ctxt->input = value;
1954
98.4k
    return (ctxt->inputNr++);
1955
98.4k
}
1956
/**
1957
 * inputPop:
1958
 * @ctxt: an XML parser context
1959
 *
1960
 * Pops the top parser input from the input stack
1961
 *
1962
 * Returns the input just removed
1963
 */
1964
xmlParserInputPtr
1965
inputPop(xmlParserCtxtPtr ctxt)
1966
284k
{
1967
284k
    xmlParserInputPtr ret;
1968
1969
284k
    if (ctxt == NULL)
1970
0
        return(NULL);
1971
284k
    if (ctxt->inputNr <= 0)
1972
188k
        return (NULL);
1973
95.8k
    ctxt->inputNr--;
1974
95.8k
    if (ctxt->inputNr > 0)
1975
6.73k
        ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1976
89.0k
    else
1977
89.0k
        ctxt->input = NULL;
1978
95.8k
    ret = ctxt->inputTab[ctxt->inputNr];
1979
95.8k
    ctxt->inputTab[ctxt->inputNr] = NULL;
1980
95.8k
    return (ret);
1981
284k
}
1982
/**
1983
 * nodePush:
1984
 * @ctxt:  an XML parser context
1985
 * @value:  the element node
1986
 *
1987
 * DEPRECATED: Internal function, do not use.
1988
 *
1989
 * Pushes a new element node on top of the node stack
1990
 *
1991
 * Returns -1 in case of error, the index in the stack otherwise
1992
 */
1993
int
1994
nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1995
3.66M
{
1996
3.66M
    int maxDepth;
1997
1998
3.66M
    if (ctxt == NULL)
1999
0
        return(0);
2000
2001
3.66M
    maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
2002
3.66M
    if (ctxt->nodeNr > maxDepth) {
2003
0
        xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
2004
0
                "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
2005
0
                ctxt->nodeNr);
2006
0
        xmlHaltParser(ctxt);
2007
0
        return(-1);
2008
0
    }
2009
3.66M
    if (ctxt->nodeNr >= ctxt->nodeMax) {
2010
7.02k
        xmlNodePtr *tmp;
2011
2012
7.02k
  tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
2013
7.02k
                                      ctxt->nodeMax * 2 *
2014
7.02k
                                      sizeof(ctxt->nodeTab[0]));
2015
7.02k
        if (tmp == NULL) {
2016
0
            xmlErrMemory(ctxt);
2017
0
            return (-1);
2018
0
        }
2019
7.02k
        ctxt->nodeTab = tmp;
2020
7.02k
  ctxt->nodeMax *= 2;
2021
7.02k
    }
2022
3.66M
    ctxt->nodeTab[ctxt->nodeNr] = value;
2023
3.66M
    ctxt->node = value;
2024
3.66M
    return (ctxt->nodeNr++);
2025
3.66M
}
2026
2027
/**
2028
 * nodePop:
2029
 * @ctxt: an XML parser context
2030
 *
2031
 * DEPRECATED: Internal function, do not use.
2032
 *
2033
 * Pops the top element node from the node stack
2034
 *
2035
 * Returns the node just removed
2036
 */
2037
xmlNodePtr
2038
nodePop(xmlParserCtxtPtr ctxt)
2039
3.68M
{
2040
3.68M
    xmlNodePtr ret;
2041
2042
3.68M
    if (ctxt == NULL) return(NULL);
2043
3.68M
    if (ctxt->nodeNr <= 0)
2044
31.9k
        return (NULL);
2045
3.65M
    ctxt->nodeNr--;
2046
3.65M
    if (ctxt->nodeNr > 0)
2047
3.60M
        ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
2048
49.2k
    else
2049
49.2k
        ctxt->node = NULL;
2050
3.65M
    ret = ctxt->nodeTab[ctxt->nodeNr];
2051
3.65M
    ctxt->nodeTab[ctxt->nodeNr] = NULL;
2052
3.65M
    return (ret);
2053
3.68M
}
2054
2055
/**
2056
 * nameNsPush:
2057
 * @ctxt:  an XML parser context
2058
 * @value:  the element name
2059
 * @prefix:  the element prefix
2060
 * @URI:  the element namespace name
2061
 * @line:  the current line number for error messages
2062
 * @nsNr:  the number of namespaces pushed on the namespace table
2063
 *
2064
 * Pushes a new element name/prefix/URL on top of the name stack
2065
 *
2066
 * Returns -1 in case of error, the index in the stack otherwise
2067
 */
2068
static int
2069
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
2070
           const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
2071
3.81M
{
2072
3.81M
    xmlStartTag *tag;
2073
2074
3.81M
    if (ctxt->nameNr >= ctxt->nameMax) {
2075
9.12k
        const xmlChar * *tmp;
2076
9.12k
        xmlStartTag *tmp2;
2077
9.12k
        ctxt->nameMax *= 2;
2078
9.12k
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
2079
9.12k
                                    ctxt->nameMax *
2080
9.12k
                                    sizeof(ctxt->nameTab[0]));
2081
9.12k
        if (tmp == NULL) {
2082
0
      ctxt->nameMax /= 2;
2083
0
      goto mem_error;
2084
0
        }
2085
9.12k
  ctxt->nameTab = tmp;
2086
9.12k
        tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
2087
9.12k
                                    ctxt->nameMax *
2088
9.12k
                                    sizeof(ctxt->pushTab[0]));
2089
9.12k
        if (tmp2 == NULL) {
2090
1
      ctxt->nameMax /= 2;
2091
1
      goto mem_error;
2092
1
        }
2093
9.12k
  ctxt->pushTab = tmp2;
2094
3.80M
    } else if (ctxt->pushTab == NULL) {
2095
64.9k
        ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
2096
64.9k
                                            sizeof(ctxt->pushTab[0]));
2097
64.9k
        if (ctxt->pushTab == NULL)
2098
201
            goto mem_error;
2099
64.9k
    }
2100
3.81M
    ctxt->nameTab[ctxt->nameNr] = value;
2101
3.81M
    ctxt->name = value;
2102
3.81M
    tag = &ctxt->pushTab[ctxt->nameNr];
2103
3.81M
    tag->prefix = prefix;
2104
3.81M
    tag->URI = URI;
2105
3.81M
    tag->line = line;
2106
3.81M
    tag->nsNr = nsNr;
2107
3.81M
    return (ctxt->nameNr++);
2108
202
mem_error:
2109
202
    xmlErrMemory(ctxt);
2110
202
    return (-1);
2111
3.81M
}
2112
#ifdef LIBXML_PUSH_ENABLED
2113
/**
2114
 * nameNsPop:
2115
 * @ctxt: an XML parser context
2116
 *
2117
 * Pops the top element/prefix/URI name from the name stack
2118
 *
2119
 * Returns the name just removed
2120
 */
2121
static const xmlChar *
2122
nameNsPop(xmlParserCtxtPtr ctxt)
2123
{
2124
    const xmlChar *ret;
2125
2126
    if (ctxt->nameNr <= 0)
2127
        return (NULL);
2128
    ctxt->nameNr--;
2129
    if (ctxt->nameNr > 0)
2130
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2131
    else
2132
        ctxt->name = NULL;
2133
    ret = ctxt->nameTab[ctxt->nameNr];
2134
    ctxt->nameTab[ctxt->nameNr] = NULL;
2135
    return (ret);
2136
}
2137
#endif /* LIBXML_PUSH_ENABLED */
2138
2139
/**
2140
 * namePush:
2141
 * @ctxt:  an XML parser context
2142
 * @value:  the element name
2143
 *
2144
 * DEPRECATED: Internal function, do not use.
2145
 *
2146
 * Pushes a new element name on top of the name stack
2147
 *
2148
 * Returns -1 in case of error, the index in the stack otherwise
2149
 */
2150
int
2151
namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
2152
0
{
2153
0
    if (ctxt == NULL) return (-1);
2154
2155
0
    if (ctxt->nameNr >= ctxt->nameMax) {
2156
0
        const xmlChar * *tmp;
2157
0
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
2158
0
                                    ctxt->nameMax * 2 *
2159
0
                                    sizeof(ctxt->nameTab[0]));
2160
0
        if (tmp == NULL) {
2161
0
      goto mem_error;
2162
0
        }
2163
0
  ctxt->nameTab = tmp;
2164
0
        ctxt->nameMax *= 2;
2165
0
    }
2166
0
    ctxt->nameTab[ctxt->nameNr] = value;
2167
0
    ctxt->name = value;
2168
0
    return (ctxt->nameNr++);
2169
0
mem_error:
2170
0
    xmlErrMemory(ctxt);
2171
0
    return (-1);
2172
0
}
2173
2174
/**
2175
 * namePop:
2176
 * @ctxt: an XML parser context
2177
 *
2178
 * DEPRECATED: Internal function, do not use.
2179
 *
2180
 * Pops the top element name from the name stack
2181
 *
2182
 * Returns the name just removed
2183
 */
2184
const xmlChar *
2185
namePop(xmlParserCtxtPtr ctxt)
2186
3.80M
{
2187
3.80M
    const xmlChar *ret;
2188
2189
3.80M
    if ((ctxt == NULL) || (ctxt->nameNr <= 0))
2190
92
        return (NULL);
2191
3.80M
    ctxt->nameNr--;
2192
3.80M
    if (ctxt->nameNr > 0)
2193
3.75M
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2194
51.4k
    else
2195
51.4k
        ctxt->name = NULL;
2196
3.80M
    ret = ctxt->nameTab[ctxt->nameNr];
2197
3.80M
    ctxt->nameTab[ctxt->nameNr] = NULL;
2198
3.80M
    return (ret);
2199
3.80M
}
2200
2201
4.55M
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
2202
4.55M
    if (ctxt->spaceNr >= ctxt->spaceMax) {
2203
9.88k
        int *tmp;
2204
2205
9.88k
  ctxt->spaceMax *= 2;
2206
9.88k
        tmp = (int *) xmlRealloc(ctxt->spaceTab,
2207
9.88k
                           ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
2208
9.88k
        if (tmp == NULL) {
2209
3
      xmlErrMemory(ctxt);
2210
3
      ctxt->spaceMax /=2;
2211
3
      return(-1);
2212
3
  }
2213
9.87k
  ctxt->spaceTab = tmp;
2214
9.87k
    }
2215
4.55M
    ctxt->spaceTab[ctxt->spaceNr] = val;
2216
4.55M
    ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
2217
4.55M
    return(ctxt->spaceNr++);
2218
4.55M
}
2219
2220
4.54M
static int spacePop(xmlParserCtxtPtr ctxt) {
2221
4.54M
    int ret;
2222
4.54M
    if (ctxt->spaceNr <= 0) return(0);
2223
4.54M
    ctxt->spaceNr--;
2224
4.54M
    if (ctxt->spaceNr > 0)
2225
4.54M
  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
2226
0
    else
2227
0
        ctxt->space = &ctxt->spaceTab[0];
2228
4.54M
    ret = ctxt->spaceTab[ctxt->spaceNr];
2229
4.54M
    ctxt->spaceTab[ctxt->spaceNr] = -1;
2230
4.54M
    return(ret);
2231
4.54M
}
2232
2233
/*
2234
 * Macros for accessing the content. Those should be used only by the parser,
2235
 * and not exported.
2236
 *
2237
 * Dirty macros, i.e. one often need to make assumption on the context to
2238
 * use them
2239
 *
2240
 *   CUR_PTR return the current pointer to the xmlChar to be parsed.
2241
 *           To be used with extreme caution since operations consuming
2242
 *           characters may move the input buffer to a different location !
2243
 *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
2244
 *           This should be used internally by the parser
2245
 *           only to compare to ASCII values otherwise it would break when
2246
 *           running with UTF-8 encoding.
2247
 *   RAW     same as CUR but in the input buffer, bypass any token
2248
 *           extraction that may have been done
2249
 *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
2250
 *           to compare on ASCII based substring.
2251
 *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2252
 *           strings without newlines within the parser.
2253
 *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2254
 *           defined char within the parser.
2255
 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2256
 *
2257
 *   NEXT    Skip to the next character, this does the proper decoding
2258
 *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2259
 *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2260
 *   CUR_CHAR(l) returns the current unicode character (int), set l
2261
 *           to the number of xmlChars used for the encoding [0-5].
2262
 *   CUR_SCHAR  same but operate on a string instead of the context
2263
 *   COPY_BUF  copy the current unicode char to the target buffer, increment
2264
 *            the index
2265
 *   GROW, SHRINK  handling of input buffers
2266
 */
2267
2268
31.3M
#define RAW (*ctxt->input->cur)
2269
479M
#define CUR (*ctxt->input->cur)
2270
16.0M
#define NXT(val) ctxt->input->cur[(val)]
2271
1.06G
#define CUR_PTR ctxt->input->cur
2272
15.3M
#define BASE_PTR ctxt->input->base
2273
2274
#define CMP4( s, c1, c2, c3, c4 ) \
2275
33.5M
  ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2276
16.8M
    ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2277
#define CMP5( s, c1, c2, c3, c4, c5 ) \
2278
33.1M
  ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2279
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2280
32.7M
  ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2281
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2282
32.4M
  ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2283
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2284
32.2M
  ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2285
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2286
16.0M
  ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2287
16.0M
    ((unsigned char *) s)[ 8 ] == c9 )
2288
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2289
11.3k
  ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2290
11.3k
    ((unsigned char *) s)[ 9 ] == c10 )
2291
2292
2.20M
#define SKIP(val) do {             \
2293
2.20M
    ctxt->input->cur += (val),ctxt->input->col+=(val);      \
2294
2.20M
    if (*ctxt->input->cur == 0)           \
2295
2.20M
        xmlParserGrow(ctxt);           \
2296
2.20M
  } while (0)
2297
2298
#define SKIPL(val) do {             \
2299
    int skipl;                \
2300
    for(skipl=0; skipl<val; skipl++) {          \
2301
  if (*(ctxt->input->cur) == '\n') {        \
2302
  ctxt->input->line++; ctxt->input->col = 1;      \
2303
  } else ctxt->input->col++;          \
2304
  ctxt->input->cur++;           \
2305
    }                 \
2306
    if (*ctxt->input->cur == 0)           \
2307
        xmlParserGrow(ctxt);            \
2308
  } while (0)
2309
2310
#define SHRINK \
2311
25.9M
    if ((!PARSER_PROGRESSIVE(ctxt)) && \
2312
25.9M
        (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2313
25.9M
  (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2314
25.9M
  xmlParserShrink(ctxt);
2315
2316
#define GROW \
2317
60.4M
    if ((!PARSER_PROGRESSIVE(ctxt)) && \
2318
60.4M
        (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2319
3.29M
  xmlParserGrow(ctxt);
2320
2321
8.49M
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2322
2323
1.39M
#define SKIP_BLANKS_PE xmlSkipBlankCharsPE(ctxt)
2324
2325
12.1M
#define NEXT xmlNextChar(ctxt)
2326
2327
6.28M
#define NEXT1 {               \
2328
6.28M
  ctxt->input->col++;           \
2329
6.28M
  ctxt->input->cur++;           \
2330
6.28M
  if (*ctxt->input->cur == 0)         \
2331
6.28M
      xmlParserGrow(ctxt);           \
2332
6.28M
    }
2333
2334
676M
#define NEXTL(l) do {             \
2335
676M
    if (*(ctxt->input->cur) == '\n') {         \
2336
29.1M
  ctxt->input->line++; ctxt->input->col = 1;      \
2337
647M
    } else ctxt->input->col++;           \
2338
676M
    ctxt->input->cur += l;        \
2339
676M
  } while (0)
2340
2341
229M
#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2342
660k
#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2343
2344
#define COPY_BUF(b, i, v)           \
2345
187M
    if (v < 0x80) b[i++] = v;           \
2346
187M
    else i += xmlCopyCharMultiByte(&b[i],v)
2347
2348
/**
2349
 * xmlSkipBlankChars:
2350
 * @ctxt:  the XML parser context
2351
 *
2352
 * DEPRECATED: Internal function, do not use.
2353
 *
2354
 * Skip whitespace in the input stream.
2355
 *
2356
 * Returns the number of space chars skipped
2357
 */
2358
int
2359
9.83M
xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2360
9.83M
    const xmlChar *cur;
2361
9.83M
    int res = 0;
2362
2363
    /*
2364
     * It's Okay to use CUR/NEXT here since all the blanks are on
2365
     * the ASCII range.
2366
     */
2367
9.83M
    cur = ctxt->input->cur;
2368
9.83M
    while (IS_BLANK_CH(*cur)) {
2369
6.09M
        if (*cur == '\n') {
2370
2.10M
            ctxt->input->line++; ctxt->input->col = 1;
2371
3.98M
        } else {
2372
3.98M
            ctxt->input->col++;
2373
3.98M
        }
2374
6.09M
        cur++;
2375
6.09M
        if (res < INT_MAX)
2376
6.09M
            res++;
2377
6.09M
        if (*cur == 0) {
2378
47.9k
            ctxt->input->cur = cur;
2379
47.9k
            xmlParserGrow(ctxt);
2380
47.9k
            cur = ctxt->input->cur;
2381
47.9k
        }
2382
6.09M
    }
2383
9.83M
    ctxt->input->cur = cur;
2384
2385
9.83M
    return(res);
2386
9.83M
}
2387
2388
static void
2389
4.14k
xmlPopPE(xmlParserCtxtPtr ctxt) {
2390
4.14k
    unsigned long consumed;
2391
4.14k
    xmlEntityPtr ent;
2392
2393
4.14k
    ent = ctxt->input->entity;
2394
2395
4.14k
    ent->flags &= ~XML_ENT_EXPANDING;
2396
2397
4.14k
    if ((ent->flags & XML_ENT_CHECKED) == 0) {
2398
464
        int result;
2399
2400
        /*
2401
         * Read the rest of the stream in case of errors. We want
2402
         * to account for the whole entity size.
2403
         */
2404
4.73k
        do {
2405
4.73k
            ctxt->input->cur = ctxt->input->end;
2406
4.73k
            xmlParserShrink(ctxt);
2407
4.73k
            result = xmlParserGrow(ctxt);
2408
4.73k
        } while (result > 0);
2409
2410
464
        consumed = ctxt->input->consumed;
2411
464
        xmlSaturatedAddSizeT(&consumed,
2412
464
                             ctxt->input->end - ctxt->input->base);
2413
2414
464
        xmlSaturatedAdd(&ent->expandedSize, consumed);
2415
2416
        /*
2417
         * Add to sizeentities when parsing an external entity
2418
         * for the first time.
2419
         */
2420
464
        if (ent->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
2421
142
            xmlSaturatedAdd(&ctxt->sizeentities, consumed);
2422
142
        }
2423
2424
464
        ent->flags |= XML_ENT_CHECKED;
2425
464
    }
2426
2427
4.14k
    xmlPopInput(ctxt);
2428
2429
4.14k
    xmlParserEntityCheck(ctxt, ent->expandedSize);
2430
4.14k
}
2431
2432
/**
2433
 * xmlSkipBlankCharsPE:
2434
 * @ctxt:  the XML parser context
2435
 *
2436
 * Skip whitespace in the input stream, also handling parameter
2437
 * entities.
2438
 *
2439
 * Returns the number of space chars skipped
2440
 */
2441
static int
2442
1.39M
xmlSkipBlankCharsPE(xmlParserCtxtPtr ctxt) {
2443
1.39M
    int res = 0;
2444
1.39M
    int inParam;
2445
1.39M
    int expandParam;
2446
2447
1.39M
    inParam = PARSER_IN_PE(ctxt);
2448
1.39M
    expandParam = PARSER_EXTERNAL(ctxt);
2449
2450
1.39M
    if (!inParam && !expandParam)
2451
1.34M
        return(xmlSkipBlankChars(ctxt));
2452
2453
71.6k
    while (PARSER_STOPPED(ctxt) == 0) {
2454
69.7k
        if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2455
20.7k
            NEXT;
2456
48.9k
        } else if (CUR == '%') {
2457
5.67k
            if ((expandParam == 0) ||
2458
5.67k
                (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2459
4.25k
                break;
2460
2461
            /*
2462
             * Expand parameter entity. We continue to consume
2463
             * whitespace at the start of the entity and possible
2464
             * even consume the whole entity and pop it. We might
2465
             * even pop multiple PEs in this loop.
2466
             */
2467
1.42k
            xmlParsePEReference(ctxt);
2468
2469
1.42k
            inParam = PARSER_IN_PE(ctxt);
2470
1.42k
            expandParam = PARSER_EXTERNAL(ctxt);
2471
43.3k
        } else if (CUR == 0) {
2472
4.60k
            if (inParam == 0)
2473
621
                break;
2474
2475
3.98k
            xmlPopPE(ctxt);
2476
2477
3.98k
            inParam = PARSER_IN_PE(ctxt);
2478
3.98k
            expandParam = PARSER_EXTERNAL(ctxt);
2479
38.6k
        } else {
2480
38.6k
            break;
2481
38.6k
        }
2482
2483
        /*
2484
         * Also increase the counter when entering or exiting a PERef.
2485
         * The spec says: "When a parameter-entity reference is recognized
2486
         * in the DTD and included, its replacement text MUST be enlarged
2487
         * by the attachment of one leading and one following space (#x20)
2488
         * character."
2489
         */
2490
26.1k
        if (res < INT_MAX)
2491
26.1k
            res++;
2492
26.1k
    }
2493
2494
45.4k
    return(res);
2495
1.39M
}
2496
2497
/************************************************************************
2498
 *                  *
2499
 *    Commodity functions to handle entities      *
2500
 *                  *
2501
 ************************************************************************/
2502
2503
/**
2504
 * xmlPopInput:
2505
 * @ctxt:  an XML parser context
2506
 *
2507
 * xmlPopInput: the current input pointed by ctxt->input came to an end
2508
 *          pop it and return the next char.
2509
 *
2510
 * Returns the current xmlChar in the parser context
2511
 */
2512
xmlChar
2513
4.14k
xmlPopInput(xmlParserCtxtPtr ctxt) {
2514
4.14k
    xmlParserInputPtr input;
2515
2516
4.14k
    if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2517
4.14k
    input = inputPop(ctxt);
2518
4.14k
    xmlFreeInputStream(input);
2519
4.14k
    if (*ctxt->input->cur == 0)
2520
63
        xmlParserGrow(ctxt);
2521
4.14k
    return(CUR);
2522
4.14k
}
2523
2524
/**
2525
 * xmlPushInput:
2526
 * @ctxt:  an XML parser context
2527
 * @input:  an XML parser input fragment (entity, XML fragment ...).
2528
 *
2529
 * Push an input stream onto the stack.
2530
 *
2531
 * This makes the parser use an input returned from advanced functions
2532
 * like xmlNewInputURL or xmlNewInputMemory.
2533
 *
2534
 * Returns -1 in case of error or the index in the input stack
2535
 */
2536
int
2537
9.44k
xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2538
9.44k
    int maxDepth;
2539
9.44k
    int ret;
2540
2541
9.44k
    if ((ctxt == NULL) || (input == NULL))
2542
127
        return(-1);
2543
2544
9.32k
    maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
2545
9.32k
    if (ctxt->inputNr > maxDepth) {
2546
0
        xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
2547
0
                       "Maximum entity nesting depth exceeded");
2548
0
        xmlHaltParser(ctxt);
2549
0
  return(-1);
2550
0
    }
2551
9.32k
    ret = inputPush(ctxt, input);
2552
9.32k
    GROW;
2553
9.32k
    return(ret);
2554
9.32k
}
2555
2556
/**
2557
 * xmlParseCharRef:
2558
 * @ctxt:  an XML parser context
2559
 *
2560
 * DEPRECATED: Internal function, don't use.
2561
 *
2562
 * Parse a numeric character reference. Always consumes '&'.
2563
 *
2564
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2565
 *                  '&#x' [0-9a-fA-F]+ ';'
2566
 *
2567
 * [ WFC: Legal Character ]
2568
 * Characters referred to using character references must match the
2569
 * production for Char.
2570
 *
2571
 * Returns the value parsed (as an int), 0 in case of error
2572
 */
2573
int
2574
65.7k
xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2575
65.7k
    int val = 0;
2576
65.7k
    int count = 0;
2577
2578
    /*
2579
     * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2580
     */
2581
65.7k
    if ((RAW == '&') && (NXT(1) == '#') &&
2582
65.7k
        (NXT(2) == 'x')) {
2583
28.7k
  SKIP(3);
2584
28.7k
  GROW;
2585
99.9k
  while ((RAW != ';') && (PARSER_STOPPED(ctxt) == 0)) {
2586
77.5k
      if (count++ > 20) {
2587
1.04k
    count = 0;
2588
1.04k
    GROW;
2589
1.04k
      }
2590
77.5k
      if ((RAW >= '0') && (RAW <= '9'))
2591
13.9k
          val = val * 16 + (CUR - '0');
2592
63.6k
      else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2593
32.5k
          val = val * 16 + (CUR - 'a') + 10;
2594
31.0k
      else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2595
24.6k
          val = val * 16 + (CUR - 'A') + 10;
2596
6.41k
      else {
2597
6.41k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2598
6.41k
    val = 0;
2599
6.41k
    break;
2600
6.41k
      }
2601
71.1k
      if (val > 0x110000)
2602
20.7k
          val = 0x110000;
2603
2604
71.1k
      NEXT;
2605
71.1k
      count++;
2606
71.1k
  }
2607
28.7k
  if (RAW == ';') {
2608
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2609
22.3k
      ctxt->input->col++;
2610
22.3k
      ctxt->input->cur++;
2611
22.3k
  }
2612
36.9k
    } else if  ((RAW == '&') && (NXT(1) == '#')) {
2613
36.9k
  SKIP(2);
2614
36.9k
  GROW;
2615
128k
  while (RAW != ';') { /* loop blocked by count */
2616
99.6k
      if (count++ > 20) {
2617
1.39k
    count = 0;
2618
1.39k
    GROW;
2619
1.39k
      }
2620
99.6k
      if ((RAW >= '0') && (RAW <= '9'))
2621
91.4k
          val = val * 10 + (CUR - '0');
2622
8.24k
      else {
2623
8.24k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2624
8.24k
    val = 0;
2625
8.24k
    break;
2626
8.24k
      }
2627
91.4k
      if (val > 0x110000)
2628
12.6k
          val = 0x110000;
2629
2630
91.4k
      NEXT;
2631
91.4k
      count++;
2632
91.4k
  }
2633
36.9k
  if (RAW == ';') {
2634
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2635
28.7k
      ctxt->input->col++;
2636
28.7k
      ctxt->input->cur++;
2637
28.7k
  }
2638
36.9k
    } else {
2639
0
        if (RAW == '&')
2640
0
            SKIP(1);
2641
0
        xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2642
0
    }
2643
2644
    /*
2645
     * [ WFC: Legal Character ]
2646
     * Characters referred to using character references must match the
2647
     * production for Char.
2648
     */
2649
65.7k
    if (val >= 0x110000) {
2650
377
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2651
377
                "xmlParseCharRef: character reference out of bounds\n",
2652
377
          val);
2653
65.3k
    } else if (IS_CHAR(val)) {
2654
43.1k
        return(val);
2655
43.1k
    } else {
2656
22.2k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2657
22.2k
                          "xmlParseCharRef: invalid xmlChar value %d\n",
2658
22.2k
                    val);
2659
22.2k
    }
2660
22.6k
    return(0);
2661
65.7k
}
2662
2663
/**
2664
 * xmlParseStringCharRef:
2665
 * @ctxt:  an XML parser context
2666
 * @str:  a pointer to an index in the string
2667
 *
2668
 * parse Reference declarations, variant parsing from a string rather
2669
 * than an an input flow.
2670
 *
2671
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2672
 *                  '&#x' [0-9a-fA-F]+ ';'
2673
 *
2674
 * [ WFC: Legal Character ]
2675
 * Characters referred to using character references must match the
2676
 * production for Char.
2677
 *
2678
 * Returns the value parsed (as an int), 0 in case of error, str will be
2679
 *         updated to the current value of the index
2680
 */
2681
static int
2682
308k
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2683
308k
    const xmlChar *ptr;
2684
308k
    xmlChar cur;
2685
308k
    int val = 0;
2686
2687
308k
    if ((str == NULL) || (*str == NULL)) return(0);
2688
308k
    ptr = *str;
2689
308k
    cur = *ptr;
2690
308k
    if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2691
283k
  ptr += 3;
2692
283k
  cur = *ptr;
2693
1.44M
  while (cur != ';') { /* Non input consuming loop */
2694
1.16M
      if ((cur >= '0') && (cur <= '9'))
2695
729k
          val = val * 16 + (cur - '0');
2696
434k
      else if ((cur >= 'a') && (cur <= 'f'))
2697
173k
          val = val * 16 + (cur - 'a') + 10;
2698
261k
      else if ((cur >= 'A') && (cur <= 'F'))
2699
256k
          val = val * 16 + (cur - 'A') + 10;
2700
5.15k
      else {
2701
5.15k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2702
5.15k
    val = 0;
2703
5.15k
    break;
2704
5.15k
      }
2705
1.15M
      if (val > 0x110000)
2706
112k
          val = 0x110000;
2707
2708
1.15M
      ptr++;
2709
1.15M
      cur = *ptr;
2710
1.15M
  }
2711
283k
  if (cur == ';')
2712
278k
      ptr++;
2713
283k
    } else if  ((cur == '&') && (ptr[1] == '#')){
2714
25.1k
  ptr += 2;
2715
25.1k
  cur = *ptr;
2716
160k
  while (cur != ';') { /* Non input consuming loops */
2717
139k
      if ((cur >= '0') && (cur <= '9'))
2718
134k
          val = val * 10 + (cur - '0');
2719
4.95k
      else {
2720
4.95k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2721
4.95k
    val = 0;
2722
4.95k
    break;
2723
4.95k
      }
2724
134k
      if (val > 0x110000)
2725
56.5k
          val = 0x110000;
2726
2727
134k
      ptr++;
2728
134k
      cur = *ptr;
2729
134k
  }
2730
25.1k
  if (cur == ';')
2731
20.2k
      ptr++;
2732
25.1k
    } else {
2733
0
  xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2734
0
  return(0);
2735
0
    }
2736
308k
    *str = ptr;
2737
2738
    /*
2739
     * [ WFC: Legal Character ]
2740
     * Characters referred to using character references must match the
2741
     * production for Char.
2742
     */
2743
308k
    if (val >= 0x110000) {
2744
852
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2745
852
                "xmlParseStringCharRef: character reference out of bounds\n",
2746
852
                val);
2747
307k
    } else if (IS_CHAR(val)) {
2748
288k
        return(val);
2749
288k
    } else {
2750
18.7k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2751
18.7k
        "xmlParseStringCharRef: invalid xmlChar value %d\n",
2752
18.7k
        val);
2753
18.7k
    }
2754
19.5k
    return(0);
2755
308k
}
2756
2757
/**
2758
 * xmlParserHandlePEReference:
2759
 * @ctxt:  the parser context
2760
 *
2761
 * DEPRECATED: Internal function, do not use.
2762
 *
2763
 * [69] PEReference ::= '%' Name ';'
2764
 *
2765
 * [ WFC: No Recursion ]
2766
 * A parsed entity must not contain a recursive
2767
 * reference to itself, either directly or indirectly.
2768
 *
2769
 * [ WFC: Entity Declared ]
2770
 * In a document without any DTD, a document with only an internal DTD
2771
 * subset which contains no parameter entity references, or a document
2772
 * with "standalone='yes'", ...  ... The declaration of a parameter
2773
 * entity must precede any reference to it...
2774
 *
2775
 * [ VC: Entity Declared ]
2776
 * In a document with an external subset or external parameter entities
2777
 * with "standalone='no'", ...  ... The declaration of a parameter entity
2778
 * must precede any reference to it...
2779
 *
2780
 * [ WFC: In DTD ]
2781
 * Parameter-entity references may only appear in the DTD.
2782
 * NOTE: misleading but this is handled.
2783
 *
2784
 * A PEReference may have been detected in the current input stream
2785
 * the handling is done accordingly to
2786
 *      http://www.w3.org/TR/REC-xml#entproc
2787
 * i.e.
2788
 *   - Included in literal in entity values
2789
 *   - Included as Parameter Entity reference within DTDs
2790
 */
2791
void
2792
0
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2793
0
    xmlParsePEReference(ctxt);
2794
0
}
2795
2796
/**
2797
 * xmlStringLenDecodeEntities:
2798
 * @ctxt:  the parser context
2799
 * @str:  the input string
2800
 * @len: the string length
2801
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2802
 * @end:  an end marker xmlChar, 0 if none
2803
 * @end2:  an end marker xmlChar, 0 if none
2804
 * @end3:  an end marker xmlChar, 0 if none
2805
 *
2806
 * DEPRECATED: Internal function, don't use.
2807
 *
2808
 * Returns A newly allocated string with the substitution done. The caller
2809
 *      must deallocate it !
2810
 */
2811
xmlChar *
2812
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2813
                           int what ATTRIBUTE_UNUSED,
2814
0
                           xmlChar end, xmlChar end2, xmlChar end3) {
2815
0
    if ((ctxt == NULL) || (str == NULL) || (len < 0))
2816
0
        return(NULL);
2817
2818
0
    if ((str[len] != 0) ||
2819
0
        (end != 0) || (end2 != 0) || (end3 != 0))
2820
0
        return(NULL);
2821
2822
0
    return(xmlExpandEntitiesInAttValue(ctxt, str, 0));
2823
0
}
2824
2825
/**
2826
 * xmlStringDecodeEntities:
2827
 * @ctxt:  the parser context
2828
 * @str:  the input string
2829
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2830
 * @end:  an end marker xmlChar, 0 if none
2831
 * @end2:  an end marker xmlChar, 0 if none
2832
 * @end3:  an end marker xmlChar, 0 if none
2833
 *
2834
 * DEPRECATED: Internal function, don't use.
2835
 *
2836
 * Returns A newly allocated string with the substitution done. The caller
2837
 *      must deallocate it !
2838
 */
2839
xmlChar *
2840
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str,
2841
                        int what ATTRIBUTE_UNUSED,
2842
0
            xmlChar end, xmlChar  end2, xmlChar end3) {
2843
0
    if ((ctxt == NULL) || (str == NULL))
2844
0
        return(NULL);
2845
2846
0
    if ((end != 0) || (end2 != 0) || (end3 != 0))
2847
0
        return(NULL);
2848
2849
0
    return(xmlExpandEntitiesInAttValue(ctxt, str, 0));
2850
0
}
2851
2852
/************************************************************************
2853
 *                  *
2854
 *    Commodity functions, cleanup needed ?     *
2855
 *                  *
2856
 ************************************************************************/
2857
2858
/**
2859
 * areBlanks:
2860
 * @ctxt:  an XML parser context
2861
 * @str:  a xmlChar *
2862
 * @len:  the size of @str
2863
 * @blank_chars: we know the chars are blanks
2864
 *
2865
 * Is this a sequence of blank chars that one can ignore ?
2866
 *
2867
 * Returns 1 if ignorable 0 otherwise.
2868
 */
2869
2870
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2871
1.59M
                     int blank_chars) {
2872
1.59M
    int i;
2873
1.59M
    xmlNodePtr lastChild;
2874
2875
    /*
2876
     * Don't spend time trying to differentiate them, the same callback is
2877
     * used !
2878
     */
2879
1.59M
    if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2880
1.59M
  return(0);
2881
2882
    /*
2883
     * Check for xml:space value.
2884
     */
2885
0
    if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2886
0
        (*(ctxt->space) == -2))
2887
0
  return(0);
2888
2889
    /*
2890
     * Check that the string is made of blanks
2891
     */
2892
0
    if (blank_chars == 0) {
2893
0
  for (i = 0;i < len;i++)
2894
0
      if (!(IS_BLANK_CH(str[i]))) return(0);
2895
0
    }
2896
2897
    /*
2898
     * Look if the element is mixed content in the DTD if available
2899
     */
2900
0
    if (ctxt->node == NULL) return(0);
2901
0
    if (ctxt->myDoc != NULL) {
2902
0
        xmlElementPtr elemDecl = NULL;
2903
0
        xmlDocPtr doc = ctxt->myDoc;
2904
0
        const xmlChar *prefix = NULL;
2905
2906
0
        if (ctxt->node->ns)
2907
0
            prefix = ctxt->node->ns->prefix;
2908
0
        if (doc->intSubset != NULL)
2909
0
            elemDecl = xmlHashLookup2(doc->intSubset->elements, ctxt->node->name,
2910
0
                                      prefix);
2911
0
        if ((elemDecl == NULL) && (doc->extSubset != NULL))
2912
0
            elemDecl = xmlHashLookup2(doc->extSubset->elements, ctxt->node->name,
2913
0
                                      prefix);
2914
0
        if (elemDecl != NULL) {
2915
0
            if (elemDecl->etype == XML_ELEMENT_TYPE_ELEMENT)
2916
0
                return(1);
2917
0
            if ((elemDecl->etype == XML_ELEMENT_TYPE_ANY) ||
2918
0
                (elemDecl->etype == XML_ELEMENT_TYPE_MIXED))
2919
0
                return(0);
2920
0
        }
2921
0
    }
2922
2923
    /*
2924
     * Otherwise, heuristic :-\
2925
     */
2926
0
    if ((RAW != '<') && (RAW != 0xD)) return(0);
2927
0
    if ((ctxt->node->children == NULL) &&
2928
0
  (RAW == '<') && (NXT(1) == '/')) return(0);
2929
2930
0
    lastChild = xmlGetLastChild(ctxt->node);
2931
0
    if (lastChild == NULL) {
2932
0
        if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2933
0
            (ctxt->node->content != NULL)) return(0);
2934
0
    } else if (xmlNodeIsText(lastChild))
2935
0
        return(0);
2936
0
    else if ((ctxt->node->children != NULL) &&
2937
0
             (xmlNodeIsText(ctxt->node->children)))
2938
0
        return(0);
2939
0
    return(1);
2940
0
}
2941
2942
/************************************************************************
2943
 *                  *
2944
 *    Extra stuff for namespace support     *
2945
 *  Relates to http://www.w3.org/TR/WD-xml-names      *
2946
 *                  *
2947
 ************************************************************************/
2948
2949
/**
2950
 * xmlSplitQName:
2951
 * @ctxt:  an XML parser context
2952
 * @name:  an XML parser context
2953
 * @prefixOut:  a xmlChar **
2954
 *
2955
 * parse an UTF8 encoded XML qualified name string
2956
 *
2957
 * [NS 5] QName ::= (Prefix ':')? LocalPart
2958
 *
2959
 * [NS 6] Prefix ::= NCName
2960
 *
2961
 * [NS 7] LocalPart ::= NCName
2962
 *
2963
 * Returns the local part, and prefix is updated
2964
 *   to get the Prefix if any.
2965
 */
2966
2967
xmlChar *
2968
21.1k
xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefixOut) {
2969
21.1k
    xmlChar buf[XML_MAX_NAMELEN + 5];
2970
21.1k
    xmlChar *buffer = NULL;
2971
21.1k
    int len = 0;
2972
21.1k
    int max = XML_MAX_NAMELEN;
2973
21.1k
    xmlChar *ret = NULL;
2974
21.1k
    xmlChar *prefix;
2975
21.1k
    const xmlChar *cur = name;
2976
21.1k
    int c;
2977
2978
21.1k
    if (prefixOut == NULL) return(NULL);
2979
21.1k
    *prefixOut = NULL;
2980
2981
21.1k
    if (cur == NULL) return(NULL);
2982
2983
#ifndef XML_XML_NAMESPACE
2984
    /* xml: prefix is not really a namespace */
2985
    if ((cur[0] == 'x') && (cur[1] == 'm') &&
2986
        (cur[2] == 'l') && (cur[3] == ':'))
2987
  return(xmlStrdup(name));
2988
#endif
2989
2990
    /* nasty but well=formed */
2991
21.1k
    if (cur[0] == ':')
2992
807
  return(xmlStrdup(name));
2993
2994
20.3k
    c = *cur++;
2995
1.08M
    while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2996
1.06M
  buf[len++] = c;
2997
1.06M
  c = *cur++;
2998
1.06M
    }
2999
20.3k
    if (len >= max) {
3000
  /*
3001
   * Okay someone managed to make a huge name, so he's ready to pay
3002
   * for the processing speed.
3003
   */
3004
8.00k
  max = len * 2;
3005
3006
8.00k
  buffer = (xmlChar *) xmlMallocAtomic(max);
3007
8.00k
  if (buffer == NULL) {
3008
0
      xmlErrMemory(ctxt);
3009
0
      return(NULL);
3010
0
  }
3011
8.00k
  memcpy(buffer, buf, len);
3012
991k
  while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3013
983k
      if (len + 10 > max) {
3014
5.03k
          xmlChar *tmp;
3015
3016
5.03k
    max *= 2;
3017
5.03k
    tmp = (xmlChar *) xmlRealloc(buffer, max);
3018
5.03k
    if (tmp == NULL) {
3019
0
        xmlFree(buffer);
3020
0
        xmlErrMemory(ctxt);
3021
0
        return(NULL);
3022
0
    }
3023
5.03k
    buffer = tmp;
3024
5.03k
      }
3025
983k
      buffer[len++] = c;
3026
983k
      c = *cur++;
3027
983k
  }
3028
8.00k
  buffer[len] = 0;
3029
8.00k
    }
3030
3031
20.3k
    if ((c == ':') && (*cur == 0)) {
3032
782
        if (buffer != NULL)
3033
508
      xmlFree(buffer);
3034
782
  return(xmlStrdup(name));
3035
782
    }
3036
3037
19.5k
    if (buffer == NULL) {
3038
12.0k
  ret = xmlStrndup(buf, len);
3039
12.0k
        if (ret == NULL) {
3040
1
      xmlErrMemory(ctxt);
3041
1
      return(NULL);
3042
1
        }
3043
12.0k
    } else {
3044
7.49k
  ret = buffer;
3045
7.49k
  buffer = NULL;
3046
7.49k
  max = XML_MAX_NAMELEN;
3047
7.49k
    }
3048
3049
3050
19.5k
    if (c == ':') {
3051
10.2k
  c = *cur;
3052
10.2k
        prefix = ret;
3053
10.2k
  if (c == 0) {
3054
0
      ret = xmlStrndup(BAD_CAST "", 0);
3055
0
            if (ret == NULL) {
3056
0
                xmlFree(prefix);
3057
0
                return(NULL);
3058
0
            }
3059
0
            *prefixOut = prefix;
3060
0
            return(ret);
3061
0
  }
3062
10.2k
  len = 0;
3063
3064
  /*
3065
   * Check that the first character is proper to start
3066
   * a new name
3067
   */
3068
10.2k
  if (!(((c >= 0x61) && (c <= 0x7A)) ||
3069
10.2k
        ((c >= 0x41) && (c <= 0x5A)) ||
3070
10.2k
        (c == '_') || (c == ':'))) {
3071
3.20k
      int l;
3072
3.20k
      int first = CUR_SCHAR(cur, l);
3073
3074
3.20k
      if (!IS_LETTER(first) && (first != '_')) {
3075
2.28k
    xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3076
2.28k
          "Name %s is not XML Namespace compliant\n",
3077
2.28k
          name);
3078
2.28k
      }
3079
3.20k
  }
3080
10.2k
  cur++;
3081
3082
642k
  while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3083
632k
      buf[len++] = c;
3084
632k
      c = *cur++;
3085
632k
  }
3086
10.2k
  if (len >= max) {
3087
      /*
3088
       * Okay someone managed to make a huge name, so he's ready to pay
3089
       * for the processing speed.
3090
       */
3091
5.56k
      max = len * 2;
3092
3093
5.56k
      buffer = (xmlChar *) xmlMallocAtomic(max);
3094
5.56k
      if (buffer == NULL) {
3095
1
          xmlErrMemory(ctxt);
3096
1
                xmlFree(prefix);
3097
1
    return(NULL);
3098
1
      }
3099
5.56k
      memcpy(buffer, buf, len);
3100
1.34M
      while (c != 0) { /* tested bigname2.xml */
3101
1.33M
    if (len + 10 > max) {
3102
5.44k
        xmlChar *tmp;
3103
3104
5.44k
        max *= 2;
3105
5.44k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3106
5.44k
        if (tmp == NULL) {
3107
0
      xmlErrMemory(ctxt);
3108
0
                        xmlFree(prefix);
3109
0
      xmlFree(buffer);
3110
0
      return(NULL);
3111
0
        }
3112
5.44k
        buffer = tmp;
3113
5.44k
    }
3114
1.33M
    buffer[len++] = c;
3115
1.33M
    c = *cur++;
3116
1.33M
      }
3117
5.56k
      buffer[len] = 0;
3118
5.56k
  }
3119
3120
10.2k
  if (buffer == NULL) {
3121
4.65k
      ret = xmlStrndup(buf, len);
3122
4.65k
            if (ret == NULL) {
3123
1
                xmlFree(prefix);
3124
1
                return(NULL);
3125
1
            }
3126
5.56k
  } else {
3127
5.56k
      ret = buffer;
3128
5.56k
  }
3129
3130
10.2k
        *prefixOut = prefix;
3131
10.2k
    }
3132
3133
19.5k
    return(ret);
3134
19.5k
}
3135
3136
/************************************************************************
3137
 *                  *
3138
 *      The parser itself       *
3139
 *  Relates to http://www.w3.org/TR/REC-xml       *
3140
 *                  *
3141
 ************************************************************************/
3142
3143
/************************************************************************
3144
 *                  *
3145
 *  Routines to parse Name, NCName and NmToken      *
3146
 *                  *
3147
 ************************************************************************/
3148
3149
/*
3150
 * The two following functions are related to the change of accepted
3151
 * characters for Name and NmToken in the Revision 5 of XML-1.0
3152
 * They correspond to the modified production [4] and the new production [4a]
3153
 * changes in that revision. Also note that the macros used for the
3154
 * productions Letter, Digit, CombiningChar and Extender are not needed
3155
 * anymore.
3156
 * We still keep compatibility to pre-revision5 parsing semantic if the
3157
 * new XML_PARSE_OLD10 option is given to the parser.
3158
 */
3159
static int
3160
3.24M
xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3161
3.24M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3162
        /*
3163
   * Use the new checks of production [4] [4a] amd [5] of the
3164
   * Update 5 of XML-1.0
3165
   */
3166
3.24M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3167
3.24M
      (((c >= 'a') && (c <= 'z')) ||
3168
3.24M
       ((c >= 'A') && (c <= 'Z')) ||
3169
3.24M
       (c == '_') || (c == ':') ||
3170
3.24M
       ((c >= 0xC0) && (c <= 0xD6)) ||
3171
3.24M
       ((c >= 0xD8) && (c <= 0xF6)) ||
3172
3.24M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3173
3.24M
       ((c >= 0x370) && (c <= 0x37D)) ||
3174
3.24M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3175
3.24M
       ((c >= 0x200C) && (c <= 0x200D)) ||
3176
3.24M
       ((c >= 0x2070) && (c <= 0x218F)) ||
3177
3.24M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3178
3.24M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3179
3.24M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3180
3.24M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3181
3.24M
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3182
402k
      return(1);
3183
3.24M
    } else {
3184
0
        if (IS_LETTER(c) || (c == '_') || (c == ':'))
3185
0
      return(1);
3186
0
    }
3187
2.84M
    return(0);
3188
3.24M
}
3189
3190
static int
3191
23.4M
xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3192
23.4M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3193
        /*
3194
   * Use the new checks of production [4] [4a] amd [5] of the
3195
   * Update 5 of XML-1.0
3196
   */
3197
23.4M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3198
23.4M
      (((c >= 'a') && (c <= 'z')) ||
3199
23.4M
       ((c >= 'A') && (c <= 'Z')) ||
3200
23.4M
       ((c >= '0') && (c <= '9')) || /* !start */
3201
23.4M
       (c == '_') || (c == ':') ||
3202
23.4M
       (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3203
23.4M
       ((c >= 0xC0) && (c <= 0xD6)) ||
3204
23.4M
       ((c >= 0xD8) && (c <= 0xF6)) ||
3205
23.4M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3206
23.4M
       ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3207
23.4M
       ((c >= 0x370) && (c <= 0x37D)) ||
3208
23.4M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3209
23.4M
       ((c >= 0x200C) && (c <= 0x200D)) ||
3210
23.4M
       ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3211
23.4M
       ((c >= 0x2070) && (c <= 0x218F)) ||
3212
23.4M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3213
23.4M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3214
23.4M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3215
23.4M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3216
23.4M
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3217
23.0M
       return(1);
3218
23.4M
    } else {
3219
0
        if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3220
0
            (c == '.') || (c == '-') ||
3221
0
      (c == '_') || (c == ':') ||
3222
0
      (IS_COMBINING(c)) ||
3223
0
      (IS_EXTENDER(c)))
3224
0
      return(1);
3225
0
    }
3226
433k
    return(0);
3227
23.4M
}
3228
3229
static const xmlChar *
3230
470k
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3231
470k
    const xmlChar *ret;
3232
470k
    int len = 0, l;
3233
470k
    int c;
3234
470k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3235
0
                    XML_MAX_TEXT_LENGTH :
3236
470k
                    XML_MAX_NAME_LENGTH;
3237
3238
    /*
3239
     * Handler for more complex cases
3240
     */
3241
470k
    c = CUR_CHAR(l);
3242
470k
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3243
        /*
3244
   * Use the new checks of production [4] [4a] amd [5] of the
3245
   * Update 5 of XML-1.0
3246
   */
3247
470k
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3248
470k
      (!(((c >= 'a') && (c <= 'z')) ||
3249
413k
         ((c >= 'A') && (c <= 'Z')) ||
3250
413k
         (c == '_') || (c == ':') ||
3251
413k
         ((c >= 0xC0) && (c <= 0xD6)) ||
3252
413k
         ((c >= 0xD8) && (c <= 0xF6)) ||
3253
413k
         ((c >= 0xF8) && (c <= 0x2FF)) ||
3254
413k
         ((c >= 0x370) && (c <= 0x37D)) ||
3255
413k
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
3256
413k
         ((c >= 0x200C) && (c <= 0x200D)) ||
3257
413k
         ((c >= 0x2070) && (c <= 0x218F)) ||
3258
413k
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3259
413k
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
3260
413k
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
3261
413k
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3262
413k
         ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3263
251k
      return(NULL);
3264
251k
  }
3265
219k
  len += l;
3266
219k
  NEXTL(l);
3267
219k
  c = CUR_CHAR(l);
3268
10.0M
  while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3269
10.0M
         (((c >= 'a') && (c <= 'z')) ||
3270
10.0M
          ((c >= 'A') && (c <= 'Z')) ||
3271
10.0M
          ((c >= '0') && (c <= '9')) || /* !start */
3272
10.0M
          (c == '_') || (c == ':') ||
3273
10.0M
          (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3274
10.0M
          ((c >= 0xC0) && (c <= 0xD6)) ||
3275
10.0M
          ((c >= 0xD8) && (c <= 0xF6)) ||
3276
10.0M
          ((c >= 0xF8) && (c <= 0x2FF)) ||
3277
10.0M
          ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3278
10.0M
          ((c >= 0x370) && (c <= 0x37D)) ||
3279
10.0M
          ((c >= 0x37F) && (c <= 0x1FFF)) ||
3280
10.0M
          ((c >= 0x200C) && (c <= 0x200D)) ||
3281
10.0M
          ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3282
10.0M
          ((c >= 0x2070) && (c <= 0x218F)) ||
3283
10.0M
          ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3284
10.0M
          ((c >= 0x3001) && (c <= 0xD7FF)) ||
3285
10.0M
          ((c >= 0xF900) && (c <= 0xFDCF)) ||
3286
10.0M
          ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3287
10.0M
          ((c >= 0x10000) && (c <= 0xEFFFF))
3288
10.0M
    )) {
3289
9.82M
            if (len <= INT_MAX - l)
3290
9.82M
          len += l;
3291
9.82M
      NEXTL(l);
3292
9.82M
      c = CUR_CHAR(l);
3293
9.82M
  }
3294
219k
    } else {
3295
0
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3296
0
      (!IS_LETTER(c) && (c != '_') &&
3297
0
       (c != ':'))) {
3298
0
      return(NULL);
3299
0
  }
3300
0
  len += l;
3301
0
  NEXTL(l);
3302
0
  c = CUR_CHAR(l);
3303
3304
0
  while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3305
0
         ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3306
0
    (c == '.') || (c == '-') ||
3307
0
    (c == '_') || (c == ':') ||
3308
0
    (IS_COMBINING(c)) ||
3309
0
    (IS_EXTENDER(c)))) {
3310
0
            if (len <= INT_MAX - l)
3311
0
          len += l;
3312
0
      NEXTL(l);
3313
0
      c = CUR_CHAR(l);
3314
0
  }
3315
0
    }
3316
219k
    if (len > maxLength) {
3317
36
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3318
36
        return(NULL);
3319
36
    }
3320
219k
    if (ctxt->input->cur - ctxt->input->base < len) {
3321
        /*
3322
         * There were a couple of bugs where PERefs lead to to a change
3323
         * of the buffer. Check the buffer size to avoid passing an invalid
3324
         * pointer to xmlDictLookup.
3325
         */
3326
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3327
0
                    "unexpected change of input buffer");
3328
0
        return (NULL);
3329
0
    }
3330
219k
    if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3331
319
        ret = xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len);
3332
219k
    else
3333
219k
        ret = xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len);
3334
219k
    if (ret == NULL)
3335
0
        xmlErrMemory(ctxt);
3336
219k
    return(ret);
3337
219k
}
3338
3339
/**
3340
 * xmlParseName:
3341
 * @ctxt:  an XML parser context
3342
 *
3343
 * DEPRECATED: Internal function, don't use.
3344
 *
3345
 * parse an XML name.
3346
 *
3347
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3348
 *                  CombiningChar | Extender
3349
 *
3350
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3351
 *
3352
 * [6] Names ::= Name (#x20 Name)*
3353
 *
3354
 * Returns the Name parsed or NULL
3355
 */
3356
3357
const xmlChar *
3358
1.47M
xmlParseName(xmlParserCtxtPtr ctxt) {
3359
1.47M
    const xmlChar *in;
3360
1.47M
    const xmlChar *ret;
3361
1.47M
    size_t count = 0;
3362
1.47M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3363
0
                       XML_MAX_TEXT_LENGTH :
3364
1.47M
                       XML_MAX_NAME_LENGTH;
3365
3366
1.47M
    GROW;
3367
3368
    /*
3369
     * Accelerator for simple ASCII names
3370
     */
3371
1.47M
    in = ctxt->input->cur;
3372
1.47M
    if (((*in >= 0x61) && (*in <= 0x7A)) ||
3373
1.47M
  ((*in >= 0x41) && (*in <= 0x5A)) ||
3374
1.47M
  (*in == '_') || (*in == ':')) {
3375
1.13M
  in++;
3376
3.26M
  while (((*in >= 0x61) && (*in <= 0x7A)) ||
3377
3.26M
         ((*in >= 0x41) && (*in <= 0x5A)) ||
3378
3.26M
         ((*in >= 0x30) && (*in <= 0x39)) ||
3379
3.26M
         (*in == '_') || (*in == '-') ||
3380
3.26M
         (*in == ':') || (*in == '.'))
3381
2.13M
      in++;
3382
1.13M
  if ((*in > 0) && (*in < 0x80)) {
3383
1.00M
      count = in - ctxt->input->cur;
3384
1.00M
            if (count > maxLength) {
3385
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3386
0
                return(NULL);
3387
0
            }
3388
1.00M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3389
1.00M
      ctxt->input->cur = in;
3390
1.00M
      ctxt->input->col += count;
3391
1.00M
      if (ret == NULL)
3392
3
          xmlErrMemory(ctxt);
3393
1.00M
      return(ret);
3394
1.00M
  }
3395
1.13M
    }
3396
    /* accelerator for special cases */
3397
470k
    return(xmlParseNameComplex(ctxt));
3398
1.47M
}
3399
3400
static xmlHashedString
3401
3.18M
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3402
3.18M
    xmlHashedString ret;
3403
3.18M
    int len = 0, l;
3404
3.18M
    int c;
3405
3.18M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3406
0
                    XML_MAX_TEXT_LENGTH :
3407
3.18M
                    XML_MAX_NAME_LENGTH;
3408
3.18M
    size_t startPosition = 0;
3409
3410
3.18M
    ret.name = NULL;
3411
3.18M
    ret.hashValue = 0;
3412
3413
    /*
3414
     * Handler for more complex cases
3415
     */
3416
3.18M
    startPosition = CUR_PTR - BASE_PTR;
3417
3.18M
    c = CUR_CHAR(l);
3418
3.18M
    if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3419
3.18M
  (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3420
2.89M
  return(ret);
3421
2.89M
    }
3422
3423
21.1M
    while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3424
21.1M
     (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3425
20.8M
        if (len <= INT_MAX - l)
3426
20.8M
      len += l;
3427
20.8M
  NEXTL(l);
3428
20.8M
  c = CUR_CHAR(l);
3429
20.8M
    }
3430
287k
    if (len > maxLength) {
3431
238
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3432
238
        return(ret);
3433
238
    }
3434
287k
    ret = xmlDictLookupHashed(ctxt->dict, (BASE_PTR + startPosition), len);
3435
287k
    if (ret.name == NULL)
3436
0
        xmlErrMemory(ctxt);
3437
287k
    return(ret);
3438
287k
}
3439
3440
/**
3441
 * xmlParseNCName:
3442
 * @ctxt:  an XML parser context
3443
 * @len:  length of the string parsed
3444
 *
3445
 * parse an XML name.
3446
 *
3447
 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3448
 *                      CombiningChar | Extender
3449
 *
3450
 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3451
 *
3452
 * Returns the Name parsed or NULL
3453
 */
3454
3455
static xmlHashedString
3456
9.22M
xmlParseNCName(xmlParserCtxtPtr ctxt) {
3457
9.22M
    const xmlChar *in, *e;
3458
9.22M
    xmlHashedString ret;
3459
9.22M
    size_t count = 0;
3460
9.22M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3461
0
                       XML_MAX_TEXT_LENGTH :
3462
9.22M
                       XML_MAX_NAME_LENGTH;
3463
3464
9.22M
    ret.name = NULL;
3465
3466
    /*
3467
     * Accelerator for simple ASCII names
3468
     */
3469
9.22M
    in = ctxt->input->cur;
3470
9.22M
    e = ctxt->input->end;
3471
9.22M
    if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3472
9.22M
   ((*in >= 0x41) && (*in <= 0x5A)) ||
3473
9.22M
   (*in == '_')) && (in < e)) {
3474
6.18M
  in++;
3475
22.5M
  while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3476
22.5M
          ((*in >= 0x41) && (*in <= 0x5A)) ||
3477
22.5M
          ((*in >= 0x30) && (*in <= 0x39)) ||
3478
22.5M
          (*in == '_') || (*in == '-') ||
3479
22.5M
          (*in == '.')) && (in < e))
3480
16.3M
      in++;
3481
6.18M
  if (in >= e)
3482
1.84k
      goto complex;
3483
6.17M
  if ((*in > 0) && (*in < 0x80)) {
3484
6.04M
      count = in - ctxt->input->cur;
3485
6.04M
            if (count > maxLength) {
3486
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3487
0
                return(ret);
3488
0
            }
3489
6.04M
      ret = xmlDictLookupHashed(ctxt->dict, ctxt->input->cur, count);
3490
6.04M
      ctxt->input->cur = in;
3491
6.04M
      ctxt->input->col += count;
3492
6.04M
      if (ret.name == NULL) {
3493
0
          xmlErrMemory(ctxt);
3494
0
      }
3495
6.04M
      return(ret);
3496
6.04M
  }
3497
6.17M
    }
3498
3.18M
complex:
3499
3.18M
    return(xmlParseNCNameComplex(ctxt));
3500
9.22M
}
3501
3502
/**
3503
 * xmlParseNameAndCompare:
3504
 * @ctxt:  an XML parser context
3505
 *
3506
 * parse an XML name and compares for match
3507
 * (specialized for endtag parsing)
3508
 *
3509
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3510
 * and the name for mismatch
3511
 */
3512
3513
static const xmlChar *
3514
315k
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3515
315k
    register const xmlChar *cmp = other;
3516
315k
    register const xmlChar *in;
3517
315k
    const xmlChar *ret;
3518
3519
315k
    GROW;
3520
3521
315k
    in = ctxt->input->cur;
3522
1.47M
    while (*in != 0 && *in == *cmp) {
3523
1.16M
  ++in;
3524
1.16M
  ++cmp;
3525
1.16M
    }
3526
315k
    if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3527
  /* success */
3528
227k
  ctxt->input->col += in - ctxt->input->cur;
3529
227k
  ctxt->input->cur = in;
3530
227k
  return (const xmlChar*) 1;
3531
227k
    }
3532
    /* failure (or end of input buffer), check with full function */
3533
87.7k
    ret = xmlParseName (ctxt);
3534
    /* strings coming from the dictionary direct compare possible */
3535
87.7k
    if (ret == other) {
3536
899
  return (const xmlChar*) 1;
3537
899
    }
3538
86.8k
    return ret;
3539
87.7k
}
3540
3541
/**
3542
 * xmlParseStringName:
3543
 * @ctxt:  an XML parser context
3544
 * @str:  a pointer to the string pointer (IN/OUT)
3545
 *
3546
 * parse an XML name.
3547
 *
3548
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3549
 *                  CombiningChar | Extender
3550
 *
3551
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3552
 *
3553
 * [6] Names ::= Name (#x20 Name)*
3554
 *
3555
 * Returns the Name parsed or NULL. The @str pointer
3556
 * is updated to the current location in the string.
3557
 */
3558
3559
static xmlChar *
3560
92.3k
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3561
92.3k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3562
92.3k
    xmlChar *ret;
3563
92.3k
    const xmlChar *cur = *str;
3564
92.3k
    int len = 0, l;
3565
92.3k
    int c;
3566
92.3k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3567
0
                    XML_MAX_TEXT_LENGTH :
3568
92.3k
                    XML_MAX_NAME_LENGTH;
3569
3570
92.3k
    c = CUR_SCHAR(cur, l);
3571
92.3k
    if (!xmlIsNameStartChar(ctxt, c)) {
3572
3.83k
  return(NULL);
3573
3.83k
    }
3574
3575
88.4k
    COPY_BUF(buf, len, c);
3576
88.4k
    cur += l;
3577
88.4k
    c = CUR_SCHAR(cur, l);
3578
270k
    while (xmlIsNameChar(ctxt, c)) {
3579
182k
  COPY_BUF(buf, len, c);
3580
182k
  cur += l;
3581
182k
  c = CUR_SCHAR(cur, l);
3582
182k
  if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3583
      /*
3584
       * Okay someone managed to make a huge name, so he's ready to pay
3585
       * for the processing speed.
3586
       */
3587
1.10k
      xmlChar *buffer;
3588
1.10k
      int max = len * 2;
3589
3590
1.10k
      buffer = (xmlChar *) xmlMallocAtomic(max);
3591
1.10k
      if (buffer == NULL) {
3592
0
          xmlErrMemory(ctxt);
3593
0
    return(NULL);
3594
0
      }
3595
1.10k
      memcpy(buffer, buf, len);
3596
294k
      while (xmlIsNameChar(ctxt, c)) {
3597
293k
    if (len + 10 > max) {
3598
751
        xmlChar *tmp;
3599
3600
751
        max *= 2;
3601
751
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3602
751
        if (tmp == NULL) {
3603
0
      xmlErrMemory(ctxt);
3604
0
      xmlFree(buffer);
3605
0
      return(NULL);
3606
0
        }
3607
751
        buffer = tmp;
3608
751
    }
3609
293k
    COPY_BUF(buffer, len, c);
3610
293k
    cur += l;
3611
293k
    c = CUR_SCHAR(cur, l);
3612
293k
                if (len > maxLength) {
3613
12
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3614
12
                    xmlFree(buffer);
3615
12
                    return(NULL);
3616
12
                }
3617
293k
      }
3618
1.08k
      buffer[len] = 0;
3619
1.08k
      *str = cur;
3620
1.08k
      return(buffer);
3621
1.10k
  }
3622
182k
    }
3623
87.3k
    if (len > maxLength) {
3624
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3625
0
        return(NULL);
3626
0
    }
3627
87.3k
    *str = cur;
3628
87.3k
    ret = xmlStrndup(buf, len);
3629
87.3k
    if (ret == NULL)
3630
1
        xmlErrMemory(ctxt);
3631
87.3k
    return(ret);
3632
87.3k
}
3633
3634
/**
3635
 * xmlParseNmtoken:
3636
 * @ctxt:  an XML parser context
3637
 *
3638
 * DEPRECATED: Internal function, don't use.
3639
 *
3640
 * parse an XML Nmtoken.
3641
 *
3642
 * [7] Nmtoken ::= (NameChar)+
3643
 *
3644
 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3645
 *
3646
 * Returns the Nmtoken parsed or NULL
3647
 */
3648
3649
xmlChar *
3650
126k
xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3651
126k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3652
126k
    xmlChar *ret;
3653
126k
    int len = 0, l;
3654
126k
    int c;
3655
126k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3656
0
                    XML_MAX_TEXT_LENGTH :
3657
126k
                    XML_MAX_NAME_LENGTH;
3658
3659
126k
    c = CUR_CHAR(l);
3660
3661
448k
    while (xmlIsNameChar(ctxt, c)) {
3662
322k
  COPY_BUF(buf, len, c);
3663
322k
  NEXTL(l);
3664
322k
  c = CUR_CHAR(l);
3665
322k
  if (len >= XML_MAX_NAMELEN) {
3666
      /*
3667
       * Okay someone managed to make a huge token, so he's ready to pay
3668
       * for the processing speed.
3669
       */
3670
874
      xmlChar *buffer;
3671
874
      int max = len * 2;
3672
3673
874
      buffer = (xmlChar *) xmlMallocAtomic(max);
3674
874
      if (buffer == NULL) {
3675
0
          xmlErrMemory(ctxt);
3676
0
    return(NULL);
3677
0
      }
3678
874
      memcpy(buffer, buf, len);
3679
1.34M
      while (xmlIsNameChar(ctxt, c)) {
3680
1.34M
    if (len + 10 > max) {
3681
1.45k
        xmlChar *tmp;
3682
3683
1.45k
        max *= 2;
3684
1.45k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3685
1.45k
        if (tmp == NULL) {
3686
0
      xmlErrMemory(ctxt);
3687
0
      xmlFree(buffer);
3688
0
      return(NULL);
3689
0
        }
3690
1.45k
        buffer = tmp;
3691
1.45k
    }
3692
1.34M
    COPY_BUF(buffer, len, c);
3693
1.34M
                if (len > maxLength) {
3694
61
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3695
61
                    xmlFree(buffer);
3696
61
                    return(NULL);
3697
61
                }
3698
1.34M
    NEXTL(l);
3699
1.34M
    c = CUR_CHAR(l);
3700
1.34M
      }
3701
813
      buffer[len] = 0;
3702
813
      return(buffer);
3703
874
  }
3704
322k
    }
3705
125k
    if (len == 0)
3706
71.2k
        return(NULL);
3707
54.3k
    if (len > maxLength) {
3708
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3709
0
        return(NULL);
3710
0
    }
3711
54.3k
    ret = xmlStrndup(buf, len);
3712
54.3k
    if (ret == NULL)
3713
0
        xmlErrMemory(ctxt);
3714
54.3k
    return(ret);
3715
54.3k
}
3716
3717
/**
3718
 * xmlExpandPEsInEntityValue:
3719
 * @ctxt:  parser context
3720
 * @buf:  string buffer
3721
 * @str:  entity value
3722
 * @length:  size of entity value
3723
 * @depth:  nesting depth
3724
 *
3725
 * Validate an entity value and expand parameter entities.
3726
 */
3727
static void
3728
xmlExpandPEsInEntityValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf,
3729
49.0k
                          const xmlChar *str, int length, int depth) {
3730
49.0k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3731
49.0k
    const xmlChar *end, *chunk;
3732
49.0k
    int c, l;
3733
3734
49.0k
    if (str == NULL)
3735
0
        return;
3736
3737
49.0k
    depth += 1;
3738
49.0k
    if (depth > maxDepth) {
3739
0
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3740
0
                       "Maximum entity nesting depth exceeded");
3741
0
  return;
3742
0
    }
3743
3744
49.0k
    end = str + length;
3745
49.0k
    chunk = str;
3746
3747
73.6M
    while ((str < end) && (!PARSER_STOPPED(ctxt))) {
3748
73.5M
        c = *str;
3749
3750
73.5M
        if (c >= 0x80) {
3751
65.9M
            l = xmlUTF8MultibyteLen(ctxt, str,
3752
65.9M
                    "invalid character in entity value\n");
3753
65.9M
            if (l == 0) {
3754
5.25M
                if (chunk < str)
3755
91.6k
                    xmlSBufAddString(buf, chunk, str - chunk);
3756
5.25M
                xmlSBufAddReplChar(buf);
3757
5.25M
                str += 1;
3758
5.25M
                chunk = str;
3759
60.7M
            } else {
3760
60.7M
                str += l;
3761
60.7M
            }
3762
65.9M
        } else if (c == '&') {
3763
82.1k
            if (str[1] == '#') {
3764
59.4k
                if (chunk < str)
3765
56.1k
                    xmlSBufAddString(buf, chunk, str - chunk);
3766
3767
59.4k
                c = xmlParseStringCharRef(ctxt, &str);
3768
59.4k
                if (c == 0)
3769
19.5k
                    return;
3770
3771
39.9k
                xmlSBufAddChar(buf, c);
3772
3773
39.9k
                chunk = str;
3774
39.9k
            } else {
3775
22.6k
                xmlChar *name;
3776
3777
                /*
3778
                 * General entity references are checked for
3779
                 * syntactic validity.
3780
                 */
3781
22.6k
                str++;
3782
22.6k
                name = xmlParseStringName(ctxt, &str);
3783
3784
22.6k
                if ((name == NULL) || (*str++ != ';')) {
3785
5.94k
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3786
5.94k
                            "EntityValue: '&' forbidden except for entities "
3787
5.94k
                            "references\n");
3788
5.94k
                    xmlFree(name);
3789
5.94k
                    return;
3790
5.94k
                }
3791
3792
16.7k
                xmlFree(name);
3793
16.7k
            }
3794
7.52M
        } else if (c == '%') {
3795
6.15k
            xmlEntityPtr ent;
3796
3797
6.15k
            if (chunk < str)
3798
5.97k
                xmlSBufAddString(buf, chunk, str - chunk);
3799
3800
6.15k
            ent = xmlParseStringPEReference(ctxt, &str);
3801
6.15k
            if (ent == NULL)
3802
5.96k
                return;
3803
3804
189
            if (!PARSER_EXTERNAL(ctxt)) {
3805
114
                xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3806
114
                return;
3807
114
            }
3808
3809
75
            if (ent->content == NULL) {
3810
                /*
3811
                 * Note: external parsed entities will not be loaded,
3812
                 * it is not required for a non-validating parser to
3813
                 * complete external PEReferences coming from the
3814
                 * internal subset
3815
                 */
3816
21
                if (((ctxt->options & XML_PARSE_NO_XXE) == 0) &&
3817
21
                    ((ctxt->replaceEntities) ||
3818
21
                     (ctxt->validate))) {
3819
21
                    xmlLoadEntityContent(ctxt, ent);
3820
21
                } else {
3821
0
                    xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
3822
0
                                  "not validating will not read content for "
3823
0
                                  "PE entity %s\n", ent->name, NULL);
3824
0
                }
3825
21
            }
3826
3827
            /*
3828
             * TODO: Skip if ent->content is still NULL.
3829
             */
3830
3831
75
            if (xmlParserEntityCheck(ctxt, ent->length))
3832
0
                return;
3833
3834
75
            if (ent->flags & XML_ENT_EXPANDING) {
3835
21
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
3836
21
                xmlHaltParser(ctxt);
3837
21
                return;
3838
21
            }
3839
3840
54
            ent->flags |= XML_ENT_EXPANDING;
3841
54
            xmlExpandPEsInEntityValue(ctxt, buf, ent->content, ent->length,
3842
54
                                      depth);
3843
54
            ent->flags &= ~XML_ENT_EXPANDING;
3844
3845
54
            chunk = str;
3846
7.51M
        } else {
3847
            /* Normal ASCII char */
3848
7.51M
            if (!IS_BYTE_CHAR(c)) {
3849
682k
                xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3850
682k
                        "invalid character in entity value\n");
3851
682k
                if (chunk < str)
3852
19.3k
                    xmlSBufAddString(buf, chunk, str - chunk);
3853
682k
                xmlSBufAddReplChar(buf);
3854
682k
                str += 1;
3855
682k
                chunk = str;
3856
6.83M
            } else {
3857
6.83M
                str += 1;
3858
6.83M
            }
3859
7.51M
        }
3860
73.5M
    }
3861
3862
17.4k
    if (chunk < str)
3863
11.7k
        xmlSBufAddString(buf, chunk, str - chunk);
3864
3865
17.4k
    return;
3866
49.0k
}
3867
3868
/**
3869
 * xmlParseEntityValue:
3870
 * @ctxt:  an XML parser context
3871
 * @orig:  if non-NULL store a copy of the original entity value
3872
 *
3873
 * DEPRECATED: Internal function, don't use.
3874
 *
3875
 * parse a value for ENTITY declarations
3876
 *
3877
 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3878
 *                 "'" ([^%&'] | PEReference | Reference)* "'"
3879
 *
3880
 * Returns the EntityValue parsed with reference substituted or NULL
3881
 */
3882
xmlChar *
3883
49.5k
xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3884
49.5k
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3885
0
                         XML_MAX_HUGE_LENGTH :
3886
49.5k
                         XML_MAX_TEXT_LENGTH;
3887
49.5k
    xmlSBuf buf;
3888
49.5k
    const xmlChar *start;
3889
49.5k
    int quote, length;
3890
3891
49.5k
    xmlSBufInit(&buf, maxLength);
3892
3893
49.5k
    GROW;
3894
3895
49.5k
    quote = CUR;
3896
49.5k
    if ((quote != '"') && (quote != '\'')) {
3897
0
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3898
0
  return(NULL);
3899
0
    }
3900
49.5k
    CUR_PTR++;
3901
3902
49.5k
    length = 0;
3903
3904
    /*
3905
     * Copy raw content of the entity into a buffer
3906
     */
3907
225M
    while (1) {
3908
225M
        int c;
3909
3910
225M
        if (PARSER_STOPPED(ctxt))
3911
2
            goto error;
3912
3913
225M
        if (CUR_PTR >= ctxt->input->end) {
3914
240
            xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3915
240
            goto error;
3916
240
        }
3917
3918
225M
        c = CUR;
3919
3920
225M
        if (c == 0) {
3921
380
            xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3922
380
                    "invalid character in entity value\n");
3923
380
            goto error;
3924
380
        }
3925
225M
        if (c == quote)
3926
48.9k
            break;
3927
225M
        NEXTL(1);
3928
225M
        length += 1;
3929
3930
        /*
3931
         * TODO: Check growth threshold
3932
         */
3933
225M
        if (ctxt->input->end - CUR_PTR < 10)
3934
12.2k
            GROW;
3935
225M
    }
3936
3937
48.9k
    start = CUR_PTR - length;
3938
3939
48.9k
    if (orig != NULL) {
3940
48.9k
        *orig = xmlStrndup(start, length);
3941
48.9k
        if (*orig == NULL)
3942
2
            xmlErrMemory(ctxt);
3943
48.9k
    }
3944
3945
48.9k
    xmlExpandPEsInEntityValue(ctxt, &buf, start, length, ctxt->inputNr);
3946
3947
48.9k
    NEXTL(1);
3948
3949
48.9k
    return(xmlSBufFinish(&buf, NULL, ctxt, "entity length too long"));
3950
3951
622
error:
3952
622
    xmlSBufCleanup(&buf, ctxt, "entity length too long");
3953
622
    return(NULL);
3954
49.5k
}
3955
3956
/**
3957
 * xmlCheckEntityInAttValue:
3958
 * @ctxt:  parser context
3959
 * @pent:  entity
3960
 * @depth:  nesting depth
3961
 *
3962
 * Check an entity reference in an attribute value for validity
3963
 * without expanding it.
3964
 */
3965
static void
3966
426
xmlCheckEntityInAttValue(xmlParserCtxtPtr ctxt, xmlEntityPtr pent, int depth) {
3967
426
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3968
426
    const xmlChar *str;
3969
426
    unsigned long expandedSize = pent->length;
3970
426
    int c, flags;
3971
3972
426
    depth += 1;
3973
426
    if (depth > maxDepth) {
3974
0
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3975
0
                       "Maximum entity nesting depth exceeded");
3976
0
  return;
3977
0
    }
3978
3979
426
    if (pent->flags & XML_ENT_EXPANDING) {
3980
11
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
3981
11
        xmlHaltParser(ctxt);
3982
11
        return;
3983
11
    }
3984
3985
    /*
3986
     * If we're parsing a default attribute value in DTD content,
3987
     * the entity might reference other entities which weren't
3988
     * defined yet, so the check isn't reliable.
3989
     */
3990
415
    if (ctxt->inSubset == 0)
3991
405
        flags = XML_ENT_CHECKED | XML_ENT_VALIDATED;
3992
10
    else
3993
10
        flags = XML_ENT_VALIDATED;
3994
3995
415
    str = pent->content;
3996
415
    if (str == NULL)
3997
0
        goto done;
3998
3999
    /*
4000
     * Note that entity values are already validated. We only check
4001
     * for illegal less-than signs and compute the expanded size
4002
     * of the entity. No special handling for multi-byte characters
4003
     * is needed.
4004
     */
4005
10.7M
    while (!PARSER_STOPPED(ctxt)) {
4006
10.7M
        c = *str;
4007
4008
10.7M
  if (c != '&') {
4009
10.7M
            if (c == 0)
4010
379
                break;
4011
4012
10.7M
            if (c == '<')
4013
32.6k
                xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
4014
32.6k
                        "'<' in entity '%s' is not allowed in attributes "
4015
32.6k
                        "values\n", pent->name);
4016
4017
10.7M
            str += 1;
4018
10.7M
        } else if (str[1] == '#') {
4019
208
            int val;
4020
4021
208
      val = xmlParseStringCharRef(ctxt, &str);
4022
208
      if (val == 0) {
4023
0
                pent->content[0] = 0;
4024
0
                break;
4025
0
            }
4026
8.23k
  } else {
4027
8.23k
            xmlChar *name;
4028
8.23k
            xmlEntityPtr ent;
4029
4030
8.23k
      name = xmlParseStringEntityRef(ctxt, &str);
4031
8.23k
      if (name == NULL) {
4032
25
                pent->content[0] = 0;
4033
25
                break;
4034
25
            }
4035
4036
8.20k
            ent = xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 1);
4037
8.20k
            xmlFree(name);
4038
4039
8.20k
            if ((ent != NULL) &&
4040
8.20k
                (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
4041
4.35k
                if ((ent->flags & flags) != flags) {
4042
64
                    pent->flags |= XML_ENT_EXPANDING;
4043
64
                    xmlCheckEntityInAttValue(ctxt, ent, depth);
4044
64
                    pent->flags &= ~XML_ENT_EXPANDING;
4045
64
                }
4046
4047
4.35k
                xmlSaturatedAdd(&expandedSize, ent->expandedSize);
4048
4.35k
                xmlSaturatedAdd(&expandedSize, XML_ENT_FIXED_COST);
4049
4.35k
            }
4050
8.20k
        }
4051
10.7M
    }
4052
4053
415
done:
4054
415
    if (ctxt->inSubset == 0)
4055
405
        pent->expandedSize = expandedSize;
4056
4057
415
    pent->flags |= flags;
4058
415
}
4059
4060
/**
4061
 * xmlExpandEntityInAttValue:
4062
 * @ctxt:  parser context
4063
 * @buf:  string buffer
4064
 * @str:  entity or attribute value
4065
 * @pent:  entity for entity value, NULL for attribute values
4066
 * @normalize:  whether to collapse whitespace
4067
 * @inSpace:  whitespace state
4068
 * @depth:  nesting depth
4069
 * @check:  whether to check for amplification
4070
 *
4071
 * Expand general entity references in an entity or attribute value.
4072
 * Perform attribute value normalization.
4073
 */
4074
static void
4075
xmlExpandEntityInAttValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf,
4076
                          const xmlChar *str, xmlEntityPtr pent, int normalize,
4077
266k
                          int *inSpace, int depth, int check) {
4078
266k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
4079
266k
    int c, chunkSize;
4080
4081
266k
    if (str == NULL)
4082
0
        return;
4083
4084
266k
    depth += 1;
4085
266k
    if (depth > maxDepth) {
4086
0
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
4087
0
                       "Maximum entity nesting depth exceeded");
4088
0
  return;
4089
0
    }
4090
4091
266k
    if (pent != NULL) {
4092
266k
        if (pent->flags & XML_ENT_EXPANDING) {
4093
161
            xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
4094
161
            xmlHaltParser(ctxt);
4095
161
            return;
4096
161
        }
4097
4098
266k
        if (check) {
4099
266k
            if (xmlParserEntityCheck(ctxt, pent->length))
4100
52
                return;
4101
266k
        }
4102
266k
    }
4103
4104
266k
    chunkSize = 0;
4105
4106
    /*
4107
     * Note that entity values are already validated. No special
4108
     * handling for multi-byte characters is needed.
4109
     */
4110
310M
    while (!PARSER_STOPPED(ctxt)) {
4111
310M
        c = *str;
4112
4113
310M
  if (c != '&') {
4114
309M
            if (c == 0)
4115
257k
                break;
4116
4117
            /*
4118
             * If this function is called without an entity, it is used to
4119
             * expand entities in an attribute content where less-than was
4120
             * already unscaped and is allowed.
4121
             */
4122
309M
            if ((pent != NULL) && (c == '<')) {
4123
8.16k
                xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
4124
8.16k
                        "'<' in entity '%s' is not allowed in attributes "
4125
8.16k
                        "values\n", pent->name);
4126
8.16k
                break;
4127
8.16k
            }
4128
4129
309M
            if (c <= 0x20) {
4130
7.69M
                if ((normalize) && (*inSpace)) {
4131
                    /* Skip char */
4132
0
                    if (chunkSize > 0) {
4133
0
                        xmlSBufAddString(buf, str - chunkSize, chunkSize);
4134
0
                        chunkSize = 0;
4135
0
                    }
4136
7.69M
                } else if (c < 0x20) {
4137
7.45M
                    if (chunkSize > 0) {
4138
493k
                        xmlSBufAddString(buf, str - chunkSize, chunkSize);
4139
493k
                        chunkSize = 0;
4140
493k
                    }
4141
4142
7.45M
                    xmlSBufAddCString(buf, " ", 1);
4143
7.45M
                } else {
4144
244k
                    chunkSize += 1;
4145
244k
                }
4146
4147
7.69M
                *inSpace = 1;
4148
301M
            } else {
4149
301M
                chunkSize += 1;
4150
301M
                *inSpace = 0;
4151
301M
            }
4152
4153
309M
            str += 1;
4154
309M
        } else if (str[1] == '#') {
4155
248k
            int val;
4156
4157
248k
            if (chunkSize > 0) {
4158
14.3k
                xmlSBufAddString(buf, str - chunkSize, chunkSize);
4159
14.3k
                chunkSize = 0;
4160
14.3k
            }
4161
4162
248k
      val = xmlParseStringCharRef(ctxt, &str);
4163
248k
      if (val == 0) {
4164
60
                if (pent != NULL)
4165
60
                    pent->content[0] = 0;
4166
60
                break;
4167
60
            }
4168
4169
248k
            if (val == ' ') {
4170
3.60k
                if ((!normalize) || (!*inSpace))
4171
3.60k
                    xmlSBufAddCString(buf, " ", 1);
4172
3.60k
                *inSpace = 1;
4173
245k
            } else {
4174
245k
                xmlSBufAddChar(buf, val);
4175
245k
                *inSpace = 0;
4176
245k
            }
4177
248k
  } else {
4178
55.2k
            xmlChar *name;
4179
55.2k
            xmlEntityPtr ent;
4180
4181
55.2k
            if (chunkSize > 0) {
4182
43.5k
                xmlSBufAddString(buf, str - chunkSize, chunkSize);
4183
43.5k
                chunkSize = 0;
4184
43.5k
            }
4185
4186
55.2k
      name = xmlParseStringEntityRef(ctxt, &str);
4187
55.2k
            if (name == NULL) {
4188
196
                if (pent != NULL)
4189
196
                    pent->content[0] = 0;
4190
196
                break;
4191
196
            }
4192
4193
55.0k
            ent = xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 1);
4194
55.0k
            xmlFree(name);
4195
4196
55.0k
      if ((ent != NULL) &&
4197
55.0k
    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
4198
17.9k
    if (ent->content == NULL) {
4199
0
        xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
4200
0
          "predefined entity has no content\n");
4201
0
                    break;
4202
0
                }
4203
4204
17.9k
                xmlSBufAddString(buf, ent->content, ent->length);
4205
4206
17.9k
                *inSpace = 0;
4207
37.1k
      } else if ((ent != NULL) && (ent->content != NULL)) {
4208
23.1k
                if (pent != NULL)
4209
23.1k
                    pent->flags |= XML_ENT_EXPANDING;
4210
23.1k
    xmlExpandEntityInAttValue(ctxt, buf, ent->content, ent,
4211
23.1k
                                          normalize, inSpace, depth, check);
4212
23.1k
                if (pent != NULL)
4213
23.1k
                    pent->flags &= ~XML_ENT_EXPANDING;
4214
23.1k
      }
4215
55.0k
        }
4216
310M
    }
4217
4218
266k
    if (chunkSize > 0)
4219
39.1k
        xmlSBufAddString(buf, str - chunkSize, chunkSize);
4220
4221
266k
    return;
4222
266k
}
4223
4224
/**
4225
 * xmlExpandEntitiesInAttValue:
4226
 * @ctxt:  parser context
4227
 * @str:  entity or attribute value
4228
 * @normalize:  whether to collapse whitespace
4229
 *
4230
 * Expand general entity references in an entity or attribute value.
4231
 * Perform attribute value normalization.
4232
 *
4233
 * Returns the expanded attribtue value.
4234
 */
4235
xmlChar *
4236
xmlExpandEntitiesInAttValue(xmlParserCtxtPtr ctxt, const xmlChar *str,
4237
0
                            int normalize) {
4238
0
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4239
0
                         XML_MAX_HUGE_LENGTH :
4240
0
                         XML_MAX_TEXT_LENGTH;
4241
0
    xmlSBuf buf;
4242
0
    int inSpace = 1;
4243
4244
0
    xmlSBufInit(&buf, maxLength);
4245
4246
0
    xmlExpandEntityInAttValue(ctxt, &buf, str, NULL, normalize, &inSpace,
4247
0
                              ctxt->inputNr, /* check */ 0);
4248
4249
0
    if ((normalize) && (inSpace) && (buf.size > 0))
4250
0
        buf.size--;
4251
4252
0
    return(xmlSBufFinish(&buf, NULL, ctxt, "AttValue length too long"));
4253
0
}
4254
4255
/**
4256
 * xmlParseAttValueInternal:
4257
 * @ctxt:  an XML parser context
4258
 * @len:  attribute len result
4259
 * @alloc:  whether the attribute was reallocated as a new string
4260
 * @normalize:  if 1 then further non-CDATA normalization must be done
4261
 *
4262
 * parse a value for an attribute.
4263
 * NOTE: if no normalization is needed, the routine will return pointers
4264
 *       directly from the data buffer.
4265
 *
4266
 * 3.3.3 Attribute-Value Normalization:
4267
 * Before the value of an attribute is passed to the application or
4268
 * checked for validity, the XML processor must normalize it as follows:
4269
 * - a character reference is processed by appending the referenced
4270
 *   character to the attribute value
4271
 * - an entity reference is processed by recursively processing the
4272
 *   replacement text of the entity
4273
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4274
 *   appending #x20 to the normalized value, except that only a single
4275
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4276
 *   parsed entity or the literal entity value of an internal parsed entity
4277
 * - other characters are processed by appending them to the normalized value
4278
 * If the declared value is not CDATA, then the XML processor must further
4279
 * process the normalized attribute value by discarding any leading and
4280
 * trailing space (#x20) characters, and by replacing sequences of space
4281
 * (#x20) characters by a single space (#x20) character.
4282
 * All attributes for which no declaration has been read should be treated
4283
 * by a non-validating parser as if declared CDATA.
4284
 *
4285
 * Returns the AttValue parsed or NULL. The value has to be freed by the
4286
 *     caller if it was copied, this can be detected by val[*len] == 0.
4287
 */
4288
static xmlChar *
4289
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *attlen, int *alloc,
4290
1.21M
                         int normalize) {
4291
1.21M
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4292
0
                         XML_MAX_HUGE_LENGTH :
4293
1.21M
                         XML_MAX_TEXT_LENGTH;
4294
1.21M
    xmlSBuf buf;
4295
1.21M
    xmlChar *ret;
4296
1.21M
    int c, l, quote, flags, chunkSize;
4297
1.21M
    int inSpace = 1;
4298
4299
1.21M
    xmlSBufInit(&buf, maxLength);
4300
4301
1.21M
    GROW;
4302
4303
1.21M
    quote = CUR;
4304
1.21M
    if ((quote != '"') && (quote != '\'')) {
4305
51.4k
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
4306
51.4k
  return(NULL);
4307
51.4k
    }
4308
1.16M
    NEXTL(1);
4309
4310
1.16M
    if (ctxt->inSubset == 0)
4311
1.14M
        flags = XML_ENT_CHECKED | XML_ENT_VALIDATED;
4312
25.4k
    else
4313
25.4k
        flags = XML_ENT_VALIDATED;
4314
4315
1.16M
    inSpace = 1;
4316
1.16M
    chunkSize = 0;
4317
4318
225M
    while (1) {
4319
225M
        if (PARSER_STOPPED(ctxt))
4320
250
            goto error;
4321
4322
225M
        if (CUR_PTR >= ctxt->input->end) {
4323
1.77k
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4324
1.77k
                           "AttValue: ' expected\n");
4325
1.77k
            goto error;
4326
1.77k
        }
4327
4328
        /*
4329
         * TODO: Check growth threshold
4330
         */
4331
225M
        if (ctxt->input->end - CUR_PTR < 10)
4332
56.6k
            GROW;
4333
4334
225M
        c = CUR;
4335
4336
225M
        if (c >= 0x80) {
4337
153M
            l = xmlUTF8MultibyteLen(ctxt, CUR_PTR,
4338
153M
                    "invalid character in attribute value\n");
4339
153M
            if (l == 0) {
4340
26.2M
                if (chunkSize > 0) {
4341
362k
                    xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4342
362k
                    chunkSize = 0;
4343
362k
                }
4344
26.2M
                xmlSBufAddReplChar(&buf);
4345
26.2M
                NEXTL(1);
4346
127M
            } else {
4347
127M
                chunkSize += l;
4348
127M
                NEXTL(l);
4349
127M
            }
4350
4351
153M
            inSpace = 0;
4352
153M
        } else if (c != '&') {
4353
71.4M
            if (c > 0x20) {
4354
20.2M
                if (c == quote)
4355
1.16M
                    break;
4356
4357
19.1M
                if (c == '<')
4358
462k
                    xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4359
4360
19.1M
                chunkSize += 1;
4361
19.1M
                inSpace = 0;
4362
51.1M
            } else if (!IS_BYTE_CHAR(c)) {
4363
29.5M
                xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4364
29.5M
                        "invalid character in attribute value\n");
4365
29.5M
                if (chunkSize > 0) {
4366
40.8k
                    xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4367
40.8k
                    chunkSize = 0;
4368
40.8k
                }
4369
29.5M
                xmlSBufAddReplChar(&buf);
4370
29.5M
                inSpace = 0;
4371
29.5M
            } else {
4372
                /* Whitespace */
4373
21.5M
                if ((normalize) && (inSpace)) {
4374
                    /* Skip char */
4375
1.70M
                    if (chunkSize > 0) {
4376
136k
                        xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4377
136k
                        chunkSize = 0;
4378
136k
                    }
4379
19.8M
                } else if (c < 0x20) {
4380
                    /* Convert to space */
4381
18.6M
                    if (chunkSize > 0) {
4382
101k
                        xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4383
101k
                        chunkSize = 0;
4384
101k
                    }
4385
4386
18.6M
                    xmlSBufAddCString(&buf, " ", 1);
4387
18.6M
                } else {
4388
1.21M
                    chunkSize += 1;
4389
1.21M
                }
4390
4391
21.5M
                inSpace = 1;
4392
4393
21.5M
                if ((c == 0xD) && (NXT(1) == 0xA))
4394
2.06k
                    CUR_PTR++;
4395
21.5M
            }
4396
4397
70.3M
            NEXTL(1);
4398
70.3M
        } else if (NXT(1) == '#') {
4399
27.5k
            int val;
4400
4401
27.5k
            if (chunkSize > 0) {
4402
12.9k
                xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4403
12.9k
                chunkSize = 0;
4404
12.9k
            }
4405
4406
27.5k
            val = xmlParseCharRef(ctxt);
4407
27.5k
            if (val == 0)
4408
2.76k
                goto error;
4409
4410
24.7k
            if ((val == '&') && (!ctxt->replaceEntities)) {
4411
                /*
4412
                 * The reparsing will be done in xmlStringGetNodeList()
4413
                 * called by the attribute() function in SAX.c
4414
                 */
4415
2.99k
                xmlSBufAddCString(&buf, "&#38;", 5);
4416
2.99k
                inSpace = 0;
4417
21.7k
            } else if (val == ' ') {
4418
4.25k
                if ((!normalize) || (!inSpace))
4419
4.19k
                    xmlSBufAddCString(&buf, " ", 1);
4420
4.25k
                inSpace = 1;
4421
17.5k
            } else {
4422
17.5k
                xmlSBufAddChar(&buf, val);
4423
17.5k
                inSpace = 0;
4424
17.5k
            }
4425
620k
        } else {
4426
620k
            const xmlChar *name;
4427
620k
            xmlEntityPtr ent;
4428
4429
620k
            if (chunkSize > 0) {
4430
178k
                xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4431
178k
                chunkSize = 0;
4432
178k
            }
4433
4434
620k
            name = xmlParseEntityRefInternal(ctxt);
4435
620k
            if (name == NULL) {
4436
                /*
4437
                 * Probably a literal '&' which wasn't escaped.
4438
                 * TODO: Handle gracefully in recovery mode.
4439
                 */
4440
247k
                continue;
4441
247k
            }
4442
4443
373k
            ent = xmlLookupGeneralEntity(ctxt, name, /* isAttr */ 1);
4444
373k
            if (ent == NULL)
4445
77.7k
                continue;
4446
4447
295k
            if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
4448
21.5k
                if ((ent->content[0] == '&') && (!ctxt->replaceEntities))
4449
1.12k
                    xmlSBufAddCString(&buf, "&#38;", 5);
4450
20.3k
                else
4451
20.3k
                    xmlSBufAddString(&buf, ent->content, ent->length);
4452
21.5k
                inSpace = 0;
4453
273k
            } else if (ctxt->replaceEntities) {
4454
243k
                xmlExpandEntityInAttValue(ctxt, &buf, ent->content, ent,
4455
243k
                                          normalize, &inSpace, ctxt->inputNr,
4456
243k
                                          /* check */ 1);
4457
243k
            } else {
4458
30.8k
                if ((ent->flags & flags) != flags)
4459
362
                    xmlCheckEntityInAttValue(ctxt, ent, ctxt->inputNr);
4460
4461
30.8k
                if (xmlParserEntityCheck(ctxt, ent->expandedSize)) {
4462
42
                    ent->content[0] = 0;
4463
42
                    goto error;
4464
42
                }
4465
4466
                /*
4467
                 * Just output the reference
4468
                 */
4469
30.7k
                xmlSBufAddCString(&buf, "&", 1);
4470
30.7k
                xmlSBufAddString(&buf, ent->name, xmlStrlen(ent->name));
4471
30.7k
                xmlSBufAddCString(&buf, ";", 1);
4472
4473
30.7k
                inSpace = 0;
4474
30.7k
            }
4475
295k
  }
4476
225M
    }
4477
4478
1.16M
    if ((buf.mem == NULL) && (alloc != NULL)) {
4479
1.04M
        ret = (xmlChar *) CUR_PTR - chunkSize;
4480
4481
1.04M
        if (attlen != NULL)
4482
1.04M
            *attlen = chunkSize;
4483
1.04M
        if ((normalize) && (inSpace) && (chunkSize > 0))
4484
318
            *attlen -= 1;
4485
1.04M
        *alloc = 0;
4486
4487
        /* Report potential error */
4488
1.04M
        xmlSBufCleanup(&buf, ctxt, "AttValue length too long");
4489
1.04M
    } else {
4490
115k
        if (chunkSize > 0)
4491
74.6k
            xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4492
4493
115k
        if ((normalize) && (inSpace) && (buf.size > 0))
4494
678
            buf.size--;
4495
4496
115k
        ret = xmlSBufFinish(&buf, attlen, ctxt, "AttValue length too long");
4497
4498
115k
        if (ret != NULL) {
4499
115k
            if (attlen != NULL)
4500
90.1k
                *attlen = buf.size;
4501
115k
            if (alloc != NULL)
4502
90.1k
                *alloc = 1;
4503
115k
        }
4504
115k
    }
4505
4506
1.16M
    NEXTL(1);
4507
4508
1.16M
    return(ret);
4509
4510
4.83k
error:
4511
4.83k
    xmlSBufCleanup(&buf, ctxt, "AttValue length too long");
4512
4.83k
    return(NULL);
4513
1.16M
}
4514
4515
/**
4516
 * xmlParseAttValue:
4517
 * @ctxt:  an XML parser context
4518
 *
4519
 * DEPRECATED: Internal function, don't use.
4520
 *
4521
 * parse a value for an attribute
4522
 * Note: the parser won't do substitution of entities here, this
4523
 * will be handled later in xmlStringGetNodeList
4524
 *
4525
 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4526
 *                   "'" ([^<&'] | Reference)* "'"
4527
 *
4528
 * 3.3.3 Attribute-Value Normalization:
4529
 * Before the value of an attribute is passed to the application or
4530
 * checked for validity, the XML processor must normalize it as follows:
4531
 * - a character reference is processed by appending the referenced
4532
 *   character to the attribute value
4533
 * - an entity reference is processed by recursively processing the
4534
 *   replacement text of the entity
4535
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4536
 *   appending #x20 to the normalized value, except that only a single
4537
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4538
 *   parsed entity or the literal entity value of an internal parsed entity
4539
 * - other characters are processed by appending them to the normalized value
4540
 * If the declared value is not CDATA, then the XML processor must further
4541
 * process the normalized attribute value by discarding any leading and
4542
 * trailing space (#x20) characters, and by replacing sequences of space
4543
 * (#x20) characters by a single space (#x20) character.
4544
 * All attributes for which no declaration has been read should be treated
4545
 * by a non-validating parser as if declared CDATA.
4546
 *
4547
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4548
 */
4549
4550
4551
xmlChar *
4552
26.8k
xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4553
26.8k
    if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4554
26.8k
    return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4555
26.8k
}
4556
4557
/**
4558
 * xmlParseSystemLiteral:
4559
 * @ctxt:  an XML parser context
4560
 *
4561
 * DEPRECATED: Internal function, don't use.
4562
 *
4563
 * parse an XML Literal
4564
 *
4565
 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4566
 *
4567
 * Returns the SystemLiteral parsed or NULL
4568
 */
4569
4570
xmlChar *
4571
10.9k
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4572
10.9k
    xmlChar *buf = NULL;
4573
10.9k
    int len = 0;
4574
10.9k
    int size = XML_PARSER_BUFFER_SIZE;
4575
10.9k
    int cur, l;
4576
10.9k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4577
0
                    XML_MAX_TEXT_LENGTH :
4578
10.9k
                    XML_MAX_NAME_LENGTH;
4579
10.9k
    xmlChar stop;
4580
4581
10.9k
    if (RAW == '"') {
4582
7.79k
        NEXT;
4583
7.79k
  stop = '"';
4584
7.79k
    } else if (RAW == '\'') {
4585
1.70k
        NEXT;
4586
1.70k
  stop = '\'';
4587
1.70k
    } else {
4588
1.45k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4589
1.45k
  return(NULL);
4590
1.45k
    }
4591
4592
9.49k
    buf = (xmlChar *) xmlMallocAtomic(size);
4593
9.49k
    if (buf == NULL) {
4594
2
        xmlErrMemory(ctxt);
4595
2
  return(NULL);
4596
2
    }
4597
9.49k
    cur = CUR_CHAR(l);
4598
1.64M
    while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4599
1.63M
  if (len + 5 >= size) {
4600
5.35k
      xmlChar *tmp;
4601
4602
5.35k
      size *= 2;
4603
5.35k
      tmp = (xmlChar *) xmlRealloc(buf, size);
4604
5.35k
      if (tmp == NULL) {
4605
1
          xmlFree(buf);
4606
1
    xmlErrMemory(ctxt);
4607
1
    return(NULL);
4608
1
      }
4609
5.35k
      buf = tmp;
4610
5.35k
  }
4611
1.63M
  COPY_BUF(buf, len, cur);
4612
1.63M
        if (len > maxLength) {
4613
8
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4614
8
            xmlFree(buf);
4615
8
            return(NULL);
4616
8
        }
4617
1.63M
  NEXTL(l);
4618
1.63M
  cur = CUR_CHAR(l);
4619
1.63M
    }
4620
9.48k
    buf[len] = 0;
4621
9.48k
    if (!IS_CHAR(cur)) {
4622
263
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4623
9.21k
    } else {
4624
9.21k
  NEXT;
4625
9.21k
    }
4626
9.48k
    return(buf);
4627
9.49k
}
4628
4629
/**
4630
 * xmlParsePubidLiteral:
4631
 * @ctxt:  an XML parser context
4632
 *
4633
 * DEPRECATED: Internal function, don't use.
4634
 *
4635
 * parse an XML public literal
4636
 *
4637
 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4638
 *
4639
 * Returns the PubidLiteral parsed or NULL.
4640
 */
4641
4642
xmlChar *
4643
6.57k
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4644
6.57k
    xmlChar *buf = NULL;
4645
6.57k
    int len = 0;
4646
6.57k
    int size = XML_PARSER_BUFFER_SIZE;
4647
6.57k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4648
0
                    XML_MAX_TEXT_LENGTH :
4649
6.57k
                    XML_MAX_NAME_LENGTH;
4650
6.57k
    xmlChar cur;
4651
6.57k
    xmlChar stop;
4652
4653
6.57k
    if (RAW == '"') {
4654
5.62k
        NEXT;
4655
5.62k
  stop = '"';
4656
5.62k
    } else if (RAW == '\'') {
4657
882
        NEXT;
4658
882
  stop = '\'';
4659
882
    } else {
4660
76
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4661
76
  return(NULL);
4662
76
    }
4663
6.50k
    buf = (xmlChar *) xmlMallocAtomic(size);
4664
6.50k
    if (buf == NULL) {
4665
2
  xmlErrMemory(ctxt);
4666
2
  return(NULL);
4667
2
    }
4668
6.50k
    cur = CUR;
4669
670k
    while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop) &&
4670
670k
           (PARSER_STOPPED(ctxt) == 0)) { /* checked */
4671
664k
  if (len + 1 >= size) {
4672
2.01k
      xmlChar *tmp;
4673
4674
2.01k
      size *= 2;
4675
2.01k
      tmp = (xmlChar *) xmlRealloc(buf, size);
4676
2.01k
      if (tmp == NULL) {
4677
0
    xmlErrMemory(ctxt);
4678
0
    xmlFree(buf);
4679
0
    return(NULL);
4680
0
      }
4681
2.01k
      buf = tmp;
4682
2.01k
  }
4683
664k
  buf[len++] = cur;
4684
664k
        if (len > maxLength) {
4685
7
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4686
7
            xmlFree(buf);
4687
7
            return(NULL);
4688
7
        }
4689
664k
  NEXT;
4690
664k
  cur = CUR;
4691
664k
    }
4692
6.49k
    buf[len] = 0;
4693
6.49k
    if (cur != stop) {
4694
395
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4695
6.09k
    } else {
4696
6.09k
  NEXTL(1);
4697
6.09k
    }
4698
6.49k
    return(buf);
4699
6.50k
}
4700
4701
static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial);
4702
4703
/*
4704
 * used for the test in the inner loop of the char data testing
4705
 */
4706
static const unsigned char test_char_data[256] = {
4707
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4708
    0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4709
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4710
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4711
    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4712
    0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4713
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4714
    0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4715
    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4716
    0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4717
    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4718
    0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4719
    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4720
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4721
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4722
    0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4723
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4724
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4725
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4726
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4727
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4728
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4729
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4730
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4731
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4732
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4733
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4734
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4735
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4736
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4737
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4738
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4739
};
4740
4741
/**
4742
 * xmlParseCharDataInternal:
4743
 * @ctxt:  an XML parser context
4744
 * @partial:  buffer may contain partial UTF-8 sequences
4745
 *
4746
 * Parse character data. Always makes progress if the first char isn't
4747
 * '<' or '&'.
4748
 *
4749
 * The right angle bracket (>) may be represented using the string "&gt;",
4750
 * and must, for compatibility, be escaped using "&gt;" or a character
4751
 * reference when it appears in the string "]]>" in content, when that
4752
 * string is not marking the end of a CDATA section.
4753
 *
4754
 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4755
 */
4756
static void
4757
10.0M
xmlParseCharDataInternal(xmlParserCtxtPtr ctxt, int partial) {
4758
10.0M
    const xmlChar *in;
4759
10.0M
    int nbchar = 0;
4760
10.0M
    int line = ctxt->input->line;
4761
10.0M
    int col = ctxt->input->col;
4762
10.0M
    int ccol;
4763
4764
10.0M
    GROW;
4765
    /*
4766
     * Accelerated common case where input don't need to be
4767
     * modified before passing it to the handler.
4768
     */
4769
10.0M
    in = ctxt->input->cur;
4770
10.3M
    do {
4771
10.6M
get_more_space:
4772
12.2M
        while (*in == 0x20) { in++; ctxt->input->col++; }
4773
10.6M
        if (*in == 0xA) {
4774
5.77M
            do {
4775
5.77M
                ctxt->input->line++; ctxt->input->col = 1;
4776
5.77M
                in++;
4777
5.77M
            } while (*in == 0xA);
4778
355k
            goto get_more_space;
4779
355k
        }
4780
10.3M
        if (*in == '<') {
4781
203k
            nbchar = in - ctxt->input->cur;
4782
203k
            if (nbchar > 0) {
4783
203k
                const xmlChar *tmp = ctxt->input->cur;
4784
203k
                ctxt->input->cur = in;
4785
4786
203k
                if ((ctxt->sax != NULL) &&
4787
203k
                    (ctxt->disableSAX == 0) &&
4788
203k
                    (ctxt->sax->ignorableWhitespace !=
4789
187k
                     ctxt->sax->characters)) {
4790
0
                    if (areBlanks(ctxt, tmp, nbchar, 1)) {
4791
0
                        if (ctxt->sax->ignorableWhitespace != NULL)
4792
0
                            ctxt->sax->ignorableWhitespace(ctxt->userData,
4793
0
                                                   tmp, nbchar);
4794
0
                    } else {
4795
0
                        if (ctxt->sax->characters != NULL)
4796
0
                            ctxt->sax->characters(ctxt->userData,
4797
0
                                                  tmp, nbchar);
4798
0
                        if (*ctxt->space == -1)
4799
0
                            *ctxt->space = -2;
4800
0
                    }
4801
203k
                } else if ((ctxt->sax != NULL) &&
4802
203k
                           (ctxt->disableSAX == 0) &&
4803
203k
                           (ctxt->sax->characters != NULL)) {
4804
187k
                    ctxt->sax->characters(ctxt->userData,
4805
187k
                                          tmp, nbchar);
4806
187k
                }
4807
203k
            }
4808
203k
            return;
4809
203k
        }
4810
4811
11.3M
get_more:
4812
11.3M
        ccol = ctxt->input->col;
4813
50.8M
        while (test_char_data[*in]) {
4814
39.4M
            in++;
4815
39.4M
            ccol++;
4816
39.4M
        }
4817
11.3M
        ctxt->input->col = ccol;
4818
11.3M
        if (*in == 0xA) {
4819
4.57M
            do {
4820
4.57M
                ctxt->input->line++; ctxt->input->col = 1;
4821
4.57M
                in++;
4822
4.57M
            } while (*in == 0xA);
4823
212k
            goto get_more;
4824
212k
        }
4825
11.1M
        if (*in == ']') {
4826
1.04M
            if ((in[1] == ']') && (in[2] == '>')) {
4827
2.97k
                xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4828
2.97k
                ctxt->input->cur = in + 1;
4829
2.97k
                return;
4830
2.97k
            }
4831
1.04M
            in++;
4832
1.04M
            ctxt->input->col++;
4833
1.04M
            goto get_more;
4834
1.04M
        }
4835
10.0M
        nbchar = in - ctxt->input->cur;
4836
10.0M
        if (nbchar > 0) {
4837
2.62M
            if ((ctxt->sax != NULL) &&
4838
2.62M
                (ctxt->disableSAX == 0) &&
4839
2.62M
                (ctxt->sax->ignorableWhitespace !=
4840
2.46M
                 ctxt->sax->characters) &&
4841
2.62M
                (IS_BLANK_CH(*ctxt->input->cur))) {
4842
0
                const xmlChar *tmp = ctxt->input->cur;
4843
0
                ctxt->input->cur = in;
4844
4845
0
                if (areBlanks(ctxt, tmp, nbchar, 0)) {
4846
0
                    if (ctxt->sax->ignorableWhitespace != NULL)
4847
0
                        ctxt->sax->ignorableWhitespace(ctxt->userData,
4848
0
                                                       tmp, nbchar);
4849
0
                } else {
4850
0
                    if (ctxt->sax->characters != NULL)
4851
0
                        ctxt->sax->characters(ctxt->userData,
4852
0
                                              tmp, nbchar);
4853
0
                    if (*ctxt->space == -1)
4854
0
                        *ctxt->space = -2;
4855
0
                }
4856
0
                line = ctxt->input->line;
4857
0
                col = ctxt->input->col;
4858
2.62M
            } else if ((ctxt->sax != NULL) &&
4859
2.62M
                       (ctxt->disableSAX == 0)) {
4860
2.46M
                if (ctxt->sax->characters != NULL)
4861
2.46M
                    ctxt->sax->characters(ctxt->userData,
4862
2.46M
                                          ctxt->input->cur, nbchar);
4863
2.46M
                line = ctxt->input->line;
4864
2.46M
                col = ctxt->input->col;
4865
2.46M
            }
4866
2.62M
        }
4867
10.0M
        ctxt->input->cur = in;
4868
10.0M
        if (*in == 0xD) {
4869
515k
            in++;
4870
515k
            if (*in == 0xA) {
4871
254k
                ctxt->input->cur = in;
4872
254k
                in++;
4873
254k
                ctxt->input->line++; ctxt->input->col = 1;
4874
254k
                continue; /* while */
4875
254k
            }
4876
261k
            in--;
4877
261k
        }
4878
9.84M
        if (*in == '<') {
4879
1.11M
            return;
4880
1.11M
        }
4881
8.72M
        if (*in == '&') {
4882
302k
            return;
4883
302k
        }
4884
8.41M
        SHRINK;
4885
8.41M
        GROW;
4886
8.41M
        in = ctxt->input->cur;
4887
8.67M
    } while (((*in >= 0x20) && (*in <= 0x7F)) ||
4888
8.67M
             (*in == 0x09) || (*in == 0x0a));
4889
8.41M
    ctxt->input->line = line;
4890
8.41M
    ctxt->input->col = col;
4891
8.41M
    xmlParseCharDataComplex(ctxt, partial);
4892
8.41M
}
4893
4894
/**
4895
 * xmlParseCharDataComplex:
4896
 * @ctxt:  an XML parser context
4897
 * @cdata:  int indicating whether we are within a CDATA section
4898
 *
4899
 * Always makes progress if the first char isn't '<' or '&'.
4900
 *
4901
 * parse a CharData section.this is the fallback function
4902
 * of xmlParseCharData() when the parsing requires handling
4903
 * of non-ASCII characters.
4904
 */
4905
static void
4906
8.41M
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial) {
4907
8.41M
    xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4908
8.41M
    int nbchar = 0;
4909
8.41M
    int cur, l;
4910
4911
8.41M
    cur = CUR_CHAR(l);
4912
109M
    while ((cur != '<') && /* checked */
4913
109M
           (cur != '&') &&
4914
109M
     (IS_CHAR(cur))) {
4915
100M
  if ((cur == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4916
4.31k
      xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4917
4.31k
  }
4918
100M
  COPY_BUF(buf, nbchar, cur);
4919
  /* move current position before possible calling of ctxt->sax->characters */
4920
100M
  NEXTL(l);
4921
100M
  if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4922
505k
      buf[nbchar] = 0;
4923
4924
      /*
4925
       * OK the segment is to be consumed as chars.
4926
       */
4927
505k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4928
329k
    if (areBlanks(ctxt, buf, nbchar, 0)) {
4929
0
        if (ctxt->sax->ignorableWhitespace != NULL)
4930
0
      ctxt->sax->ignorableWhitespace(ctxt->userData,
4931
0
                                     buf, nbchar);
4932
329k
    } else {
4933
329k
        if (ctxt->sax->characters != NULL)
4934
329k
      ctxt->sax->characters(ctxt->userData, buf, nbchar);
4935
329k
        if ((ctxt->sax->characters !=
4936
329k
             ctxt->sax->ignorableWhitespace) &&
4937
329k
      (*ctxt->space == -1))
4938
0
      *ctxt->space = -2;
4939
329k
    }
4940
329k
      }
4941
505k
      nbchar = 0;
4942
505k
            SHRINK;
4943
505k
  }
4944
100M
  cur = CUR_CHAR(l);
4945
100M
    }
4946
8.41M
    if (nbchar != 0) {
4947
1.35M
        buf[nbchar] = 0;
4948
  /*
4949
   * OK the segment is to be consumed as chars.
4950
   */
4951
1.35M
  if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4952
1.26M
      if (areBlanks(ctxt, buf, nbchar, 0)) {
4953
0
    if (ctxt->sax->ignorableWhitespace != NULL)
4954
0
        ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4955
1.26M
      } else {
4956
1.26M
    if (ctxt->sax->characters != NULL)
4957
1.26M
        ctxt->sax->characters(ctxt->userData, buf, nbchar);
4958
1.26M
    if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4959
1.26M
        (*ctxt->space == -1))
4960
0
        *ctxt->space = -2;
4961
1.26M
      }
4962
1.26M
  }
4963
1.35M
    }
4964
    /*
4965
     * cur == 0 can mean
4966
     *
4967
     * - End of buffer.
4968
     * - An actual 0 character.
4969
     * - An incomplete UTF-8 sequence. This is allowed if partial is set.
4970
     */
4971
8.41M
    if (ctxt->input->cur < ctxt->input->end) {
4972
8.41M
        if ((cur == 0) && (CUR != 0)) {
4973
1.91k
            if (partial == 0) {
4974
1.91k
                xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4975
1.91k
                        "Incomplete UTF-8 sequence starting with %02X\n", CUR);
4976
1.91k
                NEXTL(1);
4977
1.91k
            }
4978
8.40M
        } else if ((cur != '<') && (cur != '&')) {
4979
            /* Generate the error and skip the offending character */
4980
7.73M
            xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4981
7.73M
                              "PCDATA invalid Char value %d\n", cur);
4982
7.73M
            NEXTL(l);
4983
7.73M
        }
4984
8.41M
    }
4985
8.41M
}
4986
4987
/**
4988
 * xmlParseCharData:
4989
 * @ctxt:  an XML parser context
4990
 * @cdata:  unused
4991
 *
4992
 * DEPRECATED: Internal function, don't use.
4993
 */
4994
void
4995
0
xmlParseCharData(xmlParserCtxtPtr ctxt, ATTRIBUTE_UNUSED int cdata) {
4996
0
    xmlParseCharDataInternal(ctxt, 0);
4997
0
}
4998
4999
/**
5000
 * xmlParseExternalID:
5001
 * @ctxt:  an XML parser context
5002
 * @publicID:  a xmlChar** receiving PubidLiteral
5003
 * @strict: indicate whether we should restrict parsing to only
5004
 *          production [75], see NOTE below
5005
 *
5006
 * DEPRECATED: Internal function, don't use.
5007
 *
5008
 * Parse an External ID or a Public ID
5009
 *
5010
 * NOTE: Productions [75] and [83] interact badly since [75] can generate
5011
 *       'PUBLIC' S PubidLiteral S SystemLiteral
5012
 *
5013
 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
5014
 *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
5015
 *
5016
 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
5017
 *
5018
 * Returns the function returns SystemLiteral and in the second
5019
 *                case publicID receives PubidLiteral, is strict is off
5020
 *                it is possible to return NULL and have publicID set.
5021
 */
5022
5023
xmlChar *
5024
30.0k
xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
5025
30.0k
    xmlChar *URI = NULL;
5026
5027
30.0k
    *publicID = NULL;
5028
30.0k
    if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
5029
4.39k
        SKIP(6);
5030
4.39k
  if (SKIP_BLANKS == 0) {
5031
49
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5032
49
                     "Space required after 'SYSTEM'\n");
5033
49
  }
5034
4.39k
  URI = xmlParseSystemLiteral(ctxt);
5035
4.39k
  if (URI == NULL) {
5036
48
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
5037
48
        }
5038
25.6k
    } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
5039
6.57k
        SKIP(6);
5040
6.57k
  if (SKIP_BLANKS == 0) {
5041
325
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5042
325
        "Space required after 'PUBLIC'\n");
5043
325
  }
5044
6.57k
  *publicID = xmlParsePubidLiteral(ctxt);
5045
6.57k
  if (*publicID == NULL) {
5046
85
      xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
5047
85
  }
5048
6.57k
  if (strict) {
5049
      /*
5050
       * We don't handle [83] so "S SystemLiteral" is required.
5051
       */
5052
6.52k
      if (SKIP_BLANKS == 0) {
5053
1.31k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5054
1.31k
      "Space required after the Public Identifier\n");
5055
1.31k
      }
5056
6.52k
  } else {
5057
      /*
5058
       * We handle [83] so we return immediately, if
5059
       * "S SystemLiteral" is not detected. We skip blanks if no
5060
             * system literal was found, but this is harmless since we must
5061
             * be at the end of a NotationDecl.
5062
       */
5063
54
      if (SKIP_BLANKS == 0) return(NULL);
5064
31
      if ((CUR != '\'') && (CUR != '"')) return(NULL);
5065
31
  }
5066
6.55k
  URI = xmlParseSystemLiteral(ctxt);
5067
6.55k
  if (URI == NULL) {
5068
1.41k
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
5069
1.41k
        }
5070
6.55k
    }
5071
29.9k
    return(URI);
5072
30.0k
}
5073
5074
/**
5075
 * xmlParseCommentComplex:
5076
 * @ctxt:  an XML parser context
5077
 * @buf:  the already parsed part of the buffer
5078
 * @len:  number of bytes in the buffer
5079
 * @size:  allocated size of the buffer
5080
 *
5081
 * Skip an XML (SGML) comment <!-- .... -->
5082
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
5083
 *  must not occur within comments. "
5084
 * This is the slow routine in case the accelerator for ascii didn't work
5085
 *
5086
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
5087
 */
5088
static void
5089
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
5090
49.2k
                       size_t len, size_t size) {
5091
49.2k
    int q, ql;
5092
49.2k
    int r, rl;
5093
49.2k
    int cur, l;
5094
49.2k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5095
0
                       XML_MAX_HUGE_LENGTH :
5096
49.2k
                       XML_MAX_TEXT_LENGTH;
5097
5098
49.2k
    if (buf == NULL) {
5099
30.5k
        len = 0;
5100
30.5k
  size = XML_PARSER_BUFFER_SIZE;
5101
30.5k
  buf = (xmlChar *) xmlMallocAtomic(size);
5102
30.5k
  if (buf == NULL) {
5103
0
      xmlErrMemory(ctxt);
5104
0
      return;
5105
0
  }
5106
30.5k
    }
5107
49.2k
    q = CUR_CHAR(ql);
5108
49.2k
    if (q == 0)
5109
17.4k
        goto not_terminated;
5110
31.8k
    if (!IS_CHAR(q)) {
5111
9.61k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
5112
9.61k
                          "xmlParseComment: invalid xmlChar value %d\n",
5113
9.61k
                    q);
5114
9.61k
  xmlFree (buf);
5115
9.61k
  return;
5116
9.61k
    }
5117
22.1k
    NEXTL(ql);
5118
22.1k
    r = CUR_CHAR(rl);
5119
22.1k
    if (r == 0)
5120
2.56k
        goto not_terminated;
5121
19.6k
    if (!IS_CHAR(r)) {
5122
1.77k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
5123
1.77k
                          "xmlParseComment: invalid xmlChar value %d\n",
5124
1.77k
                    r);
5125
1.77k
  xmlFree (buf);
5126
1.77k
  return;
5127
1.77k
    }
5128
17.8k
    NEXTL(rl);
5129
17.8k
    cur = CUR_CHAR(l);
5130
17.8k
    if (cur == 0)
5131
3.45k
        goto not_terminated;
5132
33.2M
    while (IS_CHAR(cur) && /* checked */
5133
33.2M
           ((cur != '>') ||
5134
33.2M
      (r != '-') || (q != '-'))) {
5135
33.2M
  if ((r == '-') && (q == '-')) {
5136
455k
      xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
5137
455k
  }
5138
33.2M
  if (len + 5 >= size) {
5139
7.46k
      xmlChar *new_buf;
5140
7.46k
            size_t new_size;
5141
5142
7.46k
      new_size = size * 2;
5143
7.46k
      new_buf = (xmlChar *) xmlRealloc(buf, new_size);
5144
7.46k
      if (new_buf == NULL) {
5145
2
    xmlFree (buf);
5146
2
    xmlErrMemory(ctxt);
5147
2
    return;
5148
2
      }
5149
7.46k
      buf = new_buf;
5150
7.46k
            size = new_size;
5151
7.46k
  }
5152
33.2M
  COPY_BUF(buf, len, q);
5153
33.2M
        if (len > maxLength) {
5154
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5155
0
                         "Comment too big found", NULL);
5156
0
            xmlFree (buf);
5157
0
            return;
5158
0
        }
5159
5160
33.2M
  q = r;
5161
33.2M
  ql = rl;
5162
33.2M
  r = cur;
5163
33.2M
  rl = l;
5164
5165
33.2M
  NEXTL(l);
5166
33.2M
  cur = CUR_CHAR(l);
5167
5168
33.2M
    }
5169
14.3k
    buf[len] = 0;
5170
14.3k
    if (cur == 0) {
5171
4.43k
  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5172
4.43k
                       "Comment not terminated \n<!--%.50s\n", buf);
5173
9.96k
    } else if (!IS_CHAR(cur)) {
5174
3.59k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
5175
3.59k
                          "xmlParseComment: invalid xmlChar value %d\n",
5176
3.59k
                    cur);
5177
6.37k
    } else {
5178
6.37k
        NEXT;
5179
6.37k
  if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5180
6.37k
      (!ctxt->disableSAX))
5181
5.96k
      ctxt->sax->comment(ctxt->userData, buf);
5182
6.37k
    }
5183
14.3k
    xmlFree(buf);
5184
14.3k
    return;
5185
23.5k
not_terminated:
5186
23.5k
    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5187
23.5k
       "Comment not terminated\n", NULL);
5188
23.5k
    xmlFree(buf);
5189
23.5k
    return;
5190
14.3k
}
5191
5192
/**
5193
 * xmlParseComment:
5194
 * @ctxt:  an XML parser context
5195
 *
5196
 * DEPRECATED: Internal function, don't use.
5197
 *
5198
 * Parse an XML (SGML) comment. Always consumes '<!'.
5199
 *
5200
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
5201
 *  must not occur within comments. "
5202
 *
5203
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
5204
 */
5205
void
5206
85.4k
xmlParseComment(xmlParserCtxtPtr ctxt) {
5207
85.4k
    xmlChar *buf = NULL;
5208
85.4k
    size_t size = XML_PARSER_BUFFER_SIZE;
5209
85.4k
    size_t len = 0;
5210
85.4k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5211
0
                       XML_MAX_HUGE_LENGTH :
5212
85.4k
                       XML_MAX_TEXT_LENGTH;
5213
85.4k
    const xmlChar *in;
5214
85.4k
    size_t nbchar = 0;
5215
85.4k
    int ccol;
5216
5217
    /*
5218
     * Check that there is a comment right here.
5219
     */
5220
85.4k
    if ((RAW != '<') || (NXT(1) != '!'))
5221
0
        return;
5222
85.4k
    SKIP(2);
5223
85.4k
    if ((RAW != '-') || (NXT(1) != '-'))
5224
5
        return;
5225
85.4k
    SKIP(2);
5226
85.4k
    GROW;
5227
5228
    /*
5229
     * Accelerated common case where input don't need to be
5230
     * modified before passing it to the handler.
5231
     */
5232
85.4k
    in = ctxt->input->cur;
5233
85.8k
    do {
5234
85.8k
  if (*in == 0xA) {
5235
12.7k
      do {
5236
12.7k
    ctxt->input->line++; ctxt->input->col = 1;
5237
12.7k
    in++;
5238
12.7k
      } while (*in == 0xA);
5239
7.05k
  }
5240
1.04M
get_more:
5241
1.04M
        ccol = ctxt->input->col;
5242
3.84M
  while (((*in > '-') && (*in <= 0x7F)) ||
5243
3.84M
         ((*in >= 0x20) && (*in < '-')) ||
5244
3.84M
         (*in == 0x09)) {
5245
2.80M
        in++;
5246
2.80M
        ccol++;
5247
2.80M
  }
5248
1.04M
  ctxt->input->col = ccol;
5249
1.04M
  if (*in == 0xA) {
5250
164k
      do {
5251
164k
    ctxt->input->line++; ctxt->input->col = 1;
5252
164k
    in++;
5253
164k
      } while (*in == 0xA);
5254
16.1k
      goto get_more;
5255
16.1k
  }
5256
1.02M
  nbchar = in - ctxt->input->cur;
5257
  /*
5258
   * save current set of data
5259
   */
5260
1.02M
  if (nbchar > 0) {
5261
986k
            if (buf == NULL) {
5262
53.4k
                if ((*in == '-') && (in[1] == '-'))
5263
23.5k
                    size = nbchar + 1;
5264
29.8k
                else
5265
29.8k
                    size = XML_PARSER_BUFFER_SIZE + nbchar;
5266
53.4k
                buf = (xmlChar *) xmlMallocAtomic(size);
5267
53.4k
                if (buf == NULL) {
5268
3
                    xmlErrMemory(ctxt);
5269
3
                    return;
5270
3
                }
5271
53.4k
                len = 0;
5272
933k
            } else if (len + nbchar + 1 >= size) {
5273
3.94k
                xmlChar *new_buf;
5274
3.94k
                size  += len + nbchar + XML_PARSER_BUFFER_SIZE;
5275
3.94k
                new_buf = (xmlChar *) xmlRealloc(buf, size);
5276
3.94k
                if (new_buf == NULL) {
5277
0
                    xmlFree (buf);
5278
0
                    xmlErrMemory(ctxt);
5279
0
                    return;
5280
0
                }
5281
3.94k
                buf = new_buf;
5282
3.94k
            }
5283
986k
            memcpy(&buf[len], ctxt->input->cur, nbchar);
5284
986k
            len += nbchar;
5285
986k
            buf[len] = 0;
5286
986k
  }
5287
1.02M
        if (len > maxLength) {
5288
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5289
0
                         "Comment too big found", NULL);
5290
0
            xmlFree (buf);
5291
0
            return;
5292
0
        }
5293
1.02M
  ctxt->input->cur = in;
5294
1.02M
  if (*in == 0xA) {
5295
0
      in++;
5296
0
      ctxt->input->line++; ctxt->input->col = 1;
5297
0
  }
5298
1.02M
  if (*in == 0xD) {
5299
3.55k
      in++;
5300
3.55k
      if (*in == 0xA) {
5301
1.99k
    ctxt->input->cur = in;
5302
1.99k
    in++;
5303
1.99k
    ctxt->input->line++; ctxt->input->col = 1;
5304
1.99k
    goto get_more;
5305
1.99k
      }
5306
1.56k
      in--;
5307
1.56k
  }
5308
1.02M
  SHRINK;
5309
1.02M
  GROW;
5310
1.02M
  in = ctxt->input->cur;
5311
1.02M
  if (*in == '-') {
5312
972k
      if (in[1] == '-') {
5313
921k
          if (in[2] == '>') {
5314
36.1k
        SKIP(3);
5315
36.1k
        if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5316
36.1k
            (!ctxt->disableSAX)) {
5317
31.8k
      if (buf != NULL)
5318
30.3k
          ctxt->sax->comment(ctxt->userData, buf);
5319
1.45k
      else
5320
1.45k
          ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5321
31.8k
        }
5322
36.1k
        if (buf != NULL)
5323
34.6k
            xmlFree(buf);
5324
36.1k
        return;
5325
36.1k
    }
5326
885k
    if (buf != NULL) {
5327
883k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5328
883k
                          "Double hyphen within comment: "
5329
883k
                                      "<!--%.50s\n",
5330
883k
              buf);
5331
883k
    } else
5332
1.82k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5333
1.82k
                          "Double hyphen within comment\n", NULL);
5334
885k
    in++;
5335
885k
    ctxt->input->col++;
5336
885k
      }
5337
936k
      in++;
5338
936k
      ctxt->input->col++;
5339
936k
      goto get_more;
5340
972k
  }
5341
1.02M
    } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5342
49.2k
    xmlParseCommentComplex(ctxt, buf, len, size);
5343
49.2k
    return;
5344
85.4k
}
5345
5346
5347
/**
5348
 * xmlParsePITarget:
5349
 * @ctxt:  an XML parser context
5350
 *
5351
 * DEPRECATED: Internal function, don't use.
5352
 *
5353
 * parse the name of a PI
5354
 *
5355
 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5356
 *
5357
 * Returns the PITarget name or NULL
5358
 */
5359
5360
const xmlChar *
5361
55.3k
xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5362
55.3k
    const xmlChar *name;
5363
5364
55.3k
    name = xmlParseName(ctxt);
5365
55.3k
    if ((name != NULL) &&
5366
55.3k
        ((name[0] == 'x') || (name[0] == 'X')) &&
5367
55.3k
        ((name[1] == 'm') || (name[1] == 'M')) &&
5368
55.3k
        ((name[2] == 'l') || (name[2] == 'L'))) {
5369
5.72k
  int i;
5370
5.72k
  if ((name[0] == 'x') && (name[1] == 'm') &&
5371
5.72k
      (name[2] == 'l') && (name[3] == 0)) {
5372
988
      xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5373
988
     "XML declaration allowed only at the start of the document\n");
5374
988
      return(name);
5375
4.73k
  } else if (name[3] == 0) {
5376
762
      xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5377
762
      return(name);
5378
762
  }
5379
11.4k
  for (i = 0;;i++) {
5380
11.4k
      if (xmlW3CPIs[i] == NULL) break;
5381
7.91k
      if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5382
395
          return(name);
5383
7.91k
  }
5384
3.58k
  xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5385
3.58k
          "xmlParsePITarget: invalid name prefix 'xml'\n",
5386
3.58k
          NULL, NULL);
5387
3.58k
    }
5388
53.2k
    if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5389
3.10k
  xmlNsErr(ctxt, XML_NS_ERR_COLON,
5390
3.10k
     "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5391
3.10k
    }
5392
53.2k
    return(name);
5393
55.3k
}
5394
5395
#ifdef LIBXML_CATALOG_ENABLED
5396
/**
5397
 * xmlParseCatalogPI:
5398
 * @ctxt:  an XML parser context
5399
 * @catalog:  the PI value string
5400
 *
5401
 * parse an XML Catalog Processing Instruction.
5402
 *
5403
 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5404
 *
5405
 * Occurs only if allowed by the user and if happening in the Misc
5406
 * part of the document before any doctype information
5407
 * This will add the given catalog to the parsing context in order
5408
 * to be used if there is a resolution need further down in the document
5409
 */
5410
5411
static void
5412
6.12k
xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5413
6.12k
    xmlChar *URL = NULL;
5414
6.12k
    const xmlChar *tmp, *base;
5415
6.12k
    xmlChar marker;
5416
5417
6.12k
    tmp = catalog;
5418
6.12k
    while (IS_BLANK_CH(*tmp)) tmp++;
5419
6.12k
    if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5420
1.16k
  goto error;
5421
4.95k
    tmp += 7;
5422
9.23k
    while (IS_BLANK_CH(*tmp)) tmp++;
5423
4.95k
    if (*tmp != '=') {
5424
1.70k
  return;
5425
1.70k
    }
5426
3.24k
    tmp++;
5427
3.24k
    while (IS_BLANK_CH(*tmp)) tmp++;
5428
3.24k
    marker = *tmp;
5429
3.24k
    if ((marker != '\'') && (marker != '"'))
5430
915
  goto error;
5431
2.33k
    tmp++;
5432
2.33k
    base = tmp;
5433
25.1k
    while ((*tmp != 0) && (*tmp != marker)) tmp++;
5434
2.33k
    if (*tmp == 0)
5435
823
  goto error;
5436
1.51k
    URL = xmlStrndup(base, tmp - base);
5437
1.51k
    tmp++;
5438
1.51k
    while (IS_BLANK_CH(*tmp)) tmp++;
5439
1.51k
    if (*tmp != 0)
5440
813
  goto error;
5441
5442
697
    if (URL != NULL) {
5443
        /*
5444
         * Unfortunately, the catalog API doesn't report OOM errors.
5445
         * xmlGetLastError isn't very helpful since we don't know
5446
         * where the last error came from. We'd have to reset it
5447
         * before this call and restore it afterwards.
5448
         */
5449
697
  ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5450
697
  xmlFree(URL);
5451
697
    }
5452
697
    return;
5453
5454
3.71k
error:
5455
3.71k
    xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5456
3.71k
            "Catalog PI syntax error: %s\n",
5457
3.71k
      catalog, NULL);
5458
3.71k
    if (URL != NULL)
5459
813
  xmlFree(URL);
5460
3.71k
}
5461
#endif
5462
5463
/**
5464
 * xmlParsePI:
5465
 * @ctxt:  an XML parser context
5466
 *
5467
 * DEPRECATED: Internal function, don't use.
5468
 *
5469
 * parse an XML Processing Instruction.
5470
 *
5471
 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5472
 *
5473
 * The processing is transferred to SAX once parsed.
5474
 */
5475
5476
void
5477
55.3k
xmlParsePI(xmlParserCtxtPtr ctxt) {
5478
55.3k
    xmlChar *buf = NULL;
5479
55.3k
    size_t len = 0;
5480
55.3k
    size_t size = XML_PARSER_BUFFER_SIZE;
5481
55.3k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5482
0
                       XML_MAX_HUGE_LENGTH :
5483
55.3k
                       XML_MAX_TEXT_LENGTH;
5484
55.3k
    int cur, l;
5485
55.3k
    const xmlChar *target;
5486
5487
55.3k
    if ((RAW == '<') && (NXT(1) == '?')) {
5488
  /*
5489
   * this is a Processing Instruction.
5490
   */
5491
55.3k
  SKIP(2);
5492
5493
  /*
5494
   * Parse the target name and check for special support like
5495
   * namespace.
5496
   */
5497
55.3k
        target = xmlParsePITarget(ctxt);
5498
55.3k
  if (target != NULL) {
5499
49.6k
      if ((RAW == '?') && (NXT(1) == '>')) {
5500
12.6k
    SKIP(2);
5501
5502
    /*
5503
     * SAX: PI detected.
5504
     */
5505
12.6k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5506
12.6k
        (ctxt->sax->processingInstruction != NULL))
5507
12.3k
        ctxt->sax->processingInstruction(ctxt->userData,
5508
12.3k
                                         target, NULL);
5509
12.6k
    return;
5510
12.6k
      }
5511
37.0k
      buf = (xmlChar *) xmlMallocAtomic(size);
5512
37.0k
      if (buf == NULL) {
5513
2
    xmlErrMemory(ctxt);
5514
2
    return;
5515
2
      }
5516
37.0k
      if (SKIP_BLANKS == 0) {
5517
15.3k
    xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5518
15.3k
        "ParsePI: PI %s space expected\n", target);
5519
15.3k
      }
5520
37.0k
      cur = CUR_CHAR(l);
5521
48.2M
      while (IS_CHAR(cur) && /* checked */
5522
48.2M
       ((cur != '?') || (NXT(1) != '>'))) {
5523
48.1M
    if (len + 5 >= size) {
5524
10.8k
        xmlChar *tmp;
5525
10.8k
                    size_t new_size = size * 2;
5526
10.8k
        tmp = (xmlChar *) xmlRealloc(buf, new_size);
5527
10.8k
        if (tmp == NULL) {
5528
1
      xmlErrMemory(ctxt);
5529
1
      xmlFree(buf);
5530
1
      return;
5531
1
        }
5532
10.8k
        buf = tmp;
5533
10.8k
                    size = new_size;
5534
10.8k
    }
5535
48.1M
    COPY_BUF(buf, len, cur);
5536
48.1M
                if (len > maxLength) {
5537
0
                    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5538
0
                                      "PI %s too big found", target);
5539
0
                    xmlFree(buf);
5540
0
                    return;
5541
0
                }
5542
48.1M
    NEXTL(l);
5543
48.1M
    cur = CUR_CHAR(l);
5544
48.1M
      }
5545
37.0k
      buf[len] = 0;
5546
37.0k
      if (cur != '?') {
5547
11.2k
    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5548
11.2k
          "ParsePI: PI %s never end ...\n", target);
5549
25.7k
      } else {
5550
25.7k
    SKIP(2);
5551
5552
25.7k
#ifdef LIBXML_CATALOG_ENABLED
5553
25.7k
    if ((ctxt->inSubset == 0) &&
5554
25.7k
        (xmlStrEqual(target, XML_CATALOG_PI))) {
5555
6.12k
        xmlCatalogAllow allow = xmlCatalogGetDefaults();
5556
6.12k
        if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5557
6.12k
      (allow == XML_CATA_ALLOW_ALL))
5558
6.12k
      xmlParseCatalogPI(ctxt, buf);
5559
6.12k
    }
5560
25.7k
#endif
5561
5562
5563
    /*
5564
     * SAX: PI detected.
5565
     */
5566
25.7k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5567
25.7k
        (ctxt->sax->processingInstruction != NULL))
5568
21.4k
        ctxt->sax->processingInstruction(ctxt->userData,
5569
21.4k
                                         target, buf);
5570
25.7k
      }
5571
37.0k
      xmlFree(buf);
5572
37.0k
  } else {
5573
5.68k
      xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5574
5.68k
  }
5575
55.3k
    }
5576
55.3k
}
5577
5578
/**
5579
 * xmlParseNotationDecl:
5580
 * @ctxt:  an XML parser context
5581
 *
5582
 * DEPRECATED: Internal function, don't use.
5583
 *
5584
 * Parse a notation declaration. Always consumes '<!'.
5585
 *
5586
 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
5587
 *
5588
 * Hence there is actually 3 choices:
5589
 *     'PUBLIC' S PubidLiteral
5590
 *     'PUBLIC' S PubidLiteral S SystemLiteral
5591
 * and 'SYSTEM' S SystemLiteral
5592
 *
5593
 * See the NOTE on xmlParseExternalID().
5594
 */
5595
5596
void
5597
1.21k
xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5598
1.21k
    const xmlChar *name;
5599
1.21k
    xmlChar *Pubid;
5600
1.21k
    xmlChar *Systemid;
5601
5602
1.21k
    if ((CUR != '<') || (NXT(1) != '!'))
5603
0
        return;
5604
1.21k
    SKIP(2);
5605
5606
1.21k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5607
1.08k
  int inputid = ctxt->input->id;
5608
1.08k
  SKIP(8);
5609
1.08k
  if (SKIP_BLANKS_PE == 0) {
5610
128
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5611
128
         "Space required after '<!NOTATION'\n");
5612
128
      return;
5613
128
  }
5614
5615
959
        name = xmlParseName(ctxt);
5616
959
  if (name == NULL) {
5617
23
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5618
23
      return;
5619
23
  }
5620
936
  if (xmlStrchr(name, ':') != NULL) {
5621
183
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5622
183
         "colons are forbidden from notation names '%s'\n",
5623
183
         name, NULL, NULL);
5624
183
  }
5625
936
  if (SKIP_BLANKS_PE == 0) {
5626
283
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5627
283
         "Space required after the NOTATION name'\n");
5628
283
      return;
5629
283
  }
5630
5631
  /*
5632
   * Parse the IDs.
5633
   */
5634
653
  Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5635
653
  SKIP_BLANKS_PE;
5636
5637
653
  if (RAW == '>') {
5638
635
      if (inputid != ctxt->input->id) {
5639
4
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5640
4
                         "Notation declaration doesn't start and stop"
5641
4
                               " in the same entity\n");
5642
4
      }
5643
635
      NEXT;
5644
635
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5645
635
    (ctxt->sax->notationDecl != NULL))
5646
254
    ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5647
635
  } else {
5648
18
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5649
18
  }
5650
653
  if (Systemid != NULL) xmlFree(Systemid);
5651
653
  if (Pubid != NULL) xmlFree(Pubid);
5652
653
    }
5653
1.21k
}
5654
5655
/**
5656
 * xmlParseEntityDecl:
5657
 * @ctxt:  an XML parser context
5658
 *
5659
 * DEPRECATED: Internal function, don't use.
5660
 *
5661
 * Parse an entity declaration. Always consumes '<!'.
5662
 *
5663
 * [70] EntityDecl ::= GEDecl | PEDecl
5664
 *
5665
 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5666
 *
5667
 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5668
 *
5669
 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5670
 *
5671
 * [74] PEDef ::= EntityValue | ExternalID
5672
 *
5673
 * [76] NDataDecl ::= S 'NDATA' S Name
5674
 *
5675
 * [ VC: Notation Declared ]
5676
 * The Name must match the declared name of a notation.
5677
 */
5678
5679
void
5680
58.3k
xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5681
58.3k
    const xmlChar *name = NULL;
5682
58.3k
    xmlChar *value = NULL;
5683
58.3k
    xmlChar *URI = NULL, *literal = NULL;
5684
58.3k
    const xmlChar *ndata = NULL;
5685
58.3k
    int isParameter = 0;
5686
58.3k
    xmlChar *orig = NULL;
5687
5688
58.3k
    if ((CUR != '<') || (NXT(1) != '!'))
5689
0
        return;
5690
58.3k
    SKIP(2);
5691
5692
    /* GROW; done in the caller */
5693
58.3k
    if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5694
57.8k
  int inputid = ctxt->input->id;
5695
57.8k
  SKIP(6);
5696
57.8k
  if (SKIP_BLANKS_PE == 0) {
5697
11.8k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5698
11.8k
         "Space required after '<!ENTITY'\n");
5699
11.8k
  }
5700
5701
57.8k
  if (RAW == '%') {
5702
11.6k
      NEXT;
5703
11.6k
      if (SKIP_BLANKS_PE == 0) {
5704
8.35k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5705
8.35k
             "Space required after '%%'\n");
5706
8.35k
      }
5707
11.6k
      isParameter = 1;
5708
11.6k
  }
5709
5710
57.8k
        name = xmlParseName(ctxt);
5711
57.8k
  if (name == NULL) {
5712
172
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5713
172
                     "xmlParseEntityDecl: no name\n");
5714
172
            return;
5715
172
  }
5716
57.6k
  if (xmlStrchr(name, ':') != NULL) {
5717
630
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5718
630
         "colons are forbidden from entities names '%s'\n",
5719
630
         name, NULL, NULL);
5720
630
  }
5721
57.6k
  if (SKIP_BLANKS_PE == 0) {
5722
23.8k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5723
23.8k
         "Space required after the entity name\n");
5724
23.8k
  }
5725
5726
  /*
5727
   * handle the various case of definitions...
5728
   */
5729
57.6k
  if (isParameter) {
5730
11.4k
      if ((RAW == '"') || (RAW == '\'')) {
5731
6.46k
          value = xmlParseEntityValue(ctxt, &orig);
5732
6.46k
    if (value) {
5733
6.10k
        if ((ctxt->sax != NULL) &&
5734
6.10k
      (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5735
4.50k
      ctxt->sax->entityDecl(ctxt->userData, name,
5736
4.50k
                        XML_INTERNAL_PARAMETER_ENTITY,
5737
4.50k
            NULL, NULL, value);
5738
6.10k
    }
5739
6.46k
      } else {
5740
5.02k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5741
5.02k
    if ((URI == NULL) && (literal == NULL)) {
5742
84
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5743
84
    }
5744
5.02k
    if (URI) {
5745
3.94k
        xmlURIPtr uri;
5746
5747
3.94k
                    if (xmlParseURISafe((const char *) URI, &uri) < 0) {
5748
1
                        xmlErrMemory(ctxt);
5749
3.94k
                    } else if (uri == NULL) {
5750
                        /*
5751
                         * This really ought to be a well formedness error
5752
                         * but the XML Core WG decided otherwise c.f. issue
5753
                         * E26 of the XML erratas.
5754
                         */
5755
1.62k
                        xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5756
1.62k
                                     "Invalid URI: %s\n", URI);
5757
2.32k
                    } else if (uri->fragment != NULL) {
5758
                        /*
5759
                         * Okay this is foolish to block those but not
5760
                         * invalid URIs.
5761
                         */
5762
39
                        xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5763
2.28k
                    } else {
5764
2.28k
                        if ((ctxt->sax != NULL) &&
5765
2.28k
                            (!ctxt->disableSAX) &&
5766
2.28k
                            (ctxt->sax->entityDecl != NULL))
5767
2.28k
                            ctxt->sax->entityDecl(ctxt->userData, name,
5768
2.28k
                                        XML_EXTERNAL_PARAMETER_ENTITY,
5769
2.28k
                                        literal, URI, NULL);
5770
2.28k
                    }
5771
3.94k
        xmlFreeURI(uri);
5772
3.94k
    }
5773
5.02k
      }
5774
46.1k
  } else {
5775
46.1k
      if ((RAW == '"') || (RAW == '\'')) {
5776
43.1k
          value = xmlParseEntityValue(ctxt, &orig);
5777
43.1k
    if ((ctxt->sax != NULL) &&
5778
43.1k
        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5779
38.0k
        ctxt->sax->entityDecl(ctxt->userData, name,
5780
38.0k
        XML_INTERNAL_GENERAL_ENTITY,
5781
38.0k
        NULL, NULL, value);
5782
    /*
5783
     * For expat compatibility in SAX mode.
5784
     */
5785
43.1k
    if ((ctxt->myDoc == NULL) ||
5786
43.1k
        (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5787
4.22k
        if (ctxt->myDoc == NULL) {
5788
1.61k
      ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5789
1.61k
      if (ctxt->myDoc == NULL) {
5790
3
          xmlErrMemory(ctxt);
5791
3
          goto done;
5792
3
      }
5793
1.61k
      ctxt->myDoc->properties = XML_DOC_INTERNAL;
5794
1.61k
        }
5795
4.21k
        if (ctxt->myDoc->intSubset == NULL) {
5796
1.61k
      ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5797
1.61k
              BAD_CAST "fake", NULL, NULL);
5798
1.61k
                        if (ctxt->myDoc->intSubset == NULL) {
5799
2
                            xmlErrMemory(ctxt);
5800
2
                            goto done;
5801
2
                        }
5802
1.61k
                    }
5803
5804
4.21k
        xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5805
4.21k
                    NULL, NULL, value);
5806
4.21k
    }
5807
43.1k
      } else {
5808
3.04k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5809
3.04k
    if ((URI == NULL) && (literal == NULL)) {
5810
1.51k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5811
1.51k
    }
5812
3.04k
    if (URI) {
5813
1.39k
        xmlURIPtr uri;
5814
5815
1.39k
                    if (xmlParseURISafe((const char *) URI, &uri) < 0) {
5816
1
                        xmlErrMemory(ctxt);
5817
1.39k
                    } else if (uri == NULL) {
5818
                        /*
5819
                         * This really ought to be a well formedness error
5820
                         * but the XML Core WG decided otherwise c.f. issue
5821
                         * E26 of the XML erratas.
5822
                         */
5823
133
                        xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5824
133
                                     "Invalid URI: %s\n", URI);
5825
1.26k
                    } else if (uri->fragment != NULL) {
5826
                        /*
5827
                         * Okay this is foolish to block those but not
5828
                         * invalid URIs.
5829
                         */
5830
8
                        xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5831
8
                    }
5832
1.39k
                    xmlFreeURI(uri);
5833
1.39k
    }
5834
3.04k
    if ((RAW != '>') && (SKIP_BLANKS_PE == 0)) {
5835
754
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5836
754
           "Space required before 'NDATA'\n");
5837
754
    }
5838
3.04k
    if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5839
67
        SKIP(5);
5840
67
        if (SKIP_BLANKS_PE == 0) {
5841
21
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5842
21
               "Space required after 'NDATA'\n");
5843
21
        }
5844
67
        ndata = xmlParseName(ctxt);
5845
67
        if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5846
67
            (ctxt->sax->unparsedEntityDecl != NULL))
5847
25
      ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5848
25
            literal, URI, ndata);
5849
2.97k
    } else {
5850
2.97k
        if ((ctxt->sax != NULL) &&
5851
2.97k
            (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5852
2.16k
      ctxt->sax->entityDecl(ctxt->userData, name,
5853
2.16k
            XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5854
2.16k
            literal, URI, NULL);
5855
        /*
5856
         * For expat compatibility in SAX mode.
5857
         * assuming the entity replacement was asked for
5858
         */
5859
2.97k
        if ((ctxt->replaceEntities != 0) &&
5860
2.97k
      ((ctxt->myDoc == NULL) ||
5861
1.39k
      (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5862
442
      if (ctxt->myDoc == NULL) {
5863
419
          ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5864
419
          if (ctxt->myDoc == NULL) {
5865
1
              xmlErrMemory(ctxt);
5866
1
        goto done;
5867
1
          }
5868
418
          ctxt->myDoc->properties = XML_DOC_INTERNAL;
5869
418
      }
5870
5871
441
      if (ctxt->myDoc->intSubset == NULL) {
5872
418
          ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5873
418
            BAD_CAST "fake", NULL, NULL);
5874
418
                            if (ctxt->myDoc->intSubset == NULL) {
5875
0
                                xmlErrMemory(ctxt);
5876
0
                                goto done;
5877
0
                            }
5878
418
                        }
5879
441
      xmlSAX2EntityDecl(ctxt, name,
5880
441
                  XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5881
441
                  literal, URI, NULL);
5882
441
        }
5883
2.97k
    }
5884
3.04k
      }
5885
46.1k
  }
5886
57.6k
  SKIP_BLANKS_PE;
5887
57.6k
  if (RAW != '>') {
5888
2.78k
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5889
2.78k
              "xmlParseEntityDecl: entity %s not terminated\n", name);
5890
2.78k
      xmlHaltParser(ctxt);
5891
54.8k
  } else {
5892
54.8k
      if (inputid != ctxt->input->id) {
5893
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5894
0
                         "Entity declaration doesn't start and stop in"
5895
0
                               " the same entity\n");
5896
0
      }
5897
54.8k
      NEXT;
5898
54.8k
  }
5899
57.6k
  if (orig != NULL) {
5900
      /*
5901
       * Ugly mechanism to save the raw entity value.
5902
       */
5903
48.9k
      xmlEntityPtr cur = NULL;
5904
5905
48.9k
      if (isParameter) {
5906
6.10k
          if ((ctxt->sax != NULL) &&
5907
6.10k
        (ctxt->sax->getParameterEntity != NULL))
5908
6.10k
        cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5909
42.8k
      } else {
5910
42.8k
          if ((ctxt->sax != NULL) &&
5911
42.8k
        (ctxt->sax->getEntity != NULL))
5912
42.8k
        cur = ctxt->sax->getEntity(ctxt->userData, name);
5913
42.8k
    if ((cur == NULL) && (ctxt->userData==ctxt)) {
5914
804
        cur = xmlSAX2GetEntity(ctxt, name);
5915
804
    }
5916
42.8k
      }
5917
48.9k
            if ((cur != NULL) && (cur->orig == NULL)) {
5918
6.80k
    cur->orig = orig;
5919
6.80k
                orig = NULL;
5920
6.80k
      }
5921
48.9k
  }
5922
5923
57.6k
done:
5924
57.6k
  if (value != NULL) xmlFree(value);
5925
57.6k
  if (URI != NULL) xmlFree(URI);
5926
57.6k
  if (literal != NULL) xmlFree(literal);
5927
57.6k
        if (orig != NULL) xmlFree(orig);
5928
57.6k
    }
5929
58.3k
}
5930
5931
/**
5932
 * xmlParseDefaultDecl:
5933
 * @ctxt:  an XML parser context
5934
 * @value:  Receive a possible fixed default value for the attribute
5935
 *
5936
 * DEPRECATED: Internal function, don't use.
5937
 *
5938
 * Parse an attribute default declaration
5939
 *
5940
 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5941
 *
5942
 * [ VC: Required Attribute ]
5943
 * if the default declaration is the keyword #REQUIRED, then the
5944
 * attribute must be specified for all elements of the type in the
5945
 * attribute-list declaration.
5946
 *
5947
 * [ VC: Attribute Default Legal ]
5948
 * The declared default value must meet the lexical constraints of
5949
 * the declared attribute type c.f. xmlValidateAttributeDecl()
5950
 *
5951
 * [ VC: Fixed Attribute Default ]
5952
 * if an attribute has a default value declared with the #FIXED
5953
 * keyword, instances of that attribute must match the default value.
5954
 *
5955
 * [ WFC: No < in Attribute Values ]
5956
 * handled in xmlParseAttValue()
5957
 *
5958
 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5959
 *          or XML_ATTRIBUTE_FIXED.
5960
 */
5961
5962
int
5963
27.9k
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5964
27.9k
    int val;
5965
27.9k
    xmlChar *ret;
5966
5967
27.9k
    *value = NULL;
5968
27.9k
    if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5969
771
  SKIP(9);
5970
771
  return(XML_ATTRIBUTE_REQUIRED);
5971
771
    }
5972
27.1k
    if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5973
282
  SKIP(8);
5974
282
  return(XML_ATTRIBUTE_IMPLIED);
5975
282
    }
5976
26.8k
    val = XML_ATTRIBUTE_NONE;
5977
26.8k
    if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5978
283
  SKIP(6);
5979
283
  val = XML_ATTRIBUTE_FIXED;
5980
283
  if (SKIP_BLANKS_PE == 0) {
5981
128
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5982
128
         "Space required after '#FIXED'\n");
5983
128
  }
5984
283
    }
5985
26.8k
    ret = xmlParseAttValue(ctxt);
5986
26.8k
    if (ret == NULL) {
5987
1.75k
  xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5988
1.75k
           "Attribute default value declaration error\n");
5989
1.75k
    } else
5990
25.1k
        *value = ret;
5991
26.8k
    return(val);
5992
27.1k
}
5993
5994
/**
5995
 * xmlParseNotationType:
5996
 * @ctxt:  an XML parser context
5997
 *
5998
 * DEPRECATED: Internal function, don't use.
5999
 *
6000
 * parse an Notation attribute type.
6001
 *
6002
 * Note: the leading 'NOTATION' S part has already being parsed...
6003
 *
6004
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
6005
 *
6006
 * [ VC: Notation Attributes ]
6007
 * Values of this type must match one of the notation names included
6008
 * in the declaration; all notation names in the declaration must be declared.
6009
 *
6010
 * Returns: the notation attribute tree built while parsing
6011
 */
6012
6013
xmlEnumerationPtr
6014
33
xmlParseNotationType(xmlParserCtxtPtr ctxt) {
6015
33
    const xmlChar *name;
6016
33
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
6017
6018
33
    if (RAW != '(') {
6019
17
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
6020
17
  return(NULL);
6021
17
    }
6022
283
    do {
6023
283
        NEXT;
6024
283
  SKIP_BLANKS_PE;
6025
283
        name = xmlParseName(ctxt);
6026
283
  if (name == NULL) {
6027
5
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6028
5
         "Name expected in NOTATION declaration\n");
6029
5
            xmlFreeEnumeration(ret);
6030
5
      return(NULL);
6031
5
  }
6032
278
  tmp = ret;
6033
1.77k
  while (tmp != NULL) {
6034
1.69k
      if (xmlStrEqual(name, tmp->name)) {
6035
193
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
6036
193
    "standalone: attribute notation value token %s duplicated\n",
6037
193
         name, NULL);
6038
193
    if (!xmlDictOwns(ctxt->dict, name))
6039
0
        xmlFree((xmlChar *) name);
6040
193
    break;
6041
193
      }
6042
1.50k
      tmp = tmp->next;
6043
1.50k
  }
6044
278
  if (tmp == NULL) {
6045
85
      cur = xmlCreateEnumeration(name);
6046
85
      if (cur == NULL) {
6047
0
                xmlErrMemory(ctxt);
6048
0
                xmlFreeEnumeration(ret);
6049
0
                return(NULL);
6050
0
            }
6051
85
      if (last == NULL) ret = last = cur;
6052
74
      else {
6053
74
    last->next = cur;
6054
74
    last = cur;
6055
74
      }
6056
85
  }
6057
278
  SKIP_BLANKS_PE;
6058
278
    } while (RAW == '|');
6059
11
    if (RAW != ')') {
6060
11
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
6061
11
        xmlFreeEnumeration(ret);
6062
11
  return(NULL);
6063
11
    }
6064
0
    NEXT;
6065
0
    return(ret);
6066
11
}
6067
6068
/**
6069
 * xmlParseEnumerationType:
6070
 * @ctxt:  an XML parser context
6071
 *
6072
 * DEPRECATED: Internal function, don't use.
6073
 *
6074
 * parse an Enumeration attribute type.
6075
 *
6076
 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
6077
 *
6078
 * [ VC: Enumeration ]
6079
 * Values of this type must match one of the Nmtoken tokens in
6080
 * the declaration
6081
 *
6082
 * Returns: the enumeration attribute tree built while parsing
6083
 */
6084
6085
xmlEnumerationPtr
6086
15.0k
xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
6087
15.0k
    xmlChar *name;
6088
15.0k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
6089
6090
15.0k
    if (RAW != '(') {
6091
2.44k
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
6092
2.44k
  return(NULL);
6093
2.44k
    }
6094
19.0k
    do {
6095
19.0k
        NEXT;
6096
19.0k
  SKIP_BLANKS_PE;
6097
19.0k
        name = xmlParseNmtoken(ctxt);
6098
19.0k
  if (name == NULL) {
6099
357
      xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
6100
357
      return(ret);
6101
357
  }
6102
18.7k
  tmp = ret;
6103
26.6k
  while (tmp != NULL) {
6104
8.36k
      if (xmlStrEqual(name, tmp->name)) {
6105
372
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
6106
372
    "standalone: attribute enumeration value token %s duplicated\n",
6107
372
         name, NULL);
6108
372
    if (!xmlDictOwns(ctxt->dict, name))
6109
372
        xmlFree(name);
6110
372
    break;
6111
372
      }
6112
7.99k
      tmp = tmp->next;
6113
7.99k
  }
6114
18.7k
  if (tmp == NULL) {
6115
18.3k
      cur = xmlCreateEnumeration(name);
6116
18.3k
      if (!xmlDictOwns(ctxt->dict, name))
6117
18.3k
    xmlFree(name);
6118
18.3k
      if (cur == NULL) {
6119
0
                xmlErrMemory(ctxt);
6120
0
                xmlFreeEnumeration(ret);
6121
0
                return(NULL);
6122
0
            }
6123
18.3k
      if (last == NULL) ret = last = cur;
6124
6.08k
      else {
6125
6.08k
    last->next = cur;
6126
6.08k
    last = cur;
6127
6.08k
      }
6128
18.3k
  }
6129
18.7k
  SKIP_BLANKS_PE;
6130
18.7k
    } while (RAW == '|');
6131
12.2k
    if (RAW != ')') {
6132
209
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
6133
209
  return(ret);
6134
209
    }
6135
12.0k
    NEXT;
6136
12.0k
    return(ret);
6137
12.2k
}
6138
6139
/**
6140
 * xmlParseEnumeratedType:
6141
 * @ctxt:  an XML parser context
6142
 * @tree:  the enumeration tree built while parsing
6143
 *
6144
 * DEPRECATED: Internal function, don't use.
6145
 *
6146
 * parse an Enumerated attribute type.
6147
 *
6148
 * [57] EnumeratedType ::= NotationType | Enumeration
6149
 *
6150
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
6151
 *
6152
 *
6153
 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
6154
 */
6155
6156
int
6157
15.0k
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6158
15.0k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
6159
40
  SKIP(8);
6160
40
  if (SKIP_BLANKS_PE == 0) {
6161
7
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6162
7
         "Space required after 'NOTATION'\n");
6163
7
      return(0);
6164
7
  }
6165
33
  *tree = xmlParseNotationType(ctxt);
6166
33
  if (*tree == NULL) return(0);
6167
0
  return(XML_ATTRIBUTE_NOTATION);
6168
33
    }
6169
15.0k
    *tree = xmlParseEnumerationType(ctxt);
6170
15.0k
    if (*tree == NULL) return(0);
6171
12.2k
    return(XML_ATTRIBUTE_ENUMERATION);
6172
15.0k
}
6173
6174
/**
6175
 * xmlParseAttributeType:
6176
 * @ctxt:  an XML parser context
6177
 * @tree:  the enumeration tree built while parsing
6178
 *
6179
 * DEPRECATED: Internal function, don't use.
6180
 *
6181
 * parse the Attribute list def for an element
6182
 *
6183
 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
6184
 *
6185
 * [55] StringType ::= 'CDATA'
6186
 *
6187
 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
6188
 *                        'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
6189
 *
6190
 * Validity constraints for attribute values syntax are checked in
6191
 * xmlValidateAttributeValue()
6192
 *
6193
 * [ VC: ID ]
6194
 * Values of type ID must match the Name production. A name must not
6195
 * appear more than once in an XML document as a value of this type;
6196
 * i.e., ID values must uniquely identify the elements which bear them.
6197
 *
6198
 * [ VC: One ID per Element Type ]
6199
 * No element type may have more than one ID attribute specified.
6200
 *
6201
 * [ VC: ID Attribute Default ]
6202
 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
6203
 *
6204
 * [ VC: IDREF ]
6205
 * Values of type IDREF must match the Name production, and values
6206
 * of type IDREFS must match Names; each IDREF Name must match the value
6207
 * of an ID attribute on some element in the XML document; i.e. IDREF
6208
 * values must match the value of some ID attribute.
6209
 *
6210
 * [ VC: Entity Name ]
6211
 * Values of type ENTITY must match the Name production, values
6212
 * of type ENTITIES must match Names; each Entity Name must match the
6213
 * name of an unparsed entity declared in the DTD.
6214
 *
6215
 * [ VC: Name Token ]
6216
 * Values of type NMTOKEN must match the Nmtoken production; values
6217
 * of type NMTOKENS must match Nmtokens.
6218
 *
6219
 * Returns the attribute type
6220
 */
6221
int
6222
31.6k
xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6223
31.6k
    if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
6224
1.74k
  SKIP(5);
6225
1.74k
  return(XML_ATTRIBUTE_CDATA);
6226
29.9k
     } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
6227
102
  SKIP(6);
6228
102
  return(XML_ATTRIBUTE_IDREFS);
6229
29.8k
     } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
6230
336
  SKIP(5);
6231
336
  return(XML_ATTRIBUTE_IDREF);
6232
29.5k
     } else if ((RAW == 'I') && (NXT(1) == 'D')) {
6233
13.7k
        SKIP(2);
6234
13.7k
  return(XML_ATTRIBUTE_ID);
6235
15.7k
     } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
6236
80
  SKIP(6);
6237
80
  return(XML_ATTRIBUTE_ENTITY);
6238
15.6k
     } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
6239
205
  SKIP(8);
6240
205
  return(XML_ATTRIBUTE_ENTITIES);
6241
15.4k
     } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
6242
159
  SKIP(8);
6243
159
  return(XML_ATTRIBUTE_NMTOKENS);
6244
15.2k
     } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
6245
218
  SKIP(7);
6246
218
  return(XML_ATTRIBUTE_NMTOKEN);
6247
218
     }
6248
15.0k
     return(xmlParseEnumeratedType(ctxt, tree));
6249
31.6k
}
6250
6251
/**
6252
 * xmlParseAttributeListDecl:
6253
 * @ctxt:  an XML parser context
6254
 *
6255
 * DEPRECATED: Internal function, don't use.
6256
 *
6257
 * Parse an attribute list declaration for an element. Always consumes '<!'.
6258
 *
6259
 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
6260
 *
6261
 * [53] AttDef ::= S Name S AttType S DefaultDecl
6262
 *
6263
 */
6264
void
6265
13.8k
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
6266
13.8k
    const xmlChar *elemName;
6267
13.8k
    const xmlChar *attrName;
6268
13.8k
    xmlEnumerationPtr tree;
6269
6270
13.8k
    if ((CUR != '<') || (NXT(1) != '!'))
6271
0
        return;
6272
13.8k
    SKIP(2);
6273
6274
13.8k
    if (CMP7(CUR_PTR, 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
6275
13.3k
  int inputid = ctxt->input->id;
6276
6277
13.3k
  SKIP(7);
6278
13.3k
  if (SKIP_BLANKS_PE == 0) {
6279
2.05k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6280
2.05k
                     "Space required after '<!ATTLIST'\n");
6281
2.05k
  }
6282
13.3k
        elemName = xmlParseName(ctxt);
6283
13.3k
  if (elemName == NULL) {
6284
864
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6285
864
         "ATTLIST: no name for Element\n");
6286
864
      return;
6287
864
  }
6288
12.4k
  SKIP_BLANKS_PE;
6289
12.4k
  GROW;
6290
38.2k
  while ((RAW != '>') && (PARSER_STOPPED(ctxt) == 0)) {
6291
34.7k
      int type;
6292
34.7k
      int def;
6293
34.7k
      xmlChar *defaultValue = NULL;
6294
6295
34.7k
      GROW;
6296
34.7k
            tree = NULL;
6297
34.7k
      attrName = xmlParseName(ctxt);
6298
34.7k
      if (attrName == NULL) {
6299
1.41k
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6300
1.41k
             "ATTLIST: no name for Attribute\n");
6301
1.41k
    break;
6302
1.41k
      }
6303
33.3k
      GROW;
6304
33.3k
      if (SKIP_BLANKS_PE == 0) {
6305
1.61k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6306
1.61k
            "Space required after the attribute name\n");
6307
1.61k
    break;
6308
1.61k
      }
6309
6310
31.6k
      type = xmlParseAttributeType(ctxt, &tree);
6311
31.6k
      if (type <= 0) {
6312
2.82k
          break;
6313
2.82k
      }
6314
6315
28.8k
      GROW;
6316
28.8k
      if (SKIP_BLANKS_PE == 0) {
6317
933
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6318
933
             "Space required after the attribute type\n");
6319
933
          if (tree != NULL)
6320
223
        xmlFreeEnumeration(tree);
6321
933
    break;
6322
933
      }
6323
6324
27.9k
      def = xmlParseDefaultDecl(ctxt, &defaultValue);
6325
27.9k
      if (def <= 0) {
6326
0
                if (defaultValue != NULL)
6327
0
        xmlFree(defaultValue);
6328
0
          if (tree != NULL)
6329
0
        xmlFreeEnumeration(tree);
6330
0
          break;
6331
0
      }
6332
27.9k
      if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6333
24.0k
          xmlAttrNormalizeSpace(defaultValue, defaultValue);
6334
6335
27.9k
      GROW;
6336
27.9k
            if (RAW != '>') {
6337
24.5k
    if (SKIP_BLANKS_PE == 0) {
6338
2.13k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6339
2.13k
      "Space required after the attribute default value\n");
6340
2.13k
        if (defaultValue != NULL)
6341
342
      xmlFree(defaultValue);
6342
2.13k
        if (tree != NULL)
6343
35
      xmlFreeEnumeration(tree);
6344
2.13k
        break;
6345
2.13k
    }
6346
24.5k
      }
6347
25.8k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6348
25.8k
    (ctxt->sax->attributeDecl != NULL))
6349
21.1k
    ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6350
21.1k
                          type, def, defaultValue, tree);
6351
4.68k
      else if (tree != NULL)
6352
65
    xmlFreeEnumeration(tree);
6353
6354
25.8k
      if ((ctxt->sax2) && (defaultValue != NULL) &&
6355
25.8k
          (def != XML_ATTRIBUTE_IMPLIED) &&
6356
25.8k
    (def != XML_ATTRIBUTE_REQUIRED)) {
6357
24.7k
    xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6358
24.7k
      }
6359
25.8k
      if (ctxt->sax2) {
6360
25.8k
    xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6361
25.8k
      }
6362
25.8k
      if (defaultValue != NULL)
6363
24.7k
          xmlFree(defaultValue);
6364
25.8k
      GROW;
6365
25.8k
  }
6366
12.4k
  if (RAW == '>') {
6367
3.93k
      if (inputid != ctxt->input->id) {
6368
63
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6369
63
                               "Attribute list declaration doesn't start and"
6370
63
                               " stop in the same entity\n");
6371
63
      }
6372
3.93k
      NEXT;
6373
3.93k
  }
6374
12.4k
    }
6375
13.8k
}
6376
6377
/**
6378
 * xmlParseElementMixedContentDecl:
6379
 * @ctxt:  an XML parser context
6380
 * @inputchk:  the input used for the current entity, needed for boundary checks
6381
 *
6382
 * DEPRECATED: Internal function, don't use.
6383
 *
6384
 * parse the declaration for a Mixed Element content
6385
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6386
 *
6387
 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6388
 *                '(' S? '#PCDATA' S? ')'
6389
 *
6390
 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6391
 *
6392
 * [ VC: No Duplicate Types ]
6393
 * The same name must not appear more than once in a single
6394
 * mixed-content declaration.
6395
 *
6396
 * returns: the list of the xmlElementContentPtr describing the element choices
6397
 */
6398
xmlElementContentPtr
6399
3.32k
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6400
3.32k
    xmlElementContentPtr ret = NULL, cur = NULL, n;
6401
3.32k
    const xmlChar *elem = NULL;
6402
6403
3.32k
    GROW;
6404
3.32k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6405
3.32k
  SKIP(7);
6406
3.32k
  SKIP_BLANKS_PE;
6407
3.32k
  if (RAW == ')') {
6408
1.79k
      if (ctxt->input->id != inputchk) {
6409
147
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6410
147
                               "Element content declaration doesn't start and"
6411
147
                               " stop in the same entity\n");
6412
147
      }
6413
1.79k
      NEXT;
6414
1.79k
      ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6415
1.79k
      if (ret == NULL)
6416
0
                goto mem_error;
6417
1.79k
      if (RAW == '*') {
6418
18
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6419
18
    NEXT;
6420
18
      }
6421
1.79k
      return(ret);
6422
1.79k
  }
6423
1.52k
  if ((RAW == '(') || (RAW == '|')) {
6424
361
      ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6425
361
      if (ret == NULL)
6426
1
                goto mem_error;
6427
361
  }
6428
2.67k
  while ((RAW == '|') && (PARSER_STOPPED(ctxt) == 0)) {
6429
1.19k
      NEXT;
6430
1.19k
            n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6431
1.19k
            if (n == NULL)
6432
1
                goto mem_error;
6433
1.19k
      if (elem == NULL) {
6434
347
    n->c1 = cur;
6435
347
    if (cur != NULL)
6436
347
        cur->parent = n;
6437
347
    ret = cur = n;
6438
847
      } else {
6439
847
          cur->c2 = n;
6440
847
    n->parent = cur;
6441
847
    n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6442
847
                if (n->c1 == NULL)
6443
1
                    goto mem_error;
6444
846
    n->c1->parent = n;
6445
846
    cur = n;
6446
846
      }
6447
1.19k
      SKIP_BLANKS_PE;
6448
1.19k
      elem = xmlParseName(ctxt);
6449
1.19k
      if (elem == NULL) {
6450
43
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6451
43
      "xmlParseElementMixedContentDecl : Name expected\n");
6452
43
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6453
43
    return(NULL);
6454
43
      }
6455
1.15k
      SKIP_BLANKS_PE;
6456
1.15k
      GROW;
6457
1.15k
  }
6458
1.48k
  if ((RAW == ')') && (NXT(1) == '*')) {
6459
19
      if (elem != NULL) {
6460
19
    cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6461
19
                                   XML_ELEMENT_CONTENT_ELEMENT);
6462
19
    if (cur->c2 == NULL)
6463
0
                    goto mem_error;
6464
19
    cur->c2->parent = cur;
6465
19
            }
6466
19
            if (ret != NULL)
6467
19
                ret->ocur = XML_ELEMENT_CONTENT_MULT;
6468
19
      if (ctxt->input->id != inputchk) {
6469
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6470
0
                               "Element content declaration doesn't start and"
6471
0
                               " stop in the same entity\n");
6472
0
      }
6473
19
      SKIP(2);
6474
1.46k
  } else {
6475
1.46k
      xmlFreeDocElementContent(ctxt->myDoc, ret);
6476
1.46k
      xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6477
1.46k
      return(NULL);
6478
1.46k
  }
6479
6480
1.48k
    } else {
6481
0
  xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6482
0
    }
6483
19
    return(ret);
6484
6485
3
mem_error:
6486
3
    xmlErrMemory(ctxt);
6487
3
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6488
3
    return(NULL);
6489
3.32k
}
6490
6491
/**
6492
 * xmlParseElementChildrenContentDeclPriv:
6493
 * @ctxt:  an XML parser context
6494
 * @inputchk:  the input used for the current entity, needed for boundary checks
6495
 * @depth: the level of recursion
6496
 *
6497
 * parse the declaration for a Mixed Element content
6498
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6499
 *
6500
 *
6501
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6502
 *
6503
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6504
 *
6505
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6506
 *
6507
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6508
 *
6509
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6510
 * TODO Parameter-entity replacement text must be properly nested
6511
 *  with parenthesized groups. That is to say, if either of the
6512
 *  opening or closing parentheses in a choice, seq, or Mixed
6513
 *  construct is contained in the replacement text for a parameter
6514
 *  entity, both must be contained in the same replacement text. For
6515
 *  interoperability, if a parameter-entity reference appears in a
6516
 *  choice, seq, or Mixed construct, its replacement text should not
6517
 *  be empty, and neither the first nor last non-blank character of
6518
 *  the replacement text should be a connector (| or ,).
6519
 *
6520
 * Returns the tree of xmlElementContentPtr describing the element
6521
 *          hierarchy.
6522
 */
6523
static xmlElementContentPtr
6524
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6525
169k
                                       int depth) {
6526
169k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
6527
169k
    xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6528
169k
    const xmlChar *elem;
6529
169k
    xmlChar type = 0;
6530
6531
169k
    if (depth > maxDepth) {
6532
24
        xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
6533
24
                "xmlParseElementChildrenContentDecl : depth %d too deep, "
6534
24
                "use XML_PARSE_HUGE\n", depth);
6535
24
  return(NULL);
6536
24
    }
6537
169k
    SKIP_BLANKS_PE;
6538
169k
    GROW;
6539
169k
    if (RAW == '(') {
6540
141k
  int inputid = ctxt->input->id;
6541
6542
        /* Recurse on first child */
6543
141k
  NEXT;
6544
141k
  SKIP_BLANKS_PE;
6545
141k
        cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6546
141k
                                                           depth + 1);
6547
141k
        if (cur == NULL)
6548
11.2k
            return(NULL);
6549
130k
  SKIP_BLANKS_PE;
6550
130k
  GROW;
6551
130k
    } else {
6552
28.1k
  elem = xmlParseName(ctxt);
6553
28.1k
  if (elem == NULL) {
6554
667
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6555
667
      return(NULL);
6556
667
  }
6557
27.4k
        cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6558
27.4k
  if (cur == NULL) {
6559
2
      xmlErrMemory(ctxt);
6560
2
      return(NULL);
6561
2
  }
6562
27.4k
  GROW;
6563
27.4k
  if (RAW == '?') {
6564
928
      cur->ocur = XML_ELEMENT_CONTENT_OPT;
6565
928
      NEXT;
6566
26.5k
  } else if (RAW == '*') {
6567
740
      cur->ocur = XML_ELEMENT_CONTENT_MULT;
6568
740
      NEXT;
6569
25.7k
  } else if (RAW == '+') {
6570
1.80k
      cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6571
1.80k
      NEXT;
6572
23.9k
  } else {
6573
23.9k
      cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6574
23.9k
  }
6575
27.4k
  GROW;
6576
27.4k
    }
6577
158k
    SKIP_BLANKS_PE;
6578
280k
    while ((RAW != ')') && (PARSER_STOPPED(ctxt) == 0)) {
6579
        /*
6580
   * Each loop we parse one separator and one element.
6581
   */
6582
123k
        if (RAW == ',') {
6583
13.6k
      if (type == 0) type = CUR;
6584
6585
      /*
6586
       * Detect "Name | Name , Name" error
6587
       */
6588
1.20k
      else if (type != CUR) {
6589
163
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6590
163
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6591
163
                      type);
6592
163
    if ((last != NULL) && (last != ret))
6593
163
        xmlFreeDocElementContent(ctxt->myDoc, last);
6594
163
    if (ret != NULL)
6595
163
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6596
163
    return(NULL);
6597
163
      }
6598
13.4k
      NEXT;
6599
6600
13.4k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6601
13.4k
      if (op == NULL) {
6602
1
                xmlErrMemory(ctxt);
6603
1
    if ((last != NULL) && (last != ret))
6604
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6605
1
          xmlFreeDocElementContent(ctxt->myDoc, ret);
6606
1
    return(NULL);
6607
1
      }
6608
13.4k
      if (last == NULL) {
6609
12.4k
    op->c1 = ret;
6610
12.4k
    if (ret != NULL)
6611
12.4k
        ret->parent = op;
6612
12.4k
    ret = cur = op;
6613
12.4k
      } else {
6614
1.03k
          cur->c2 = op;
6615
1.03k
    if (op != NULL)
6616
1.03k
        op->parent = cur;
6617
1.03k
    op->c1 = last;
6618
1.03k
    if (last != NULL)
6619
1.03k
        last->parent = op;
6620
1.03k
    cur =op;
6621
1.03k
    last = NULL;
6622
1.03k
      }
6623
110k
  } else if (RAW == '|') {
6624
109k
      if (type == 0) type = CUR;
6625
6626
      /*
6627
       * Detect "Name , Name | Name" error
6628
       */
6629
75.4k
      else if (type != CUR) {
6630
4
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6631
4
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6632
4
          type);
6633
4
    if ((last != NULL) && (last != ret))
6634
4
        xmlFreeDocElementContent(ctxt->myDoc, last);
6635
4
    if (ret != NULL)
6636
4
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6637
4
    return(NULL);
6638
4
      }
6639
109k
      NEXT;
6640
6641
109k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6642
109k
      if (op == NULL) {
6643
0
                xmlErrMemory(ctxt);
6644
0
    if ((last != NULL) && (last != ret))
6645
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6646
0
    if (ret != NULL)
6647
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6648
0
    return(NULL);
6649
0
      }
6650
109k
      if (last == NULL) {
6651
34.4k
    op->c1 = ret;
6652
34.4k
    if (ret != NULL)
6653
34.4k
        ret->parent = op;
6654
34.4k
    ret = cur = op;
6655
75.4k
      } else {
6656
75.4k
          cur->c2 = op;
6657
75.4k
    if (op != NULL)
6658
75.4k
        op->parent = cur;
6659
75.4k
    op->c1 = last;
6660
75.4k
    if (last != NULL)
6661
75.4k
        last->parent = op;
6662
75.4k
    cur =op;
6663
75.4k
    last = NULL;
6664
75.4k
      }
6665
109k
  } else {
6666
351
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6667
351
      if ((last != NULL) && (last != ret))
6668
77
          xmlFreeDocElementContent(ctxt->myDoc, last);
6669
351
      if (ret != NULL)
6670
351
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6671
351
      return(NULL);
6672
351
  }
6673
123k
  GROW;
6674
123k
  SKIP_BLANKS_PE;
6675
123k
  GROW;
6676
123k
  if (RAW == '(') {
6677
25.9k
      int inputid = ctxt->input->id;
6678
      /* Recurse on second child */
6679
25.9k
      NEXT;
6680
25.9k
      SKIP_BLANKS_PE;
6681
25.9k
      last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6682
25.9k
                                                          depth + 1);
6683
25.9k
            if (last == NULL) {
6684
1.38k
    if (ret != NULL)
6685
1.38k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6686
1.38k
    return(NULL);
6687
1.38k
            }
6688
24.5k
      SKIP_BLANKS_PE;
6689
97.4k
  } else {
6690
97.4k
      elem = xmlParseName(ctxt);
6691
97.4k
      if (elem == NULL) {
6692
15
    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6693
15
    if (ret != NULL)
6694
15
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6695
15
    return(NULL);
6696
15
      }
6697
97.4k
      last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6698
97.4k
      if (last == NULL) {
6699
0
                xmlErrMemory(ctxt);
6700
0
    if (ret != NULL)
6701
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6702
0
    return(NULL);
6703
0
      }
6704
97.4k
      if (RAW == '?') {
6705
1.45k
    last->ocur = XML_ELEMENT_CONTENT_OPT;
6706
1.45k
    NEXT;
6707
96.0k
      } else if (RAW == '*') {
6708
4.41k
    last->ocur = XML_ELEMENT_CONTENT_MULT;
6709
4.41k
    NEXT;
6710
91.6k
      } else if (RAW == '+') {
6711
3.43k
    last->ocur = XML_ELEMENT_CONTENT_PLUS;
6712
3.43k
    NEXT;
6713
88.1k
      } else {
6714
88.1k
    last->ocur = XML_ELEMENT_CONTENT_ONCE;
6715
88.1k
      }
6716
97.4k
  }
6717
122k
  SKIP_BLANKS_PE;
6718
122k
  GROW;
6719
122k
    }
6720
156k
    if ((cur != NULL) && (last != NULL)) {
6721
45.2k
        cur->c2 = last;
6722
45.2k
  if (last != NULL)
6723
45.2k
      last->parent = cur;
6724
45.2k
    }
6725
156k
    if (ctxt->input->id != inputchk) {
6726
0
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6727
0
                       "Element content declaration doesn't start and stop in"
6728
0
                       " the same entity\n");
6729
0
    }
6730
156k
    NEXT;
6731
156k
    if (RAW == '?') {
6732
22.3k
  if (ret != NULL) {
6733
22.3k
      if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6734
22.3k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6735
14.7k
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6736
7.57k
      else
6737
7.57k
          ret->ocur = XML_ELEMENT_CONTENT_OPT;
6738
22.3k
  }
6739
22.3k
  NEXT;
6740
133k
    } else if (RAW == '*') {
6741
9.78k
  if (ret != NULL) {
6742
9.78k
      ret->ocur = XML_ELEMENT_CONTENT_MULT;
6743
9.78k
      cur = ret;
6744
      /*
6745
       * Some normalization:
6746
       * (a | b* | c?)* == (a | b | c)*
6747
       */
6748
48.7k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6749
38.9k
    if ((cur->c1 != NULL) &&
6750
38.9k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6751
38.9k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6752
2.20k
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6753
38.9k
    if ((cur->c2 != NULL) &&
6754
38.9k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6755
38.9k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6756
235
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6757
38.9k
    cur = cur->c2;
6758
38.9k
      }
6759
9.78k
  }
6760
9.78k
  NEXT;
6761
124k
    } else if (RAW == '+') {
6762
20.5k
  if (ret != NULL) {
6763
20.5k
      int found = 0;
6764
6765
20.5k
      if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6766
20.5k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6767
6.51k
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6768
14.0k
      else
6769
14.0k
          ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6770
      /*
6771
       * Some normalization:
6772
       * (a | b*)+ == (a | b)*
6773
       * (a | b?)+ == (a | b)*
6774
       */
6775
60.8k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6776
40.2k
    if ((cur->c1 != NULL) &&
6777
40.2k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6778
40.2k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6779
3.11k
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6780
3.11k
        found = 1;
6781
3.11k
    }
6782
40.2k
    if ((cur->c2 != NULL) &&
6783
40.2k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6784
40.2k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6785
9.12k
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6786
9.12k
        found = 1;
6787
9.12k
    }
6788
40.2k
    cur = cur->c2;
6789
40.2k
      }
6790
20.5k
      if (found)
6791
9.71k
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6792
20.5k
  }
6793
20.5k
  NEXT;
6794
20.5k
    }
6795
156k
    return(ret);
6796
158k
}
6797
6798
/**
6799
 * xmlParseElementChildrenContentDecl:
6800
 * @ctxt:  an XML parser context
6801
 * @inputchk:  the input used for the current entity, needed for boundary checks
6802
 *
6803
 * DEPRECATED: Internal function, don't use.
6804
 *
6805
 * parse the declaration for a Mixed Element content
6806
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6807
 *
6808
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6809
 *
6810
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6811
 *
6812
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6813
 *
6814
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6815
 *
6816
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6817
 * TODO Parameter-entity replacement text must be properly nested
6818
 *  with parenthesized groups. That is to say, if either of the
6819
 *  opening or closing parentheses in a choice, seq, or Mixed
6820
 *  construct is contained in the replacement text for a parameter
6821
 *  entity, both must be contained in the same replacement text. For
6822
 *  interoperability, if a parameter-entity reference appears in a
6823
 *  choice, seq, or Mixed construct, its replacement text should not
6824
 *  be empty, and neither the first nor last non-blank character of
6825
 *  the replacement text should be a connector (| or ,).
6826
 *
6827
 * Returns the tree of xmlElementContentPtr describing the element
6828
 *          hierarchy.
6829
 */
6830
xmlElementContentPtr
6831
0
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6832
    /* stub left for API/ABI compat */
6833
0
    return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6834
0
}
6835
6836
/**
6837
 * xmlParseElementContentDecl:
6838
 * @ctxt:  an XML parser context
6839
 * @name:  the name of the element being defined.
6840
 * @result:  the Element Content pointer will be stored here if any
6841
 *
6842
 * DEPRECATED: Internal function, don't use.
6843
 *
6844
 * parse the declaration for an Element content either Mixed or Children,
6845
 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6846
 *
6847
 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6848
 *
6849
 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6850
 */
6851
6852
int
6853
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6854
5.54k
                           xmlElementContentPtr *result) {
6855
6856
5.54k
    xmlElementContentPtr tree = NULL;
6857
5.54k
    int inputid = ctxt->input->id;
6858
5.54k
    int res;
6859
6860
5.54k
    *result = NULL;
6861
6862
5.54k
    if (RAW != '(') {
6863
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6864
0
    "xmlParseElementContentDecl : %s '(' expected\n", name);
6865
0
  return(-1);
6866
0
    }
6867
5.54k
    NEXT;
6868
5.54k
    GROW;
6869
5.54k
    SKIP_BLANKS_PE;
6870
5.54k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6871
3.32k
        tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6872
3.32k
  res = XML_ELEMENT_TYPE_MIXED;
6873
3.32k
    } else {
6874
2.21k
        tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6875
2.21k
  res = XML_ELEMENT_TYPE_ELEMENT;
6876
2.21k
    }
6877
5.54k
    SKIP_BLANKS_PE;
6878
5.54k
    *result = tree;
6879
5.54k
    return(res);
6880
5.54k
}
6881
6882
/**
6883
 * xmlParseElementDecl:
6884
 * @ctxt:  an XML parser context
6885
 *
6886
 * DEPRECATED: Internal function, don't use.
6887
 *
6888
 * Parse an element declaration. Always consumes '<!'.
6889
 *
6890
 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6891
 *
6892
 * [ VC: Unique Element Type Declaration ]
6893
 * No element type may be declared more than once
6894
 *
6895
 * Returns the type of the element, or -1 in case of error
6896
 */
6897
int
6898
7.21k
xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6899
7.21k
    const xmlChar *name;
6900
7.21k
    int ret = -1;
6901
7.21k
    xmlElementContentPtr content  = NULL;
6902
6903
7.21k
    if ((CUR != '<') || (NXT(1) != '!'))
6904
0
        return(ret);
6905
7.21k
    SKIP(2);
6906
6907
    /* GROW; done in the caller */
6908
7.21k
    if (CMP7(CUR_PTR, 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6909
7.00k
  int inputid = ctxt->input->id;
6910
6911
7.00k
  SKIP(7);
6912
7.00k
  if (SKIP_BLANKS_PE == 0) {
6913
318
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6914
318
               "Space required after 'ELEMENT'\n");
6915
318
      return(-1);
6916
318
  }
6917
6.68k
        name = xmlParseName(ctxt);
6918
6.68k
  if (name == NULL) {
6919
248
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6920
248
         "xmlParseElementDecl: no name for Element\n");
6921
248
      return(-1);
6922
248
  }
6923
6.43k
  if (SKIP_BLANKS_PE == 0) {
6924
644
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6925
644
         "Space required after the element name\n");
6926
644
  }
6927
6.43k
  if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6928
51
      SKIP(5);
6929
      /*
6930
       * Element must always be empty.
6931
       */
6932
51
      ret = XML_ELEMENT_TYPE_EMPTY;
6933
6.38k
  } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6934
6.38k
             (NXT(2) == 'Y')) {
6935
210
      SKIP(3);
6936
      /*
6937
       * Element is a generic container.
6938
       */
6939
210
      ret = XML_ELEMENT_TYPE_ANY;
6940
6.17k
  } else if (RAW == '(') {
6941
5.54k
      ret = xmlParseElementContentDecl(ctxt, name, &content);
6942
5.54k
  } else {
6943
      /*
6944
       * [ WFC: PEs in Internal Subset ] error handling.
6945
       */
6946
635
            xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6947
635
                  "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6948
635
      return(-1);
6949
635
  }
6950
6951
5.80k
  SKIP_BLANKS_PE;
6952
6953
5.80k
  if (RAW != '>') {
6954
2.46k
      xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6955
2.46k
      if (content != NULL) {
6956
266
    xmlFreeDocElementContent(ctxt->myDoc, content);
6957
266
      }
6958
3.33k
  } else {
6959
3.33k
      if (inputid != ctxt->input->id) {
6960
507
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6961
507
                               "Element declaration doesn't start and stop in"
6962
507
                               " the same entity\n");
6963
507
      }
6964
6965
3.33k
      NEXT;
6966
3.33k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6967
3.33k
    (ctxt->sax->elementDecl != NULL)) {
6968
2.28k
    if (content != NULL)
6969
2.02k
        content->parent = NULL;
6970
2.28k
          ctxt->sax->elementDecl(ctxt->userData, name, ret,
6971
2.28k
                           content);
6972
2.28k
    if ((content != NULL) && (content->parent == NULL)) {
6973
        /*
6974
         * this is a trick: if xmlAddElementDecl is called,
6975
         * instead of copying the full tree it is plugged directly
6976
         * if called from the parser. Avoid duplicating the
6977
         * interfaces or change the API/ABI
6978
         */
6979
1.02k
        xmlFreeDocElementContent(ctxt->myDoc, content);
6980
1.02k
    }
6981
2.28k
      } else if (content != NULL) {
6982
516
    xmlFreeDocElementContent(ctxt->myDoc, content);
6983
516
      }
6984
3.33k
  }
6985
5.80k
    }
6986
6.01k
    return(ret);
6987
7.21k
}
6988
6989
/**
6990
 * xmlParseConditionalSections
6991
 * @ctxt:  an XML parser context
6992
 *
6993
 * Parse a conditional section. Always consumes '<!['.
6994
 *
6995
 * [61] conditionalSect ::= includeSect | ignoreSect
6996
 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6997
 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6998
 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6999
 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
7000
 */
7001
7002
static void
7003
1.75k
xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
7004
1.75k
    int *inputIds = NULL;
7005
1.75k
    size_t inputIdsSize = 0;
7006
1.75k
    size_t depth = 0;
7007
7008
6.70k
    while (PARSER_STOPPED(ctxt) == 0) {
7009
6.53k
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7010
3.62k
            int id = ctxt->input->id;
7011
7012
3.62k
            SKIP(3);
7013
3.62k
            SKIP_BLANKS_PE;
7014
7015
3.62k
            if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
7016
2.46k
                SKIP(7);
7017
2.46k
                SKIP_BLANKS_PE;
7018
2.46k
                if (RAW != '[') {
7019
27
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
7020
27
                    xmlHaltParser(ctxt);
7021
27
                    goto error;
7022
27
                }
7023
2.44k
                if (ctxt->input->id != id) {
7024
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
7025
0
                                   "All markup of the conditional section is"
7026
0
                                   " not in the same entity\n");
7027
0
                }
7028
2.44k
                NEXT;
7029
7030
2.44k
                if (inputIdsSize <= depth) {
7031
920
                    int *tmp;
7032
7033
920
                    inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2);
7034
920
                    tmp = (int *) xmlRealloc(inputIds,
7035
920
                            inputIdsSize * sizeof(int));
7036
920
                    if (tmp == NULL) {
7037
1
                        xmlErrMemory(ctxt);
7038
1
                        goto error;
7039
1
                    }
7040
919
                    inputIds = tmp;
7041
919
                }
7042
2.44k
                inputIds[depth] = id;
7043
2.44k
                depth++;
7044
2.44k
            } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
7045
85
                size_t ignoreDepth = 0;
7046
7047
85
                SKIP(6);
7048
85
                SKIP_BLANKS_PE;
7049
85
                if (RAW != '[') {
7050
7
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
7051
7
                    xmlHaltParser(ctxt);
7052
7
                    goto error;
7053
7
                }
7054
78
                if (ctxt->input->id != id) {
7055
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
7056
0
                                   "All markup of the conditional section is"
7057
0
                                   " not in the same entity\n");
7058
0
                }
7059
78
                NEXT;
7060
7061
7.74k
                while (PARSER_STOPPED(ctxt) == 0) {
7062
7.74k
                    if (RAW == 0) {
7063
49
                        xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
7064
49
                        goto error;
7065
49
                    }
7066
7.70k
                    if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7067
180
                        SKIP(3);
7068
180
                        ignoreDepth++;
7069
                        /* Check for integer overflow */
7070
180
                        if (ignoreDepth == 0) {
7071
0
                            xmlErrMemory(ctxt);
7072
0
                            goto error;
7073
0
                        }
7074
7.52k
                    } else if ((RAW == ']') && (NXT(1) == ']') &&
7075
7.52k
                               (NXT(2) == '>')) {
7076
57
                        SKIP(3);
7077
57
                        if (ignoreDepth == 0)
7078
29
                            break;
7079
28
                        ignoreDepth--;
7080
7.46k
                    } else {
7081
7.46k
                        NEXT;
7082
7.46k
                    }
7083
7.70k
                }
7084
7085
29
                if (ctxt->input->id != id) {
7086
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
7087
0
                                   "All markup of the conditional section is"
7088
0
                                   " not in the same entity\n");
7089
0
                }
7090
1.07k
            } else {
7091
1.07k
                xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
7092
1.07k
                xmlHaltParser(ctxt);
7093
1.07k
                goto error;
7094
1.07k
            }
7095
3.62k
        } else if ((depth > 0) &&
7096
2.90k
                   (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
7097
3
            depth--;
7098
3
            if (ctxt->input->id != inputIds[depth]) {
7099
0
                xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
7100
0
                               "All markup of the conditional section is not"
7101
0
                               " in the same entity\n");
7102
0
            }
7103
3
            SKIP(3);
7104
2.89k
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7105
2.50k
            xmlParseMarkupDecl(ctxt);
7106
2.50k
        } else {
7107
390
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7108
390
            xmlHaltParser(ctxt);
7109
390
            goto error;
7110
390
        }
7111
7112
4.98k
        if (depth == 0)
7113
32
            break;
7114
7115
4.94k
        SKIP_BLANKS_PE;
7116
4.94k
        SHRINK;
7117
4.94k
        GROW;
7118
4.94k
    }
7119
7120
1.75k
error:
7121
1.75k
    xmlFree(inputIds);
7122
1.75k
}
7123
7124
/**
7125
 * xmlParseMarkupDecl:
7126
 * @ctxt:  an XML parser context
7127
 *
7128
 * DEPRECATED: Internal function, don't use.
7129
 *
7130
 * Parse markup declarations. Always consumes '<!' or '<?'.
7131
 *
7132
 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
7133
 *                     NotationDecl | PI | Comment
7134
 *
7135
 * [ VC: Proper Declaration/PE Nesting ]
7136
 * Parameter-entity replacement text must be properly nested with
7137
 * markup declarations. That is to say, if either the first character
7138
 * or the last character of a markup declaration (markupdecl above) is
7139
 * contained in the replacement text for a parameter-entity reference,
7140
 * both must be contained in the same replacement text.
7141
 *
7142
 * [ WFC: PEs in Internal Subset ]
7143
 * In the internal DTD subset, parameter-entity references can occur
7144
 * only where markup declarations can occur, not within markup declarations.
7145
 * (This does not apply to references that occur in external parameter
7146
 * entities or to the external subset.)
7147
 */
7148
void
7149
93.2k
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
7150
93.2k
    GROW;
7151
93.2k
    if (CUR == '<') {
7152
93.2k
        if (NXT(1) == '!') {
7153
86.5k
      switch (NXT(2)) {
7154
65.8k
          case 'E':
7155
65.8k
        if (NXT(3) == 'L')
7156
7.21k
      xmlParseElementDecl(ctxt);
7157
58.6k
        else if (NXT(3) == 'N')
7158
58.3k
      xmlParseEntityDecl(ctxt);
7159
346
                    else
7160
346
                        SKIP(2);
7161
65.8k
        break;
7162
13.8k
          case 'A':
7163
13.8k
        xmlParseAttributeListDecl(ctxt);
7164
13.8k
        break;
7165
1.21k
          case 'N':
7166
1.21k
        xmlParseNotationDecl(ctxt);
7167
1.21k
        break;
7168
4.68k
          case '-':
7169
4.68k
        xmlParseComment(ctxt);
7170
4.68k
        break;
7171
982
    default:
7172
        /* there is an error but it will be detected later */
7173
982
                    SKIP(2);
7174
982
        break;
7175
86.5k
      }
7176
86.5k
  } else if (NXT(1) == '?') {
7177
6.69k
      xmlParsePI(ctxt);
7178
6.69k
  }
7179
93.2k
    }
7180
93.2k
}
7181
7182
/**
7183
 * xmlParseTextDecl:
7184
 * @ctxt:  an XML parser context
7185
 *
7186
 * DEPRECATED: Internal function, don't use.
7187
 *
7188
 * parse an XML declaration header for external entities
7189
 *
7190
 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
7191
 */
7192
7193
void
7194
1.32k
xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
7195
1.32k
    xmlChar *version;
7196
7197
    /*
7198
     * We know that '<?xml' is here.
7199
     */
7200
1.32k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7201
1.32k
  SKIP(5);
7202
1.32k
    } else {
7203
1
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
7204
1
  return;
7205
1
    }
7206
7207
1.32k
    if (SKIP_BLANKS == 0) {
7208
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7209
0
           "Space needed after '<?xml'\n");
7210
0
    }
7211
7212
    /*
7213
     * We may have the VersionInfo here.
7214
     */
7215
1.32k
    version = xmlParseVersionInfo(ctxt);
7216
1.32k
    if (version == NULL) {
7217
631
  version = xmlCharStrdup(XML_DEFAULT_VERSION);
7218
631
        if (version == NULL) {
7219
0
            xmlErrMemory(ctxt);
7220
0
            return;
7221
0
        }
7222
693
    } else {
7223
693
  if (SKIP_BLANKS == 0) {
7224
77
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7225
77
               "Space needed here\n");
7226
77
  }
7227
693
    }
7228
1.32k
    ctxt->input->version = version;
7229
7230
    /*
7231
     * We must have the encoding declaration
7232
     */
7233
1.32k
    xmlParseEncodingDecl(ctxt);
7234
7235
1.32k
    SKIP_BLANKS;
7236
1.32k
    if ((RAW == '?') && (NXT(1) == '>')) {
7237
404
        SKIP(2);
7238
920
    } else if (RAW == '>') {
7239
        /* Deprecated old WD ... */
7240
0
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7241
0
  NEXT;
7242
920
    } else {
7243
920
        int c;
7244
7245
920
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7246
407k
        while ((PARSER_STOPPED(ctxt) == 0) && ((c = CUR) != 0)) {
7247
407k
            NEXT;
7248
407k
            if (c == '>')
7249
131
                break;
7250
407k
        }
7251
920
    }
7252
1.32k
}
7253
7254
/**
7255
 * xmlParseExternalSubset:
7256
 * @ctxt:  an XML parser context
7257
 * @ExternalID: the external identifier
7258
 * @SystemID: the system identifier (or URL)
7259
 *
7260
 * parse Markup declarations from an external subset
7261
 *
7262
 * [30] extSubset ::= textDecl? extSubsetDecl
7263
 *
7264
 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7265
 */
7266
void
7267
xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7268
2.58k
                       const xmlChar *SystemID) {
7269
2.58k
    int oldInputNr;
7270
7271
2.58k
    xmlCtxtInitializeLate(ctxt);
7272
7273
2.58k
    xmlDetectEncoding(ctxt);
7274
7275
2.58k
    if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7276
316
  xmlParseTextDecl(ctxt);
7277
316
    }
7278
2.58k
    if (ctxt->myDoc == NULL) {
7279
0
        ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7280
0
  if (ctxt->myDoc == NULL) {
7281
0
      xmlErrMemory(ctxt);
7282
0
      return;
7283
0
  }
7284
0
  ctxt->myDoc->properties = XML_DOC_INTERNAL;
7285
0
    }
7286
2.58k
    if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL) &&
7287
2.58k
        (xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID) == NULL)) {
7288
0
        xmlErrMemory(ctxt);
7289
0
    }
7290
7291
2.58k
    ctxt->inSubset = 2;
7292
2.58k
    oldInputNr = ctxt->inputNr;
7293
7294
2.58k
    SKIP_BLANKS_PE;
7295
5.40k
    while (((RAW != 0) || (ctxt->inputNr > oldInputNr)) &&
7296
5.40k
           (!PARSER_STOPPED(ctxt))) {
7297
3.35k
  GROW;
7298
3.35k
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7299
1.72k
            xmlParseConditionalSections(ctxt);
7300
1.72k
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7301
1.09k
            xmlParseMarkupDecl(ctxt);
7302
1.09k
        } else {
7303
535
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7304
535
            xmlHaltParser(ctxt);
7305
535
            return;
7306
535
        }
7307
2.81k
        SKIP_BLANKS_PE;
7308
2.81k
        SHRINK;
7309
2.81k
    }
7310
7311
2.11k
    while (ctxt->inputNr > oldInputNr)
7312
58
        xmlPopPE(ctxt);
7313
7314
2.05k
    if (RAW != 0) {
7315
1.46k
  xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7316
1.46k
    }
7317
2.05k
}
7318
7319
/**
7320
 * xmlParseReference:
7321
 * @ctxt:  an XML parser context
7322
 *
7323
 * DEPRECATED: Internal function, don't use.
7324
 *
7325
 * parse and handle entity references in content, depending on the SAX
7326
 * interface, this may end-up in a call to character() if this is a
7327
 * CharRef, a predefined entity, if there is no reference() callback.
7328
 * or if the parser was asked to switch to that mode.
7329
 *
7330
 * Always consumes '&'.
7331
 *
7332
 * [67] Reference ::= EntityRef | CharRef
7333
 */
7334
void
7335
471k
xmlParseReference(xmlParserCtxtPtr ctxt) {
7336
471k
    xmlEntityPtr ent = NULL;
7337
471k
    const xmlChar *name;
7338
471k
    xmlChar *val;
7339
7340
471k
    if (RAW != '&')
7341
0
        return;
7342
7343
    /*
7344
     * Simple case of a CharRef
7345
     */
7346
471k
    if (NXT(1) == '#') {
7347
38.2k
  int i = 0;
7348
38.2k
  xmlChar out[16];
7349
38.2k
  int value = xmlParseCharRef(ctxt);
7350
7351
38.2k
  if (value == 0)
7352
19.8k
      return;
7353
7354
        /*
7355
         * Just encode the value in UTF-8
7356
         */
7357
18.3k
        COPY_BUF(out, i, value);
7358
18.3k
        out[i] = 0;
7359
18.3k
        if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7360
18.3k
            (!ctxt->disableSAX))
7361
13.5k
            ctxt->sax->characters(ctxt->userData, out, i);
7362
18.3k
  return;
7363
38.2k
    }
7364
7365
    /*
7366
     * We are seeing an entity reference
7367
     */
7368
433k
    name = xmlParseEntityRefInternal(ctxt);
7369
433k
    if (name != NULL)
7370
326k
        ent = xmlLookupGeneralEntity(ctxt, name, /* isAttr */ 0);
7371
433k
    if (ent == NULL) return;
7372
300k
    if (!ctxt->wellFormed)
7373
35.8k
  return;
7374
7375
    /* special case of predefined entities */
7376
264k
    if ((ent->name == NULL) ||
7377
264k
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7378
261k
  val = ent->content;
7379
261k
  if (val == NULL) return;
7380
  /*
7381
   * inline the entity.
7382
   */
7383
261k
  if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7384
261k
      (!ctxt->disableSAX))
7385
261k
      ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7386
261k
  return;
7387
261k
    }
7388
7389
    /*
7390
     * The first reference to the entity trigger a parsing phase
7391
     * where the ent->children is filled with the result from
7392
     * the parsing.
7393
     * Note: external parsed entities will not be loaded, it is not
7394
     * required for a non-validating parser, unless the parsing option
7395
     * of validating, or substituting entities were given. Doing so is
7396
     * far more secure as the parser will only process data coming from
7397
     * the document entity by default.
7398
     *
7399
     * FIXME: This doesn't work correctly since entities can be
7400
     * expanded with different namespace declarations in scope.
7401
     * For example:
7402
     *
7403
     * <!DOCTYPE doc [
7404
     *   <!ENTITY ent "<ns:elem/>">
7405
     * ]>
7406
     * <doc>
7407
     *   <decl1 xmlns:ns="urn:ns1">
7408
     *     &ent;
7409
     *   </decl1>
7410
     *   <decl2 xmlns:ns="urn:ns2">
7411
     *     &ent;
7412
     *   </decl2>
7413
     * </doc>
7414
     *
7415
     * Proposed fix:
7416
     *
7417
     * - Ignore current namespace declarations when parsing the
7418
     *   entity. If a prefix can't be resolved, don't report an error
7419
     *   but mark it as unresolved.
7420
     * - Try to resolve these prefixes when expanding the entity.
7421
     *   This will require a specialized version of xmlStaticCopyNode
7422
     *   which can also make use of the namespace hash table to avoid
7423
     *   quadratic behavior.
7424
     *
7425
     * Alternatively, we could simply reparse the entity on each
7426
     * expansion like we already do with custom SAX callbacks.
7427
     * External entity content should be cached in this case.
7428
     */
7429
3.21k
    if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7430
3.21k
        (((ctxt->options & XML_PARSE_NO_XXE) == 0) &&
7431
394
         ((ctxt->replaceEntities) ||
7432
3.21k
          (ctxt->validate)))) {
7433
3.21k
        if ((ent->flags & XML_ENT_PARSED) == 0) {
7434
1.14k
            xmlCtxtParseEntity(ctxt, ent);
7435
2.07k
        } else if (ent->children == NULL) {
7436
            /*
7437
             * Probably running in SAX mode and the callbacks don't
7438
             * build the entity content. Parse the entity again.
7439
             *
7440
             * This will also be triggered in normal tree builder mode
7441
             * if an entity happens to be empty, causing unnecessary
7442
             * reloads. It's hard to come up with a reliable check in
7443
             * which mode we're running.
7444
             */
7445
1.49k
            xmlCtxtParseEntity(ctxt, ent);
7446
1.49k
        }
7447
3.21k
    }
7448
7449
    /*
7450
     * We also check for amplification if entities aren't substituted.
7451
     * They might be expanded later.
7452
     */
7453
3.21k
    if (xmlParserEntityCheck(ctxt, ent->expandedSize))
7454
6
        return;
7455
7456
3.20k
    if ((ctxt->sax == NULL) || (ctxt->disableSAX))
7457
805
        return;
7458
7459
2.40k
    if (ctxt->replaceEntities == 0) {
7460
  /*
7461
   * Create a reference
7462
   */
7463
1.32k
        if (ctxt->sax->reference != NULL)
7464
1.32k
      ctxt->sax->reference(ctxt->userData, ent->name);
7465
1.32k
    } else if ((ent->children != NULL) && (ctxt->node != NULL)) {
7466
668
        xmlNodePtr copy, cur;
7467
7468
        /*
7469
         * Seems we are generating the DOM content, copy the tree
7470
   */
7471
668
        cur = ent->children;
7472
7473
        /*
7474
         * Handle first text node with SAX to coalesce text efficiently
7475
         */
7476
668
        if ((cur->type == XML_TEXT_NODE) ||
7477
668
            (cur->type == XML_CDATA_SECTION_NODE)) {
7478
652
            int len = xmlStrlen(cur->content);
7479
7480
652
            if ((cur->type == XML_TEXT_NODE) ||
7481
652
                (ctxt->sax->cdataBlock == NULL)) {
7482
652
                if (ctxt->sax->characters != NULL)
7483
652
                    ctxt->sax->characters(ctxt, cur->content, len);
7484
652
            } else {
7485
0
                if (ctxt->sax->cdataBlock != NULL)
7486
0
                    ctxt->sax->cdataBlock(ctxt, cur->content, len);
7487
0
            }
7488
7489
652
            cur = cur->next;
7490
652
        }
7491
7492
1.78k
        while (cur != NULL) {
7493
1.41k
            xmlNodePtr last;
7494
7495
            /*
7496
             * Handle last text node with SAX to coalesce text efficiently
7497
             */
7498
1.41k
            if ((cur->next == NULL) &&
7499
1.41k
                ((cur->type == XML_TEXT_NODE) ||
7500
298
                 (cur->type == XML_CDATA_SECTION_NODE))) {
7501
287
                int len = xmlStrlen(cur->content);
7502
7503
287
                if ((cur->type == XML_TEXT_NODE) ||
7504
287
                    (ctxt->sax->cdataBlock == NULL)) {
7505
287
                    if (ctxt->sax->characters != NULL)
7506
287
                        ctxt->sax->characters(ctxt, cur->content, len);
7507
287
                } else {
7508
0
                    if (ctxt->sax->cdataBlock != NULL)
7509
0
                        ctxt->sax->cdataBlock(ctxt, cur->content, len);
7510
0
                }
7511
7512
287
                break;
7513
287
            }
7514
7515
            /*
7516
             * Reset coalesce buffer stats only for non-text nodes.
7517
             */
7518
1.12k
            ctxt->nodemem = 0;
7519
1.12k
            ctxt->nodelen = 0;
7520
7521
1.12k
            copy = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7522
7523
1.12k
            if (copy == NULL) {
7524
5
                xmlErrMemory(ctxt);
7525
5
                break;
7526
5
            }
7527
7528
1.12k
            if (ctxt->parseMode == XML_PARSE_READER) {
7529
                /* Needed for reader */
7530
0
                copy->extra = cur->extra;
7531
                /* Maybe needed for reader */
7532
0
                copy->_private = cur->_private;
7533
0
            }
7534
7535
1.12k
            copy->parent = ctxt->node;
7536
1.12k
            last = ctxt->node->last;
7537
1.12k
            if (last == NULL) {
7538
1
                ctxt->node->children = copy;
7539
1.11k
            } else {
7540
1.11k
                last->next = copy;
7541
1.11k
                copy->prev = last;
7542
1.11k
            }
7543
1.12k
            ctxt->node->last = copy;
7544
7545
1.12k
            cur = cur->next;
7546
1.12k
        }
7547
668
    }
7548
2.40k
}
7549
7550
static xmlEntityPtr
7551
763k
xmlLookupGeneralEntity(xmlParserCtxtPtr ctxt, const xmlChar *name, int inAttr) {
7552
763k
    xmlEntityPtr ent;
7553
7554
    /*
7555
     * Predefined entities override any extra definition
7556
     */
7557
763k
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7558
763k
        ent = xmlGetPredefinedEntity(name);
7559
763k
        if (ent != NULL)
7560
322k
            return(ent);
7561
763k
    }
7562
7563
    /*
7564
     * Ask first SAX for entity resolution, otherwise try the
7565
     * entities which may have stored in the parser context.
7566
     */
7567
441k
    if (ctxt->sax != NULL) {
7568
441k
  if (ctxt->sax->getEntity != NULL)
7569
441k
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7570
441k
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7571
441k
      (ctxt->options & XML_PARSE_OLDSAX))
7572
0
      ent = xmlGetPredefinedEntity(name);
7573
441k
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7574
441k
      (ctxt->userData==ctxt)) {
7575
18.4k
      ent = xmlSAX2GetEntity(ctxt, name);
7576
18.4k
  }
7577
441k
    }
7578
    /*
7579
     * [ WFC: Entity Declared ]
7580
     * In a document without any DTD, a document with only an
7581
     * internal DTD subset which contains no parameter entity
7582
     * references, or a document with "standalone='yes'", the
7583
     * Name given in the entity reference must match that in an
7584
     * entity declaration, except that well-formed documents
7585
     * need not declare any of the following entities: amp, lt,
7586
     * gt, apos, quot.
7587
     * The declaration of a parameter entity must precede any
7588
     * reference to it.
7589
     * Similarly, the declaration of a general entity must
7590
     * precede any reference to it which appears in a default
7591
     * value in an attribute-list declaration. Note that if
7592
     * entities are declared in the external subset or in
7593
     * external parameter entities, a non-validating processor
7594
     * is not obligated to read and process their declarations;
7595
     * for such documents, the rule that an entity must be
7596
     * declared is a well-formedness constraint only if
7597
     * standalone='yes'.
7598
     */
7599
441k
    if (ent == NULL) {
7600
111k
  if ((ctxt->standalone == 1) ||
7601
111k
      ((ctxt->hasExternalSubset == 0) &&
7602
111k
       (ctxt->hasPErefs == 0))) {
7603
106k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7604
106k
         "Entity '%s' not defined\n", name);
7605
106k
  } else {
7606
5.27k
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7607
5.27k
         "Entity '%s' not defined\n", name);
7608
5.27k
      if ((ctxt->inSubset == 0) &&
7609
5.27k
    (ctxt->sax != NULL) &&
7610
5.27k
                (ctxt->disableSAX == 0) &&
7611
5.27k
    (ctxt->sax->reference != NULL)) {
7612
2.97k
    ctxt->sax->reference(ctxt->userData, name);
7613
2.97k
      }
7614
5.27k
  }
7615
111k
  ctxt->valid = 0;
7616
111k
    }
7617
7618
    /*
7619
     * [ WFC: Parsed Entity ]
7620
     * An entity reference must not contain the name of an
7621
     * unparsed entity
7622
     */
7623
329k
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7624
3
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7625
3
     "Entity reference to unparsed entity %s\n", name);
7626
3
        ent = NULL;
7627
3
    }
7628
7629
    /*
7630
     * [ WFC: No External Entity References ]
7631
     * Attribute values cannot contain direct or indirect
7632
     * entity references to external entities.
7633
     */
7634
329k
    else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7635
15.4k
        if (inAttr) {
7636
7.37k
            xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7637
7.37k
                 "Attribute references external entity '%s'\n", name);
7638
7.37k
            ent = NULL;
7639
7.37k
        }
7640
15.4k
    }
7641
7642
441k
    return(ent);
7643
763k
}
7644
7645
/**
7646
 * xmlParseEntityRefInternal:
7647
 * @ctxt:  an XML parser context
7648
 * @inAttr:  whether we are in an attribute value
7649
 *
7650
 * Parse an entity reference. Always consumes '&'.
7651
 *
7652
 * [68] EntityRef ::= '&' Name ';'
7653
 *
7654
 * Returns the name, or NULL in case of error.
7655
 */
7656
static const xmlChar *
7657
1.05M
xmlParseEntityRefInternal(xmlParserCtxtPtr ctxt) {
7658
1.05M
    const xmlChar *name;
7659
7660
1.05M
    GROW;
7661
7662
1.05M
    if (RAW != '&')
7663
0
        return(NULL);
7664
1.05M
    NEXT;
7665
1.05M
    name = xmlParseName(ctxt);
7666
1.05M
    if (name == NULL) {
7667
182k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7668
182k
           "xmlParseEntityRef: no name\n");
7669
182k
        return(NULL);
7670
182k
    }
7671
871k
    if (RAW != ';') {
7672
171k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7673
171k
  return(NULL);
7674
171k
    }
7675
699k
    NEXT;
7676
7677
699k
    return(name);
7678
871k
}
7679
7680
/**
7681
 * xmlParseEntityRef:
7682
 * @ctxt:  an XML parser context
7683
 *
7684
 * DEPRECATED: Internal function, don't use.
7685
 *
7686
 * Returns the xmlEntityPtr if found, or NULL otherwise.
7687
 */
7688
xmlEntityPtr
7689
0
xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7690
0
    const xmlChar *name;
7691
7692
0
    if (ctxt == NULL)
7693
0
        return(NULL);
7694
7695
0
    name = xmlParseEntityRefInternal(ctxt);
7696
0
    if (name == NULL)
7697
0
        return(NULL);
7698
7699
0
    return(xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 0));
7700
0
}
7701
7702
/**
7703
 * xmlParseStringEntityRef:
7704
 * @ctxt:  an XML parser context
7705
 * @str:  a pointer to an index in the string
7706
 *
7707
 * parse ENTITY references declarations, but this version parses it from
7708
 * a string value.
7709
 *
7710
 * [68] EntityRef ::= '&' Name ';'
7711
 *
7712
 * [ WFC: Entity Declared ]
7713
 * In a document without any DTD, a document with only an internal DTD
7714
 * subset which contains no parameter entity references, or a document
7715
 * with "standalone='yes'", the Name given in the entity reference
7716
 * must match that in an entity declaration, except that well-formed
7717
 * documents need not declare any of the following entities: amp, lt,
7718
 * gt, apos, quot.  The declaration of a parameter entity must precede
7719
 * any reference to it.  Similarly, the declaration of a general entity
7720
 * must precede any reference to it which appears in a default value in an
7721
 * attribute-list declaration. Note that if entities are declared in the
7722
 * external subset or in external parameter entities, a non-validating
7723
 * processor is not obligated to read and process their declarations;
7724
 * for such documents, the rule that an entity must be declared is a
7725
 * well-formedness constraint only if standalone='yes'.
7726
 *
7727
 * [ WFC: Parsed Entity ]
7728
 * An entity reference must not contain the name of an unparsed entity
7729
 *
7730
 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7731
 * is updated to the current location in the string.
7732
 */
7733
static xmlChar *
7734
63.4k
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7735
63.4k
    xmlChar *name;
7736
63.4k
    const xmlChar *ptr;
7737
63.4k
    xmlChar cur;
7738
7739
63.4k
    if ((str == NULL) || (*str == NULL))
7740
0
        return(NULL);
7741
63.4k
    ptr = *str;
7742
63.4k
    cur = *ptr;
7743
63.4k
    if (cur != '&')
7744
0
  return(NULL);
7745
7746
63.4k
    ptr++;
7747
63.4k
    name = xmlParseStringName(ctxt, &ptr);
7748
63.4k
    if (name == NULL) {
7749
150
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7750
150
           "xmlParseStringEntityRef: no name\n");
7751
150
  *str = ptr;
7752
150
  return(NULL);
7753
150
    }
7754
63.3k
    if (*ptr != ';') {
7755
71
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7756
71
        xmlFree(name);
7757
71
  *str = ptr;
7758
71
  return(NULL);
7759
71
    }
7760
63.2k
    ptr++;
7761
7762
63.2k
    *str = ptr;
7763
63.2k
    return(name);
7764
63.3k
}
7765
7766
/**
7767
 * xmlParsePEReference:
7768
 * @ctxt:  an XML parser context
7769
 *
7770
 * DEPRECATED: Internal function, don't use.
7771
 *
7772
 * Parse a parameter entity reference. Always consumes '%'.
7773
 *
7774
 * The entity content is handled directly by pushing it's content as
7775
 * a new input stream.
7776
 *
7777
 * [69] PEReference ::= '%' Name ';'
7778
 *
7779
 * [ WFC: No Recursion ]
7780
 * A parsed entity must not contain a recursive
7781
 * reference to itself, either directly or indirectly.
7782
 *
7783
 * [ WFC: Entity Declared ]
7784
 * In a document without any DTD, a document with only an internal DTD
7785
 * subset which contains no parameter entity references, or a document
7786
 * with "standalone='yes'", ...  ... The declaration of a parameter
7787
 * entity must precede any reference to it...
7788
 *
7789
 * [ VC: Entity Declared ]
7790
 * In a document with an external subset or external parameter entities
7791
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7792
 * must precede any reference to it...
7793
 *
7794
 * [ WFC: In DTD ]
7795
 * Parameter-entity references may only appear in the DTD.
7796
 * NOTE: misleading but this is handled.
7797
 */
7798
void
7799
xmlParsePEReference(xmlParserCtxtPtr ctxt)
7800
14.2k
{
7801
14.2k
    const xmlChar *name;
7802
14.2k
    xmlEntityPtr entity = NULL;
7803
14.2k
    xmlParserInputPtr input;
7804
7805
14.2k
    if (RAW != '%')
7806
0
        return;
7807
14.2k
    NEXT;
7808
14.2k
    name = xmlParseName(ctxt);
7809
14.2k
    if (name == NULL) {
7810
4.65k
  xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
7811
4.65k
  return;
7812
4.65k
    }
7813
9.60k
    if (RAW != ';') {
7814
2.80k
  xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
7815
2.80k
        return;
7816
2.80k
    }
7817
7818
6.80k
    NEXT;
7819
7820
    /*
7821
     * Request the entity from SAX
7822
     */
7823
6.80k
    if ((ctxt->sax != NULL) &&
7824
6.80k
  (ctxt->sax->getParameterEntity != NULL))
7825
6.80k
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7826
6.80k
    if (entity == NULL) {
7827
  /*
7828
   * [ WFC: Entity Declared ]
7829
   * In a document without any DTD, a document with only an
7830
   * internal DTD subset which contains no parameter entity
7831
   * references, or a document with "standalone='yes'", ...
7832
   * ... The declaration of a parameter entity must precede
7833
   * any reference to it...
7834
   */
7835
2.37k
  if ((ctxt->standalone == 1) ||
7836
2.37k
      ((ctxt->hasExternalSubset == 0) &&
7837
2.31k
       (ctxt->hasPErefs == 0))) {
7838
188
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7839
188
            "PEReference: %%%s; not found\n",
7840
188
            name);
7841
2.18k
  } else {
7842
      /*
7843
       * [ VC: Entity Declared ]
7844
       * In a document with an external subset or external
7845
       * parameter entities with "standalone='no'", ...
7846
       * ... The declaration of a parameter entity must
7847
       * precede any reference to it...
7848
       */
7849
2.18k
            if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
7850
0
                xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
7851
0
                                 "PEReference: %%%s; not found\n",
7852
0
                                 name, NULL);
7853
0
            } else
7854
2.18k
                xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7855
2.18k
                              "PEReference: %%%s; not found\n",
7856
2.18k
                              name, NULL);
7857
2.18k
            ctxt->valid = 0;
7858
2.18k
  }
7859
4.43k
    } else {
7860
  /*
7861
   * Internal checking in case the entity quest barfed
7862
   */
7863
4.43k
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7864
4.43k
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7865
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7866
0
      "Internal: %%%s; is not a parameter entity\n",
7867
0
        name, NULL);
7868
4.43k
  } else {
7869
4.43k
      if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7870
4.43k
                ((ctxt->options & XML_PARSE_NO_XXE) ||
7871
2.02k
     ((ctxt->loadsubset == 0) &&
7872
2.02k
      (ctxt->replaceEntities == 0) &&
7873
2.02k
      (ctxt->validate == 0))))
7874
161
    return;
7875
7876
4.27k
            if (entity->flags & XML_ENT_EXPANDING) {
7877
1
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7878
1
                xmlHaltParser(ctxt);
7879
1
                return;
7880
1
            }
7881
7882
4.27k
      input = xmlNewEntityInputStream(ctxt, entity);
7883
4.27k
      if (xmlPushInput(ctxt, input) < 0) {
7884
127
                xmlFreeInputStream(input);
7885
127
    return;
7886
127
            }
7887
7888
4.14k
            entity->flags |= XML_ENT_EXPANDING;
7889
7890
4.14k
      if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
7891
1.73k
                xmlDetectEncoding(ctxt);
7892
7893
1.73k
                if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7894
1.73k
                    (IS_BLANK_CH(NXT(5)))) {
7895
851
                    xmlParseTextDecl(ctxt);
7896
851
                }
7897
1.73k
            }
7898
4.14k
  }
7899
4.43k
    }
7900
6.51k
    ctxt->hasPErefs = 1;
7901
6.51k
}
7902
7903
/**
7904
 * xmlLoadEntityContent:
7905
 * @ctxt:  an XML parser context
7906
 * @entity: an unloaded system entity
7907
 *
7908
 * Load the original content of the given system entity from the
7909
 * ExternalID/SystemID given. This is to be used for Included in Literal
7910
 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
7911
 *
7912
 * Returns 0 in case of success and -1 in case of failure
7913
 */
7914
static int
7915
21
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7916
21
    xmlParserInputPtr oldinput, input = NULL;
7917
21
    xmlParserInputPtr *oldinputTab;
7918
21
    const xmlChar *oldencoding;
7919
21
    xmlChar *content = NULL;
7920
21
    size_t length, i;
7921
21
    int oldinputNr, oldinputMax;
7922
21
    int ret = -1;
7923
21
    int res;
7924
7925
21
    if ((ctxt == NULL) || (entity == NULL) ||
7926
21
        ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7927
21
   (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7928
21
  (entity->content != NULL)) {
7929
0
  xmlFatalErr(ctxt, XML_ERR_ARGUMENT,
7930
0
              "xmlLoadEntityContent parameter error");
7931
0
        return(-1);
7932
0
    }
7933
7934
21
    input = xmlLoadExternalEntity((char *) entity->URI,
7935
21
           (char *) entity->ExternalID, ctxt);
7936
21
    if (input == NULL)
7937
0
        return(-1);
7938
7939
21
    oldinput = ctxt->input;
7940
21
    oldinputNr = ctxt->inputNr;
7941
21
    oldinputMax = ctxt->inputMax;
7942
21
    oldinputTab = ctxt->inputTab;
7943
21
    oldencoding = ctxt->encoding;
7944
7945
21
    ctxt->input = NULL;
7946
21
    ctxt->inputNr = 0;
7947
21
    ctxt->inputMax = 1;
7948
21
    ctxt->encoding = NULL;
7949
21
    ctxt->inputTab = xmlMalloc(sizeof(xmlParserInputPtr));
7950
21
    if (ctxt->inputTab == NULL) {
7951
0
        xmlErrMemory(ctxt);
7952
0
        xmlFreeInputStream(input);
7953
0
        goto error;
7954
0
    }
7955
7956
21
    xmlBufResetInput(input->buf->buffer, input);
7957
7958
21
    inputPush(ctxt, input);
7959
7960
21
    xmlDetectEncoding(ctxt);
7961
7962
    /*
7963
     * Parse a possible text declaration first
7964
     */
7965
21
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7966
13
  xmlParseTextDecl(ctxt);
7967
        /*
7968
         * An XML-1.0 document can't reference an entity not XML-1.0
7969
         */
7970
13
        if ((xmlStrEqual(ctxt->version, BAD_CAST "1.0")) &&
7971
13
            (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
7972
0
            xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
7973
0
                           "Version mismatch between document and entity\n");
7974
0
        }
7975
13
    }
7976
7977
21
    length = input->cur - input->base;
7978
21
    xmlBufShrink(input->buf->buffer, length);
7979
21
    xmlSaturatedAdd(&ctxt->sizeentities, length);
7980
7981
327
    while ((res = xmlParserInputBufferGrow(input->buf, 4096)) > 0)
7982
306
        ;
7983
7984
21
    xmlBufResetInput(input->buf->buffer, input);
7985
7986
21
    if (res < 0) {
7987
0
        xmlCtxtErrIO(ctxt, input->buf->error, NULL);
7988
0
        goto error;
7989
0
    }
7990
7991
21
    length = xmlBufUse(input->buf->buffer);
7992
21
    content = xmlBufDetach(input->buf->buffer);
7993
7994
21
    if (length > INT_MAX) {
7995
0
        xmlErrMemory(ctxt);
7996
0
        goto error;
7997
0
    }
7998
7999
23.7k
    for (i = 0; i < length; ) {
8000
23.7k
        int clen = length - i;
8001
23.7k
        int c = xmlGetUTF8Char(content + i, &clen);
8002
8003
23.7k
        if ((c < 0) || (!IS_CHAR(c))) {
8004
17
            xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8005
17
                              "xmlLoadEntityContent: invalid char value %d\n",
8006
17
                              content[i]);
8007
17
            goto error;
8008
17
        }
8009
23.7k
        i += clen;
8010
23.7k
    }
8011
8012
4
    xmlSaturatedAdd(&ctxt->sizeentities, length);
8013
4
    entity->content = content;
8014
4
    entity->length = length;
8015
4
    content = NULL;
8016
4
    ret = 0;
8017
8018
21
error:
8019
42
    while (ctxt->inputNr > 0)
8020
21
        xmlFreeInputStream(inputPop(ctxt));
8021
21
    xmlFree(ctxt->inputTab);
8022
21
    xmlFree((xmlChar *) ctxt->encoding);
8023
8024
21
    ctxt->input = oldinput;
8025
21
    ctxt->inputNr = oldinputNr;
8026
21
    ctxt->inputMax = oldinputMax;
8027
21
    ctxt->inputTab = oldinputTab;
8028
21
    ctxt->encoding = oldencoding;
8029
8030
21
    xmlFree(content);
8031
8032
21
    return(ret);
8033
4
}
8034
8035
/**
8036
 * xmlParseStringPEReference:
8037
 * @ctxt:  an XML parser context
8038
 * @str:  a pointer to an index in the string
8039
 *
8040
 * parse PEReference declarations
8041
 *
8042
 * [69] PEReference ::= '%' Name ';'
8043
 *
8044
 * [ WFC: No Recursion ]
8045
 * A parsed entity must not contain a recursive
8046
 * reference to itself, either directly or indirectly.
8047
 *
8048
 * [ WFC: Entity Declared ]
8049
 * In a document without any DTD, a document with only an internal DTD
8050
 * subset which contains no parameter entity references, or a document
8051
 * with "standalone='yes'", ...  ... The declaration of a parameter
8052
 * entity must precede any reference to it...
8053
 *
8054
 * [ VC: Entity Declared ]
8055
 * In a document with an external subset or external parameter entities
8056
 * with "standalone='no'", ...  ... The declaration of a parameter entity
8057
 * must precede any reference to it...
8058
 *
8059
 * [ WFC: In DTD ]
8060
 * Parameter-entity references may only appear in the DTD.
8061
 * NOTE: misleading but this is handled.
8062
 *
8063
 * Returns the string of the entity content.
8064
 *         str is updated to the current value of the index
8065
 */
8066
static xmlEntityPtr
8067
6.15k
xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8068
6.15k
    const xmlChar *ptr;
8069
6.15k
    xmlChar cur;
8070
6.15k
    xmlChar *name;
8071
6.15k
    xmlEntityPtr entity = NULL;
8072
8073
6.15k
    if ((str == NULL) || (*str == NULL)) return(NULL);
8074
6.15k
    ptr = *str;
8075
6.15k
    cur = *ptr;
8076
6.15k
    if (cur != '%')
8077
0
        return(NULL);
8078
6.15k
    ptr++;
8079
6.15k
    name = xmlParseStringName(ctxt, &ptr);
8080
6.15k
    if (name == NULL) {
8081
2.22k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8082
2.22k
           "xmlParseStringPEReference: no name\n");
8083
2.22k
  *str = ptr;
8084
2.22k
  return(NULL);
8085
2.22k
    }
8086
3.92k
    cur = *ptr;
8087
3.92k
    if (cur != ';') {
8088
574
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8089
574
  xmlFree(name);
8090
574
  *str = ptr;
8091
574
  return(NULL);
8092
574
    }
8093
3.35k
    ptr++;
8094
8095
    /*
8096
     * Request the entity from SAX
8097
     */
8098
3.35k
    if ((ctxt->sax != NULL) &&
8099
3.35k
  (ctxt->sax->getParameterEntity != NULL))
8100
3.35k
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8101
3.35k
    if (entity == NULL) {
8102
  /*
8103
   * [ WFC: Entity Declared ]
8104
   * In a document without any DTD, a document with only an
8105
   * internal DTD subset which contains no parameter entity
8106
   * references, or a document with "standalone='yes'", ...
8107
   * ... The declaration of a parameter entity must precede
8108
   * any reference to it...
8109
   */
8110
3.16k
  if ((ctxt->standalone == 1) ||
8111
3.16k
      ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8112
265
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8113
265
     "PEReference: %%%s; not found\n", name);
8114
2.89k
  } else {
8115
      /*
8116
       * [ VC: Entity Declared ]
8117
       * In a document with an external subset or external
8118
       * parameter entities with "standalone='no'", ...
8119
       * ... The declaration of a parameter entity must
8120
       * precede any reference to it...
8121
       */
8122
2.89k
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8123
2.89k
        "PEReference: %%%s; not found\n",
8124
2.89k
        name, NULL);
8125
2.89k
      ctxt->valid = 0;
8126
2.89k
  }
8127
3.16k
    } else {
8128
  /*
8129
   * Internal checking in case the entity quest barfed
8130
   */
8131
189
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8132
189
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8133
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8134
0
        "%%%s; is not a parameter entity\n",
8135
0
        name, NULL);
8136
0
  }
8137
189
    }
8138
3.35k
    ctxt->hasPErefs = 1;
8139
3.35k
    xmlFree(name);
8140
3.35k
    *str = ptr;
8141
3.35k
    return(entity);
8142
3.92k
}
8143
8144
/**
8145
 * xmlParseDocTypeDecl:
8146
 * @ctxt:  an XML parser context
8147
 *
8148
 * DEPRECATED: Internal function, don't use.
8149
 *
8150
 * parse a DOCTYPE declaration
8151
 *
8152
 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8153
 *                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8154
 *
8155
 * [ VC: Root Element Type ]
8156
 * The Name in the document type declaration must match the element
8157
 * type of the root element.
8158
 */
8159
8160
void
8161
21.2k
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8162
21.2k
    const xmlChar *name = NULL;
8163
21.2k
    xmlChar *ExternalID = NULL;
8164
21.2k
    xmlChar *URI = NULL;
8165
8166
    /*
8167
     * We know that '<!DOCTYPE' has been detected.
8168
     */
8169
21.2k
    SKIP(9);
8170
8171
21.2k
    SKIP_BLANKS;
8172
8173
    /*
8174
     * Parse the DOCTYPE name.
8175
     */
8176
21.2k
    name = xmlParseName(ctxt);
8177
21.2k
    if (name == NULL) {
8178
1.02k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8179
1.02k
           "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8180
1.02k
    }
8181
21.2k
    ctxt->intSubName = name;
8182
8183
21.2k
    SKIP_BLANKS;
8184
8185
    /*
8186
     * Check for SystemID and ExternalID
8187
     */
8188
21.2k
    URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8189
8190
21.2k
    if ((URI != NULL) || (ExternalID != NULL)) {
8191
3.75k
        ctxt->hasExternalSubset = 1;
8192
3.75k
    }
8193
21.2k
    ctxt->extSubURI = URI;
8194
21.2k
    ctxt->extSubSystem = ExternalID;
8195
8196
21.2k
    SKIP_BLANKS;
8197
8198
    /*
8199
     * Create and update the internal subset.
8200
     */
8201
21.2k
    if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8202
21.2k
  (!ctxt->disableSAX))
8203
16.1k
  ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8204
8205
    /*
8206
     * Is there any internal subset declarations ?
8207
     * they are handled separately in xmlParseInternalSubset()
8208
     */
8209
21.2k
    if (RAW == '[')
8210
16.6k
  return;
8211
8212
    /*
8213
     * We should be at the end of the DOCTYPE declaration.
8214
     */
8215
4.62k
    if (RAW != '>') {
8216
1.47k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8217
1.47k
    }
8218
4.62k
    NEXT;
8219
4.62k
}
8220
8221
/**
8222
 * xmlParseInternalSubset:
8223
 * @ctxt:  an XML parser context
8224
 *
8225
 * parse the internal subset declaration
8226
 *
8227
 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8228
 */
8229
8230
static void
8231
17.2k
xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8232
    /*
8233
     * Is there any DTD definition ?
8234
     */
8235
17.2k
    if (RAW == '[') {
8236
17.2k
        int oldInputNr = ctxt->inputNr;
8237
8238
17.2k
        NEXT;
8239
  /*
8240
   * Parse the succession of Markup declarations and
8241
   * PEReferences.
8242
   * Subsequence (markupdecl | PEReference | S)*
8243
   */
8244
17.2k
  SKIP_BLANKS;
8245
119k
  while (((RAW != ']') || (ctxt->inputNr > oldInputNr)) &&
8246
119k
               (PARSER_STOPPED(ctxt) == 0)) {
8247
8248
            /*
8249
             * Conditional sections are allowed from external entities included
8250
             * by PE References in the internal subset.
8251
             */
8252
113k
            if ((PARSER_EXTERNAL(ctxt)) &&
8253
113k
                (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8254
29
                xmlParseConditionalSections(ctxt);
8255
113k
            } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
8256
89.6k
          xmlParseMarkupDecl(ctxt);
8257
89.6k
            } else if (RAW == '%') {
8258
12.8k
          xmlParsePEReference(ctxt);
8259
12.8k
            } else {
8260
10.6k
    xmlFatalErr(ctxt, XML_ERR_INT_SUBSET_NOT_FINISHED, NULL);
8261
10.6k
                break;
8262
10.6k
            }
8263
102k
      SKIP_BLANKS_PE;
8264
102k
            SHRINK;
8265
102k
            GROW;
8266
102k
  }
8267
8268
17.3k
        while (ctxt->inputNr > oldInputNr)
8269
105
            xmlPopPE(ctxt);
8270
8271
17.2k
  if (RAW == ']') {
8272
4.01k
      NEXT;
8273
4.01k
      SKIP_BLANKS;
8274
4.01k
  }
8275
17.2k
    }
8276
8277
    /*
8278
     * We should be at the end of the DOCTYPE declaration.
8279
     */
8280
17.2k
    if ((ctxt->wellFormed) && (RAW != '>')) {
8281
48
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8282
48
  return;
8283
48
    }
8284
17.2k
    NEXT;
8285
17.2k
}
8286
8287
#ifdef LIBXML_SAX1_ENABLED
8288
/**
8289
 * xmlParseAttribute:
8290
 * @ctxt:  an XML parser context
8291
 * @value:  a xmlChar ** used to store the value of the attribute
8292
 *
8293
 * DEPRECATED: Internal function, don't use.
8294
 *
8295
 * parse an attribute
8296
 *
8297
 * [41] Attribute ::= Name Eq AttValue
8298
 *
8299
 * [ WFC: No External Entity References ]
8300
 * Attribute values cannot contain direct or indirect entity references
8301
 * to external entities.
8302
 *
8303
 * [ WFC: No < in Attribute Values ]
8304
 * The replacement text of any entity referred to directly or indirectly in
8305
 * an attribute value (other than "&lt;") must not contain a <.
8306
 *
8307
 * [ VC: Attribute Value Type ]
8308
 * The attribute must have been declared; the value must be of the type
8309
 * declared for it.
8310
 *
8311
 * [25] Eq ::= S? '=' S?
8312
 *
8313
 * With namespace:
8314
 *
8315
 * [NS 11] Attribute ::= QName Eq AttValue
8316
 *
8317
 * Also the case QName == xmlns:??? is handled independently as a namespace
8318
 * definition.
8319
 *
8320
 * Returns the attribute name, and the value in *value.
8321
 */
8322
8323
const xmlChar *
8324
xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8325
    const xmlChar *name;
8326
    xmlChar *val;
8327
8328
    *value = NULL;
8329
    GROW;
8330
    name = xmlParseName(ctxt);
8331
    if (name == NULL) {
8332
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8333
                 "error parsing attribute name\n");
8334
        return(NULL);
8335
    }
8336
8337
    /*
8338
     * read the value
8339
     */
8340
    SKIP_BLANKS;
8341
    if (RAW == '=') {
8342
        NEXT;
8343
  SKIP_BLANKS;
8344
  val = xmlParseAttValue(ctxt);
8345
    } else {
8346
  xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8347
         "Specification mandates value for attribute %s\n", name);
8348
  return(name);
8349
    }
8350
8351
    /*
8352
     * Check that xml:lang conforms to the specification
8353
     * No more registered as an error, just generate a warning now
8354
     * since this was deprecated in XML second edition
8355
     */
8356
    if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8357
  if (!xmlCheckLanguageID(val)) {
8358
      xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8359
              "Malformed value for xml:lang : %s\n",
8360
        val, NULL);
8361
  }
8362
    }
8363
8364
    /*
8365
     * Check that xml:space conforms to the specification
8366
     */
8367
    if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8368
  if (xmlStrEqual(val, BAD_CAST "default"))
8369
      *(ctxt->space) = 0;
8370
  else if (xmlStrEqual(val, BAD_CAST "preserve"))
8371
      *(ctxt->space) = 1;
8372
  else {
8373
    xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8374
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8375
                                 val, NULL);
8376
  }
8377
    }
8378
8379
    *value = val;
8380
    return(name);
8381
}
8382
8383
/**
8384
 * xmlParseStartTag:
8385
 * @ctxt:  an XML parser context
8386
 *
8387
 * DEPRECATED: Internal function, don't use.
8388
 *
8389
 * Parse a start tag. Always consumes '<'.
8390
 *
8391
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8392
 *
8393
 * [ WFC: Unique Att Spec ]
8394
 * No attribute name may appear more than once in the same start-tag or
8395
 * empty-element tag.
8396
 *
8397
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8398
 *
8399
 * [ WFC: Unique Att Spec ]
8400
 * No attribute name may appear more than once in the same start-tag or
8401
 * empty-element tag.
8402
 *
8403
 * With namespace:
8404
 *
8405
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8406
 *
8407
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8408
 *
8409
 * Returns the element name parsed
8410
 */
8411
8412
const xmlChar *
8413
xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8414
    const xmlChar *name;
8415
    const xmlChar *attname;
8416
    xmlChar *attvalue;
8417
    const xmlChar **atts = ctxt->atts;
8418
    int nbatts = 0;
8419
    int maxatts = ctxt->maxatts;
8420
    int i;
8421
8422
    if (RAW != '<') return(NULL);
8423
    NEXT1;
8424
8425
    name = xmlParseName(ctxt);
8426
    if (name == NULL) {
8427
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8428
       "xmlParseStartTag: invalid element name\n");
8429
        return(NULL);
8430
    }
8431
8432
    /*
8433
     * Now parse the attributes, it ends up with the ending
8434
     *
8435
     * (S Attribute)* S?
8436
     */
8437
    SKIP_BLANKS;
8438
    GROW;
8439
8440
    while (((RAW != '>') &&
8441
     ((RAW != '/') || (NXT(1) != '>')) &&
8442
     (IS_BYTE_CHAR(RAW))) && (PARSER_STOPPED(ctxt) == 0)) {
8443
  attname = xmlParseAttribute(ctxt, &attvalue);
8444
        if (attname == NULL)
8445
      break;
8446
        if (attvalue != NULL) {
8447
      /*
8448
       * [ WFC: Unique Att Spec ]
8449
       * No attribute name may appear more than once in the same
8450
       * start-tag or empty-element tag.
8451
       */
8452
      for (i = 0; i < nbatts;i += 2) {
8453
          if (xmlStrEqual(atts[i], attname)) {
8454
        xmlErrAttributeDup(ctxt, NULL, attname);
8455
        xmlFree(attvalue);
8456
        goto failed;
8457
    }
8458
      }
8459
      /*
8460
       * Add the pair to atts
8461
       */
8462
      if (atts == NULL) {
8463
          maxatts = 22; /* allow for 10 attrs by default */
8464
          atts = (const xmlChar **)
8465
           xmlMalloc(maxatts * sizeof(xmlChar *));
8466
    if (atts == NULL) {
8467
        xmlErrMemory(ctxt);
8468
        if (attvalue != NULL)
8469
      xmlFree(attvalue);
8470
        goto failed;
8471
    }
8472
    ctxt->atts = atts;
8473
    ctxt->maxatts = maxatts;
8474
      } else if (nbatts + 4 > maxatts) {
8475
          const xmlChar **n;
8476
8477
          maxatts *= 2;
8478
          n = (const xmlChar **) xmlRealloc((void *) atts,
8479
               maxatts * sizeof(const xmlChar *));
8480
    if (n == NULL) {
8481
        xmlErrMemory(ctxt);
8482
        if (attvalue != NULL)
8483
      xmlFree(attvalue);
8484
        goto failed;
8485
    }
8486
    atts = n;
8487
    ctxt->atts = atts;
8488
    ctxt->maxatts = maxatts;
8489
      }
8490
      atts[nbatts++] = attname;
8491
      atts[nbatts++] = attvalue;
8492
      atts[nbatts] = NULL;
8493
      atts[nbatts + 1] = NULL;
8494
  } else {
8495
      if (attvalue != NULL)
8496
    xmlFree(attvalue);
8497
  }
8498
8499
failed:
8500
8501
  GROW
8502
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8503
      break;
8504
  if (SKIP_BLANKS == 0) {
8505
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8506
         "attributes construct error\n");
8507
  }
8508
  SHRINK;
8509
        GROW;
8510
    }
8511
8512
    /*
8513
     * SAX: Start of Element !
8514
     */
8515
    if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8516
  (!ctxt->disableSAX)) {
8517
  if (nbatts > 0)
8518
      ctxt->sax->startElement(ctxt->userData, name, atts);
8519
  else
8520
      ctxt->sax->startElement(ctxt->userData, name, NULL);
8521
    }
8522
8523
    if (atts != NULL) {
8524
        /* Free only the content strings */
8525
        for (i = 1;i < nbatts;i+=2)
8526
      if (atts[i] != NULL)
8527
         xmlFree((xmlChar *) atts[i]);
8528
    }
8529
    return(name);
8530
}
8531
8532
/**
8533
 * xmlParseEndTag1:
8534
 * @ctxt:  an XML parser context
8535
 * @line:  line of the start tag
8536
 * @nsNr:  number of namespaces on the start tag
8537
 *
8538
 * Parse an end tag. Always consumes '</'.
8539
 *
8540
 * [42] ETag ::= '</' Name S? '>'
8541
 *
8542
 * With namespace
8543
 *
8544
 * [NS 9] ETag ::= '</' QName S? '>'
8545
 */
8546
8547
static void
8548
xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8549
    const xmlChar *name;
8550
8551
    GROW;
8552
    if ((RAW != '<') || (NXT(1) != '/')) {
8553
  xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8554
           "xmlParseEndTag: '</' not found\n");
8555
  return;
8556
    }
8557
    SKIP(2);
8558
8559
    name = xmlParseNameAndCompare(ctxt,ctxt->name);
8560
8561
    /*
8562
     * We should definitely be at the ending "S? '>'" part
8563
     */
8564
    GROW;
8565
    SKIP_BLANKS;
8566
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8567
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8568
    } else
8569
  NEXT1;
8570
8571
    /*
8572
     * [ WFC: Element Type Match ]
8573
     * The Name in an element's end-tag must match the element type in the
8574
     * start-tag.
8575
     *
8576
     */
8577
    if (name != (xmlChar*)1) {
8578
        if (name == NULL) name = BAD_CAST "unparsable";
8579
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8580
         "Opening and ending tag mismatch: %s line %d and %s\n",
8581
                    ctxt->name, line, name);
8582
    }
8583
8584
    /*
8585
     * SAX: End of Tag
8586
     */
8587
    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8588
  (!ctxt->disableSAX))
8589
        ctxt->sax->endElement(ctxt->userData, ctxt->name);
8590
8591
    namePop(ctxt);
8592
    spacePop(ctxt);
8593
    return;
8594
}
8595
8596
/**
8597
 * xmlParseEndTag:
8598
 * @ctxt:  an XML parser context
8599
 *
8600
 * DEPRECATED: Internal function, don't use.
8601
 *
8602
 * parse an end of tag
8603
 *
8604
 * [42] ETag ::= '</' Name S? '>'
8605
 *
8606
 * With namespace
8607
 *
8608
 * [NS 9] ETag ::= '</' QName S? '>'
8609
 */
8610
8611
void
8612
xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8613
    xmlParseEndTag1(ctxt, 0);
8614
}
8615
#endif /* LIBXML_SAX1_ENABLED */
8616
8617
/************************************************************************
8618
 *                  *
8619
 *          SAX 2 specific operations       *
8620
 *                  *
8621
 ************************************************************************/
8622
8623
/**
8624
 * xmlParseQNameHashed:
8625
 * @ctxt:  an XML parser context
8626
 * @prefix:  pointer to store the prefix part
8627
 *
8628
 * parse an XML Namespace QName
8629
 *
8630
 * [6]  QName  ::= (Prefix ':')? LocalPart
8631
 * [7]  Prefix  ::= NCName
8632
 * [8]  LocalPart  ::= NCName
8633
 *
8634
 * Returns the Name parsed or NULL
8635
 */
8636
8637
static xmlHashedString
8638
7.90M
xmlParseQNameHashed(xmlParserCtxtPtr ctxt, xmlHashedString *prefix) {
8639
7.90M
    xmlHashedString l, p;
8640
7.90M
    int start, isNCName = 0;
8641
8642
7.90M
    l.name = NULL;
8643
7.90M
    p.name = NULL;
8644
8645
7.90M
    GROW;
8646
7.90M
    start = CUR_PTR - BASE_PTR;
8647
8648
7.90M
    l = xmlParseNCName(ctxt);
8649
7.90M
    if (l.name != NULL) {
8650
5.10M
        isNCName = 1;
8651
5.10M
        if (CUR == ':') {
8652
1.31M
            NEXT;
8653
1.31M
            p = l;
8654
1.31M
            l = xmlParseNCName(ctxt);
8655
1.31M
        }
8656
5.10M
    }
8657
7.90M
    if ((l.name == NULL) || (CUR == ':')) {
8658
2.90M
        xmlChar *tmp;
8659
8660
2.90M
        l.name = NULL;
8661
2.90M
        p.name = NULL;
8662
2.90M
        if ((isNCName == 0) && (CUR != ':'))
8663
2.79M
            return(l);
8664
107k
        tmp = xmlParseNmtoken(ctxt);
8665
107k
        if (tmp != NULL)
8666
36.4k
            xmlFree(tmp);
8667
107k
        l = xmlDictLookupHashed(ctxt->dict, BASE_PTR + start,
8668
107k
                                CUR_PTR - (BASE_PTR + start));
8669
107k
        if (l.name == NULL) {
8670
0
            xmlErrMemory(ctxt);
8671
0
            return(l);
8672
0
        }
8673
107k
        xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8674
107k
                 "Failed to parse QName '%s'\n", l.name, NULL, NULL);
8675
107k
    }
8676
8677
5.11M
    *prefix = p;
8678
5.11M
    return(l);
8679
7.90M
}
8680
8681
/**
8682
 * xmlParseQName:
8683
 * @ctxt:  an XML parser context
8684
 * @prefix:  pointer to store the prefix part
8685
 *
8686
 * parse an XML Namespace QName
8687
 *
8688
 * [6]  QName  ::= (Prefix ':')? LocalPart
8689
 * [7]  Prefix  ::= NCName
8690
 * [8]  LocalPart  ::= NCName
8691
 *
8692
 * Returns the Name parsed or NULL
8693
 */
8694
8695
static const xmlChar *
8696
100k
xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8697
100k
    xmlHashedString n, p;
8698
8699
100k
    n = xmlParseQNameHashed(ctxt, &p);
8700
100k
    if (n.name == NULL)
8701
64.2k
        return(NULL);
8702
35.9k
    *prefix = p.name;
8703
35.9k
    return(n.name);
8704
100k
}
8705
8706
/**
8707
 * xmlParseQNameAndCompare:
8708
 * @ctxt:  an XML parser context
8709
 * @name:  the localname
8710
 * @prefix:  the prefix, if any.
8711
 *
8712
 * parse an XML name and compares for match
8713
 * (specialized for endtag parsing)
8714
 *
8715
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8716
 * and the name for mismatch
8717
 */
8718
8719
static const xmlChar *
8720
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8721
507k
                        xmlChar const *prefix) {
8722
507k
    const xmlChar *cmp;
8723
507k
    const xmlChar *in;
8724
507k
    const xmlChar *ret;
8725
507k
    const xmlChar *prefix2;
8726
8727
507k
    if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8728
8729
507k
    GROW;
8730
507k
    in = ctxt->input->cur;
8731
8732
507k
    cmp = prefix;
8733
1.04M
    while (*in != 0 && *in == *cmp) {
8734
536k
  ++in;
8735
536k
  ++cmp;
8736
536k
    }
8737
507k
    if ((*cmp == 0) && (*in == ':')) {
8738
418k
        in++;
8739
418k
  cmp = name;
8740
2.91M
  while (*in != 0 && *in == *cmp) {
8741
2.49M
      ++in;
8742
2.49M
      ++cmp;
8743
2.49M
  }
8744
418k
  if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8745
      /* success */
8746
407k
            ctxt->input->col += in - ctxt->input->cur;
8747
407k
      ctxt->input->cur = in;
8748
407k
      return((const xmlChar*) 1);
8749
407k
  }
8750
418k
    }
8751
    /*
8752
     * all strings coms from the dictionary, equality can be done directly
8753
     */
8754
100k
    ret = xmlParseQName (ctxt, &prefix2);
8755
100k
    if (ret == NULL)
8756
64.2k
        return(NULL);
8757
35.9k
    if ((ret == name) && (prefix == prefix2))
8758
3.19k
  return((const xmlChar*) 1);
8759
32.7k
    return ret;
8760
35.9k
}
8761
8762
/**
8763
 * xmlParseAttribute2:
8764
 * @ctxt:  an XML parser context
8765
 * @pref:  the element prefix
8766
 * @elem:  the element name
8767
 * @prefix:  a xmlChar ** used to store the value of the attribute prefix
8768
 * @value:  a xmlChar ** used to store the value of the attribute
8769
 * @len:  an int * to save the length of the attribute
8770
 * @alloc:  an int * to indicate if the attribute was allocated
8771
 *
8772
 * parse an attribute in the new SAX2 framework.
8773
 *
8774
 * Returns the attribute name, and the value in *value, .
8775
 */
8776
8777
static xmlHashedString
8778
xmlParseAttribute2(xmlParserCtxtPtr ctxt,
8779
                   const xmlChar * pref, const xmlChar * elem,
8780
                   xmlHashedString * hprefix, xmlChar ** value,
8781
                   int *len, int *alloc)
8782
3.25M
{
8783
3.25M
    xmlHashedString hname;
8784
3.25M
    const xmlChar *prefix, *name;
8785
3.25M
    xmlChar *val = NULL, *internal_val = NULL;
8786
3.25M
    int normalize = 0;
8787
8788
3.25M
    *value = NULL;
8789
3.25M
    GROW;
8790
3.25M
    hname = xmlParseQNameHashed(ctxt, hprefix);
8791
3.25M
    if (hname.name == NULL) {
8792
1.98M
        xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8793
1.98M
                       "error parsing attribute name\n");
8794
1.98M
        return(hname);
8795
1.98M
    }
8796
1.26M
    name = hname.name;
8797
1.26M
    if (hprefix->name != NULL)
8798
284k
        prefix = hprefix->name;
8799
976k
    else
8800
976k
        prefix = NULL;
8801
8802
    /*
8803
     * get the type if needed
8804
     */
8805
1.26M
    if (ctxt->attsSpecial != NULL) {
8806
135k
        int type;
8807
8808
135k
        type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
8809
135k
                                                 pref, elem,
8810
135k
                                                 prefix, name);
8811
135k
        if (type != 0)
8812
61.8k
            normalize = 1;
8813
135k
    }
8814
8815
    /*
8816
     * read the value
8817
     */
8818
1.26M
    SKIP_BLANKS;
8819
1.26M
    if (RAW == '=') {
8820
1.19M
        NEXT;
8821
1.19M
        SKIP_BLANKS;
8822
1.19M
        val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
8823
1.19M
        if (val == NULL)
8824
54.5k
            goto error;
8825
1.19M
    } else {
8826
70.1k
        xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8827
70.1k
                          "Specification mandates value for attribute %s\n",
8828
70.1k
                          name);
8829
70.1k
        goto error;
8830
70.1k
    }
8831
8832
1.13M
    if (prefix == ctxt->str_xml) {
8833
        /*
8834
         * Check that xml:lang conforms to the specification
8835
         * No more registered as an error, just generate a warning now
8836
         * since this was deprecated in XML second edition
8837
         */
8838
82.1k
        if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8839
0
            internal_val = xmlStrndup(val, *len);
8840
0
            if (internal_val == NULL)
8841
0
                goto mem_error;
8842
0
            if (!xmlCheckLanguageID(internal_val)) {
8843
0
                xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8844
0
                              "Malformed value for xml:lang : %s\n",
8845
0
                              internal_val, NULL);
8846
0
            }
8847
0
        }
8848
8849
        /*
8850
         * Check that xml:space conforms to the specification
8851
         */
8852
82.1k
        if (xmlStrEqual(name, BAD_CAST "space")) {
8853
613
            internal_val = xmlStrndup(val, *len);
8854
613
            if (internal_val == NULL)
8855
1
                goto mem_error;
8856
612
            if (xmlStrEqual(internal_val, BAD_CAST "default"))
8857
59
                *(ctxt->space) = 0;
8858
553
            else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8859
199
                *(ctxt->space) = 1;
8860
354
            else {
8861
354
                xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8862
354
                              "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8863
354
                              internal_val, NULL);
8864
354
            }
8865
612
        }
8866
82.1k
        if (internal_val) {
8867
612
            xmlFree(internal_val);
8868
612
        }
8869
82.1k
    }
8870
8871
1.13M
    *value = val;
8872
1.13M
    return (hname);
8873
8874
1
mem_error:
8875
1
    xmlErrMemory(ctxt);
8876
124k
error:
8877
124k
    if ((val != NULL) && (*alloc != 0))
8878
1
        xmlFree(val);
8879
124k
    return(hname);
8880
1
}
8881
8882
/**
8883
 * xmlAttrHashInsert:
8884
 * @ctxt: parser context
8885
 * @size: size of the hash table
8886
 * @name: attribute name
8887
 * @uri: namespace uri
8888
 * @hashValue: combined hash value of name and uri
8889
 * @aindex: attribute index (this is a multiple of 5)
8890
 *
8891
 * Inserts a new attribute into the hash table.
8892
 *
8893
 * Returns INT_MAX if no existing attribute was found, the attribute
8894
 * index if an attribute was found, -1 if a memory allocation failed.
8895
 */
8896
static int
8897
xmlAttrHashInsert(xmlParserCtxtPtr ctxt, unsigned size, const xmlChar *name,
8898
859k
                  const xmlChar *uri, unsigned hashValue, int aindex) {
8899
859k
    xmlAttrHashBucket *table = ctxt->attrHash;
8900
859k
    xmlAttrHashBucket *bucket;
8901
859k
    unsigned hindex;
8902
8903
859k
    hindex = hashValue & (size - 1);
8904
859k
    bucket = &table[hindex];
8905
8906
931k
    while (bucket->index >= 0) {
8907
269k
        const xmlChar **atts = &ctxt->atts[bucket->index];
8908
8909
269k
        if (name == atts[0]) {
8910
210k
            int nsIndex = (int) (ptrdiff_t) atts[2];
8911
8912
210k
            if ((nsIndex == NS_INDEX_EMPTY) ? (uri == NULL) :
8913
210k
                (nsIndex == NS_INDEX_XML) ? (uri == ctxt->str_xml) :
8914
22.4k
                (uri == ctxt->nsTab[nsIndex * 2 + 1]))
8915
197k
                return(bucket->index);
8916
210k
        }
8917
8918
71.8k
        hindex++;
8919
71.8k
        bucket++;
8920
71.8k
        if (hindex >= size) {
8921
7.73k
            hindex = 0;
8922
7.73k
            bucket = table;
8923
7.73k
        }
8924
71.8k
    }
8925
8926
662k
    bucket->index = aindex;
8927
8928
662k
    return(INT_MAX);
8929
859k
}
8930
8931
/**
8932
 * xmlParseStartTag2:
8933
 * @ctxt:  an XML parser context
8934
 *
8935
 * Parse a start tag. Always consumes '<'.
8936
 *
8937
 * This routine is called when running SAX2 parsing
8938
 *
8939
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8940
 *
8941
 * [ WFC: Unique Att Spec ]
8942
 * No attribute name may appear more than once in the same start-tag or
8943
 * empty-element tag.
8944
 *
8945
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8946
 *
8947
 * [ WFC: Unique Att Spec ]
8948
 * No attribute name may appear more than once in the same start-tag or
8949
 * empty-element tag.
8950
 *
8951
 * With namespace:
8952
 *
8953
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8954
 *
8955
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8956
 *
8957
 * Returns the element name parsed
8958
 */
8959
8960
static const xmlChar *
8961
xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
8962
4.55M
                  const xmlChar **URI, int *nbNsPtr) {
8963
4.55M
    xmlHashedString hlocalname;
8964
4.55M
    xmlHashedString hprefix;
8965
4.55M
    xmlHashedString hattname;
8966
4.55M
    xmlHashedString haprefix;
8967
4.55M
    const xmlChar *localname;
8968
4.55M
    const xmlChar *prefix;
8969
4.55M
    const xmlChar *attname;
8970
4.55M
    const xmlChar *aprefix;
8971
4.55M
    const xmlChar *uri;
8972
4.55M
    xmlChar *attvalue = NULL;
8973
4.55M
    const xmlChar **atts = ctxt->atts;
8974
4.55M
    unsigned attrHashSize = 0;
8975
4.55M
    int maxatts = ctxt->maxatts;
8976
4.55M
    int nratts, nbatts, nbdef;
8977
4.55M
    int i, j, nbNs, nbTotalDef, attval, nsIndex, maxAtts;
8978
4.55M
    int alloc = 0;
8979
8980
4.55M
    if (RAW != '<') return(NULL);
8981
4.55M
    NEXT1;
8982
8983
4.55M
    nbatts = 0;
8984
4.55M
    nratts = 0;
8985
4.55M
    nbdef = 0;
8986
4.55M
    nbNs = 0;
8987
4.55M
    nbTotalDef = 0;
8988
4.55M
    attval = 0;
8989
8990
4.55M
    if (xmlParserNsStartElement(ctxt->nsdb) < 0) {
8991
0
        xmlErrMemory(ctxt);
8992
0
        return(NULL);
8993
0
    }
8994
8995
4.55M
    hlocalname = xmlParseQNameHashed(ctxt, &hprefix);
8996
4.55M
    if (hlocalname.name == NULL) {
8997
740k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8998
740k
           "StartTag: invalid element name\n");
8999
740k
        return(NULL);
9000
740k
    }
9001
3.81M
    localname = hlocalname.name;
9002
3.81M
    prefix = hprefix.name;
9003
9004
    /*
9005
     * Now parse the attributes, it ends up with the ending
9006
     *
9007
     * (S Attribute)* S?
9008
     */
9009
3.81M
    SKIP_BLANKS;
9010
3.81M
    GROW;
9011
9012
    /*
9013
     * The ctxt->atts array will be ultimately passed to the SAX callback
9014
     * containing five xmlChar pointers for each attribute:
9015
     *
9016
     * [0] attribute name
9017
     * [1] attribute prefix
9018
     * [2] namespace URI
9019
     * [3] attribute value
9020
     * [4] end of attribute value
9021
     *
9022
     * To save memory, we reuse this array temporarily and store integers
9023
     * in these pointer variables.
9024
     *
9025
     * [0] attribute name
9026
     * [1] attribute prefix
9027
     * [2] hash value of attribute prefix, and later namespace index
9028
     * [3] for non-allocated values: ptrdiff_t offset into input buffer
9029
     * [4] for non-allocated values: ptrdiff_t offset into input buffer
9030
     *
9031
     * The ctxt->attallocs array contains an additional unsigned int for
9032
     * each attribute, containing the hash value of the attribute name
9033
     * and the alloc flag in bit 31.
9034
     */
9035
9036
4.36M
    while (((RAW != '>') &&
9037
4.36M
     ((RAW != '/') || (NXT(1) != '>')) &&
9038
4.36M
     (IS_BYTE_CHAR(RAW))) && (PARSER_STOPPED(ctxt) == 0)) {
9039
3.25M
  int len = -1;
9040
9041
3.25M
  hattname = xmlParseAttribute2(ctxt, prefix, localname,
9042
3.25M
                                          &haprefix, &attvalue, &len,
9043
3.25M
                                          &alloc);
9044
3.25M
        if (hattname.name == NULL)
9045
1.98M
      break;
9046
1.26M
        if (attvalue == NULL)
9047
124k
            goto next_attr;
9048
1.13M
        attname = hattname.name;
9049
1.13M
        aprefix = haprefix.name;
9050
1.13M
  if (len < 0) len = xmlStrlen(attvalue);
9051
9052
1.13M
        if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9053
24.2k
            xmlHashedString huri;
9054
24.2k
            xmlURIPtr parsedUri;
9055
9056
24.2k
            huri = xmlDictLookupHashed(ctxt->dict, attvalue, len);
9057
24.2k
            uri = huri.name;
9058
24.2k
            if (uri == NULL) {
9059
0
                xmlErrMemory(ctxt);
9060
0
                goto next_attr;
9061
0
            }
9062
24.2k
            if (*uri != 0) {
9063
20.9k
                if (xmlParseURISafe((const char *) uri, &parsedUri) < 0) {
9064
0
                    xmlErrMemory(ctxt);
9065
0
                    goto next_attr;
9066
0
                }
9067
20.9k
                if (parsedUri == NULL) {
9068
5.25k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9069
5.25k
                             "xmlns: '%s' is not a valid URI\n",
9070
5.25k
                                       uri, NULL, NULL);
9071
15.6k
                } else {
9072
15.6k
                    if (parsedUri->scheme == NULL) {
9073
14.8k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9074
14.8k
                                  "xmlns: URI %s is not absolute\n",
9075
14.8k
                                  uri, NULL, NULL);
9076
14.8k
                    }
9077
15.6k
                    xmlFreeURI(parsedUri);
9078
15.6k
                }
9079
20.9k
                if (uri == ctxt->str_xml_ns) {
9080
18
                    if (attname != ctxt->str_xml) {
9081
18
                        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9082
18
                     "xml namespace URI cannot be the default namespace\n",
9083
18
                                 NULL, NULL, NULL);
9084
18
                    }
9085
18
                    goto next_attr;
9086
18
                }
9087
20.8k
                if ((len == 29) &&
9088
20.8k
                    (xmlStrEqual(uri,
9089
735
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9090
331
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9091
331
                         "reuse of the xmlns namespace name is forbidden\n",
9092
331
                             NULL, NULL, NULL);
9093
331
                    goto next_attr;
9094
331
                }
9095
20.8k
            }
9096
9097
23.9k
            if (xmlParserNsPush(ctxt, NULL, &huri, NULL, 0) > 0)
9098
23.5k
                nbNs++;
9099
1.11M
        } else if (aprefix == ctxt->str_xmlns) {
9100
102k
            xmlHashedString huri;
9101
102k
            xmlURIPtr parsedUri;
9102
9103
102k
            huri = xmlDictLookupHashed(ctxt->dict, attvalue, len);
9104
102k
            uri = huri.name;
9105
102k
            if (uri == NULL) {
9106
2
                xmlErrMemory(ctxt);
9107
2
                goto next_attr;
9108
2
            }
9109
9110
102k
            if (attname == ctxt->str_xml) {
9111
293
                if (uri != ctxt->str_xml_ns) {
9112
293
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9113
293
                             "xml namespace prefix mapped to wrong URI\n",
9114
293
                             NULL, NULL, NULL);
9115
293
                }
9116
                /*
9117
                 * Do not keep a namespace definition node
9118
                 */
9119
293
                goto next_attr;
9120
293
            }
9121
102k
            if (uri == ctxt->str_xml_ns) {
9122
3
                if (attname != ctxt->str_xml) {
9123
3
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9124
3
                             "xml namespace URI mapped to wrong prefix\n",
9125
3
                             NULL, NULL, NULL);
9126
3
                }
9127
3
                goto next_attr;
9128
3
            }
9129
102k
            if (attname == ctxt->str_xmlns) {
9130
291
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9131
291
                         "redefinition of the xmlns prefix is forbidden\n",
9132
291
                         NULL, NULL, NULL);
9133
291
                goto next_attr;
9134
291
            }
9135
102k
            if ((len == 29) &&
9136
102k
                (xmlStrEqual(uri,
9137
1.12k
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9138
761
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9139
761
                         "reuse of the xmlns namespace name is forbidden\n",
9140
761
                         NULL, NULL, NULL);
9141
761
                goto next_attr;
9142
761
            }
9143
101k
            if ((uri == NULL) || (uri[0] == 0)) {
9144
269
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9145
269
                         "xmlns:%s: Empty XML namespace is not allowed\n",
9146
269
                              attname, NULL, NULL);
9147
269
                goto next_attr;
9148
101k
            } else {
9149
101k
                if (xmlParseURISafe((const char *) uri, &parsedUri) < 0) {
9150
40
                    xmlErrMemory(ctxt);
9151
40
                    goto next_attr;
9152
40
                }
9153
101k
                if (parsedUri == NULL) {
9154
15.2k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9155
15.2k
                         "xmlns:%s: '%s' is not a valid URI\n",
9156
15.2k
                                       attname, uri, NULL);
9157
86.0k
                } else {
9158
86.0k
                    if ((ctxt->pedantic) && (parsedUri->scheme == NULL)) {
9159
0
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9160
0
                                  "xmlns:%s: URI %s is not absolute\n",
9161
0
                                  attname, uri, NULL);
9162
0
                    }
9163
86.0k
                    xmlFreeURI(parsedUri);
9164
86.0k
                }
9165
101k
            }
9166
9167
101k
            if (xmlParserNsPush(ctxt, &hattname, &huri, NULL, 0) > 0)
9168
92.3k
                nbNs++;
9169
1.00M
        } else {
9170
            /*
9171
             * Populate attributes array, see above for repurposing
9172
             * of xmlChar pointers.
9173
             */
9174
1.00M
            if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9175
52.4k
                if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9176
24
                    goto next_attr;
9177
24
                }
9178
52.4k
                maxatts = ctxt->maxatts;
9179
52.4k
                atts = ctxt->atts;
9180
52.4k
            }
9181
1.00M
            ctxt->attallocs[nratts++] = (hattname.hashValue & 0x7FFFFFFF) |
9182
1.00M
                                        ((unsigned) alloc << 31);
9183
1.00M
            atts[nbatts++] = attname;
9184
1.00M
            atts[nbatts++] = aprefix;
9185
1.00M
            atts[nbatts++] = (const xmlChar *) (size_t) haprefix.hashValue;
9186
1.00M
            if (alloc) {
9187
73.6k
                atts[nbatts++] = attvalue;
9188
73.6k
                attvalue += len;
9189
73.6k
                atts[nbatts++] = attvalue;
9190
935k
            } else {
9191
                /*
9192
                 * attvalue points into the input buffer which can be
9193
                 * reallocated. Store differences to input->base instead.
9194
                 * The pointers will be reconstructed later.
9195
                 */
9196
935k
                atts[nbatts++] = (void *) (attvalue - BASE_PTR);
9197
935k
                attvalue += len;
9198
935k
                atts[nbatts++] = (void *) (attvalue - BASE_PTR);
9199
935k
            }
9200
            /*
9201
             * tag if some deallocation is needed
9202
             */
9203
1.00M
            if (alloc != 0) attval = 1;
9204
1.00M
            attvalue = NULL; /* moved into atts */
9205
1.00M
        }
9206
9207
1.26M
next_attr:
9208
1.26M
        if ((attvalue != NULL) && (alloc != 0)) {
9209
16.4k
            xmlFree(attvalue);
9210
16.4k
            attvalue = NULL;
9211
16.4k
        }
9212
9213
1.26M
  GROW
9214
1.26M
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9215
533k
      break;
9216
727k
  if (SKIP_BLANKS == 0) {
9217
178k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9218
178k
         "attributes construct error\n");
9219
178k
      break;
9220
178k
  }
9221
549k
        GROW;
9222
549k
    }
9223
9224
    /*
9225
     * Namespaces from default attributes
9226
     */
9227
3.81M
    if (ctxt->attsDefault != NULL) {
9228
317k
        xmlDefAttrsPtr defaults;
9229
9230
317k
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9231
317k
  if (defaults != NULL) {
9232
556k
      for (i = 0; i < defaults->nbAttrs; i++) {
9233
455k
                xmlDefAttr *attr = &defaults->attrs[i];
9234
9235
455k
          attname = attr->name.name;
9236
455k
    aprefix = attr->prefix.name;
9237
9238
455k
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9239
2.40k
                    xmlParserEntityCheck(ctxt, attr->expandedSize);
9240
9241
2.40k
                    if (xmlParserNsPush(ctxt, NULL, &attr->value, NULL, 1) > 0)
9242
1.58k
                        nbNs++;
9243
452k
    } else if (aprefix == ctxt->str_xmlns) {
9244
19.9k
                    xmlParserEntityCheck(ctxt, attr->expandedSize);
9245
9246
19.9k
                    if (xmlParserNsPush(ctxt, &attr->name, &attr->value,
9247
19.9k
                                      NULL, 1) > 0)
9248
19.8k
                        nbNs++;
9249
432k
    } else {
9250
432k
                    nbTotalDef += 1;
9251
432k
                }
9252
455k
      }
9253
101k
  }
9254
317k
    }
9255
9256
    /*
9257
     * Resolve attribute namespaces
9258
     */
9259
4.82M
    for (i = 0; i < nbatts; i += 5) {
9260
1.00M
        attname = atts[i];
9261
1.00M
        aprefix = atts[i+1];
9262
9263
        /*
9264
  * The default namespace does not apply to attribute names.
9265
  */
9266
1.00M
  if (aprefix == NULL) {
9267
878k
            nsIndex = NS_INDEX_EMPTY;
9268
878k
        } else if (aprefix == ctxt->str_xml) {
9269
82.1k
            nsIndex = NS_INDEX_XML;
9270
82.1k
        } else {
9271
48.4k
            haprefix.name = aprefix;
9272
48.4k
            haprefix.hashValue = (size_t) atts[i+2];
9273
48.4k
            nsIndex = xmlParserNsLookup(ctxt, &haprefix, NULL);
9274
9275
48.4k
      if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex)) {
9276
34.9k
                xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9277
34.9k
        "Namespace prefix %s for %s on %s is not defined\n",
9278
34.9k
        aprefix, attname, localname);
9279
34.9k
                nsIndex = NS_INDEX_EMPTY;
9280
34.9k
            }
9281
48.4k
        }
9282
9283
1.00M
        atts[i+2] = (const xmlChar *) (ptrdiff_t) nsIndex;
9284
1.00M
    }
9285
9286
    /*
9287
     * Maximum number of attributes including default attributes.
9288
     */
9289
3.81M
    maxAtts = nratts + nbTotalDef;
9290
9291
    /*
9292
     * Verify that attribute names are unique.
9293
     */
9294
3.81M
    if (maxAtts > 1) {
9295
230k
        attrHashSize = 4;
9296
434k
        while (attrHashSize / 2 < (unsigned) maxAtts)
9297
203k
            attrHashSize *= 2;
9298
9299
230k
        if (attrHashSize > ctxt->attrHashMax) {
9300
51.8k
            xmlAttrHashBucket *tmp;
9301
9302
51.8k
            tmp = xmlRealloc(ctxt->attrHash, attrHashSize * sizeof(tmp[0]));
9303
51.8k
            if (tmp == NULL) {
9304
14
                xmlErrMemory(ctxt);
9305
14
                goto done;
9306
14
            }
9307
9308
51.8k
            ctxt->attrHash = tmp;
9309
51.8k
            ctxt->attrHashMax = attrHashSize;
9310
51.8k
        }
9311
9312
230k
        memset(ctxt->attrHash, -1, attrHashSize * sizeof(ctxt->attrHash[0]));
9313
9314
703k
        for (i = 0, j = 0; j < nratts; i += 5, j++) {
9315
473k
            const xmlChar *nsuri;
9316
473k
            unsigned hashValue, nameHashValue, uriHashValue;
9317
473k
            int res;
9318
9319
473k
            attname = atts[i];
9320
473k
            aprefix = atts[i+1];
9321
473k
            nsIndex = (ptrdiff_t) atts[i+2];
9322
            /* Hash values always have bit 31 set, see dict.c */
9323
473k
            nameHashValue = ctxt->attallocs[j] | 0x80000000;
9324
9325
473k
            if (nsIndex == NS_INDEX_EMPTY) {
9326
                /*
9327
                 * Prefix with empty namespace means an undeclared
9328
                 * prefix which was already reported above.
9329
                 */
9330
399k
                if (aprefix != NULL)
9331
20.4k
                    continue;
9332
379k
                nsuri = NULL;
9333
379k
                uriHashValue = URI_HASH_EMPTY;
9334
379k
            } else if (nsIndex == NS_INDEX_XML) {
9335
60.5k
                nsuri = ctxt->str_xml_ns;
9336
60.5k
                uriHashValue = URI_HASH_XML;
9337
60.5k
            } else {
9338
12.7k
                nsuri = ctxt->nsTab[nsIndex * 2 + 1];
9339
12.7k
                uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue;
9340
12.7k
            }
9341
9342
452k
            hashValue = xmlDictCombineHash(nameHashValue, uriHashValue);
9343
452k
            res = xmlAttrHashInsert(ctxt, attrHashSize, attname, nsuri,
9344
452k
                                    hashValue, i);
9345
452k
            if (res < 0)
9346
0
                continue;
9347
9348
            /*
9349
             * [ WFC: Unique Att Spec ]
9350
             * No attribute name may appear more than once in the same
9351
             * start-tag or empty-element tag.
9352
             * As extended by the Namespace in XML REC.
9353
             */
9354
452k
            if (res < INT_MAX) {
9355
28.6k
                if (aprefix == atts[res+1]) {
9356
28.1k
                    xmlErrAttributeDup(ctxt, aprefix, attname);
9357
28.1k
                } else {
9358
466
                    xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9359
466
                             "Namespaced Attribute %s in '%s' redefined\n",
9360
466
                             attname, nsuri, NULL);
9361
466
                }
9362
28.6k
            }
9363
452k
        }
9364
230k
    }
9365
9366
    /*
9367
     * Default attributes
9368
     */
9369
3.81M
    if (ctxt->attsDefault != NULL) {
9370
317k
        xmlDefAttrsPtr defaults;
9371
9372
317k
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9373
317k
  if (defaults != NULL) {
9374
556k
      for (i = 0; i < defaults->nbAttrs; i++) {
9375
455k
                xmlDefAttr *attr = &defaults->attrs[i];
9376
455k
                const xmlChar *nsuri;
9377
455k
                unsigned hashValue, uriHashValue;
9378
455k
                int res;
9379
9380
455k
          attname = attr->name.name;
9381
455k
    aprefix = attr->prefix.name;
9382
9383
455k
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL))
9384
2.40k
                    continue;
9385
452k
    if (aprefix == ctxt->str_xmlns)
9386
19.9k
                    continue;
9387
9388
432k
                if (aprefix == NULL) {
9389
79.2k
                    nsIndex = NS_INDEX_EMPTY;
9390
79.2k
                    nsuri = NULL;
9391
79.2k
                    uriHashValue = URI_HASH_EMPTY;
9392
432k
                } if (aprefix == ctxt->str_xml) {
9393
21.3k
                    nsIndex = NS_INDEX_XML;
9394
21.3k
                    nsuri = ctxt->str_xml_ns;
9395
21.3k
                    uriHashValue = URI_HASH_XML;
9396
411k
                } else if (aprefix != NULL) {
9397
332k
                    nsIndex = xmlParserNsLookup(ctxt, &attr->prefix, NULL);
9398
332k
                    if ((nsIndex == INT_MAX) ||
9399
332k
                        (nsIndex < ctxt->nsdb->minNsIndex)) {
9400
329k
                        xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9401
329k
                                 "Namespace prefix %s for %s on %s is not "
9402
329k
                                 "defined\n",
9403
329k
                                 aprefix, attname, localname);
9404
329k
                        nsIndex = NS_INDEX_EMPTY;
9405
329k
                        nsuri = NULL;
9406
329k
                        uriHashValue = URI_HASH_EMPTY;
9407
329k
                    } else {
9408
3.14k
                        nsuri = ctxt->nsTab[nsIndex * 2 + 1];
9409
3.14k
                        uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue;
9410
3.14k
                    }
9411
332k
                }
9412
9413
                /*
9414
                 * Check whether the attribute exists
9415
                 */
9416
432k
                if (maxAtts > 1) {
9417
407k
                    hashValue = xmlDictCombineHash(attr->name.hashValue,
9418
407k
                                                   uriHashValue);
9419
407k
                    res = xmlAttrHashInsert(ctxt, attrHashSize, attname, nsuri,
9420
407k
                                            hashValue, nbatts);
9421
407k
                    if (res < 0)
9422
0
                        continue;
9423
407k
                    if (res < INT_MAX) {
9424
168k
                        if (aprefix == atts[res+1])
9425
41.9k
                            continue;
9426
126k
                        xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9427
126k
                                 "Namespaced Attribute %s in '%s' redefined\n",
9428
126k
                                 attname, nsuri, NULL);
9429
126k
                    }
9430
407k
                }
9431
9432
390k
                xmlParserEntityCheck(ctxt, attr->expandedSize);
9433
9434
390k
                if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9435
811
                    if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9436
0
                        localname = NULL;
9437
0
                        goto done;
9438
0
                    }
9439
811
                    maxatts = ctxt->maxatts;
9440
811
                    atts = ctxt->atts;
9441
811
                }
9442
9443
390k
                atts[nbatts++] = attname;
9444
390k
                atts[nbatts++] = aprefix;
9445
390k
                atts[nbatts++] = (const xmlChar *) (ptrdiff_t) nsIndex;
9446
390k
                atts[nbatts++] = attr->value.name;
9447
390k
                atts[nbatts++] = attr->valueEnd;
9448
390k
                if ((ctxt->standalone == 1) && (attr->external != 0)) {
9449
0
                    xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9450
0
                            "standalone: attribute %s on %s defaulted "
9451
0
                            "from external subset\n",
9452
0
                            attname, localname);
9453
0
                }
9454
390k
                nbdef++;
9455
390k
      }
9456
101k
  }
9457
317k
    }
9458
9459
    /*
9460
     * Reconstruct attribute pointers
9461
     */
9462
5.21M
    for (i = 0, j = 0; i < nbatts; i += 5, j++) {
9463
        /* namespace URI */
9464
1.40M
        nsIndex = (ptrdiff_t) atts[i+2];
9465
1.40M
        if (nsIndex == INT_MAX)
9466
1.28M
            atts[i+2] = NULL;
9467
119k
        else if (nsIndex == INT_MAX - 1)
9468
103k
            atts[i+2] = ctxt->str_xml_ns;
9469
15.9k
        else
9470
15.9k
            atts[i+2] = ctxt->nsTab[nsIndex * 2 + 1];
9471
9472
1.40M
        if ((j < nratts) && (ctxt->attallocs[j] & 0x80000000) == 0) {
9473
935k
            atts[i+3] = BASE_PTR + (ptrdiff_t) atts[i+3];  /* value */
9474
935k
            atts[i+4] = BASE_PTR + (ptrdiff_t) atts[i+4];  /* valuend */
9475
935k
        }
9476
1.40M
    }
9477
9478
3.81M
    uri = xmlParserNsLookupUri(ctxt, &hprefix);
9479
3.81M
    if ((prefix != NULL) && (uri == NULL)) {
9480
525k
  xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9481
525k
           "Namespace prefix %s on %s is not defined\n",
9482
525k
     prefix, localname, NULL);
9483
525k
    }
9484
3.81M
    *pref = prefix;
9485
3.81M
    *URI = uri;
9486
9487
    /*
9488
     * SAX callback
9489
     */
9490
3.81M
    if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9491
3.81M
  (!ctxt->disableSAX)) {
9492
3.65M
  if (nbNs > 0)
9493
82.7k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix, uri,
9494
82.7k
                          nbNs, ctxt->nsTab + 2 * (ctxt->nsNr - nbNs),
9495
82.7k
        nbatts / 5, nbdef, atts);
9496
3.57M
  else
9497
3.57M
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix, uri,
9498
3.57M
                          0, NULL, nbatts / 5, nbdef, atts);
9499
3.65M
    }
9500
9501
3.81M
done:
9502
    /*
9503
     * Free allocated attribute values
9504
     */
9505
3.81M
    if (attval != 0) {
9506
159k
  for (i = 0, j = 0; j < nratts; i += 5, j++)
9507
89.2k
      if (ctxt->attallocs[j] & 0x80000000)
9508
73.6k
          xmlFree((xmlChar *) atts[i+3]);
9509
70.0k
    }
9510
9511
3.81M
    *nbNsPtr = nbNs;
9512
3.81M
    return(localname);
9513
3.81M
}
9514
9515
/**
9516
 * xmlParseEndTag2:
9517
 * @ctxt:  an XML parser context
9518
 * @line:  line of the start tag
9519
 * @nsNr:  number of namespaces on the start tag
9520
 *
9521
 * Parse an end tag. Always consumes '</'.
9522
 *
9523
 * [42] ETag ::= '</' Name S? '>'
9524
 *
9525
 * With namespace
9526
 *
9527
 * [NS 9] ETag ::= '</' QName S? '>'
9528
 */
9529
9530
static void
9531
823k
xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9532
823k
    const xmlChar *name;
9533
9534
823k
    GROW;
9535
823k
    if ((RAW != '<') || (NXT(1) != '/')) {
9536
489
  xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9537
489
  return;
9538
489
    }
9539
822k
    SKIP(2);
9540
9541
822k
    if (tag->prefix == NULL)
9542
315k
        name = xmlParseNameAndCompare(ctxt, ctxt->name);
9543
507k
    else
9544
507k
        name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9545
9546
    /*
9547
     * We should definitely be at the ending "S? '>'" part
9548
     */
9549
822k
    GROW;
9550
822k
    SKIP_BLANKS;
9551
822k
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9552
138k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9553
138k
    } else
9554
683k
  NEXT1;
9555
9556
    /*
9557
     * [ WFC: Element Type Match ]
9558
     * The Name in an element's end-tag must match the element type in the
9559
     * start-tag.
9560
     *
9561
     */
9562
822k
    if (name != (xmlChar*)1) {
9563
183k
        if (name == NULL) name = BAD_CAST "unparsable";
9564
183k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9565
183k
         "Opening and ending tag mismatch: %s line %d and %s\n",
9566
183k
                    ctxt->name, tag->line, name);
9567
183k
    }
9568
9569
    /*
9570
     * SAX: End of Tag
9571
     */
9572
822k
    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9573
822k
  (!ctxt->disableSAX))
9574
788k
  ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9575
788k
                                tag->URI);
9576
9577
822k
    spacePop(ctxt);
9578
822k
    if (tag->nsNr != 0)
9579
49.6k
  xmlParserNsPop(ctxt, tag->nsNr);
9580
822k
}
9581
9582
/**
9583
 * xmlParseCDSect:
9584
 * @ctxt:  an XML parser context
9585
 *
9586
 * DEPRECATED: Internal function, don't use.
9587
 *
9588
 * Parse escaped pure raw content. Always consumes '<!['.
9589
 *
9590
 * [18] CDSect ::= CDStart CData CDEnd
9591
 *
9592
 * [19] CDStart ::= '<![CDATA['
9593
 *
9594
 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9595
 *
9596
 * [21] CDEnd ::= ']]>'
9597
 */
9598
void
9599
30.5k
xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9600
30.5k
    xmlChar *buf = NULL;
9601
30.5k
    int len = 0;
9602
30.5k
    int size = XML_PARSER_BUFFER_SIZE;
9603
30.5k
    int r, rl;
9604
30.5k
    int s, sl;
9605
30.5k
    int cur, l;
9606
30.5k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9607
0
                    XML_MAX_HUGE_LENGTH :
9608
30.5k
                    XML_MAX_TEXT_LENGTH;
9609
9610
30.5k
    if ((CUR != '<') || (NXT(1) != '!') || (NXT(2) != '['))
9611
0
        return;
9612
30.5k
    SKIP(3);
9613
9614
30.5k
    if (!CMP6(CUR_PTR, 'C', 'D', 'A', 'T', 'A', '['))
9615
0
        return;
9616
30.5k
    SKIP(6);
9617
9618
30.5k
    r = CUR_CHAR(rl);
9619
30.5k
    if (!IS_CHAR(r)) {
9620
1.28k
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9621
1.28k
        goto out;
9622
1.28k
    }
9623
29.2k
    NEXTL(rl);
9624
29.2k
    s = CUR_CHAR(sl);
9625
29.2k
    if (!IS_CHAR(s)) {
9626
1.28k
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9627
1.28k
        goto out;
9628
1.28k
    }
9629
27.9k
    NEXTL(sl);
9630
27.9k
    cur = CUR_CHAR(l);
9631
27.9k
    buf = (xmlChar *) xmlMallocAtomic(size);
9632
27.9k
    if (buf == NULL) {
9633
1
  xmlErrMemory(ctxt);
9634
1
        goto out;
9635
1
    }
9636
983k
    while (IS_CHAR(cur) &&
9637
983k
           ((r != ']') || (s != ']') || (cur != '>'))) {
9638
955k
  if (len + 5 >= size) {
9639
1.56k
      xmlChar *tmp;
9640
9641
1.56k
      tmp = (xmlChar *) xmlRealloc(buf, size * 2);
9642
1.56k
      if (tmp == NULL) {
9643
0
    xmlErrMemory(ctxt);
9644
0
                goto out;
9645
0
      }
9646
1.56k
      buf = tmp;
9647
1.56k
      size *= 2;
9648
1.56k
  }
9649
955k
  COPY_BUF(buf, len, r);
9650
955k
        if (len > maxLength) {
9651
0
            xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9652
0
                           "CData section too big found\n");
9653
0
            goto out;
9654
0
        }
9655
955k
  r = s;
9656
955k
  rl = sl;
9657
955k
  s = cur;
9658
955k
  sl = l;
9659
955k
  NEXTL(l);
9660
955k
  cur = CUR_CHAR(l);
9661
955k
    }
9662
27.9k
    buf[len] = 0;
9663
27.9k
    if (cur != '>') {
9664
6.42k
  xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9665
6.42k
                       "CData section not finished\n%.50s\n", buf);
9666
6.42k
        goto out;
9667
6.42k
    }
9668
21.5k
    NEXTL(l);
9669
9670
    /*
9671
     * OK the buffer is to be consumed as cdata.
9672
     */
9673
21.5k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9674
20.2k
  if (ctxt->sax->cdataBlock != NULL)
9675
7.08k
      ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9676
13.1k
  else if (ctxt->sax->characters != NULL)
9677
13.1k
      ctxt->sax->characters(ctxt->userData, buf, len);
9678
20.2k
    }
9679
9680
30.5k
out:
9681
30.5k
    xmlFree(buf);
9682
30.5k
}
9683
9684
/**
9685
 * xmlParseContentInternal:
9686
 * @ctxt:  an XML parser context
9687
 *
9688
 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9689
 * unexpected EOF to the caller.
9690
 */
9691
9692
static void
9693
61.7k
xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9694
61.7k
    int oldNameNr = ctxt->nameNr;
9695
61.7k
    int oldSpaceNr = ctxt->spaceNr;
9696
61.7k
    int oldNodeNr = ctxt->nodeNr;
9697
9698
61.7k
    GROW;
9699
15.9M
    while ((ctxt->input->cur < ctxt->input->end) &&
9700
15.9M
     (PARSER_STOPPED(ctxt) == 0)) {
9701
15.9M
  const xmlChar *cur = ctxt->input->cur;
9702
9703
  /*
9704
   * First case : a Processing Instruction.
9705
   */
9706
15.9M
  if ((*cur == '<') && (cur[1] == '?')) {
9707
35.0k
      xmlParsePI(ctxt);
9708
35.0k
  }
9709
9710
  /*
9711
   * Second case : a CDSection
9712
   */
9713
  /* 2.6.0 test was *cur not RAW */
9714
15.9M
  else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9715
30.5k
      xmlParseCDSect(ctxt);
9716
30.5k
  }
9717
9718
  /*
9719
   * Third case :  a comment
9720
   */
9721
15.9M
  else if ((*cur == '<') && (NXT(1) == '!') &&
9722
15.9M
     (NXT(2) == '-') && (NXT(3) == '-')) {
9723
79.8k
      xmlParseComment(ctxt);
9724
79.8k
  }
9725
9726
  /*
9727
   * Fourth case :  a sub-element.
9728
   */
9729
15.8M
  else if (*cur == '<') {
9730
5.31M
            if (NXT(1) == '/') {
9731
822k
                if (ctxt->nameNr <= oldNameNr)
9732
45.4k
                    break;
9733
777k
          xmlParseElementEnd(ctxt);
9734
4.49M
            } else {
9735
4.49M
          xmlParseElementStart(ctxt);
9736
4.49M
            }
9737
5.31M
  }
9738
9739
  /*
9740
   * Fifth case : a reference. If if has not been resolved,
9741
   *    parsing returns it's Name, create the node
9742
   */
9743
9744
10.5M
  else if (*cur == '&') {
9745
471k
      xmlParseReference(ctxt);
9746
471k
  }
9747
9748
  /*
9749
   * Last case, text. Note that References are handled directly.
9750
   */
9751
10.0M
  else {
9752
10.0M
      xmlParseCharDataInternal(ctxt, 0);
9753
10.0M
  }
9754
9755
15.9M
  SHRINK;
9756
15.9M
  GROW;
9757
15.9M
    }
9758
9759
61.7k
    if ((ctxt->nameNr > oldNameNr) &&
9760
61.7k
        (ctxt->input->cur >= ctxt->input->end) &&
9761
61.7k
        (ctxt->wellFormed)) {
9762
2.05k
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9763
2.05k
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9764
2.05k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9765
2.05k
                "Premature end of data in tag %s line %d\n",
9766
2.05k
                name, line, NULL);
9767
2.05k
    }
9768
9769
    /*
9770
     * Clean up in error case
9771
     */
9772
9773
222k
    while (ctxt->nodeNr > oldNodeNr)
9774
160k
        nodePop(ctxt);
9775
9776
274k
    while (ctxt->nameNr > oldNameNr) {
9777
212k
        xmlStartTag *tag = &ctxt->pushTab[ctxt->nameNr - 1];
9778
9779
212k
        if (tag->nsNr != 0)
9780
16.8k
            xmlParserNsPop(ctxt, tag->nsNr);
9781
9782
212k
        namePop(ctxt);
9783
212k
    }
9784
9785
274k
    while (ctxt->spaceNr > oldSpaceNr)
9786
212k
        spacePop(ctxt);
9787
61.7k
}
9788
9789
/**
9790
 * xmlParseContent:
9791
 * @ctxt:  an XML parser context
9792
 *
9793
 * Parse XML element content. This is useful if you're only interested
9794
 * in custom SAX callbacks. If you want a node list, use
9795
 * xmlParseInNodeContext.
9796
 */
9797
void
9798
0
xmlParseContent(xmlParserCtxtPtr ctxt) {
9799
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
9800
0
        return;
9801
9802
0
    xmlCtxtInitializeLate(ctxt);
9803
9804
0
    xmlParseContentInternal(ctxt);
9805
9806
0
    if (ctxt->input->cur < ctxt->input->end)
9807
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
9808
0
}
9809
9810
/**
9811
 * xmlParseElement:
9812
 * @ctxt:  an XML parser context
9813
 *
9814
 * DEPRECATED: Internal function, don't use.
9815
 *
9816
 * parse an XML element
9817
 *
9818
 * [39] element ::= EmptyElemTag | STag content ETag
9819
 *
9820
 * [ WFC: Element Type Match ]
9821
 * The Name in an element's end-tag must match the element type in the
9822
 * start-tag.
9823
 *
9824
 */
9825
9826
void
9827
65.9k
xmlParseElement(xmlParserCtxtPtr ctxt) {
9828
65.9k
    if (xmlParseElementStart(ctxt) != 0)
9829
6.77k
        return;
9830
9831
59.1k
    xmlParseContentInternal(ctxt);
9832
9833
59.1k
    if (ctxt->input->cur >= ctxt->input->end) {
9834
13.2k
        if (ctxt->wellFormed) {
9835
2.17k
            const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9836
2.17k
            int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9837
2.17k
            xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9838
2.17k
                    "Premature end of data in tag %s line %d\n",
9839
2.17k
                    name, line, NULL);
9840
2.17k
        }
9841
13.2k
        return;
9842
13.2k
    }
9843
9844
45.9k
    xmlParseElementEnd(ctxt);
9845
45.9k
}
9846
9847
/**
9848
 * xmlParseElementStart:
9849
 * @ctxt:  an XML parser context
9850
 *
9851
 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
9852
 * opening tag was parsed, 1 if an empty element was parsed.
9853
 *
9854
 * Always consumes '<'.
9855
 */
9856
static int
9857
4.55M
xmlParseElementStart(xmlParserCtxtPtr ctxt) {
9858
4.55M
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
9859
4.55M
    const xmlChar *name;
9860
4.55M
    const xmlChar *prefix = NULL;
9861
4.55M
    const xmlChar *URI = NULL;
9862
4.55M
    xmlParserNodeInfo node_info;
9863
4.55M
    int line;
9864
4.55M
    xmlNodePtr cur;
9865
4.55M
    int nbNs = 0;
9866
9867
4.55M
    if (ctxt->nameNr > maxDepth) {
9868
107
        xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
9869
107
                "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9870
107
                ctxt->nameNr);
9871
107
  xmlHaltParser(ctxt);
9872
107
  return(-1);
9873
107
    }
9874
9875
    /* Capture start position */
9876
4.55M
    if (ctxt->record_info) {
9877
0
        node_info.begin_pos = ctxt->input->consumed +
9878
0
                          (CUR_PTR - ctxt->input->base);
9879
0
  node_info.begin_line = ctxt->input->line;
9880
0
    }
9881
9882
4.55M
    if (ctxt->spaceNr == 0)
9883
0
  spacePush(ctxt, -1);
9884
4.55M
    else if (*ctxt->space == -2)
9885
0
  spacePush(ctxt, -1);
9886
4.55M
    else
9887
4.55M
  spacePush(ctxt, *ctxt->space);
9888
9889
4.55M
    line = ctxt->input->line;
9890
#ifdef LIBXML_SAX1_ENABLED
9891
    if (ctxt->sax2)
9892
#endif /* LIBXML_SAX1_ENABLED */
9893
4.55M
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &nbNs);
9894
#ifdef LIBXML_SAX1_ENABLED
9895
    else
9896
  name = xmlParseStartTag(ctxt);
9897
#endif /* LIBXML_SAX1_ENABLED */
9898
4.55M
    if (name == NULL) {
9899
740k
  spacePop(ctxt);
9900
740k
        return(-1);
9901
740k
    }
9902
3.81M
    nameNsPush(ctxt, name, prefix, URI, line, nbNs);
9903
3.81M
    cur = ctxt->node;
9904
9905
#ifdef LIBXML_VALID_ENABLED
9906
    /*
9907
     * [ VC: Root Element Type ]
9908
     * The Name in the document type declaration must match the element
9909
     * type of the root element.
9910
     */
9911
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9912
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
9913
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9914
#endif /* LIBXML_VALID_ENABLED */
9915
9916
    /*
9917
     * Check for an Empty Element.
9918
     */
9919
3.81M
    if ((RAW == '/') && (NXT(1) == '>')) {
9920
559k
        SKIP(2);
9921
559k
  if (ctxt->sax2) {
9922
559k
      if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9923
559k
    (!ctxt->disableSAX))
9924
527k
    ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
9925
#ifdef LIBXML_SAX1_ENABLED
9926
  } else {
9927
      if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9928
    (!ctxt->disableSAX))
9929
    ctxt->sax->endElement(ctxt->userData, name);
9930
#endif /* LIBXML_SAX1_ENABLED */
9931
559k
  }
9932
559k
  namePop(ctxt);
9933
559k
  spacePop(ctxt);
9934
559k
  if (nbNs > 0)
9935
7.93k
      xmlParserNsPop(ctxt, nbNs);
9936
559k
  if (cur != NULL && ctxt->record_info) {
9937
0
            node_info.node = cur;
9938
0
            node_info.end_pos = ctxt->input->consumed +
9939
0
                                (CUR_PTR - ctxt->input->base);
9940
0
            node_info.end_line = ctxt->input->line;
9941
0
            xmlParserAddNodeInfo(ctxt, &node_info);
9942
0
  }
9943
559k
  return(1);
9944
559k
    }
9945
3.25M
    if (RAW == '>') {
9946
1.04M
        NEXT1;
9947
1.04M
        if (cur != NULL && ctxt->record_info) {
9948
0
            node_info.node = cur;
9949
0
            node_info.end_pos = 0;
9950
0
            node_info.end_line = 0;
9951
0
            xmlParserAddNodeInfo(ctxt, &node_info);
9952
0
        }
9953
2.20M
    } else {
9954
2.20M
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9955
2.20M
         "Couldn't find end of Start Tag %s line %d\n",
9956
2.20M
                    name, line, NULL);
9957
9958
  /*
9959
   * end of parsing of this node.
9960
   */
9961
2.20M
  nodePop(ctxt);
9962
2.20M
  namePop(ctxt);
9963
2.20M
  spacePop(ctxt);
9964
2.20M
  if (nbNs > 0)
9965
21.0k
      xmlParserNsPop(ctxt, nbNs);
9966
2.20M
  return(-1);
9967
2.20M
    }
9968
9969
1.04M
    return(0);
9970
3.25M
}
9971
9972
/**
9973
 * xmlParseElementEnd:
9974
 * @ctxt:  an XML parser context
9975
 *
9976
 * Parse the end of an XML element. Always consumes '</'.
9977
 */
9978
static void
9979
823k
xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
9980
823k
    xmlNodePtr cur = ctxt->node;
9981
9982
823k
    if (ctxt->nameNr <= 0) {
9983
109
        if ((RAW == '<') && (NXT(1) == '/'))
9984
0
            SKIP(2);
9985
109
        return;
9986
109
    }
9987
9988
    /*
9989
     * parse the end of tag: '</' should be here.
9990
     */
9991
823k
    if (ctxt->sax2) {
9992
823k
  xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
9993
823k
  namePop(ctxt);
9994
823k
    }
9995
#ifdef LIBXML_SAX1_ENABLED
9996
    else
9997
  xmlParseEndTag1(ctxt, 0);
9998
#endif /* LIBXML_SAX1_ENABLED */
9999
10000
    /*
10001
     * Capture end position
10002
     */
10003
823k
    if (cur != NULL && ctxt->record_info) {
10004
0
        xmlParserNodeInfoPtr node_info;
10005
10006
0
        node_info = (xmlParserNodeInfoPtr) xmlParserFindNodeInfo(ctxt, cur);
10007
0
        if (node_info != NULL) {
10008
0
            node_info->end_pos = ctxt->input->consumed +
10009
0
                                 (CUR_PTR - ctxt->input->base);
10010
0
            node_info->end_line = ctxt->input->line;
10011
0
        }
10012
0
    }
10013
823k
}
10014
10015
/**
10016
 * xmlParseVersionNum:
10017
 * @ctxt:  an XML parser context
10018
 *
10019
 * DEPRECATED: Internal function, don't use.
10020
 *
10021
 * parse the XML version value.
10022
 *
10023
 * [26] VersionNum ::= '1.' [0-9]+
10024
 *
10025
 * In practice allow [0-9].[0-9]+ at that level
10026
 *
10027
 * Returns the string giving the XML version number, or NULL
10028
 */
10029
xmlChar *
10030
46.3k
xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10031
46.3k
    xmlChar *buf = NULL;
10032
46.3k
    int len = 0;
10033
46.3k
    int size = 10;
10034
46.3k
    xmlChar cur;
10035
10036
46.3k
    buf = (xmlChar *) xmlMallocAtomic(size);
10037
46.3k
    if (buf == NULL) {
10038
9
  xmlErrMemory(ctxt);
10039
9
  return(NULL);
10040
9
    }
10041
46.3k
    cur = CUR;
10042
46.3k
    if (!((cur >= '0') && (cur <= '9'))) {
10043
1.03k
  xmlFree(buf);
10044
1.03k
  return(NULL);
10045
1.03k
    }
10046
45.3k
    buf[len++] = cur;
10047
45.3k
    NEXT;
10048
45.3k
    cur=CUR;
10049
45.3k
    if (cur != '.') {
10050
482
  xmlFree(buf);
10051
482
  return(NULL);
10052
482
    }
10053
44.8k
    buf[len++] = cur;
10054
44.8k
    NEXT;
10055
44.8k
    cur=CUR;
10056
67.0k
    while ((cur >= '0') && (cur <= '9')) {
10057
22.1k
  if (len + 1 >= size) {
10058
290
      xmlChar *tmp;
10059
10060
290
      size *= 2;
10061
290
      tmp = (xmlChar *) xmlRealloc(buf, size);
10062
290
      if (tmp == NULL) {
10063
0
          xmlFree(buf);
10064
0
    xmlErrMemory(ctxt);
10065
0
    return(NULL);
10066
0
      }
10067
290
      buf = tmp;
10068
290
  }
10069
22.1k
  buf[len++] = cur;
10070
22.1k
  NEXT;
10071
22.1k
  cur=CUR;
10072
22.1k
    }
10073
44.8k
    buf[len] = 0;
10074
44.8k
    return(buf);
10075
44.8k
}
10076
10077
/**
10078
 * xmlParseVersionInfo:
10079
 * @ctxt:  an XML parser context
10080
 *
10081
 * DEPRECATED: Internal function, don't use.
10082
 *
10083
 * parse the XML version.
10084
 *
10085
 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10086
 *
10087
 * [25] Eq ::= S? '=' S?
10088
 *
10089
 * Returns the version string, e.g. "1.0"
10090
 */
10091
10092
xmlChar *
10093
49.2k
xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10094
49.2k
    xmlChar *version = NULL;
10095
10096
49.2k
    if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10097
46.5k
  SKIP(7);
10098
46.5k
  SKIP_BLANKS;
10099
46.5k
  if (RAW != '=') {
10100
63
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10101
63
      return(NULL);
10102
63
        }
10103
46.4k
  NEXT;
10104
46.4k
  SKIP_BLANKS;
10105
46.4k
  if (RAW == '"') {
10106
45.5k
      NEXT;
10107
45.5k
      version = xmlParseVersionNum(ctxt);
10108
45.5k
      if (RAW != '"') {
10109
1.69k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10110
1.69k
      } else
10111
43.8k
          NEXT;
10112
45.5k
  } else if (RAW == '\''){
10113
848
      NEXT;
10114
848
      version = xmlParseVersionNum(ctxt);
10115
848
      if (RAW != '\'') {
10116
40
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10117
40
      } else
10118
808
          NEXT;
10119
848
  } else {
10120
78
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10121
78
  }
10122
46.4k
    }
10123
49.2k
    return(version);
10124
49.2k
}
10125
10126
/**
10127
 * xmlParseEncName:
10128
 * @ctxt:  an XML parser context
10129
 *
10130
 * DEPRECATED: Internal function, don't use.
10131
 *
10132
 * parse the XML encoding name
10133
 *
10134
 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10135
 *
10136
 * Returns the encoding name value or NULL
10137
 */
10138
xmlChar *
10139
37.6k
xmlParseEncName(xmlParserCtxtPtr ctxt) {
10140
37.6k
    xmlChar *buf = NULL;
10141
37.6k
    int len = 0;
10142
37.6k
    int size = 10;
10143
37.6k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
10144
0
                    XML_MAX_TEXT_LENGTH :
10145
37.6k
                    XML_MAX_NAME_LENGTH;
10146
37.6k
    xmlChar cur;
10147
10148
37.6k
    cur = CUR;
10149
37.6k
    if (((cur >= 'a') && (cur <= 'z')) ||
10150
37.6k
        ((cur >= 'A') && (cur <= 'Z'))) {
10151
37.5k
  buf = (xmlChar *) xmlMallocAtomic(size);
10152
37.5k
  if (buf == NULL) {
10153
48
      xmlErrMemory(ctxt);
10154
48
      return(NULL);
10155
48
  }
10156
10157
37.5k
  buf[len++] = cur;
10158
37.5k
  NEXT;
10159
37.5k
  cur = CUR;
10160
3.68M
  while (((cur >= 'a') && (cur <= 'z')) ||
10161
3.68M
         ((cur >= 'A') && (cur <= 'Z')) ||
10162
3.68M
         ((cur >= '0') && (cur <= '9')) ||
10163
3.68M
         (cur == '.') || (cur == '_') ||
10164
3.68M
         (cur == '-')) {
10165
3.64M
      if (len + 1 >= size) {
10166
5.77k
          xmlChar *tmp;
10167
10168
5.77k
    size *= 2;
10169
5.77k
    tmp = (xmlChar *) xmlRealloc(buf, size);
10170
5.77k
    if (tmp == NULL) {
10171
0
        xmlErrMemory(ctxt);
10172
0
        xmlFree(buf);
10173
0
        return(NULL);
10174
0
    }
10175
5.77k
    buf = tmp;
10176
5.77k
      }
10177
3.64M
      buf[len++] = cur;
10178
3.64M
            if (len > maxLength) {
10179
69
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "EncName");
10180
69
                xmlFree(buf);
10181
69
                return(NULL);
10182
69
            }
10183
3.64M
      NEXT;
10184
3.64M
      cur = CUR;
10185
3.64M
        }
10186
37.4k
  buf[len] = 0;
10187
37.4k
    } else {
10188
46
  xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10189
46
    }
10190
37.4k
    return(buf);
10191
37.6k
}
10192
10193
/**
10194
 * xmlParseEncodingDecl:
10195
 * @ctxt:  an XML parser context
10196
 *
10197
 * DEPRECATED: Internal function, don't use.
10198
 *
10199
 * parse the XML encoding declaration
10200
 *
10201
 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'")
10202
 *
10203
 * this setups the conversion filters.
10204
 *
10205
 * Returns the encoding value or NULL
10206
 */
10207
10208
const xmlChar *
10209
45.5k
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10210
45.5k
    xmlChar *encoding = NULL;
10211
10212
45.5k
    SKIP_BLANKS;
10213
45.5k
    if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g') == 0)
10214
7.50k
        return(NULL);
10215
10216
38.0k
    SKIP(8);
10217
38.0k
    SKIP_BLANKS;
10218
38.0k
    if (RAW != '=') {
10219
214
        xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10220
214
        return(NULL);
10221
214
    }
10222
37.8k
    NEXT;
10223
37.8k
    SKIP_BLANKS;
10224
37.8k
    if (RAW == '"') {
10225
36.6k
        NEXT;
10226
36.6k
        encoding = xmlParseEncName(ctxt);
10227
36.6k
        if (RAW != '"') {
10228
524
            xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10229
524
            xmlFree((xmlChar *) encoding);
10230
524
            return(NULL);
10231
524
        } else
10232
36.1k
            NEXT;
10233
36.6k
    } else if (RAW == '\''){
10234
960
        NEXT;
10235
960
        encoding = xmlParseEncName(ctxt);
10236
960
        if (RAW != '\'') {
10237
243
            xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10238
243
            xmlFree((xmlChar *) encoding);
10239
243
            return(NULL);
10240
243
        } else
10241
717
            NEXT;
10242
960
    } else {
10243
253
        xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10244
253
    }
10245
10246
37.0k
    if (encoding == NULL)
10247
253
        return(NULL);
10248
10249
36.8k
    xmlSetDeclaredEncoding(ctxt, encoding);
10250
10251
36.8k
    return(ctxt->encoding);
10252
37.0k
}
10253
10254
/**
10255
 * xmlParseSDDecl:
10256
 * @ctxt:  an XML parser context
10257
 *
10258
 * DEPRECATED: Internal function, don't use.
10259
 *
10260
 * parse the XML standalone declaration
10261
 *
10262
 * [32] SDDecl ::= S 'standalone' Eq
10263
 *                 (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10264
 *
10265
 * [ VC: Standalone Document Declaration ]
10266
 * TODO The standalone document declaration must have the value "no"
10267
 * if any external markup declarations contain declarations of:
10268
 *  - attributes with default values, if elements to which these
10269
 *    attributes apply appear in the document without specifications
10270
 *    of values for these attributes, or
10271
 *  - entities (other than amp, lt, gt, apos, quot), if references
10272
 *    to those entities appear in the document, or
10273
 *  - attributes with values subject to normalization, where the
10274
 *    attribute appears in the document with a value which will change
10275
 *    as a result of normalization, or
10276
 *  - element types with element content, if white space occurs directly
10277
 *    within any instance of those types.
10278
 *
10279
 * Returns:
10280
 *   1 if standalone="yes"
10281
 *   0 if standalone="no"
10282
 *  -2 if standalone attribute is missing or invalid
10283
 *    (A standalone value of -2 means that the XML declaration was found,
10284
 *     but no value was specified for the standalone attribute).
10285
 */
10286
10287
int
10288
11.3k
xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10289
11.3k
    int standalone = -2;
10290
10291
11.3k
    SKIP_BLANKS;
10292
11.3k
    if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10293
1.80k
  SKIP(10);
10294
1.80k
        SKIP_BLANKS;
10295
1.80k
  if (RAW != '=') {
10296
166
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10297
166
      return(standalone);
10298
166
        }
10299
1.63k
  NEXT;
10300
1.63k
  SKIP_BLANKS;
10301
1.63k
        if (RAW == '\''){
10302
554
      NEXT;
10303
554
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10304
287
          standalone = 0;
10305
287
                SKIP(2);
10306
287
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10307
267
                 (NXT(2) == 's')) {
10308
35
          standalone = 1;
10309
35
    SKIP(3);
10310
232
            } else {
10311
232
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10312
232
      }
10313
554
      if (RAW != '\'') {
10314
278
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10315
278
      } else
10316
276
          NEXT;
10317
1.08k
  } else if (RAW == '"'){
10318
1.08k
      NEXT;
10319
1.08k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10320
538
          standalone = 0;
10321
538
    SKIP(2);
10322
542
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10323
542
                 (NXT(2) == 's')) {
10324
355
          standalone = 1;
10325
355
                SKIP(3);
10326
355
            } else {
10327
187
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10328
187
      }
10329
1.08k
      if (RAW != '"') {
10330
491
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10331
491
      } else
10332
589
          NEXT;
10333
1.08k
  } else {
10334
1
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10335
1
        }
10336
1.63k
    }
10337
11.1k
    return(standalone);
10338
11.3k
}
10339
10340
/**
10341
 * xmlParseXMLDecl:
10342
 * @ctxt:  an XML parser context
10343
 *
10344
 * DEPRECATED: Internal function, don't use.
10345
 *
10346
 * parse an XML declaration header
10347
 *
10348
 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10349
 */
10350
10351
void
10352
47.9k
xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10353
47.9k
    xmlChar *version;
10354
10355
    /*
10356
     * This value for standalone indicates that the document has an
10357
     * XML declaration but it does not have a standalone attribute.
10358
     * It will be overwritten later if a standalone attribute is found.
10359
     */
10360
10361
47.9k
    ctxt->standalone = -2;
10362
10363
    /*
10364
     * We know that '<?xml' is here.
10365
     */
10366
47.9k
    SKIP(5);
10367
10368
47.9k
    if (!IS_BLANK_CH(RAW)) {
10369
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10370
0
                 "Blank needed after '<?xml'\n");
10371
0
    }
10372
47.9k
    SKIP_BLANKS;
10373
10374
    /*
10375
     * We must have the VersionInfo here.
10376
     */
10377
47.9k
    version = xmlParseVersionInfo(ctxt);
10378
47.9k
    if (version == NULL) {
10379
3.78k
  xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10380
44.1k
    } else {
10381
44.1k
  if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10382
      /*
10383
       * Changed here for XML-1.0 5th edition
10384
       */
10385
35.4k
      if (ctxt->options & XML_PARSE_OLD10) {
10386
0
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10387
0
                "Unsupported version '%s'\n",
10388
0
                version);
10389
35.4k
      } else {
10390
35.4k
          if ((version[0] == '1') && ((version[1] == '.'))) {
10391
33.0k
        xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10392
33.0k
                      "Unsupported version '%s'\n",
10393
33.0k
          version, NULL);
10394
33.0k
    } else {
10395
2.37k
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10396
2.37k
              "Unsupported version '%s'\n",
10397
2.37k
              version);
10398
2.37k
    }
10399
35.4k
      }
10400
35.4k
  }
10401
44.1k
  if (ctxt->version != NULL)
10402
0
      xmlFree((void *) ctxt->version);
10403
44.1k
  ctxt->version = version;
10404
44.1k
    }
10405
10406
    /*
10407
     * We may have the encoding declaration
10408
     */
10409
47.9k
    if (!IS_BLANK_CH(RAW)) {
10410
7.92k
        if ((RAW == '?') && (NXT(1) == '>')) {
10411
3.70k
      SKIP(2);
10412
3.70k
      return;
10413
3.70k
  }
10414
4.22k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10415
4.22k
    }
10416
44.2k
    xmlParseEncodingDecl(ctxt);
10417
10418
    /*
10419
     * We may have the standalone status.
10420
     */
10421
44.2k
    if ((ctxt->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10422
34.4k
        if ((RAW == '?') && (NXT(1) == '>')) {
10423
32.9k
      SKIP(2);
10424
32.9k
      return;
10425
32.9k
  }
10426
1.58k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10427
1.58k
    }
10428
10429
    /*
10430
     * We can grow the input buffer freely at that point
10431
     */
10432
11.3k
    GROW;
10433
10434
11.3k
    SKIP_BLANKS;
10435
11.3k
    ctxt->standalone = xmlParseSDDecl(ctxt);
10436
10437
11.3k
    SKIP_BLANKS;
10438
11.3k
    if ((RAW == '?') && (NXT(1) == '>')) {
10439
825
        SKIP(2);
10440
10.5k
    } else if (RAW == '>') {
10441
        /* Deprecated old WD ... */
10442
329
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10443
329
  NEXT;
10444
10.1k
    } else {
10445
10.1k
        int c;
10446
10447
10.1k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10448
1.89M
        while ((PARSER_STOPPED(ctxt) == 0) &&
10449
1.89M
               ((c = CUR) != 0)) {
10450
1.88M
            NEXT;
10451
1.88M
            if (c == '>')
10452
7.57k
                break;
10453
1.88M
        }
10454
10.1k
    }
10455
11.3k
}
10456
10457
/**
10458
 * xmlParseMisc:
10459
 * @ctxt:  an XML parser context
10460
 *
10461
 * DEPRECATED: Internal function, don't use.
10462
 *
10463
 * parse an XML Misc* optional field.
10464
 *
10465
 * [27] Misc ::= Comment | PI |  S
10466
 */
10467
10468
void
10469
176k
xmlParseMisc(xmlParserCtxtPtr ctxt) {
10470
190k
    while (PARSER_STOPPED(ctxt) == 0) {
10471
184k
        SKIP_BLANKS;
10472
184k
        GROW;
10473
184k
        if ((RAW == '<') && (NXT(1) == '?')) {
10474
13.6k
      xmlParsePI(ctxt);
10475
170k
        } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10476
892
      xmlParseComment(ctxt);
10477
170k
        } else {
10478
170k
            break;
10479
170k
        }
10480
184k
    }
10481
176k
}
10482
10483
static void
10484
88.8k
xmlFinishDocument(xmlParserCtxtPtr ctxt) {
10485
88.8k
    xmlDocPtr doc;
10486
10487
    /*
10488
     * SAX: end of the document processing.
10489
     */
10490
88.8k
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10491
88.8k
        ctxt->sax->endDocument(ctxt->userData);
10492
10493
88.8k
    doc = ctxt->myDoc;
10494
88.8k
    if (doc != NULL) {
10495
79.7k
        if (ctxt->wellFormed) {
10496
48.4k
            doc->properties |= XML_DOC_WELLFORMED;
10497
48.4k
            if (ctxt->valid)
10498
46.6k
                doc->properties |= XML_DOC_DTDVALID;
10499
48.4k
            if (ctxt->nsWellFormed)
10500
30.0k
                doc->properties |= XML_DOC_NSVALID;
10501
48.4k
        }
10502
10503
79.7k
        if (ctxt->options & XML_PARSE_OLD10)
10504
0
            doc->properties |= XML_DOC_OLD10;
10505
10506
        /*
10507
         * Remove locally kept entity definitions if the tree was not built
10508
         */
10509
79.7k
  if (xmlStrEqual(doc->version, SAX_COMPAT_MODE)) {
10510
2.03k
            xmlFreeDoc(doc);
10511
2.03k
            ctxt->myDoc = NULL;
10512
2.03k
        }
10513
79.7k
    }
10514
88.8k
}
10515
10516
/**
10517
 * xmlParseDocument:
10518
 * @ctxt:  an XML parser context
10519
 *
10520
 * Parse an XML document and invoke the SAX handlers. This is useful
10521
 * if you're only interested in custom SAX callbacks. If you want a
10522
 * document tree, use xmlCtxtParseDocument.
10523
 *
10524
 * Returns 0, -1 in case of error.
10525
 */
10526
10527
int
10528
89.0k
xmlParseDocument(xmlParserCtxtPtr ctxt) {
10529
89.0k
    if ((ctxt == NULL) || (ctxt->input == NULL))
10530
0
        return(-1);
10531
10532
89.0k
    GROW;
10533
10534
    /*
10535
     * SAX: detecting the level.
10536
     */
10537
89.0k
    xmlCtxtInitializeLate(ctxt);
10538
10539
    /*
10540
     * Document locator is unused. Only for backward compatibility.
10541
     */
10542
89.0k
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10543
89.0k
        xmlSAXLocator copy = xmlDefaultSAXLocator;
10544
89.0k
        ctxt->sax->setDocumentLocator(ctxt->userData, &copy);
10545
89.0k
    }
10546
10547
89.0k
    xmlDetectEncoding(ctxt);
10548
10549
89.0k
    if (CUR == 0) {
10550
207
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10551
207
  return(-1);
10552
207
    }
10553
10554
88.8k
    GROW;
10555
88.8k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10556
10557
  /*
10558
   * Note that we will switch encoding on the fly.
10559
   */
10560
47.9k
  xmlParseXMLDecl(ctxt);
10561
47.9k
  SKIP_BLANKS;
10562
47.9k
    } else {
10563
40.9k
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10564
40.9k
        if (ctxt->version == NULL) {
10565
3
            xmlErrMemory(ctxt);
10566
3
            return(-1);
10567
3
        }
10568
40.9k
    }
10569
88.8k
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10570
77.6k
        ctxt->sax->startDocument(ctxt->userData);
10571
88.8k
    if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10572
88.8k
        (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10573
0
  ctxt->myDoc->compression = ctxt->input->buf->compressed;
10574
0
    }
10575
10576
    /*
10577
     * The Misc part of the Prolog
10578
     */
10579
88.8k
    xmlParseMisc(ctxt);
10580
10581
    /*
10582
     * Then possibly doc type declaration(s) and more Misc
10583
     * (doctypedecl Misc*)?
10584
     */
10585
88.8k
    GROW;
10586
88.8k
    if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10587
10588
21.2k
  ctxt->inSubset = 1;
10589
21.2k
  xmlParseDocTypeDecl(ctxt);
10590
21.2k
  if (RAW == '[') {
10591
17.2k
      xmlParseInternalSubset(ctxt);
10592
17.2k
  }
10593
10594
  /*
10595
   * Create and update the external subset.
10596
   */
10597
21.2k
  ctxt->inSubset = 2;
10598
21.2k
  if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10599
21.2k
      (!ctxt->disableSAX))
10600
5.74k
      ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10601
5.74k
                                ctxt->extSubSystem, ctxt->extSubURI);
10602
21.2k
  ctxt->inSubset = 0;
10603
10604
21.2k
        xmlCleanSpecialAttr(ctxt);
10605
10606
21.2k
  xmlParseMisc(ctxt);
10607
21.2k
    }
10608
10609
    /*
10610
     * Time to start parsing the tree itself
10611
     */
10612
88.8k
    GROW;
10613
88.8k
    if (RAW != '<') {
10614
22.9k
        if (ctxt->wellFormed)
10615
1.12k
            xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10616
1.12k
                           "Start tag expected, '<' not found\n");
10617
65.9k
    } else {
10618
65.9k
  xmlParseElement(ctxt);
10619
10620
  /*
10621
   * The Misc part at the end
10622
   */
10623
65.9k
  xmlParseMisc(ctxt);
10624
10625
65.9k
        if (ctxt->input->cur < ctxt->input->end) {
10626
5.21k
            if (ctxt->wellFormed)
10627
108
          xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10628
60.7k
        } else if ((ctxt->input->buf != NULL) &&
10629
60.7k
                   (ctxt->input->buf->encoder != NULL) &&
10630
60.7k
                   (!xmlBufIsEmpty(ctxt->input->buf->raw))) {
10631
249
            xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
10632
249
                           "Truncated multi-byte sequence at EOF\n");
10633
249
        }
10634
65.9k
    }
10635
10636
88.8k
    ctxt->instate = XML_PARSER_EOF;
10637
88.8k
    xmlFinishDocument(ctxt);
10638
10639
88.8k
    if (! ctxt->wellFormed) {
10640
40.4k
  ctxt->valid = 0;
10641
40.4k
  return(-1);
10642
40.4k
    }
10643
10644
48.4k
    return(0);
10645
88.8k
}
10646
10647
/**
10648
 * xmlParseExtParsedEnt:
10649
 * @ctxt:  an XML parser context
10650
 *
10651
 * parse a general parsed entity
10652
 * An external general parsed entity is well-formed if it matches the
10653
 * production labeled extParsedEnt.
10654
 *
10655
 * [78] extParsedEnt ::= TextDecl? content
10656
 *
10657
 * Returns 0, -1 in case of error. the parser context is augmented
10658
 *                as a result of the parsing.
10659
 */
10660
10661
int
10662
0
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10663
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
10664
0
        return(-1);
10665
10666
0
    xmlCtxtInitializeLate(ctxt);
10667
10668
    /*
10669
     * Document locator is unused. Only for backward compatibility.
10670
     */
10671
0
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10672
0
        xmlSAXLocator copy = xmlDefaultSAXLocator;
10673
0
        ctxt->sax->setDocumentLocator(ctxt->userData, &copy);
10674
0
    }
10675
10676
0
    xmlDetectEncoding(ctxt);
10677
10678
0
    if (CUR == 0) {
10679
0
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10680
0
    }
10681
10682
    /*
10683
     * Check for the XMLDecl in the Prolog.
10684
     */
10685
0
    GROW;
10686
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10687
10688
  /*
10689
   * Note that we will switch encoding on the fly.
10690
   */
10691
0
  xmlParseXMLDecl(ctxt);
10692
0
  SKIP_BLANKS;
10693
0
    } else {
10694
0
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10695
0
    }
10696
0
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10697
0
        ctxt->sax->startDocument(ctxt->userData);
10698
10699
    /*
10700
     * Doing validity checking on chunk doesn't make sense
10701
     */
10702
0
    ctxt->options &= ~XML_PARSE_DTDVALID;
10703
0
    ctxt->validate = 0;
10704
0
    ctxt->depth = 0;
10705
10706
0
    xmlParseContentInternal(ctxt);
10707
10708
0
    if (ctxt->input->cur < ctxt->input->end)
10709
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10710
10711
    /*
10712
     * SAX: end of the document processing.
10713
     */
10714
0
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10715
0
        ctxt->sax->endDocument(ctxt->userData);
10716
10717
0
    if (! ctxt->wellFormed) return(-1);
10718
0
    return(0);
10719
0
}
10720
10721
#ifdef LIBXML_PUSH_ENABLED
10722
/************************************************************************
10723
 *                  *
10724
 *    Progressive parsing interfaces        *
10725
 *                  *
10726
 ************************************************************************/
10727
10728
/**
10729
 * xmlParseLookupChar:
10730
 * @ctxt:  an XML parser context
10731
 * @c:  character
10732
 *
10733
 * Check whether the input buffer contains a character.
10734
 */
10735
static int
10736
xmlParseLookupChar(xmlParserCtxtPtr ctxt, int c) {
10737
    const xmlChar *cur;
10738
10739
    if (ctxt->checkIndex == 0) {
10740
        cur = ctxt->input->cur + 1;
10741
    } else {
10742
        cur = ctxt->input->cur + ctxt->checkIndex;
10743
    }
10744
10745
    if (memchr(cur, c, ctxt->input->end - cur) == NULL) {
10746
        size_t index = ctxt->input->end - ctxt->input->cur;
10747
10748
        if (index > LONG_MAX) {
10749
            ctxt->checkIndex = 0;
10750
            return(1);
10751
        }
10752
        ctxt->checkIndex = index;
10753
        return(0);
10754
    } else {
10755
        ctxt->checkIndex = 0;
10756
        return(1);
10757
    }
10758
}
10759
10760
/**
10761
 * xmlParseLookupString:
10762
 * @ctxt:  an XML parser context
10763
 * @startDelta: delta to apply at the start
10764
 * @str:  string
10765
 * @strLen:  length of string
10766
 *
10767
 * Check whether the input buffer contains a string.
10768
 */
10769
static const xmlChar *
10770
xmlParseLookupString(xmlParserCtxtPtr ctxt, size_t startDelta,
10771
                     const char *str, size_t strLen) {
10772
    const xmlChar *cur, *term;
10773
10774
    if (ctxt->checkIndex == 0) {
10775
        cur = ctxt->input->cur + startDelta;
10776
    } else {
10777
        cur = ctxt->input->cur + ctxt->checkIndex;
10778
    }
10779
10780
    term = BAD_CAST strstr((const char *) cur, str);
10781
    if (term == NULL) {
10782
        const xmlChar *end = ctxt->input->end;
10783
        size_t index;
10784
10785
        /* Rescan (strLen - 1) characters. */
10786
        if ((size_t) (end - cur) < strLen)
10787
            end = cur;
10788
        else
10789
            end -= strLen - 1;
10790
        index = end - ctxt->input->cur;
10791
        if (index > LONG_MAX) {
10792
            ctxt->checkIndex = 0;
10793
            return(ctxt->input->end - strLen);
10794
        }
10795
        ctxt->checkIndex = index;
10796
    } else {
10797
        ctxt->checkIndex = 0;
10798
    }
10799
10800
    return(term);
10801
}
10802
10803
/**
10804
 * xmlParseLookupCharData:
10805
 * @ctxt:  an XML parser context
10806
 *
10807
 * Check whether the input buffer contains terminated char data.
10808
 */
10809
static int
10810
xmlParseLookupCharData(xmlParserCtxtPtr ctxt) {
10811
    const xmlChar *cur = ctxt->input->cur + ctxt->checkIndex;
10812
    const xmlChar *end = ctxt->input->end;
10813
    size_t index;
10814
10815
    while (cur < end) {
10816
        if ((*cur == '<') || (*cur == '&')) {
10817
            ctxt->checkIndex = 0;
10818
            return(1);
10819
        }
10820
        cur++;
10821
    }
10822
10823
    index = cur - ctxt->input->cur;
10824
    if (index > LONG_MAX) {
10825
        ctxt->checkIndex = 0;
10826
        return(1);
10827
    }
10828
    ctxt->checkIndex = index;
10829
    return(0);
10830
}
10831
10832
/**
10833
 * xmlParseLookupGt:
10834
 * @ctxt:  an XML parser context
10835
 *
10836
 * Check whether there's enough data in the input buffer to finish parsing
10837
 * a start tag. This has to take quotes into account.
10838
 */
10839
static int
10840
xmlParseLookupGt(xmlParserCtxtPtr ctxt) {
10841
    const xmlChar *cur;
10842
    const xmlChar *end = ctxt->input->end;
10843
    int state = ctxt->endCheckState;
10844
    size_t index;
10845
10846
    if (ctxt->checkIndex == 0)
10847
        cur = ctxt->input->cur + 1;
10848
    else
10849
        cur = ctxt->input->cur + ctxt->checkIndex;
10850
10851
    while (cur < end) {
10852
        if (state) {
10853
            if (*cur == state)
10854
                state = 0;
10855
        } else if (*cur == '\'' || *cur == '"') {
10856
            state = *cur;
10857
        } else if (*cur == '>') {
10858
            ctxt->checkIndex = 0;
10859
            ctxt->endCheckState = 0;
10860
            return(1);
10861
        }
10862
        cur++;
10863
    }
10864
10865
    index = cur - ctxt->input->cur;
10866
    if (index > LONG_MAX) {
10867
        ctxt->checkIndex = 0;
10868
        ctxt->endCheckState = 0;
10869
        return(1);
10870
    }
10871
    ctxt->checkIndex = index;
10872
    ctxt->endCheckState = state;
10873
    return(0);
10874
}
10875
10876
/**
10877
 * xmlParseLookupInternalSubset:
10878
 * @ctxt:  an XML parser context
10879
 *
10880
 * Check whether there's enough data in the input buffer to finish parsing
10881
 * the internal subset.
10882
 */
10883
static int
10884
xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt) {
10885
    /*
10886
     * Sorry, but progressive parsing of the internal subset is not
10887
     * supported. We first check that the full content of the internal
10888
     * subset is available and parsing is launched only at that point.
10889
     * Internal subset ends with "']' S? '>'" in an unescaped section and
10890
     * not in a ']]>' sequence which are conditional sections.
10891
     */
10892
    const xmlChar *cur, *start;
10893
    const xmlChar *end = ctxt->input->end;
10894
    int state = ctxt->endCheckState;
10895
    size_t index;
10896
10897
    if (ctxt->checkIndex == 0) {
10898
        cur = ctxt->input->cur + 1;
10899
    } else {
10900
        cur = ctxt->input->cur + ctxt->checkIndex;
10901
    }
10902
    start = cur;
10903
10904
    while (cur < end) {
10905
        if (state == '-') {
10906
            if ((*cur == '-') &&
10907
                (cur[1] == '-') &&
10908
                (cur[2] == '>')) {
10909
                state = 0;
10910
                cur += 3;
10911
                start = cur;
10912
                continue;
10913
            }
10914
        }
10915
        else if (state == ']') {
10916
            if (*cur == '>') {
10917
                ctxt->checkIndex = 0;
10918
                ctxt->endCheckState = 0;
10919
                return(1);
10920
            }
10921
            if (IS_BLANK_CH(*cur)) {
10922
                state = ' ';
10923
            } else if (*cur != ']') {
10924
                state = 0;
10925
                start = cur;
10926
                continue;
10927
            }
10928
        }
10929
        else if (state == ' ') {
10930
            if (*cur == '>') {
10931
                ctxt->checkIndex = 0;
10932
                ctxt->endCheckState = 0;
10933
                return(1);
10934
            }
10935
            if (!IS_BLANK_CH(*cur)) {
10936
                state = 0;
10937
                start = cur;
10938
                continue;
10939
            }
10940
        }
10941
        else if (state != 0) {
10942
            if (*cur == state) {
10943
                state = 0;
10944
                start = cur + 1;
10945
            }
10946
        }
10947
        else if (*cur == '<') {
10948
            if ((cur[1] == '!') &&
10949
                (cur[2] == '-') &&
10950
                (cur[3] == '-')) {
10951
                state = '-';
10952
                cur += 4;
10953
                /* Don't treat <!--> as comment */
10954
                start = cur;
10955
                continue;
10956
            }
10957
        }
10958
        else if ((*cur == '"') || (*cur == '\'') || (*cur == ']')) {
10959
            state = *cur;
10960
        }
10961
10962
        cur++;
10963
    }
10964
10965
    /*
10966
     * Rescan the three last characters to detect "<!--" and "-->"
10967
     * split across chunks.
10968
     */
10969
    if ((state == 0) || (state == '-')) {
10970
        if (cur - start < 3)
10971
            cur = start;
10972
        else
10973
            cur -= 3;
10974
    }
10975
    index = cur - ctxt->input->cur;
10976
    if (index > LONG_MAX) {
10977
        ctxt->checkIndex = 0;
10978
        ctxt->endCheckState = 0;
10979
        return(1);
10980
    }
10981
    ctxt->checkIndex = index;
10982
    ctxt->endCheckState = state;
10983
    return(0);
10984
}
10985
10986
/**
10987
 * xmlCheckCdataPush:
10988
 * @cur: pointer to the block of characters
10989
 * @len: length of the block in bytes
10990
 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
10991
 *
10992
 * Check that the block of characters is okay as SCdata content [20]
10993
 *
10994
 * Returns the number of bytes to pass if okay, a negative index where an
10995
 *         UTF-8 error occurred otherwise
10996
 */
10997
static int
10998
xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
10999
    int ix;
11000
    unsigned char c;
11001
    int codepoint;
11002
11003
    if ((utf == NULL) || (len <= 0))
11004
        return(0);
11005
11006
    for (ix = 0; ix < len;) {      /* string is 0-terminated */
11007
        c = utf[ix];
11008
        if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11009
      if (c >= 0x20)
11010
    ix++;
11011
      else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11012
          ix++;
11013
      else
11014
          return(-ix);
11015
  } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11016
      if (ix + 2 > len) return(complete ? -ix : ix);
11017
      if ((utf[ix+1] & 0xc0 ) != 0x80)
11018
          return(-ix);
11019
      codepoint = (utf[ix] & 0x1f) << 6;
11020
      codepoint |= utf[ix+1] & 0x3f;
11021
      if (!xmlIsCharQ(codepoint))
11022
          return(-ix);
11023
      ix += 2;
11024
  } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11025
      if (ix + 3 > len) return(complete ? -ix : ix);
11026
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11027
          ((utf[ix+2] & 0xc0) != 0x80))
11028
        return(-ix);
11029
      codepoint = (utf[ix] & 0xf) << 12;
11030
      codepoint |= (utf[ix+1] & 0x3f) << 6;
11031
      codepoint |= utf[ix+2] & 0x3f;
11032
      if (!xmlIsCharQ(codepoint))
11033
          return(-ix);
11034
      ix += 3;
11035
  } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11036
      if (ix + 4 > len) return(complete ? -ix : ix);
11037
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11038
          ((utf[ix+2] & 0xc0) != 0x80) ||
11039
    ((utf[ix+3] & 0xc0) != 0x80))
11040
        return(-ix);
11041
      codepoint = (utf[ix] & 0x7) << 18;
11042
      codepoint |= (utf[ix+1] & 0x3f) << 12;
11043
      codepoint |= (utf[ix+2] & 0x3f) << 6;
11044
      codepoint |= utf[ix+3] & 0x3f;
11045
      if (!xmlIsCharQ(codepoint))
11046
          return(-ix);
11047
      ix += 4;
11048
  } else        /* unknown encoding */
11049
      return(-ix);
11050
      }
11051
      return(ix);
11052
}
11053
11054
/**
11055
 * xmlParseTryOrFinish:
11056
 * @ctxt:  an XML parser context
11057
 * @terminate:  last chunk indicator
11058
 *
11059
 * Try to progress on parsing
11060
 *
11061
 * Returns zero if no parsing was possible
11062
 */
11063
static int
11064
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11065
    int ret = 0;
11066
    size_t avail;
11067
    xmlChar cur, next;
11068
11069
    if (ctxt->input == NULL)
11070
        return(0);
11071
11072
    if ((ctxt->input != NULL) &&
11073
        (ctxt->input->cur - ctxt->input->base > 4096)) {
11074
        xmlParserShrink(ctxt);
11075
    }
11076
11077
    while (ctxt->disableSAX == 0) {
11078
        avail = ctxt->input->end - ctxt->input->cur;
11079
        if (avail < 1)
11080
      goto done;
11081
        switch (ctxt->instate) {
11082
            case XML_PARSER_EOF:
11083
          /*
11084
     * Document parsing is done !
11085
     */
11086
          goto done;
11087
            case XML_PARSER_START:
11088
                /*
11089
                 * Very first chars read from the document flow.
11090
                 */
11091
                if ((!terminate) && (avail < 4))
11092
                    goto done;
11093
11094
                /*
11095
                 * We need more bytes to detect EBCDIC code pages.
11096
                 * See xmlDetectEBCDIC.
11097
                 */
11098
                if ((CMP4(CUR_PTR, 0x4C, 0x6F, 0xA7, 0x94)) &&
11099
                    (!terminate) && (avail < 200))
11100
                    goto done;
11101
11102
                xmlDetectEncoding(ctxt);
11103
                ctxt->instate = XML_PARSER_XML_DECL;
11104
    break;
11105
11106
            case XML_PARSER_XML_DECL:
11107
    if ((!terminate) && (avail < 2))
11108
        goto done;
11109
    cur = ctxt->input->cur[0];
11110
    next = ctxt->input->cur[1];
11111
          if ((cur == '<') && (next == '?')) {
11112
        /* PI or XML decl */
11113
        if ((!terminate) &&
11114
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11115
      goto done;
11116
        if ((ctxt->input->cur[2] == 'x') &&
11117
      (ctxt->input->cur[3] == 'm') &&
11118
      (ctxt->input->cur[4] == 'l') &&
11119
      (IS_BLANK_CH(ctxt->input->cur[5]))) {
11120
      ret += 5;
11121
      xmlParseXMLDecl(ctxt);
11122
        } else {
11123
      ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11124
                        if (ctxt->version == NULL) {
11125
                            xmlErrMemory(ctxt);
11126
                            break;
11127
                        }
11128
        }
11129
    } else {
11130
        ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11131
        if (ctxt->version == NULL) {
11132
            xmlErrMemory(ctxt);
11133
      break;
11134
        }
11135
    }
11136
                if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
11137
                    xmlSAXLocator copy = xmlDefaultSAXLocator;
11138
                    ctxt->sax->setDocumentLocator(ctxt->userData, &copy);
11139
                }
11140
                if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11141
                    (!ctxt->disableSAX))
11142
                    ctxt->sax->startDocument(ctxt->userData);
11143
                ctxt->instate = XML_PARSER_MISC;
11144
    break;
11145
            case XML_PARSER_START_TAG: {
11146
          const xmlChar *name;
11147
    const xmlChar *prefix = NULL;
11148
    const xmlChar *URI = NULL;
11149
                int line = ctxt->input->line;
11150
    int nbNs = 0;
11151
11152
    if ((!terminate) && (avail < 2))
11153
        goto done;
11154
    cur = ctxt->input->cur[0];
11155
          if (cur != '<') {
11156
        xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
11157
                                   "Start tag expected, '<' not found");
11158
                    ctxt->instate = XML_PARSER_EOF;
11159
                    xmlFinishDocument(ctxt);
11160
        goto done;
11161
    }
11162
    if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11163
                    goto done;
11164
    if (ctxt->spaceNr == 0)
11165
        spacePush(ctxt, -1);
11166
    else if (*ctxt->space == -2)
11167
        spacePush(ctxt, -1);
11168
    else
11169
        spacePush(ctxt, *ctxt->space);
11170
#ifdef LIBXML_SAX1_ENABLED
11171
    if (ctxt->sax2)
11172
#endif /* LIBXML_SAX1_ENABLED */
11173
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &nbNs);
11174
#ifdef LIBXML_SAX1_ENABLED
11175
    else
11176
        name = xmlParseStartTag(ctxt);
11177
#endif /* LIBXML_SAX1_ENABLED */
11178
    if (name == NULL) {
11179
        spacePop(ctxt);
11180
                    ctxt->instate = XML_PARSER_EOF;
11181
                    xmlFinishDocument(ctxt);
11182
        goto done;
11183
    }
11184
#ifdef LIBXML_VALID_ENABLED
11185
    /*
11186
     * [ VC: Root Element Type ]
11187
     * The Name in the document type declaration must match
11188
     * the element type of the root element.
11189
     */
11190
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11191
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
11192
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11193
#endif /* LIBXML_VALID_ENABLED */
11194
11195
    /*
11196
     * Check for an Empty Element.
11197
     */
11198
    if ((RAW == '/') && (NXT(1) == '>')) {
11199
        SKIP(2);
11200
11201
        if (ctxt->sax2) {
11202
      if ((ctxt->sax != NULL) &&
11203
          (ctxt->sax->endElementNs != NULL) &&
11204
          (!ctxt->disableSAX))
11205
          ctxt->sax->endElementNs(ctxt->userData, name,
11206
                                  prefix, URI);
11207
      if (nbNs > 0)
11208
          xmlParserNsPop(ctxt, nbNs);
11209
#ifdef LIBXML_SAX1_ENABLED
11210
        } else {
11211
      if ((ctxt->sax != NULL) &&
11212
          (ctxt->sax->endElement != NULL) &&
11213
          (!ctxt->disableSAX))
11214
          ctxt->sax->endElement(ctxt->userData, name);
11215
#endif /* LIBXML_SAX1_ENABLED */
11216
        }
11217
        spacePop(ctxt);
11218
    } else if (RAW == '>') {
11219
        NEXT;
11220
                    nameNsPush(ctxt, name, prefix, URI, line, nbNs);
11221
    } else {
11222
        xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11223
           "Couldn't find end of Start Tag %s\n",
11224
           name);
11225
        nodePop(ctxt);
11226
        spacePop(ctxt);
11227
                    if (nbNs > 0)
11228
                        xmlParserNsPop(ctxt, nbNs);
11229
    }
11230
11231
                if (ctxt->nameNr == 0)
11232
                    ctxt->instate = XML_PARSER_EPILOG;
11233
                else
11234
                    ctxt->instate = XML_PARSER_CONTENT;
11235
                break;
11236
      }
11237
            case XML_PARSER_CONTENT: {
11238
    cur = ctxt->input->cur[0];
11239
11240
    if (cur == '<') {
11241
                    if ((!terminate) && (avail < 2))
11242
                        goto done;
11243
        next = ctxt->input->cur[1];
11244
11245
                    if (next == '/') {
11246
                        ctxt->instate = XML_PARSER_END_TAG;
11247
                        break;
11248
                    } else if (next == '?') {
11249
                        if ((!terminate) &&
11250
                            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11251
                            goto done;
11252
                        xmlParsePI(ctxt);
11253
                        ctxt->instate = XML_PARSER_CONTENT;
11254
                        break;
11255
                    } else if (next == '!') {
11256
                        if ((!terminate) && (avail < 3))
11257
                            goto done;
11258
                        next = ctxt->input->cur[2];
11259
11260
                        if (next == '-') {
11261
                            if ((!terminate) && (avail < 4))
11262
                                goto done;
11263
                            if (ctxt->input->cur[3] == '-') {
11264
                                if ((!terminate) &&
11265
                                    (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11266
                                    goto done;
11267
                                xmlParseComment(ctxt);
11268
                                ctxt->instate = XML_PARSER_CONTENT;
11269
                                break;
11270
                            }
11271
                        } else if (next == '[') {
11272
                            if ((!terminate) && (avail < 9))
11273
                                goto done;
11274
                            if ((ctxt->input->cur[2] == '[') &&
11275
                                (ctxt->input->cur[3] == 'C') &&
11276
                                (ctxt->input->cur[4] == 'D') &&
11277
                                (ctxt->input->cur[5] == 'A') &&
11278
                                (ctxt->input->cur[6] == 'T') &&
11279
                                (ctxt->input->cur[7] == 'A') &&
11280
                                (ctxt->input->cur[8] == '[')) {
11281
                                SKIP(9);
11282
                                ctxt->instate = XML_PARSER_CDATA_SECTION;
11283
                                break;
11284
                            }
11285
                        }
11286
                    }
11287
    } else if (cur == '&') {
11288
        if ((!terminate) && (!xmlParseLookupChar(ctxt, ';')))
11289
      goto done;
11290
        xmlParseReference(ctxt);
11291
                    break;
11292
    } else {
11293
        /* TODO Avoid the extra copy, handle directly !!! */
11294
        /*
11295
         * Goal of the following test is:
11296
         *  - minimize calls to the SAX 'character' callback
11297
         *    when they are mergeable
11298
         *  - handle an problem for isBlank when we only parse
11299
         *    a sequence of blank chars and the next one is
11300
         *    not available to check against '<' presence.
11301
         *  - tries to homogenize the differences in SAX
11302
         *    callbacks between the push and pull versions
11303
         *    of the parser.
11304
         */
11305
        if (avail < XML_PARSER_BIG_BUFFER_SIZE) {
11306
      if ((!terminate) && (!xmlParseLookupCharData(ctxt)))
11307
          goto done;
11308
                    }
11309
                    ctxt->checkIndex = 0;
11310
        xmlParseCharDataInternal(ctxt, !terminate);
11311
                    break;
11312
    }
11313
11314
                ctxt->instate = XML_PARSER_START_TAG;
11315
    break;
11316
      }
11317
            case XML_PARSER_END_TAG:
11318
    if ((!terminate) && (!xmlParseLookupChar(ctxt, '>')))
11319
        goto done;
11320
    if (ctxt->sax2) {
11321
              xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11322
        nameNsPop(ctxt);
11323
    }
11324
#ifdef LIBXML_SAX1_ENABLED
11325
      else
11326
        xmlParseEndTag1(ctxt, 0);
11327
#endif /* LIBXML_SAX1_ENABLED */
11328
    if (ctxt->nameNr == 0) {
11329
        ctxt->instate = XML_PARSER_EPILOG;
11330
    } else {
11331
        ctxt->instate = XML_PARSER_CONTENT;
11332
    }
11333
    break;
11334
            case XML_PARSER_CDATA_SECTION: {
11335
          /*
11336
     * The Push mode need to have the SAX callback for
11337
     * cdataBlock merge back contiguous callbacks.
11338
     */
11339
    const xmlChar *term;
11340
11341
                if (terminate) {
11342
                    /*
11343
                     * Don't call xmlParseLookupString. If 'terminate'
11344
                     * is set, checkIndex is invalid.
11345
                     */
11346
                    term = BAD_CAST strstr((const char *) ctxt->input->cur,
11347
                                           "]]>");
11348
                } else {
11349
        term = xmlParseLookupString(ctxt, 0, "]]>", 3);
11350
                }
11351
11352
    if (term == NULL) {
11353
        int tmp, size;
11354
11355
                    if (terminate) {
11356
                        /* Unfinished CDATA section */
11357
                        size = ctxt->input->end - ctxt->input->cur;
11358
                    } else {
11359
                        if (avail < XML_PARSER_BIG_BUFFER_SIZE + 2)
11360
                            goto done;
11361
                        ctxt->checkIndex = 0;
11362
                        /* XXX: Why don't we pass the full buffer? */
11363
                        size = XML_PARSER_BIG_BUFFER_SIZE;
11364
                    }
11365
                    tmp = xmlCheckCdataPush(ctxt->input->cur, size, 0);
11366
                    if (tmp <= 0) {
11367
                        tmp = -tmp;
11368
                        ctxt->input->cur += tmp;
11369
                        goto encoding_error;
11370
                    }
11371
                    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11372
                        if (ctxt->sax->cdataBlock != NULL)
11373
                            ctxt->sax->cdataBlock(ctxt->userData,
11374
                                                  ctxt->input->cur, tmp);
11375
                        else if (ctxt->sax->characters != NULL)
11376
                            ctxt->sax->characters(ctxt->userData,
11377
                                                  ctxt->input->cur, tmp);
11378
                    }
11379
                    SKIPL(tmp);
11380
    } else {
11381
                    int base = term - CUR_PTR;
11382
        int tmp;
11383
11384
        tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11385
        if ((tmp < 0) || (tmp != base)) {
11386
      tmp = -tmp;
11387
      ctxt->input->cur += tmp;
11388
      goto encoding_error;
11389
        }
11390
        if ((ctxt->sax != NULL) && (base == 0) &&
11391
            (ctxt->sax->cdataBlock != NULL) &&
11392
            (!ctxt->disableSAX)) {
11393
      /*
11394
       * Special case to provide identical behaviour
11395
       * between pull and push parsers on enpty CDATA
11396
       * sections
11397
       */
11398
       if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11399
           (!strncmp((const char *)&ctxt->input->cur[-9],
11400
                     "<![CDATA[", 9)))
11401
           ctxt->sax->cdataBlock(ctxt->userData,
11402
                                 BAD_CAST "", 0);
11403
        } else if ((ctxt->sax != NULL) && (base > 0) &&
11404
      (!ctxt->disableSAX)) {
11405
      if (ctxt->sax->cdataBlock != NULL)
11406
          ctxt->sax->cdataBlock(ctxt->userData,
11407
              ctxt->input->cur, base);
11408
      else if (ctxt->sax->characters != NULL)
11409
          ctxt->sax->characters(ctxt->userData,
11410
              ctxt->input->cur, base);
11411
        }
11412
        SKIPL(base + 3);
11413
        ctxt->instate = XML_PARSER_CONTENT;
11414
    }
11415
    break;
11416
      }
11417
            case XML_PARSER_MISC:
11418
            case XML_PARSER_PROLOG:
11419
            case XML_PARSER_EPILOG:
11420
    SKIP_BLANKS;
11421
                avail = ctxt->input->end - ctxt->input->cur;
11422
    if (avail < 1)
11423
        goto done;
11424
    if (ctxt->input->cur[0] == '<') {
11425
                    if ((!terminate) && (avail < 2))
11426
                        goto done;
11427
                    next = ctxt->input->cur[1];
11428
                    if (next == '?') {
11429
                        if ((!terminate) &&
11430
                            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11431
                            goto done;
11432
                        xmlParsePI(ctxt);
11433
                        break;
11434
                    } else if (next == '!') {
11435
                        if ((!terminate) && (avail < 3))
11436
                            goto done;
11437
11438
                        if (ctxt->input->cur[2] == '-') {
11439
                            if ((!terminate) && (avail < 4))
11440
                                goto done;
11441
                            if (ctxt->input->cur[3] == '-') {
11442
                                if ((!terminate) &&
11443
                                    (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11444
                                    goto done;
11445
                                xmlParseComment(ctxt);
11446
                                break;
11447
                            }
11448
                        } else if (ctxt->instate == XML_PARSER_MISC) {
11449
                            if ((!terminate) && (avail < 9))
11450
                                goto done;
11451
                            if ((ctxt->input->cur[2] == 'D') &&
11452
                                (ctxt->input->cur[3] == 'O') &&
11453
                                (ctxt->input->cur[4] == 'C') &&
11454
                                (ctxt->input->cur[5] == 'T') &&
11455
                                (ctxt->input->cur[6] == 'Y') &&
11456
                                (ctxt->input->cur[7] == 'P') &&
11457
                                (ctxt->input->cur[8] == 'E')) {
11458
                                if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11459
                                    goto done;
11460
                                ctxt->inSubset = 1;
11461
                                xmlParseDocTypeDecl(ctxt);
11462
                                if (RAW == '[') {
11463
                                    ctxt->instate = XML_PARSER_DTD;
11464
                                } else {
11465
                                    /*
11466
                                     * Create and update the external subset.
11467
                                     */
11468
                                    ctxt->inSubset = 2;
11469
                                    if ((ctxt->sax != NULL) &&
11470
                                        (!ctxt->disableSAX) &&
11471
                                        (ctxt->sax->externalSubset != NULL))
11472
                                        ctxt->sax->externalSubset(
11473
                                                ctxt->userData,
11474
                                                ctxt->intSubName,
11475
                                                ctxt->extSubSystem,
11476
                                                ctxt->extSubURI);
11477
                                    ctxt->inSubset = 0;
11478
                                    xmlCleanSpecialAttr(ctxt);
11479
                                    ctxt->instate = XML_PARSER_PROLOG;
11480
                                }
11481
                                break;
11482
                            }
11483
                        }
11484
                    }
11485
                }
11486
11487
                if (ctxt->instate == XML_PARSER_EPILOG) {
11488
                    if (ctxt->errNo == XML_ERR_OK)
11489
                        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11490
        ctxt->instate = XML_PARSER_EOF;
11491
                    xmlFinishDocument(ctxt);
11492
                } else {
11493
        ctxt->instate = XML_PARSER_START_TAG;
11494
    }
11495
    break;
11496
            case XML_PARSER_DTD: {
11497
                if ((!terminate) && (!xmlParseLookupInternalSubset(ctxt)))
11498
                    goto done;
11499
    xmlParseInternalSubset(ctxt);
11500
    ctxt->inSubset = 2;
11501
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11502
        (ctxt->sax->externalSubset != NULL))
11503
        ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11504
          ctxt->extSubSystem, ctxt->extSubURI);
11505
    ctxt->inSubset = 0;
11506
    xmlCleanSpecialAttr(ctxt);
11507
    ctxt->instate = XML_PARSER_PROLOG;
11508
                break;
11509
      }
11510
            default:
11511
                xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
11512
      "PP: internal error\n");
11513
    ctxt->instate = XML_PARSER_EOF;
11514
    break;
11515
  }
11516
    }
11517
done:
11518
    return(ret);
11519
encoding_error:
11520
    /* Only report the first error */
11521
    if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
11522
        xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
11523
        ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
11524
    }
11525
    return(0);
11526
}
11527
11528
/**
11529
 * xmlParseChunk:
11530
 * @ctxt:  an XML parser context
11531
 * @chunk:  chunk of memory
11532
 * @size:  size of chunk in bytes
11533
 * @terminate:  last chunk indicator
11534
 *
11535
 * Parse a chunk of memory in push parser mode.
11536
 *
11537
 * Assumes that the parser context was initialized with
11538
 * xmlCreatePushParserCtxt.
11539
 *
11540
 * The last chunk, which will often be empty, must be marked with
11541
 * the @terminate flag. With the default SAX callbacks, the resulting
11542
 * document will be available in ctxt->myDoc. This pointer will not
11543
 * be freed by the library.
11544
 *
11545
 * If the document isn't well-formed, ctxt->myDoc is set to NULL.
11546
 * The push parser doesn't support recovery mode.
11547
 *
11548
 * Returns an xmlParserErrors code (0 on success).
11549
 */
11550
int
11551
xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
11552
              int terminate) {
11553
    size_t curBase;
11554
    size_t maxLength;
11555
    int end_in_lf = 0;
11556
11557
    if ((ctxt == NULL) || (size < 0))
11558
        return(XML_ERR_ARGUMENT);
11559
    if (ctxt->disableSAX != 0)
11560
        return(ctxt->errNo);
11561
    if (ctxt->input == NULL)
11562
        return(XML_ERR_INTERNAL_ERROR);
11563
11564
    ctxt->input->flags |= XML_INPUT_PROGRESSIVE;
11565
    if (ctxt->instate == XML_PARSER_START)
11566
        xmlCtxtInitializeLate(ctxt);
11567
    if ((size > 0) && (chunk != NULL) && (!terminate) &&
11568
        (chunk[size - 1] == '\r')) {
11569
  end_in_lf = 1;
11570
  size--;
11571
    }
11572
11573
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
11574
        (ctxt->input->buf != NULL))  {
11575
  size_t pos = ctxt->input->cur - ctxt->input->base;
11576
  int res;
11577
11578
  res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11579
        xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
11580
  if (res < 0) {
11581
            xmlCtxtErrIO(ctxt, ctxt->input->buf->error, NULL);
11582
      xmlHaltParser(ctxt);
11583
      return(ctxt->errNo);
11584
  }
11585
    }
11586
11587
    xmlParseTryOrFinish(ctxt, terminate);
11588
11589
    curBase = ctxt->input->cur - ctxt->input->base;
11590
    maxLength = (ctxt->options & XML_PARSE_HUGE) ?
11591
                XML_MAX_HUGE_LENGTH :
11592
                XML_MAX_LOOKUP_LIMIT;
11593
    if (curBase > maxLength) {
11594
        xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
11595
                    "Buffer size limit exceeded, try XML_PARSE_HUGE\n");
11596
        xmlHaltParser(ctxt);
11597
    }
11598
11599
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11600
        return(ctxt->errNo);
11601
11602
    if ((end_in_lf == 1) && (ctxt->input != NULL) &&
11603
        (ctxt->input->buf != NULL)) {
11604
  size_t pos = ctxt->input->cur - ctxt->input->base;
11605
        int res;
11606
11607
  res = xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
11608
  xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
11609
        if (res < 0) {
11610
            xmlCtxtErrIO(ctxt, ctxt->input->buf->error, NULL);
11611
            xmlHaltParser(ctxt);
11612
            return(ctxt->errNo);
11613
        }
11614
    }
11615
    if (terminate) {
11616
  /*
11617
   * Check for termination
11618
   */
11619
        if ((ctxt->instate != XML_PARSER_EOF) &&
11620
            (ctxt->instate != XML_PARSER_EPILOG)) {
11621
            if (ctxt->nameNr > 0) {
11622
                const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
11623
                int line = ctxt->pushTab[ctxt->nameNr - 1].line;
11624
                xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
11625
                        "Premature end of data in tag %s line %d\n",
11626
                        name, line, NULL);
11627
            } else if (ctxt->instate == XML_PARSER_START) {
11628
                xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11629
            } else {
11630
                xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
11631
                               "Start tag expected, '<' not found\n");
11632
            }
11633
        } else if ((ctxt->input->buf != NULL) &&
11634
                   (ctxt->input->buf->encoder != NULL) &&
11635
                   (!xmlBufIsEmpty(ctxt->input->buf->raw))) {
11636
            xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
11637
                           "Truncated multi-byte sequence at EOF\n");
11638
        }
11639
  if (ctxt->instate != XML_PARSER_EOF) {
11640
            ctxt->instate = XML_PARSER_EOF;
11641
            xmlFinishDocument(ctxt);
11642
  }
11643
    }
11644
    if (ctxt->wellFormed == 0)
11645
  return((xmlParserErrors) ctxt->errNo);
11646
    else
11647
        return(0);
11648
}
11649
11650
/************************************************************************
11651
 *                  *
11652
 *    I/O front end functions to the parser     *
11653
 *                  *
11654
 ************************************************************************/
11655
11656
/**
11657
 * xmlCreatePushParserCtxt:
11658
 * @sax:  a SAX handler (optional)
11659
 * @user_data:  user data for SAX callbacks (optional)
11660
 * @chunk:  initial chunk (optional, deprecated)
11661
 * @size:  size of initial chunk in bytes
11662
 * @filename:  file name or URI (optional)
11663
 *
11664
 * Create a parser context for using the XML parser in push mode.
11665
 * See xmlParseChunk.
11666
 *
11667
 * Passing an initial chunk is useless and deprecated.
11668
 *
11669
 * @filename is used as base URI to fetch external entities and for
11670
 * error reports.
11671
 *
11672
 * Returns the new parser context or NULL in case of error.
11673
 */
11674
11675
xmlParserCtxtPtr
11676
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11677
                        const char *chunk, int size, const char *filename) {
11678
    xmlParserCtxtPtr ctxt;
11679
    xmlParserInputPtr input;
11680
11681
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
11682
    if (ctxt == NULL)
11683
  return(NULL);
11684
11685
    ctxt->options &= ~XML_PARSE_NODICT;
11686
    ctxt->dictNames = 1;
11687
11688
    input = xmlNewInputPush(ctxt, filename, chunk, size, NULL);
11689
    if (input == NULL) {
11690
  xmlFreeParserCtxt(ctxt);
11691
  return(NULL);
11692
    }
11693
    inputPush(ctxt, input);
11694
11695
    return(ctxt);
11696
}
11697
#endif /* LIBXML_PUSH_ENABLED */
11698
11699
/**
11700
 * xmlStopParser:
11701
 * @ctxt:  an XML parser context
11702
 *
11703
 * Blocks further parser processing
11704
 */
11705
void
11706
7.07M
xmlStopParser(xmlParserCtxtPtr ctxt) {
11707
7.07M
    if (ctxt == NULL)
11708
7.07M
        return;
11709
0
    xmlHaltParser(ctxt);
11710
0
    if (ctxt->errNo != XML_ERR_NO_MEMORY)
11711
0
        ctxt->errNo = XML_ERR_USER_STOP;
11712
0
}
11713
11714
/**
11715
 * xmlCreateIOParserCtxt:
11716
 * @sax:  a SAX handler (optional)
11717
 * @user_data:  user data for SAX callbacks (optional)
11718
 * @ioread:  an I/O read function
11719
 * @ioclose:  an I/O close function (optional)
11720
 * @ioctx:  an I/O handler
11721
 * @enc:  the charset encoding if known (deprecated)
11722
 *
11723
 * DEPRECATED: Use xmlNewParserCtxt and xmlCtxtReadIO.
11724
 *
11725
 * Create a parser context for using the XML parser with an existing
11726
 * I/O stream
11727
 *
11728
 * Returns the new parser context or NULL
11729
 */
11730
xmlParserCtxtPtr
11731
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11732
                      xmlInputReadCallback ioread,
11733
                      xmlInputCloseCallback ioclose,
11734
0
                      void *ioctx, xmlCharEncoding enc) {
11735
0
    xmlParserCtxtPtr ctxt;
11736
0
    xmlParserInputPtr input;
11737
0
    const char *encoding;
11738
11739
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
11740
0
    if (ctxt == NULL)
11741
0
  return(NULL);
11742
11743
0
    encoding = xmlGetCharEncodingName(enc);
11744
0
    input = xmlNewInputIO(ctxt, NULL, ioread, ioclose, ioctx, encoding, 0);
11745
0
    if (input == NULL) {
11746
0
  xmlFreeParserCtxt(ctxt);
11747
0
        return (NULL);
11748
0
    }
11749
0
    inputPush(ctxt, input);
11750
11751
0
    return(ctxt);
11752
0
}
11753
11754
#ifdef LIBXML_VALID_ENABLED
11755
/************************************************************************
11756
 *                  *
11757
 *    Front ends when parsing a DTD       *
11758
 *                  *
11759
 ************************************************************************/
11760
11761
/**
11762
 * xmlIOParseDTD:
11763
 * @sax:  the SAX handler block or NULL
11764
 * @input:  an Input Buffer
11765
 * @enc:  the charset encoding if known
11766
 *
11767
 * Load and parse a DTD
11768
 *
11769
 * Returns the resulting xmlDtdPtr or NULL in case of error.
11770
 * @input will be freed by the function in any case.
11771
 */
11772
11773
xmlDtdPtr
11774
xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
11775
        xmlCharEncoding enc) {
11776
    xmlDtdPtr ret = NULL;
11777
    xmlParserCtxtPtr ctxt;
11778
    xmlParserInputPtr pinput = NULL;
11779
11780
    if (input == NULL)
11781
  return(NULL);
11782
11783
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
11784
    if (ctxt == NULL) {
11785
        xmlFreeParserInputBuffer(input);
11786
  return(NULL);
11787
    }
11788
11789
    /*
11790
     * generate a parser input from the I/O handler
11791
     */
11792
11793
    pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
11794
    if (pinput == NULL) {
11795
        xmlFreeParserInputBuffer(input);
11796
  xmlFreeParserCtxt(ctxt);
11797
  return(NULL);
11798
    }
11799
11800
    /*
11801
     * plug some encoding conversion routines here.
11802
     */
11803
    if (xmlPushInput(ctxt, pinput) < 0) {
11804
  xmlFreeParserCtxt(ctxt);
11805
  return(NULL);
11806
    }
11807
    if (enc != XML_CHAR_ENCODING_NONE) {
11808
        xmlSwitchEncoding(ctxt, enc);
11809
    }
11810
11811
    /*
11812
     * let's parse that entity knowing it's an external subset.
11813
     */
11814
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11815
    if (ctxt->myDoc == NULL) {
11816
  xmlErrMemory(ctxt);
11817
  return(NULL);
11818
    }
11819
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
11820
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11821
                                 BAD_CAST "none", BAD_CAST "none");
11822
11823
    xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
11824
11825
    if (ctxt->myDoc != NULL) {
11826
  if (ctxt->wellFormed) {
11827
      ret = ctxt->myDoc->extSubset;
11828
      ctxt->myDoc->extSubset = NULL;
11829
      if (ret != NULL) {
11830
    xmlNodePtr tmp;
11831
11832
    ret->doc = NULL;
11833
    tmp = ret->children;
11834
    while (tmp != NULL) {
11835
        tmp->doc = NULL;
11836
        tmp = tmp->next;
11837
    }
11838
      }
11839
  } else {
11840
      ret = NULL;
11841
  }
11842
        xmlFreeDoc(ctxt->myDoc);
11843
        ctxt->myDoc = NULL;
11844
    }
11845
    xmlFreeParserCtxt(ctxt);
11846
11847
    return(ret);
11848
}
11849
11850
/**
11851
 * xmlSAXParseDTD:
11852
 * @sax:  the SAX handler block
11853
 * @ExternalID:  a NAME* containing the External ID of the DTD
11854
 * @SystemID:  a NAME* containing the URL to the DTD
11855
 *
11856
 * DEPRECATED: Don't use.
11857
 *
11858
 * Load and parse an external subset.
11859
 *
11860
 * Returns the resulting xmlDtdPtr or NULL in case of error.
11861
 */
11862
11863
xmlDtdPtr
11864
xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
11865
                          const xmlChar *SystemID) {
11866
    xmlDtdPtr ret = NULL;
11867
    xmlParserCtxtPtr ctxt;
11868
    xmlParserInputPtr input = NULL;
11869
    xmlChar* systemIdCanonic;
11870
11871
    if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
11872
11873
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
11874
    if (ctxt == NULL) {
11875
  return(NULL);
11876
    }
11877
11878
    /*
11879
     * Canonicalise the system ID
11880
     */
11881
    systemIdCanonic = xmlCanonicPath(SystemID);
11882
    if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
11883
  xmlFreeParserCtxt(ctxt);
11884
  return(NULL);
11885
    }
11886
11887
    /*
11888
     * Ask the Entity resolver to load the damn thing
11889
     */
11890
11891
    if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
11892
  input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
11893
                                   systemIdCanonic);
11894
    if (input == NULL) {
11895
  xmlFreeParserCtxt(ctxt);
11896
  if (systemIdCanonic != NULL)
11897
      xmlFree(systemIdCanonic);
11898
  return(NULL);
11899
    }
11900
11901
    /*
11902
     * plug some encoding conversion routines here.
11903
     */
11904
    if (xmlPushInput(ctxt, input) < 0) {
11905
  xmlFreeParserCtxt(ctxt);
11906
  if (systemIdCanonic != NULL)
11907
      xmlFree(systemIdCanonic);
11908
  return(NULL);
11909
    }
11910
11911
    xmlDetectEncoding(ctxt);
11912
11913
    if (input->filename == NULL)
11914
  input->filename = (char *) systemIdCanonic;
11915
    else
11916
  xmlFree(systemIdCanonic);
11917
11918
    /*
11919
     * let's parse that entity knowing it's an external subset.
11920
     */
11921
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11922
    if (ctxt->myDoc == NULL) {
11923
  xmlErrMemory(ctxt);
11924
  xmlFreeParserCtxt(ctxt);
11925
  return(NULL);
11926
    }
11927
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
11928
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11929
                                 ExternalID, SystemID);
11930
    if (ctxt->myDoc->extSubset == NULL) {
11931
        xmlFreeDoc(ctxt->myDoc);
11932
        xmlFreeParserCtxt(ctxt);
11933
        return(NULL);
11934
    }
11935
    xmlParseExternalSubset(ctxt, ExternalID, SystemID);
11936
11937
    if (ctxt->myDoc != NULL) {
11938
  if (ctxt->wellFormed) {
11939
      ret = ctxt->myDoc->extSubset;
11940
      ctxt->myDoc->extSubset = NULL;
11941
      if (ret != NULL) {
11942
    xmlNodePtr tmp;
11943
11944
    ret->doc = NULL;
11945
    tmp = ret->children;
11946
    while (tmp != NULL) {
11947
        tmp->doc = NULL;
11948
        tmp = tmp->next;
11949
    }
11950
      }
11951
  } else {
11952
      ret = NULL;
11953
  }
11954
        xmlFreeDoc(ctxt->myDoc);
11955
        ctxt->myDoc = NULL;
11956
    }
11957
    xmlFreeParserCtxt(ctxt);
11958
11959
    return(ret);
11960
}
11961
11962
11963
/**
11964
 * xmlParseDTD:
11965
 * @ExternalID:  a NAME* containing the External ID of the DTD
11966
 * @SystemID:  a NAME* containing the URL to the DTD
11967
 *
11968
 * Load and parse an external subset.
11969
 *
11970
 * Returns the resulting xmlDtdPtr or NULL in case of error.
11971
 */
11972
11973
xmlDtdPtr
11974
xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
11975
    return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
11976
}
11977
#endif /* LIBXML_VALID_ENABLED */
11978
11979
/************************************************************************
11980
 *                  *
11981
 *    Front ends when parsing an Entity     *
11982
 *                  *
11983
 ************************************************************************/
11984
11985
static xmlNodePtr
11986
xmlCtxtParseContent(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
11987
2.58k
                    int hasTextDecl, int buildTree) {
11988
2.58k
    xmlNodePtr root = NULL;
11989
2.58k
    xmlNodePtr list = NULL;
11990
2.58k
    xmlChar *rootName = BAD_CAST "#root";
11991
2.58k
    int result;
11992
11993
2.58k
    if (buildTree) {
11994
2.58k
        root = xmlNewDocNode(ctxt->myDoc, NULL, rootName, NULL);
11995
2.58k
        if (root == NULL) {
11996
1
            xmlErrMemory(ctxt);
11997
1
            goto error;
11998
1
        }
11999
2.58k
    }
12000
12001
2.58k
    if (xmlPushInput(ctxt, input) < 0)
12002
0
        goto error;
12003
12004
2.58k
    nameNsPush(ctxt, rootName, NULL, NULL, 0, 0);
12005
2.58k
    spacePush(ctxt, -1);
12006
12007
2.58k
    if (buildTree)
12008
2.58k
        nodePush(ctxt, root);
12009
12010
2.58k
    if (hasTextDecl) {
12011
352
        xmlDetectEncoding(ctxt);
12012
12013
        /*
12014
         * Parse a possible text declaration first
12015
         */
12016
352
        if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
12017
352
            (IS_BLANK_CH(NXT(5)))) {
12018
145
            xmlParseTextDecl(ctxt);
12019
            /*
12020
             * An XML-1.0 document can't reference an entity not XML-1.0
12021
             */
12022
145
            if ((xmlStrEqual(ctxt->version, BAD_CAST "1.0")) &&
12023
145
                (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12024
5
                xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12025
5
                               "Version mismatch between document and "
12026
5
                               "entity\n");
12027
5
            }
12028
145
        }
12029
352
    }
12030
12031
2.58k
    xmlParseContentInternal(ctxt);
12032
12033
2.58k
    if (ctxt->input->cur < ctxt->input->end)
12034
226
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12035
12036
2.58k
    if ((ctxt->wellFormed) ||
12037
2.58k
        ((ctxt->recovery) && (ctxt->errNo != XML_ERR_NO_MEMORY))) {
12038
1.79k
        if (root != NULL) {
12039
1.79k
            xmlNodePtr cur;
12040
12041
            /*
12042
             * Return the newly created nodeset after unlinking it from
12043
             * its pseudo parent.
12044
             */
12045
1.79k
            cur = root->children;
12046
1.79k
            list = cur;
12047
2.74k
            while (cur != NULL) {
12048
950
                cur->parent = NULL;
12049
950
                cur = cur->next;
12050
950
            }
12051
1.79k
            root->children = NULL;
12052
1.79k
            root->last = NULL;
12053
1.79k
        }
12054
1.79k
    }
12055
12056
    /*
12057
     * Read the rest of the stream in case of errors. We want
12058
     * to account for the whole entity size.
12059
     */
12060
2.65k
    do {
12061
2.65k
        ctxt->input->cur = ctxt->input->end;
12062
2.65k
        xmlParserShrink(ctxt);
12063
2.65k
        result = xmlParserGrow(ctxt);
12064
2.65k
    } while (result > 0);
12065
12066
2.58k
    if (buildTree)
12067
2.58k
        nodePop(ctxt);
12068
12069
2.58k
    namePop(ctxt);
12070
2.58k
    spacePop(ctxt);
12071
12072
    /* xmlPopInput would free the stream */
12073
2.58k
    inputPop(ctxt);
12074
12075
2.58k
error:
12076
2.58k
    xmlFreeNode(root);
12077
12078
2.58k
    return(list);
12079
2.58k
}
12080
12081
static void
12082
2.64k
xmlCtxtParseEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr ent) {
12083
2.64k
    xmlParserInputPtr input;
12084
2.64k
    xmlNodePtr list;
12085
2.64k
    unsigned long consumed;
12086
2.64k
    int isExternal;
12087
2.64k
    int buildTree;
12088
2.64k
    int oldMinNsIndex;
12089
2.64k
    int oldNodelen, oldNodemem;
12090
12091
2.64k
    isExternal = (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY);
12092
2.64k
    buildTree = (ctxt->node != NULL);
12093
12094
    /*
12095
     * Recursion check
12096
     */
12097
2.64k
    if (ent->flags & XML_ENT_EXPANDING) {
12098
11
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
12099
11
        xmlHaltParser(ctxt);
12100
11
        goto error;
12101
11
    }
12102
12103
    /*
12104
     * Load entity
12105
     */
12106
2.62k
    input = xmlNewEntityInputStream(ctxt, ent);
12107
2.62k
    if (input == NULL)
12108
42
        goto error;
12109
12110
    /*
12111
     * When building a tree, we need to limit the scope of namespace
12112
     * declarations, so that entities don't reference xmlNs structs
12113
     * from the parent of a reference.
12114
     */
12115
2.58k
    oldMinNsIndex = ctxt->nsdb->minNsIndex;
12116
2.58k
    if (buildTree)
12117
2.58k
        ctxt->nsdb->minNsIndex = ctxt->nsNr;
12118
12119
2.58k
    oldNodelen = ctxt->nodelen;
12120
2.58k
    oldNodemem = ctxt->nodemem;
12121
2.58k
    ctxt->nodelen = 0;
12122
2.58k
    ctxt->nodemem = 0;
12123
12124
    /*
12125
     * Parse content
12126
     *
12127
     * This initiates a recursive call chain:
12128
     *
12129
     * - xmlCtxtParseContent
12130
     * - xmlParseContentInternal
12131
     * - xmlParseReference
12132
     * - xmlCtxtParseEntity
12133
     *
12134
     * The nesting depth is limited by the maximum number of inputs,
12135
     * see xmlPushInput.
12136
     *
12137
     * It's possible to make this non-recursive (minNsIndex must be
12138
     * stored in the input struct) at the expense of code readability.
12139
     */
12140
12141
2.58k
    ent->flags |= XML_ENT_EXPANDING;
12142
12143
2.58k
    list = xmlCtxtParseContent(ctxt, input, isExternal, buildTree);
12144
12145
2.58k
    ent->flags &= ~XML_ENT_EXPANDING;
12146
12147
2.58k
    ctxt->nsdb->minNsIndex = oldMinNsIndex;
12148
2.58k
    ctxt->nodelen = oldNodelen;
12149
2.58k
    ctxt->nodemem = oldNodemem;
12150
12151
    /*
12152
     * Entity size accounting
12153
     */
12154
2.58k
    consumed = input->consumed;
12155
2.58k
    xmlSaturatedAddSizeT(&consumed, input->end - input->base);
12156
12157
2.58k
    if ((ent->flags & XML_ENT_CHECKED) == 0)
12158
1.11k
        xmlSaturatedAdd(&ent->expandedSize, consumed);
12159
12160
2.58k
    if ((ent->flags & XML_ENT_PARSED) == 0) {
12161
1.11k
        if (isExternal)
12162
320
            xmlSaturatedAdd(&ctxt->sizeentities, consumed);
12163
12164
1.11k
        ent->children = list;
12165
12166
2.06k
        while (list != NULL) {
12167
950
            list->parent = (xmlNodePtr) ent;
12168
950
            if (list->next == NULL)
12169
239
                ent->last = list;
12170
950
            list = list->next;
12171
950
        }
12172
1.47k
    } else {
12173
1.47k
        xmlFreeNodeList(list);
12174
1.47k
    }
12175
12176
2.58k
    xmlFreeInputStream(input);
12177
12178
2.64k
error:
12179
2.64k
    ent->flags |= XML_ENT_PARSED | XML_ENT_CHECKED;
12180
2.64k
}
12181
12182
/**
12183
 * xmlParseCtxtExternalEntity:
12184
 * @ctx:  the existing parsing context
12185
 * @URL:  the URL for the entity to load
12186
 * @ID:  the System ID for the entity to load
12187
 * @lst:  the return value for the set of parsed nodes
12188
 *
12189
 * Parse an external general entity within an existing parsing context
12190
 * An external general parsed entity is well-formed if it matches the
12191
 * production labeled extParsedEnt.
12192
 *
12193
 * [78] extParsedEnt ::= TextDecl? content
12194
 *
12195
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12196
 *    the parser error code otherwise
12197
 */
12198
12199
int
12200
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctxt, const xmlChar *URL,
12201
0
                           const xmlChar *ID, xmlNodePtr *listOut) {
12202
0
    xmlParserInputPtr input;
12203
0
    xmlNodePtr list;
12204
12205
0
    if (listOut != NULL)
12206
0
        *listOut = NULL;
12207
12208
0
    if (ctxt == NULL)
12209
0
        return(XML_ERR_ARGUMENT);
12210
12211
0
    input = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
12212
0
    if (input == NULL)
12213
0
        return(ctxt->errNo);
12214
12215
0
    xmlCtxtInitializeLate(ctxt);
12216
12217
0
    list = xmlCtxtParseContent(ctxt, input, /* hasTextDecl */ 1, 1);
12218
0
    if (*listOut != NULL)
12219
0
        *listOut = list;
12220
0
    else
12221
0
        xmlFreeNodeList(list);
12222
12223
0
    xmlFreeInputStream(input);
12224
0
    return(ctxt->errNo);
12225
0
}
12226
12227
#ifdef LIBXML_SAX1_ENABLED
12228
/**
12229
 * xmlParseExternalEntity:
12230
 * @doc:  the document the chunk pertains to
12231
 * @sax:  the SAX handler block (possibly NULL)
12232
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12233
 * @depth:  Used for loop detection, use 0
12234
 * @URL:  the URL for the entity to load
12235
 * @ID:  the System ID for the entity to load
12236
 * @lst:  the return value for the set of parsed nodes
12237
 *
12238
 * Parse an external general entity
12239
 * An external general parsed entity is well-formed if it matches the
12240
 * production labeled extParsedEnt.
12241
 *
12242
 * [78] extParsedEnt ::= TextDecl? content
12243
 *
12244
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12245
 *    the parser error code otherwise
12246
 */
12247
12248
int
12249
xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
12250
    int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *list) {
12251
    xmlParserCtxtPtr ctxt;
12252
    int ret;
12253
12254
    if (list != NULL)
12255
        *list = NULL;
12256
12257
    if (doc == NULL)
12258
        return(XML_ERR_ARGUMENT);
12259
12260
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12261
    if (ctxt == NULL)
12262
        return(XML_ERR_NO_MEMORY);
12263
12264
    ctxt->depth = depth;
12265
    ctxt->myDoc = doc;
12266
    ret = xmlParseCtxtExternalEntity(ctxt, URL, ID, list);
12267
12268
    xmlFreeParserCtxt(ctxt);
12269
    return(ret);
12270
}
12271
12272
/**
12273
 * xmlParseBalancedChunkMemory:
12274
 * @doc:  the document the chunk pertains to (must not be NULL)
12275
 * @sax:  the SAX handler block (possibly NULL)
12276
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12277
 * @depth:  Used for loop detection, use 0
12278
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
12279
 * @lst:  the return value for the set of parsed nodes
12280
 *
12281
 * Parse a well-balanced chunk of an XML document
12282
 * called by the parser
12283
 * The allowed sequence for the Well Balanced Chunk is the one defined by
12284
 * the content production in the XML grammar:
12285
 *
12286
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12287
 *
12288
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12289
 *    the parser error code otherwise
12290
 */
12291
12292
int
12293
xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12294
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
12295
    return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
12296
                                                depth, string, lst, 0 );
12297
}
12298
#endif /* LIBXML_SAX1_ENABLED */
12299
12300
/**
12301
 * xmlParseInNodeContext:
12302
 * @node:  the context node
12303
 * @data:  the input string
12304
 * @datalen:  the input string length in bytes
12305
 * @options:  a combination of xmlParserOption
12306
 * @lst:  the return value for the set of parsed nodes
12307
 *
12308
 * Parse a well-balanced chunk of an XML document
12309
 * within the context (DTD, namespaces, etc ...) of the given node.
12310
 *
12311
 * The allowed sequence for the data is a Well Balanced Chunk defined by
12312
 * the content production in the XML grammar:
12313
 *
12314
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12315
 *
12316
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12317
 * error code otherwise
12318
 */
12319
xmlParserErrors
12320
xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
12321
0
                      int options, xmlNodePtr *lst) {
12322
0
    xmlParserCtxtPtr ctxt;
12323
0
    xmlDocPtr doc = NULL;
12324
0
    xmlNodePtr fake, cur;
12325
0
    int nsnr = 0;
12326
12327
0
    xmlParserErrors ret = XML_ERR_OK;
12328
12329
    /*
12330
     * check all input parameters, grab the document
12331
     */
12332
0
    if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
12333
0
        return(XML_ERR_ARGUMENT);
12334
0
    switch (node->type) {
12335
0
        case XML_ELEMENT_NODE:
12336
0
        case XML_ATTRIBUTE_NODE:
12337
0
        case XML_TEXT_NODE:
12338
0
        case XML_CDATA_SECTION_NODE:
12339
0
        case XML_ENTITY_REF_NODE:
12340
0
        case XML_PI_NODE:
12341
0
        case XML_COMMENT_NODE:
12342
0
        case XML_DOCUMENT_NODE:
12343
0
        case XML_HTML_DOCUMENT_NODE:
12344
0
      break;
12345
0
  default:
12346
0
      return(XML_ERR_INTERNAL_ERROR);
12347
12348
0
    }
12349
0
    while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
12350
0
           (node->type != XML_DOCUMENT_NODE) &&
12351
0
     (node->type != XML_HTML_DOCUMENT_NODE))
12352
0
  node = node->parent;
12353
0
    if (node == NULL)
12354
0
  return(XML_ERR_INTERNAL_ERROR);
12355
0
    if (node->type == XML_ELEMENT_NODE)
12356
0
  doc = node->doc;
12357
0
    else
12358
0
        doc = (xmlDocPtr) node;
12359
0
    if (doc == NULL)
12360
0
  return(XML_ERR_INTERNAL_ERROR);
12361
12362
    /*
12363
     * allocate a context and set-up everything not related to the
12364
     * node position in the tree
12365
     */
12366
0
    if (doc->type == XML_DOCUMENT_NODE)
12367
0
  ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
12368
0
#ifdef LIBXML_HTML_ENABLED
12369
0
    else if (doc->type == XML_HTML_DOCUMENT_NODE) {
12370
0
  ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
12371
        /*
12372
         * When parsing in context, it makes no sense to add implied
12373
         * elements like html/body/etc...
12374
         */
12375
0
        options |= HTML_PARSE_NOIMPLIED;
12376
0
    }
12377
0
#endif
12378
0
    else
12379
0
        return(XML_ERR_INTERNAL_ERROR);
12380
12381
0
    if (ctxt == NULL)
12382
0
        return(XML_ERR_NO_MEMORY);
12383
12384
    /*
12385
     * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
12386
     * We need a dictionary for xmlCtxtInitializeLate, so if there's no doc dict
12387
     * we must wait until the last moment to free the original one.
12388
     */
12389
0
    if (doc->dict != NULL) {
12390
0
        if (ctxt->dict != NULL)
12391
0
      xmlDictFree(ctxt->dict);
12392
0
  ctxt->dict = doc->dict;
12393
0
    } else {
12394
0
        options |= XML_PARSE_NODICT;
12395
0
        ctxt->dictNames = 0;
12396
0
    }
12397
12398
0
    if (doc->encoding != NULL)
12399
0
        xmlSwitchEncodingName(ctxt, (const char *) doc->encoding);
12400
12401
0
    xmlCtxtUseOptions(ctxt, options);
12402
0
    xmlCtxtInitializeLate(ctxt);
12403
0
    ctxt->myDoc = doc;
12404
    /* parsing in context, i.e. as within existing content */
12405
0
    ctxt->input_id = 2;
12406
12407
    /*
12408
     * TODO: Use xmlCtxtParseContent
12409
     */
12410
12411
0
    fake = xmlNewDocComment(node->doc, NULL);
12412
0
    if (fake == NULL) {
12413
0
        xmlFreeParserCtxt(ctxt);
12414
0
  return(XML_ERR_NO_MEMORY);
12415
0
    }
12416
0
    xmlAddChild(node, fake);
12417
12418
0
    if (node->type == XML_ELEMENT_NODE)
12419
0
  nodePush(ctxt, node);
12420
12421
0
    if ((ctxt->html == 0) && (node->type == XML_ELEMENT_NODE)) {
12422
  /*
12423
   * initialize the SAX2 namespaces stack
12424
   */
12425
0
  cur = node;
12426
0
  while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
12427
0
      xmlNsPtr ns = cur->nsDef;
12428
0
            xmlHashedString hprefix, huri;
12429
12430
0
      while (ns != NULL) {
12431
0
                hprefix = xmlDictLookupHashed(ctxt->dict, ns->prefix, -1);
12432
0
                huri = xmlDictLookupHashed(ctxt->dict, ns->href, -1);
12433
0
                if (xmlParserNsPush(ctxt, &hprefix, &huri, ns, 1) > 0)
12434
0
                    nsnr++;
12435
0
    ns = ns->next;
12436
0
      }
12437
0
      cur = cur->parent;
12438
0
  }
12439
0
    }
12440
12441
0
    if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
12442
  /*
12443
   * ID/IDREF registration will be done in xmlValidateElement below
12444
   */
12445
0
  ctxt->loadsubset |= XML_SKIP_IDS;
12446
0
    }
12447
12448
0
#ifdef LIBXML_HTML_ENABLED
12449
0
    if (doc->type == XML_HTML_DOCUMENT_NODE)
12450
0
        __htmlParseContent(ctxt);
12451
0
    else
12452
0
#endif
12453
0
  xmlParseContentInternal(ctxt);
12454
12455
0
    if (ctxt->input->cur < ctxt->input->end)
12456
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12457
12458
0
    xmlParserNsPop(ctxt, nsnr);
12459
12460
0
    if ((ctxt->wellFormed) ||
12461
0
        ((ctxt->recovery) && (ctxt->errNo != XML_ERR_NO_MEMORY))) {
12462
0
        ret = XML_ERR_OK;
12463
0
    } else {
12464
0
  ret = (xmlParserErrors) ctxt->errNo;
12465
0
    }
12466
12467
    /*
12468
     * Return the newly created nodeset after unlinking it from
12469
     * the pseudo sibling.
12470
     */
12471
12472
0
    cur = fake->next;
12473
0
    fake->next = NULL;
12474
0
    node->last = fake;
12475
12476
0
    if (cur != NULL) {
12477
0
  cur->prev = NULL;
12478
0
    }
12479
12480
0
    *lst = cur;
12481
12482
0
    while (cur != NULL) {
12483
0
  cur->parent = NULL;
12484
0
  cur = cur->next;
12485
0
    }
12486
12487
0
    xmlUnlinkNode(fake);
12488
0
    xmlFreeNode(fake);
12489
12490
12491
0
    if (ret != XML_ERR_OK) {
12492
0
        xmlFreeNodeList(*lst);
12493
0
  *lst = NULL;
12494
0
    }
12495
12496
0
    if (doc->dict != NULL)
12497
0
        ctxt->dict = NULL;
12498
0
    xmlFreeParserCtxt(ctxt);
12499
12500
0
    return(ret);
12501
0
}
12502
12503
#ifdef LIBXML_SAX1_ENABLED
12504
/**
12505
 * xmlParseBalancedChunkMemoryRecover:
12506
 * @doc:  the document the chunk pertains to (must not be NULL)
12507
 * @sax:  the SAX handler block (possibly NULL)
12508
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12509
 * @depth:  Used for loop detection, use 0
12510
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
12511
 * @list:  the return value for the set of parsed nodes
12512
 * @recover: return nodes even if the data is broken (use 0)
12513
 *
12514
 * Parse a well-balanced chunk of an XML document
12515
 *
12516
 * The allowed sequence for the Well Balanced Chunk is the one defined by
12517
 * the content production in the XML grammar:
12518
 *
12519
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12520
 *
12521
 * Returns 0 if the chunk is well balanced, or thehe parser error code
12522
 * otherwise.
12523
 *
12524
 * In case recover is set to 1, the nodelist will not be empty even if
12525
 * the parsed chunk is not well balanced, assuming the parsing succeeded to
12526
 * some extent.
12527
 */
12528
int
12529
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12530
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *listOut,
12531
     int recover) {
12532
    xmlParserCtxtPtr ctxt;
12533
    xmlParserInputPtr input;
12534
    xmlNodePtr list;
12535
    int ret;
12536
12537
    if (listOut != NULL)
12538
        *listOut = NULL;
12539
12540
    if (string == NULL)
12541
        return(XML_ERR_ARGUMENT);
12542
12543
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12544
    if (ctxt == NULL)
12545
        return(XML_ERR_NO_MEMORY);
12546
12547
    xmlCtxtInitializeLate(ctxt);
12548
12549
    ctxt->depth = depth;
12550
    ctxt->myDoc = doc;
12551
    if (recover) {
12552
        ctxt->options |= XML_PARSE_RECOVER;
12553
        ctxt->recovery = 1;
12554
    }
12555
12556
    input = xmlNewStringInputStream(ctxt, string);
12557
    if (input == NULL)
12558
        return(ctxt->errNo);
12559
12560
    list = xmlCtxtParseContent(ctxt, input, /* hasTextDecl */ 0, 1);
12561
    if (listOut != NULL)
12562
        *listOut = list;
12563
    else
12564
        xmlFreeNodeList(list);
12565
12566
    ret = ctxt->errNo;
12567
12568
    xmlFreeInputStream(input);
12569
    xmlFreeParserCtxt(ctxt);
12570
    return(ret);
12571
}
12572
12573
/**
12574
 * xmlSAXParseEntity:
12575
 * @sax:  the SAX handler block
12576
 * @filename:  the filename
12577
 *
12578
 * DEPRECATED: Don't use.
12579
 *
12580
 * parse an XML external entity out of context and build a tree.
12581
 * It use the given SAX function block to handle the parsing callback.
12582
 * If sax is NULL, fallback to the default DOM tree building routines.
12583
 *
12584
 * [78] extParsedEnt ::= TextDecl? content
12585
 *
12586
 * This correspond to a "Well Balanced" chunk
12587
 *
12588
 * Returns the resulting document tree
12589
 */
12590
12591
xmlDocPtr
12592
xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
12593
    xmlDocPtr ret;
12594
    xmlParserCtxtPtr ctxt;
12595
12596
    ctxt = xmlCreateFileParserCtxt(filename);
12597
    if (ctxt == NULL) {
12598
  return(NULL);
12599
    }
12600
    if (sax != NULL) {
12601
        if (sax->initialized == XML_SAX2_MAGIC) {
12602
            *ctxt->sax = *sax;
12603
        } else {
12604
            memset(ctxt->sax, 0, sizeof(*ctxt->sax));
12605
            memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12606
        }
12607
        ctxt->userData = NULL;
12608
    }
12609
12610
    xmlParseExtParsedEnt(ctxt);
12611
12612
    if (ctxt->wellFormed) {
12613
  ret = ctxt->myDoc;
12614
    } else {
12615
        ret = NULL;
12616
        xmlFreeDoc(ctxt->myDoc);
12617
    }
12618
12619
    xmlFreeParserCtxt(ctxt);
12620
12621
    return(ret);
12622
}
12623
12624
/**
12625
 * xmlParseEntity:
12626
 * @filename:  the filename
12627
 *
12628
 * parse an XML external entity out of context and build a tree.
12629
 *
12630
 * [78] extParsedEnt ::= TextDecl? content
12631
 *
12632
 * This correspond to a "Well Balanced" chunk
12633
 *
12634
 * Returns the resulting document tree
12635
 */
12636
12637
xmlDocPtr
12638
xmlParseEntity(const char *filename) {
12639
    return(xmlSAXParseEntity(NULL, filename));
12640
}
12641
#endif /* LIBXML_SAX1_ENABLED */
12642
12643
/**
12644
 * xmlCreateEntityParserCtxt:
12645
 * @URL:  the entity URL
12646
 * @ID:  the entity PUBLIC ID
12647
 * @base:  a possible base for the target URI
12648
 *
12649
 * DEPRECATED: Use xmlNewInputURL.
12650
 *
12651
 * Create a parser context for an external entity
12652
 * Automatic support for ZLIB/Compress compressed document is provided
12653
 * by default if found at compile-time.
12654
 *
12655
 * Returns the new parser context or NULL
12656
 */
12657
xmlParserCtxtPtr
12658
xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
12659
0
                    const xmlChar *base) {
12660
0
    xmlParserCtxtPtr ctxt;
12661
0
    xmlParserInputPtr input;
12662
0
    xmlChar *uri = NULL;
12663
12664
0
    ctxt = xmlNewParserCtxt();
12665
0
    if (ctxt == NULL)
12666
0
  return(NULL);
12667
12668
0
    if (base != NULL) {
12669
0
        if (xmlBuildURISafe(URL, base, &uri) < 0)
12670
0
            goto error;
12671
0
        if (uri != NULL)
12672
0
            URL = uri;
12673
0
    }
12674
12675
0
    input = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
12676
0
    if (input == NULL)
12677
0
        goto error;
12678
12679
0
    if (inputPush(ctxt, input) < 0)
12680
0
        goto error;
12681
12682
0
    xmlFree(uri);
12683
0
    return(ctxt);
12684
12685
0
error:
12686
0
    xmlFree(uri);
12687
0
    xmlFreeParserCtxt(ctxt);
12688
0
    return(NULL);
12689
0
}
12690
12691
/************************************************************************
12692
 *                  *
12693
 *    Front ends when parsing from a file     *
12694
 *                  *
12695
 ************************************************************************/
12696
12697
/**
12698
 * xmlCreateURLParserCtxt:
12699
 * @filename:  the filename or URL
12700
 * @options:  a combination of xmlParserOption
12701
 *
12702
 * DEPRECATED: Use xmlNewParserCtxt and xmlCtxtReadFile.
12703
 *
12704
 * Create a parser context for a file or URL content.
12705
 * Automatic support for ZLIB/Compress compressed document is provided
12706
 * by default if found at compile-time and for file accesses
12707
 *
12708
 * Returns the new parser context or NULL
12709
 */
12710
xmlParserCtxtPtr
12711
xmlCreateURLParserCtxt(const char *filename, int options)
12712
0
{
12713
0
    xmlParserCtxtPtr ctxt;
12714
0
    xmlParserInputPtr input;
12715
12716
0
    ctxt = xmlNewParserCtxt();
12717
0
    if (ctxt == NULL)
12718
0
  return(NULL);
12719
12720
0
    xmlCtxtUseOptions(ctxt, options);
12721
0
    ctxt->linenumbers = 1;
12722
12723
0
    input = xmlLoadExternalEntity(filename, NULL, ctxt);
12724
0
    if (input == NULL) {
12725
0
  xmlFreeParserCtxt(ctxt);
12726
0
  return(NULL);
12727
0
    }
12728
0
    inputPush(ctxt, input);
12729
12730
0
    return(ctxt);
12731
0
}
12732
12733
/**
12734
 * xmlCreateFileParserCtxt:
12735
 * @filename:  the filename
12736
 *
12737
 * DEPRECATED: Use xmlNewParserCtxt and xmlCtxtReadFile.
12738
 *
12739
 * Create a parser context for a file content.
12740
 * Automatic support for ZLIB/Compress compressed document is provided
12741
 * by default if found at compile-time.
12742
 *
12743
 * Returns the new parser context or NULL
12744
 */
12745
xmlParserCtxtPtr
12746
xmlCreateFileParserCtxt(const char *filename)
12747
0
{
12748
0
    return(xmlCreateURLParserCtxt(filename, 0));
12749
0
}
12750
12751
#ifdef LIBXML_SAX1_ENABLED
12752
/**
12753
 * xmlSAXParseFileWithData:
12754
 * @sax:  the SAX handler block
12755
 * @filename:  the filename
12756
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
12757
 *             documents
12758
 * @data:  the userdata
12759
 *
12760
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
12761
 *
12762
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12763
 * compressed document is provided by default if found at compile-time.
12764
 * It use the given SAX function block to handle the parsing callback.
12765
 * If sax is NULL, fallback to the default DOM tree building routines.
12766
 *
12767
 * User data (void *) is stored within the parser context in the
12768
 * context's _private member, so it is available nearly everywhere in libxml
12769
 *
12770
 * Returns the resulting document tree
12771
 */
12772
12773
xmlDocPtr
12774
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
12775
                        int recovery, void *data) {
12776
    xmlDocPtr ret;
12777
    xmlParserCtxtPtr ctxt;
12778
    xmlParserInputPtr input;
12779
12780
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12781
    if (ctxt == NULL)
12782
  return(NULL);
12783
12784
    if (data != NULL)
12785
  ctxt->_private = data;
12786
12787
    if (recovery) {
12788
        ctxt->options |= XML_PARSE_RECOVER;
12789
        ctxt->recovery = 1;
12790
    }
12791
12792
    input = xmlNewInputURL(ctxt, filename, NULL, NULL, 0);
12793
12794
    ret = xmlCtxtParseDocument(ctxt, input);
12795
12796
    xmlFreeParserCtxt(ctxt);
12797
    return(ret);
12798
}
12799
12800
/**
12801
 * xmlSAXParseFile:
12802
 * @sax:  the SAX handler block
12803
 * @filename:  the filename
12804
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
12805
 *             documents
12806
 *
12807
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
12808
 *
12809
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12810
 * compressed document is provided by default if found at compile-time.
12811
 * It use the given SAX function block to handle the parsing callback.
12812
 * If sax is NULL, fallback to the default DOM tree building routines.
12813
 *
12814
 * Returns the resulting document tree
12815
 */
12816
12817
xmlDocPtr
12818
xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
12819
                          int recovery) {
12820
    return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
12821
}
12822
12823
/**
12824
 * xmlRecoverDoc:
12825
 * @cur:  a pointer to an array of xmlChar
12826
 *
12827
 * DEPRECATED: Use xmlReadDoc with XML_PARSE_RECOVER.
12828
 *
12829
 * parse an XML in-memory document and build a tree.
12830
 * In the case the document is not Well Formed, a attempt to build a
12831
 * tree is tried anyway
12832
 *
12833
 * Returns the resulting document tree or NULL in case of failure
12834
 */
12835
12836
xmlDocPtr
12837
xmlRecoverDoc(const xmlChar *cur) {
12838
    return(xmlSAXParseDoc(NULL, cur, 1));
12839
}
12840
12841
/**
12842
 * xmlParseFile:
12843
 * @filename:  the filename
12844
 *
12845
 * DEPRECATED: Use xmlReadFile.
12846
 *
12847
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12848
 * compressed document is provided by default if found at compile-time.
12849
 *
12850
 * Returns the resulting document tree if the file was wellformed,
12851
 * NULL otherwise.
12852
 */
12853
12854
xmlDocPtr
12855
xmlParseFile(const char *filename) {
12856
    return(xmlSAXParseFile(NULL, filename, 0));
12857
}
12858
12859
/**
12860
 * xmlRecoverFile:
12861
 * @filename:  the filename
12862
 *
12863
 * DEPRECATED: Use xmlReadFile with XML_PARSE_RECOVER.
12864
 *
12865
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12866
 * compressed document is provided by default if found at compile-time.
12867
 * In the case the document is not Well Formed, it attempts to build
12868
 * a tree anyway
12869
 *
12870
 * Returns the resulting document tree or NULL in case of failure
12871
 */
12872
12873
xmlDocPtr
12874
xmlRecoverFile(const char *filename) {
12875
    return(xmlSAXParseFile(NULL, filename, 1));
12876
}
12877
12878
12879
/**
12880
 * xmlSetupParserForBuffer:
12881
 * @ctxt:  an XML parser context
12882
 * @buffer:  a xmlChar * buffer
12883
 * @filename:  a file name
12884
 *
12885
 * DEPRECATED: Don't use.
12886
 *
12887
 * Setup the parser context to parse a new buffer; Clears any prior
12888
 * contents from the parser context. The buffer parameter must not be
12889
 * NULL, but the filename parameter can be
12890
 */
12891
void
12892
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
12893
                             const char* filename)
12894
{
12895
    xmlParserInputPtr input;
12896
12897
    if ((ctxt == NULL) || (buffer == NULL))
12898
        return;
12899
12900
    xmlClearParserCtxt(ctxt);
12901
12902
    input = xmlNewInputString(ctxt, filename, (const char *) buffer, NULL, 0);
12903
    if (input == NULL)
12904
        return;
12905
    inputPush(ctxt, input);
12906
}
12907
12908
/**
12909
 * xmlSAXUserParseFile:
12910
 * @sax:  a SAX handler
12911
 * @user_data:  The user data returned on SAX callbacks
12912
 * @filename:  a file name
12913
 *
12914
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
12915
 *
12916
 * parse an XML file and call the given SAX handler routines.
12917
 * Automatic support for ZLIB/Compress compressed document is provided
12918
 *
12919
 * Returns 0 in case of success or a error number otherwise
12920
 */
12921
int
12922
xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
12923
                    const char *filename) {
12924
    int ret = 0;
12925
    xmlParserCtxtPtr ctxt;
12926
12927
    ctxt = xmlCreateFileParserCtxt(filename);
12928
    if (ctxt == NULL) return -1;
12929
    if (sax != NULL) {
12930
        if (sax->initialized == XML_SAX2_MAGIC) {
12931
            *ctxt->sax = *sax;
12932
        } else {
12933
            memset(ctxt->sax, 0, sizeof(*ctxt->sax));
12934
            memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12935
        }
12936
  ctxt->userData = user_data;
12937
    }
12938
12939
    xmlParseDocument(ctxt);
12940
12941
    if (ctxt->wellFormed)
12942
  ret = 0;
12943
    else {
12944
        if (ctxt->errNo != 0)
12945
      ret = ctxt->errNo;
12946
  else
12947
      ret = -1;
12948
    }
12949
    if (ctxt->myDoc != NULL) {
12950
        xmlFreeDoc(ctxt->myDoc);
12951
  ctxt->myDoc = NULL;
12952
    }
12953
    xmlFreeParserCtxt(ctxt);
12954
12955
    return ret;
12956
}
12957
#endif /* LIBXML_SAX1_ENABLED */
12958
12959
/************************************************************************
12960
 *                  *
12961
 *    Front ends when parsing from memory     *
12962
 *                  *
12963
 ************************************************************************/
12964
12965
/**
12966
 * xmlCreateMemoryParserCtxt:
12967
 * @buffer:  a pointer to a char array
12968
 * @size:  the size of the array
12969
 *
12970
 * Create a parser context for an XML in-memory document. The input buffer
12971
 * must not contain a terminating null byte.
12972
 *
12973
 * Returns the new parser context or NULL
12974
 */
12975
xmlParserCtxtPtr
12976
0
xmlCreateMemoryParserCtxt(const char *buffer, int size) {
12977
0
    xmlParserCtxtPtr ctxt;
12978
0
    xmlParserInputPtr input;
12979
12980
0
    if (size < 0)
12981
0
  return(NULL);
12982
12983
0
    ctxt = xmlNewParserCtxt();
12984
0
    if (ctxt == NULL)
12985
0
  return(NULL);
12986
12987
0
    input = xmlNewInputMemory(ctxt, NULL, buffer, size, NULL, 0);
12988
0
    if (input == NULL) {
12989
0
  xmlFreeParserCtxt(ctxt);
12990
0
  return(NULL);
12991
0
    }
12992
0
    inputPush(ctxt, input);
12993
12994
0
    return(ctxt);
12995
0
}
12996
12997
#ifdef LIBXML_SAX1_ENABLED
12998
/**
12999
 * xmlSAXParseMemoryWithData:
13000
 * @sax:  the SAX handler block
13001
 * @buffer:  an pointer to a char array
13002
 * @size:  the size of the array
13003
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13004
 *             documents
13005
 * @data:  the userdata
13006
 *
13007
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
13008
 *
13009
 * parse an XML in-memory block and use the given SAX function block
13010
 * to handle the parsing callback. If sax is NULL, fallback to the default
13011
 * DOM tree building routines.
13012
 *
13013
 * User data (void *) is stored within the parser context in the
13014
 * context's _private member, so it is available nearly everywhere in libxml
13015
 *
13016
 * Returns the resulting document tree
13017
 */
13018
13019
xmlDocPtr
13020
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
13021
                          int size, int recovery, void *data) {
13022
    xmlDocPtr ret;
13023
    xmlParserCtxtPtr ctxt;
13024
    xmlParserInputPtr input;
13025
13026
    if (size < 0)
13027
        return(NULL);
13028
13029
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
13030
    if (ctxt == NULL)
13031
        return(NULL);
13032
13033
    if (data != NULL)
13034
  ctxt->_private=data;
13035
13036
    if (recovery) {
13037
        ctxt->options |= XML_PARSE_RECOVER;
13038
        ctxt->recovery = 1;
13039
    }
13040
13041
    input = xmlNewInputMemory(ctxt, NULL, buffer, size, NULL,
13042
                              XML_INPUT_BUF_STATIC);
13043
13044
    ret = xmlCtxtParseDocument(ctxt, input);
13045
13046
    xmlFreeParserCtxt(ctxt);
13047
    return(ret);
13048
}
13049
13050
/**
13051
 * xmlSAXParseMemory:
13052
 * @sax:  the SAX handler block
13053
 * @buffer:  an pointer to a char array
13054
 * @size:  the size of the array
13055
 * @recovery:  work in recovery mode, i.e. tries to read not Well Formed
13056
 *             documents
13057
 *
13058
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
13059
 *
13060
 * parse an XML in-memory block and use the given SAX function block
13061
 * to handle the parsing callback. If sax is NULL, fallback to the default
13062
 * DOM tree building routines.
13063
 *
13064
 * Returns the resulting document tree
13065
 */
13066
xmlDocPtr
13067
xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
13068
            int size, int recovery) {
13069
    return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
13070
}
13071
13072
/**
13073
 * xmlParseMemory:
13074
 * @buffer:  an pointer to a char array
13075
 * @size:  the size of the array
13076
 *
13077
 * DEPRECATED: Use xmlReadMemory.
13078
 *
13079
 * parse an XML in-memory block and build a tree.
13080
 *
13081
 * Returns the resulting document tree
13082
 */
13083
13084
xmlDocPtr xmlParseMemory(const char *buffer, int size) {
13085
   return(xmlSAXParseMemory(NULL, buffer, size, 0));
13086
}
13087
13088
/**
13089
 * xmlRecoverMemory:
13090
 * @buffer:  an pointer to a char array
13091
 * @size:  the size of the array
13092
 *
13093
 * DEPRECATED: Use xmlReadMemory with XML_PARSE_RECOVER.
13094
 *
13095
 * parse an XML in-memory block and build a tree.
13096
 * In the case the document is not Well Formed, an attempt to
13097
 * build a tree is tried anyway
13098
 *
13099
 * Returns the resulting document tree or NULL in case of error
13100
 */
13101
13102
xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
13103
   return(xmlSAXParseMemory(NULL, buffer, size, 1));
13104
}
13105
13106
/**
13107
 * xmlSAXUserParseMemory:
13108
 * @sax:  a SAX handler
13109
 * @user_data:  The user data returned on SAX callbacks
13110
 * @buffer:  an in-memory XML document input
13111
 * @size:  the length of the XML document in bytes
13112
 *
13113
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
13114
 *
13115
 * parse an XML in-memory buffer and call the given SAX handler routines.
13116
 *
13117
 * Returns 0 in case of success or a error number otherwise
13118
 */
13119
int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
13120
        const char *buffer, int size) {
13121
    int ret = 0;
13122
    xmlParserCtxtPtr ctxt;
13123
13124
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13125
    if (ctxt == NULL) return -1;
13126
    if (sax != NULL) {
13127
        if (sax->initialized == XML_SAX2_MAGIC) {
13128
            *ctxt->sax = *sax;
13129
        } else {
13130
            memset(ctxt->sax, 0, sizeof(*ctxt->sax));
13131
            memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
13132
        }
13133
  ctxt->userData = user_data;
13134
    }
13135
13136
    xmlParseDocument(ctxt);
13137
13138
    if (ctxt->wellFormed)
13139
  ret = 0;
13140
    else {
13141
        if (ctxt->errNo != 0)
13142
      ret = ctxt->errNo;
13143
  else
13144
      ret = -1;
13145
    }
13146
    if (ctxt->myDoc != NULL) {
13147
        xmlFreeDoc(ctxt->myDoc);
13148
  ctxt->myDoc = NULL;
13149
    }
13150
    xmlFreeParserCtxt(ctxt);
13151
13152
    return ret;
13153
}
13154
#endif /* LIBXML_SAX1_ENABLED */
13155
13156
/**
13157
 * xmlCreateDocParserCtxt:
13158
 * @str:  a pointer to an array of xmlChar
13159
 *
13160
 * Creates a parser context for an XML in-memory document.
13161
 *
13162
 * Returns the new parser context or NULL
13163
 */
13164
xmlParserCtxtPtr
13165
0
xmlCreateDocParserCtxt(const xmlChar *str) {
13166
0
    xmlParserCtxtPtr ctxt;
13167
0
    xmlParserInputPtr input;
13168
13169
0
    ctxt = xmlNewParserCtxt();
13170
0
    if (ctxt == NULL)
13171
0
  return(NULL);
13172
13173
0
    input = xmlNewInputString(ctxt, NULL, (const char *) str, NULL, 0);
13174
0
    if (input == NULL) {
13175
0
  xmlFreeParserCtxt(ctxt);
13176
0
  return(NULL);
13177
0
    }
13178
0
    inputPush(ctxt, input);
13179
13180
0
    return(ctxt);
13181
0
}
13182
13183
#ifdef LIBXML_SAX1_ENABLED
13184
/**
13185
 * xmlSAXParseDoc:
13186
 * @sax:  the SAX handler block
13187
 * @cur:  a pointer to an array of xmlChar
13188
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13189
 *             documents
13190
 *
13191
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadDoc.
13192
 *
13193
 * parse an XML in-memory document and build a tree.
13194
 * It use the given SAX function block to handle the parsing callback.
13195
 * If sax is NULL, fallback to the default DOM tree building routines.
13196
 *
13197
 * Returns the resulting document tree
13198
 */
13199
13200
xmlDocPtr
13201
xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
13202
    xmlDocPtr ret;
13203
    xmlParserCtxtPtr ctxt;
13204
    xmlSAXHandlerPtr oldsax = NULL;
13205
13206
    if (cur == NULL) return(NULL);
13207
13208
13209
    ctxt = xmlCreateDocParserCtxt(cur);
13210
    if (ctxt == NULL) return(NULL);
13211
    if (sax != NULL) {
13212
        oldsax = ctxt->sax;
13213
        ctxt->sax = sax;
13214
        ctxt->userData = NULL;
13215
    }
13216
13217
    xmlParseDocument(ctxt);
13218
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13219
    else {
13220
       ret = NULL;
13221
       xmlFreeDoc(ctxt->myDoc);
13222
       ctxt->myDoc = NULL;
13223
    }
13224
    if (sax != NULL)
13225
  ctxt->sax = oldsax;
13226
    xmlFreeParserCtxt(ctxt);
13227
13228
    return(ret);
13229
}
13230
13231
/**
13232
 * xmlParseDoc:
13233
 * @cur:  a pointer to an array of xmlChar
13234
 *
13235
 * DEPRECATED: Use xmlReadDoc.
13236
 *
13237
 * parse an XML in-memory document and build a tree.
13238
 *
13239
 * Returns the resulting document tree
13240
 */
13241
13242
xmlDocPtr
13243
xmlParseDoc(const xmlChar *cur) {
13244
    return(xmlSAXParseDoc(NULL, cur, 0));
13245
}
13246
#endif /* LIBXML_SAX1_ENABLED */
13247
13248
/************************************************************************
13249
 *                  *
13250
 *  New set (2.6.0) of simpler and more flexible APIs   *
13251
 *                  *
13252
 ************************************************************************/
13253
13254
/**
13255
 * DICT_FREE:
13256
 * @str:  a string
13257
 *
13258
 * Free a string if it is not owned by the "dict" dictionary in the
13259
 * current scope
13260
 */
13261
#define DICT_FREE(str)            \
13262
0
  if ((str) && ((!dict) ||       \
13263
0
      (xmlDictOwns(dict, (const xmlChar *)(str)) == 0)))  \
13264
0
      xmlFree((char *)(str));
13265
13266
/**
13267
 * xmlCtxtReset:
13268
 * @ctxt: an XML parser context
13269
 *
13270
 * Reset a parser context
13271
 */
13272
void
13273
xmlCtxtReset(xmlParserCtxtPtr ctxt)
13274
0
{
13275
0
    xmlParserInputPtr input;
13276
0
    xmlDictPtr dict;
13277
13278
0
    if (ctxt == NULL)
13279
0
        return;
13280
13281
0
    dict = ctxt->dict;
13282
13283
0
    while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
13284
0
        xmlFreeInputStream(input);
13285
0
    }
13286
0
    ctxt->inputNr = 0;
13287
0
    ctxt->input = NULL;
13288
13289
0
    ctxt->spaceNr = 0;
13290
0
    if (ctxt->spaceTab != NULL) {
13291
0
  ctxt->spaceTab[0] = -1;
13292
0
  ctxt->space = &ctxt->spaceTab[0];
13293
0
    } else {
13294
0
        ctxt->space = NULL;
13295
0
    }
13296
13297
13298
0
    ctxt->nodeNr = 0;
13299
0
    ctxt->node = NULL;
13300
13301
0
    ctxt->nameNr = 0;
13302
0
    ctxt->name = NULL;
13303
13304
0
    ctxt->nsNr = 0;
13305
0
    xmlParserNsReset(ctxt->nsdb);
13306
13307
0
    DICT_FREE(ctxt->version);
13308
0
    ctxt->version = NULL;
13309
0
    DICT_FREE(ctxt->encoding);
13310
0
    ctxt->encoding = NULL;
13311
0
    DICT_FREE(ctxt->extSubURI);
13312
0
    ctxt->extSubURI = NULL;
13313
0
    DICT_FREE(ctxt->extSubSystem);
13314
0
    ctxt->extSubSystem = NULL;
13315
0
    if (ctxt->myDoc != NULL)
13316
0
        xmlFreeDoc(ctxt->myDoc);
13317
0
    ctxt->myDoc = NULL;
13318
13319
0
    ctxt->standalone = -1;
13320
0
    ctxt->hasExternalSubset = 0;
13321
0
    ctxt->hasPErefs = 0;
13322
0
    ctxt->html = 0;
13323
0
    ctxt->instate = XML_PARSER_START;
13324
13325
0
    ctxt->wellFormed = 1;
13326
0
    ctxt->nsWellFormed = 1;
13327
0
    ctxt->disableSAX = 0;
13328
0
    ctxt->valid = 1;
13329
#if 0
13330
    ctxt->vctxt.userData = ctxt;
13331
    ctxt->vctxt.error = xmlParserValidityError;
13332
    ctxt->vctxt.warning = xmlParserValidityWarning;
13333
#endif
13334
0
    ctxt->record_info = 0;
13335
0
    ctxt->checkIndex = 0;
13336
0
    ctxt->endCheckState = 0;
13337
0
    ctxt->inSubset = 0;
13338
0
    ctxt->errNo = XML_ERR_OK;
13339
0
    ctxt->depth = 0;
13340
0
    ctxt->catalogs = NULL;
13341
0
    ctxt->sizeentities = 0;
13342
0
    ctxt->sizeentcopy = 0;
13343
0
    xmlInitNodeInfoSeq(&ctxt->node_seq);
13344
13345
0
    if (ctxt->attsDefault != NULL) {
13346
0
        xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
13347
0
        ctxt->attsDefault = NULL;
13348
0
    }
13349
0
    if (ctxt->attsSpecial != NULL) {
13350
0
        xmlHashFree(ctxt->attsSpecial, NULL);
13351
0
        ctxt->attsSpecial = NULL;
13352
0
    }
13353
13354
0
#ifdef LIBXML_CATALOG_ENABLED
13355
0
    if (ctxt->catalogs != NULL)
13356
0
  xmlCatalogFreeLocal(ctxt->catalogs);
13357
0
#endif
13358
0
    ctxt->nbErrors = 0;
13359
0
    ctxt->nbWarnings = 0;
13360
0
    if (ctxt->lastError.code != XML_ERR_OK)
13361
0
        xmlResetError(&ctxt->lastError);
13362
0
}
13363
13364
/**
13365
 * xmlCtxtResetPush:
13366
 * @ctxt: an XML parser context
13367
 * @chunk:  a pointer to an array of chars
13368
 * @size:  number of chars in the array
13369
 * @filename:  an optional file name or URI
13370
 * @encoding:  the document encoding, or NULL
13371
 *
13372
 * Reset a push parser context
13373
 *
13374
 * Returns 0 in case of success and 1 in case of error
13375
 */
13376
int
13377
xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
13378
                 int size, const char *filename, const char *encoding)
13379
0
{
13380
0
    xmlParserInputPtr input;
13381
13382
0
    if (ctxt == NULL)
13383
0
        return(1);
13384
13385
0
    xmlCtxtReset(ctxt);
13386
13387
0
    input = xmlNewInputPush(ctxt, filename, chunk, size, encoding);
13388
0
    if (input == NULL)
13389
0
        return(1);
13390
0
    inputPush(ctxt, input);
13391
13392
0
    return(0);
13393
0
}
13394
13395
static int
13396
xmlCtxtSetOptionsInternal(xmlParserCtxtPtr ctxt, int options, int keepMask)
13397
94.0k
{
13398
94.0k
    int allMask;
13399
13400
94.0k
    if (ctxt == NULL)
13401
0
        return(-1);
13402
13403
    /*
13404
     * XInclude options aren't handled by the parser.
13405
     *
13406
     * XML_PARSE_XINCLUDE
13407
     * XML_PARSE_NOXINCNODE
13408
     * XML_PARSE_NOBASEFIX
13409
     */
13410
94.0k
    allMask = XML_PARSE_RECOVER |
13411
94.0k
              XML_PARSE_NOENT |
13412
94.0k
              XML_PARSE_DTDLOAD |
13413
94.0k
              XML_PARSE_DTDATTR |
13414
94.0k
              XML_PARSE_DTDVALID |
13415
94.0k
              XML_PARSE_NOERROR |
13416
94.0k
              XML_PARSE_NOWARNING |
13417
94.0k
              XML_PARSE_PEDANTIC |
13418
94.0k
              XML_PARSE_NOBLANKS |
13419
#ifdef LIBXML_SAX1_ENABLED
13420
              XML_PARSE_SAX1 |
13421
#endif
13422
94.0k
              XML_PARSE_NONET |
13423
94.0k
              XML_PARSE_NODICT |
13424
94.0k
              XML_PARSE_NSCLEAN |
13425
94.0k
              XML_PARSE_NOCDATA |
13426
94.0k
              XML_PARSE_COMPACT |
13427
94.0k
              XML_PARSE_OLD10 |
13428
94.0k
              XML_PARSE_HUGE |
13429
94.0k
              XML_PARSE_OLDSAX |
13430
94.0k
              XML_PARSE_IGNORE_ENC |
13431
94.0k
              XML_PARSE_BIG_LINES |
13432
94.0k
              XML_PARSE_NO_XXE;
13433
13434
94.0k
    ctxt->options = (ctxt->options & keepMask) | (options & allMask);
13435
13436
    /*
13437
     * For some options, struct members are historically the source
13438
     * of truth. The values are initalized from global variables and
13439
     * old code could also modify them directly. Several older API
13440
     * functions that don't take an options argument rely on these
13441
     * deprecated mechanisms.
13442
     *
13443
     * Once public access to struct members and the globals are
13444
     * disabled, we can use the options bitmask as source of
13445
     * truth, making all these struct members obsolete.
13446
     *
13447
     * The XML_DETECT_IDS flags is misnamed. It simply enables
13448
     * loading of the external subset.
13449
     */
13450
94.0k
    ctxt->recovery = (options & XML_PARSE_RECOVER) ? 1 : 0;
13451
94.0k
    ctxt->replaceEntities = (options & XML_PARSE_NOENT) ? 1 : 0;
13452
94.0k
    ctxt->loadsubset = (options & XML_PARSE_DTDLOAD) ? XML_DETECT_IDS : 0;
13453
94.0k
    ctxt->loadsubset |= (options & XML_PARSE_DTDATTR) ? XML_COMPLETE_ATTRS : 0;
13454
94.0k
    ctxt->validate = (options & XML_PARSE_DTDVALID) ? 1 : 0;
13455
94.0k
    ctxt->pedantic = (options & XML_PARSE_PEDANTIC) ? 1 : 0;
13456
94.0k
    ctxt->keepBlanks = (options & XML_PARSE_NOBLANKS) ? 0 : 1;
13457
94.0k
    ctxt->dictNames = (options & XML_PARSE_NODICT) ? 0 : 1;
13458
13459
    /*
13460
     * Changing SAX callbacks is a bad idea. This should be fixed.
13461
     */
13462
94.0k
    if (options & XML_PARSE_NOBLANKS) {
13463
0
        ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
13464
0
    }
13465
94.0k
    if (options & XML_PARSE_NOCDATA) {
13466
90.9k
        ctxt->sax->cdataBlock = NULL;
13467
90.9k
    }
13468
94.0k
    if (options & XML_PARSE_HUGE) {
13469
0
        if (ctxt->dict != NULL)
13470
0
            xmlDictSetLimit(ctxt->dict, 0);
13471
0
    }
13472
13473
94.0k
    ctxt->linenumbers = 1;
13474
13475
94.0k
    return(options & ~allMask);
13476
94.0k
}
13477
13478
/**
13479
 * xmlCtxtSetOptions:
13480
 * @ctxt: an XML parser context
13481
 * @options:  a bitmask of xmlParserOption values
13482
 *
13483
 * Applies the options to the parser context. Unset options are
13484
 * cleared.
13485
 *
13486
 * Available since 2.13.0. With older versions, you can use
13487
 * xmlCtxtUseOptions.
13488
 *
13489
 * XML_PARSE_RECOVER
13490
 *
13491
 * Enable "recovery" mode which allows non-wellformed documents.
13492
 * How this mode behaves exactly is unspecified and may change
13493
 * without further notice. Use of this feature is DISCOURAGED.
13494
 *
13495
 * XML_PARSE_NOENT
13496
 *
13497
 * Despite the confusing name, this option enables substitution
13498
 * of entities. The resulting tree won't contain any entity
13499
 * reference nodes.
13500
 *
13501
 * This option also enables loading of external entities (both
13502
 * general and parameter entities) which is dangerous. If you
13503
 * process untrusted data, it's recommended to set the
13504
 * XML_PARSE_NO_XXE option to disable loading of external
13505
 * entities.
13506
 *
13507
 * XML_PARSE_DTDLOAD
13508
 *
13509
 * Enables loading of an external DTD and the loading and
13510
 * substitution of external parameter entities. Has no effect
13511
 * if XML_PARSE_NO_XXE is set.
13512
 *
13513
 * XML_PARSE_DTDATTR
13514
 *
13515
 * Adds default attributes from the DTD to the result document.
13516
 *
13517
 * Implies XML_PARSE_DTDLOAD, but loading of external content
13518
 * can be disabled with XML_PARSE_NO_XXE.
13519
 *
13520
 * XML_PARSE_DTDVALID
13521
 *
13522
 * This option enables DTD validation which requires to load
13523
 * external DTDs and external entities (both general and
13524
 * parameter entities) unless XML_PARSE_NO_XXE was set.
13525
 *
13526
 * XML_PARSE_NO_XXE
13527
 *
13528
 * Disables loading of external DTDs or entities.
13529
 *
13530
 * XML_PARSE_NOERROR
13531
 *
13532
 * Disable error and warning reports to the error handlers.
13533
 * Errors are still accessible with xmlCtxtGetLastError.
13534
 *
13535
 * XML_PARSE_NOWARNING
13536
 *
13537
 * Disable warning reports.
13538
 *
13539
 * XML_PARSE_PEDANTIC
13540
 *
13541
 * Enable some pedantic warnings.
13542
 *
13543
 * XML_PARSE_NOBLANKS
13544
 *
13545
 * Remove some text nodes containing only whitespace from the
13546
 * result document. Which nodes are removed depends on DTD
13547
 * element declarations or a conservative heuristic. The
13548
 * reindenting feature of the serialization code relies on this
13549
 * option to be set when parsing. Use of this option is
13550
 * DISCOURAGED.
13551
 *
13552
 * XML_PARSE_SAX1
13553
 *
13554
 * Always invoke the deprecated SAX1 startElement and endElement
13555
 * handlers. This option is DEPRECATED.
13556
 *
13557
 * XML_PARSE_NONET
13558
 *
13559
 * Disable network access with the builtin HTTP and FTP clients.
13560
 *
13561
 * XML_PARSE_NODICT
13562
 *
13563
 * Create a document without interned strings, making all
13564
 * strings separate memory allocations.
13565
 *
13566
 * XML_PARSE_NSCLEAN
13567
 *
13568
 * Remove redundant namespace declarations from the result
13569
 * document.
13570
 *
13571
 * XML_PARSE_NOCDATA
13572
 *
13573
 * Output normal text nodes instead of CDATA nodes.
13574
 *
13575
 * XML_PARSE_COMPACT
13576
 *
13577
 * Store small strings directly in the node struct to save
13578
 * memory.
13579
 *
13580
 * XML_PARSE_OLD10
13581
 *
13582
 * Use old Name productions from before XML 1.0 Fifth Edition.
13583
 * This options is DEPRECATED.
13584
 *
13585
 * XML_PARSE_HUGE
13586
 *
13587
 * Relax some internal limits.
13588
 *
13589
 * Maximum size of text nodes, tags, comments, processing instructions,
13590
 * CDATA sections, entity values
13591
 *
13592
 * normal: 10M
13593
 * huge:    1B
13594
 *
13595
 * Maximum size of names, system literals, pubid literals
13596
 *
13597
 * normal: 50K
13598
 * huge:   10M
13599
 *
13600
 * Maximum nesting depth of elements
13601
 *
13602
 * normal:  256
13603
 * huge:   2048
13604
 *
13605
 * Maximum nesting depth of entities
13606
 *
13607
 * normal: 20
13608
 * huge:   40
13609
 *
13610
 * XML_PARSE_OLDSAX
13611
 *
13612
 * Enable an unspecified legacy mode for SAX parsers. This
13613
 * option is DEPRECATED.
13614
 *
13615
 * XML_PARSE_IGNORE_ENC
13616
 *
13617
 * Ignore the encoding in the XML declaration. This option is
13618
 * mostly unneeded these days. The only effect is to enforce
13619
 * UTF-8 decoding of ASCII-like data.
13620
 *
13621
 * XML_PARSE_BIG_LINES
13622
 *
13623
 * Enable reporting of line numbers larger than 65535.
13624
 *
13625
 * Returns 0 in case of success, the set of unknown or unimplemented options
13626
 *         in case of error.
13627
 */
13628
int
13629
xmlCtxtSetOptions(xmlParserCtxtPtr ctxt, int options)
13630
0
{
13631
0
    return(xmlCtxtSetOptionsInternal(ctxt, options, 0));
13632
0
}
13633
13634
/**
13635
 * xmlCtxtUseOptions:
13636
 * @ctxt: an XML parser context
13637
 * @options:  a combination of xmlParserOption
13638
 *
13639
 * DEPRECATED: Use xmlCtxtSetOptions.
13640
 *
13641
 * Applies the options to the parser context. The following options
13642
 * are never cleared and can only be enabled:
13643
 *
13644
 * XML_PARSE_NOERROR
13645
 * XML_PARSE_NOWARNING
13646
 * XML_PARSE_NONET
13647
 * XML_PARSE_NSCLEAN
13648
 * XML_PARSE_NOCDATA
13649
 * XML_PARSE_COMPACT
13650
 * XML_PARSE_OLD10
13651
 * XML_PARSE_HUGE
13652
 * XML_PARSE_OLDSAX
13653
 * XML_PARSE_IGNORE_ENC
13654
 * XML_PARSE_BIG_LINES
13655
 *
13656
 * Returns 0 in case of success, the set of unknown or unimplemented options
13657
 *         in case of error.
13658
 */
13659
int
13660
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
13661
94.0k
{
13662
94.0k
    int keepMask;
13663
13664
    /*
13665
     * For historic reasons, some options can only be enabled.
13666
     */
13667
94.0k
    keepMask = XML_PARSE_NOERROR |
13668
94.0k
               XML_PARSE_NOWARNING |
13669
94.0k
               XML_PARSE_NONET |
13670
94.0k
               XML_PARSE_NSCLEAN |
13671
94.0k
               XML_PARSE_NOCDATA |
13672
94.0k
               XML_PARSE_COMPACT |
13673
94.0k
               XML_PARSE_OLD10 |
13674
94.0k
               XML_PARSE_HUGE |
13675
94.0k
               XML_PARSE_OLDSAX |
13676
94.0k
               XML_PARSE_IGNORE_ENC |
13677
94.0k
               XML_PARSE_BIG_LINES;
13678
13679
94.0k
    return(xmlCtxtSetOptionsInternal(ctxt, options, keepMask));
13680
94.0k
}
13681
13682
/**
13683
 * xmlCtxtSetMaxAmplification:
13684
 * @ctxt: an XML parser context
13685
 * @maxAmpl:  maximum amplification factor
13686
 *
13687
 * To protect against exponential entity expansion ("billion laughs"), the
13688
 * size of serialized output is (roughly) limited to the input size
13689
 * multiplied by this factor. The default value is 5.
13690
 *
13691
 * When working with documents making heavy use of entity expansion, it can
13692
 * be necessary to increase the value. For security reasons, this should only
13693
 * be considered when processing trusted input.
13694
 */
13695
void
13696
xmlCtxtSetMaxAmplification(xmlParserCtxtPtr ctxt, unsigned maxAmpl)
13697
0
{
13698
0
    ctxt->maxAmpl = maxAmpl;
13699
0
}
13700
13701
/**
13702
 * xmlCtxtParseDocument:
13703
 * @ctxt:  an XML parser context
13704
 * @input:  parser input
13705
 *
13706
 * Parse an XML document and return the resulting document tree.
13707
 * Takes ownership of the input object.
13708
 *
13709
 * Returns the resulting document tree or NULL
13710
 */
13711
xmlDocPtr
13712
xmlCtxtParseDocument(xmlParserCtxtPtr ctxt, xmlParserInputPtr input)
13713
20.4k
{
13714
20.4k
    xmlDocPtr ret = NULL;
13715
13716
20.4k
    if ((ctxt == NULL) || (input == NULL))
13717
4
        return(NULL);
13718
13719
    /* assert(ctxt->inputNr == 0); */
13720
20.4k
    while (ctxt->inputNr > 0)
13721
0
        xmlFreeInputStream(inputPop(ctxt));
13722
13723
20.4k
    if (inputPush(ctxt, input) < 0) {
13724
0
        xmlFreeInputStream(input);
13725
0
        return(NULL);
13726
0
    }
13727
13728
20.4k
    xmlParseDocument(ctxt);
13729
13730
20.4k
    if ((ctxt->wellFormed) ||
13731
20.4k
        ((ctxt->recovery) && (ctxt->errNo != XML_ERR_NO_MEMORY))) {
13732
16.5k
        ret = ctxt->myDoc;
13733
16.5k
    } else {
13734
3.89k
        if (ctxt->errNo == XML_ERR_OK)
13735
0
            xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, "unknown error\n");
13736
13737
3.89k
        ret = NULL;
13738
3.89k
  xmlFreeDoc(ctxt->myDoc);
13739
3.89k
    }
13740
20.4k
    ctxt->myDoc = NULL;
13741
13742
    /* assert(ctxt->inputNr == 1); */
13743
40.8k
    while (ctxt->inputNr > 0)
13744
20.4k
        xmlFreeInputStream(inputPop(ctxt));
13745
13746
20.4k
    return(ret);
13747
20.4k
}
13748
13749
/**
13750
 * xmlReadDoc:
13751
 * @cur:  a pointer to a zero terminated string
13752
 * @URL:  base URL (optional)
13753
 * @encoding:  the document encoding (optional)
13754
 * @options:  a combination of xmlParserOption
13755
 *
13756
 * Convenience function to parse an XML document from a
13757
 * zero-terminated string.
13758
 *
13759
 * See xmlCtxtReadDoc for details.
13760
 *
13761
 * Returns the resulting document tree
13762
 */
13763
xmlDocPtr
13764
xmlReadDoc(const xmlChar *cur, const char *URL, const char *encoding,
13765
           int options)
13766
0
{
13767
0
    xmlParserCtxtPtr ctxt;
13768
0
    xmlParserInputPtr input;
13769
0
    xmlDocPtr doc;
13770
13771
0
    ctxt = xmlNewParserCtxt();
13772
0
    if (ctxt == NULL)
13773
0
        return(NULL);
13774
13775
0
    xmlCtxtUseOptions(ctxt, options);
13776
13777
0
    input = xmlNewInputString(ctxt, URL, (const char *) cur, encoding,
13778
0
                              XML_INPUT_BUF_STATIC);
13779
13780
0
    doc = xmlCtxtParseDocument(ctxt, input);
13781
13782
0
    xmlFreeParserCtxt(ctxt);
13783
0
    return(doc);
13784
0
}
13785
13786
/**
13787
 * xmlReadFile:
13788
 * @filename:  a file or URL
13789
 * @encoding:  the document encoding (optional)
13790
 * @options:  a combination of xmlParserOption
13791
 *
13792
 * Convenience function to parse an XML file from the filesystem,
13793
 * the network or a global user-define resource loader.
13794
 *
13795
 * See xmlCtxtReadFile for details.
13796
 *
13797
 * Returns the resulting document tree
13798
 */
13799
xmlDocPtr
13800
xmlReadFile(const char *filename, const char *encoding, int options)
13801
0
{
13802
0
    xmlParserCtxtPtr ctxt;
13803
0
    xmlParserInputPtr input;
13804
0
    xmlDocPtr doc;
13805
13806
0
    ctxt = xmlNewParserCtxt();
13807
0
    if (ctxt == NULL)
13808
0
        return(NULL);
13809
13810
0
    xmlCtxtUseOptions(ctxt, options);
13811
13812
0
    input = xmlNewInputURL(ctxt, filename, NULL, encoding, 0);
13813
13814
0
    doc = xmlCtxtParseDocument(ctxt, input);
13815
13816
0
    xmlFreeParserCtxt(ctxt);
13817
0
    return(doc);
13818
0
}
13819
13820
/**
13821
 * xmlReadMemory:
13822
 * @buffer:  a pointer to a char array
13823
 * @size:  the size of the array
13824
 * @url:  base URL (optional)
13825
 * @encoding:  the document encoding (optional)
13826
 * @options:  a combination of xmlParserOption
13827
 *
13828
 * Parse an XML in-memory document and build a tree. The input buffer must
13829
 * not contain a terminating null byte.
13830
 *
13831
 * See xmlCtxtReadMemory for details.
13832
 *
13833
 * Returns the resulting document tree
13834
 */
13835
xmlDocPtr
13836
xmlReadMemory(const char *buffer, int size, const char *url,
13837
              const char *encoding, int options)
13838
20.4k
{
13839
20.4k
    xmlParserCtxtPtr ctxt;
13840
20.4k
    xmlParserInputPtr input;
13841
20.4k
    xmlDocPtr doc;
13842
13843
20.4k
    if (size < 0)
13844
0
  return(NULL);
13845
13846
20.4k
    ctxt = xmlNewParserCtxt();
13847
20.4k
    if (ctxt == NULL)
13848
0
        return(NULL);
13849
13850
20.4k
    xmlCtxtUseOptions(ctxt, options);
13851
13852
20.4k
    input = xmlNewInputMemory(ctxt, url, buffer, size, encoding,
13853
20.4k
                              XML_INPUT_BUF_STATIC);
13854
13855
20.4k
    doc = xmlCtxtParseDocument(ctxt, input);
13856
13857
20.4k
    xmlFreeParserCtxt(ctxt);
13858
20.4k
    return(doc);
13859
20.4k
}
13860
13861
/**
13862
 * xmlReadFd:
13863
 * @fd:  an open file descriptor
13864
 * @URL:  base URL (optional)
13865
 * @encoding:  the document encoding (optional)
13866
 * @options:  a combination of xmlParserOption
13867
 *
13868
 * Parse an XML from a file descriptor and build a tree.
13869
 *
13870
 * See xmlCtxtReadFd for details.
13871
 *
13872
 * NOTE that the file descriptor will not be closed when the
13873
 * context is freed or reset.
13874
 *
13875
 * Returns the resulting document tree
13876
 */
13877
xmlDocPtr
13878
xmlReadFd(int fd, const char *URL, const char *encoding, int options)
13879
0
{
13880
0
    xmlParserCtxtPtr ctxt;
13881
0
    xmlParserInputPtr input;
13882
0
    xmlDocPtr doc;
13883
13884
0
    ctxt = xmlNewParserCtxt();
13885
0
    if (ctxt == NULL)
13886
0
        return(NULL);
13887
13888
0
    xmlCtxtUseOptions(ctxt, options);
13889
13890
0
    input = xmlNewInputFd(ctxt, URL, fd, encoding, 0);
13891
0
    input->buf->closecallback = NULL;
13892
13893
0
    doc = xmlCtxtParseDocument(ctxt, input);
13894
13895
0
    xmlFreeParserCtxt(ctxt);
13896
0
    return(doc);
13897
0
}
13898
13899
/**
13900
 * xmlReadIO:
13901
 * @ioread:  an I/O read function
13902
 * @ioclose:  an I/O close function (optional)
13903
 * @ioctx:  an I/O handler
13904
 * @URL:  base URL (optional)
13905
 * @encoding:  the document encoding (optional)
13906
 * @options:  a combination of xmlParserOption
13907
 *
13908
 * Parse an XML document from I/O functions and context and build a tree.
13909
 *
13910
 * See xmlCtxtReadIO for details.
13911
 *
13912
 * Returns the resulting document tree
13913
 */
13914
xmlDocPtr
13915
xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
13916
          void *ioctx, const char *URL, const char *encoding, int options)
13917
0
{
13918
0
    xmlParserCtxtPtr ctxt;
13919
0
    xmlParserInputPtr input;
13920
0
    xmlDocPtr doc;
13921
13922
0
    ctxt = xmlNewParserCtxt();
13923
0
    if (ctxt == NULL)
13924
0
        return(NULL);
13925
13926
0
    xmlCtxtUseOptions(ctxt, options);
13927
13928
0
    input = xmlNewInputIO(ctxt, URL, ioread, ioclose, ioctx, encoding, 0);
13929
13930
0
    doc = xmlCtxtParseDocument(ctxt, input);
13931
13932
0
    xmlFreeParserCtxt(ctxt);
13933
0
    return(doc);
13934
0
}
13935
13936
/**
13937
 * xmlCtxtReadDoc:
13938
 * @ctxt:  an XML parser context
13939
 * @str:  a pointer to a zero terminated string
13940
 * @URL:  base URL (optional)
13941
 * @encoding:  the document encoding (optional)
13942
 * @options:  a combination of xmlParserOption
13943
 *
13944
 * Parse an XML in-memory document and build a tree.
13945
 *
13946
 * @URL is used as base to resolve external entities and for error
13947
 * reporting.
13948
 *
13949
 * See xmlCtxtUseOptions for details.
13950
 *
13951
 * Returns the resulting document tree
13952
 */
13953
xmlDocPtr
13954
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar *str,
13955
               const char *URL, const char *encoding, int options)
13956
0
{
13957
0
    xmlParserInputPtr input;
13958
13959
0
    if (ctxt == NULL)
13960
0
        return(NULL);
13961
13962
0
    xmlCtxtReset(ctxt);
13963
0
    xmlCtxtUseOptions(ctxt, options);
13964
13965
0
    input = xmlNewInputString(ctxt, URL, (const char *) str, encoding,
13966
0
                              XML_INPUT_BUF_STATIC);
13967
13968
0
    return(xmlCtxtParseDocument(ctxt, input));
13969
0
}
13970
13971
/**
13972
 * xmlCtxtReadFile:
13973
 * @ctxt:  an XML parser context
13974
 * @filename:  a file or URL
13975
 * @encoding:  the document encoding (optional)
13976
 * @options:  a combination of xmlParserOption
13977
 *
13978
 * Parse an XML file from the filesystem, the network or a user-defined
13979
 * resource loader.
13980
 *
13981
 * See xmlNewInputURL and xmlCtxtUseOptions for details.
13982
 *
13983
 * Returns the resulting document tree
13984
 */
13985
xmlDocPtr
13986
xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
13987
                const char *encoding, int options)
13988
0
{
13989
0
    xmlParserInputPtr input;
13990
13991
0
    if (ctxt == NULL)
13992
0
        return(NULL);
13993
13994
0
    xmlCtxtReset(ctxt);
13995
0
    xmlCtxtUseOptions(ctxt, options);
13996
13997
0
    input = xmlNewInputURL(ctxt, filename, NULL, encoding, 0);
13998
13999
0
    return(xmlCtxtParseDocument(ctxt, input));
14000
0
}
14001
14002
/**
14003
 * xmlCtxtReadMemory:
14004
 * @ctxt:  an XML parser context
14005
 * @buffer:  a pointer to a char array
14006
 * @size:  the size of the array
14007
 * @URL:  base URL (optional)
14008
 * @encoding:  the document encoding (optional)
14009
 * @options:  a combination of xmlParserOption
14010
 *
14011
 * Parse an XML in-memory document and build a tree. The input buffer must
14012
 * not contain a terminating null byte.
14013
 *
14014
 * @URL is used as base to resolve external entities and for error
14015
 * reporting.
14016
 *
14017
 * See xmlCtxtUseOptions for details.
14018
 *
14019
 * Returns the resulting document tree
14020
 */
14021
xmlDocPtr
14022
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
14023
                  const char *URL, const char *encoding, int options)
14024
0
{
14025
0
    xmlParserInputPtr input;
14026
14027
0
    if ((ctxt == NULL) || (size < 0))
14028
0
        return(NULL);
14029
14030
0
    xmlCtxtReset(ctxt);
14031
0
    xmlCtxtUseOptions(ctxt, options);
14032
14033
0
    input = xmlNewInputMemory(ctxt, URL, buffer, size, encoding,
14034
0
                              XML_INPUT_BUF_STATIC);
14035
14036
0
    return(xmlCtxtParseDocument(ctxt, input));
14037
0
}
14038
14039
/**
14040
 * xmlCtxtReadFd:
14041
 * @ctxt:  an XML parser context
14042
 * @fd:  an open file descriptor
14043
 * @URL:  base URL (optional)
14044
 * @encoding:  the document encoding (optional)
14045
 * @options:  a combination of xmlParserOption
14046
 *
14047
 * Parse an XML document from a file descriptor and build a tree.
14048
 *
14049
 * NOTE that the file descriptor will not be closed when the
14050
 * context is freed or reset.
14051
 *
14052
 * @URL is used as base to resolve external entities and for error
14053
 * reporting.
14054
 *
14055
 * See xmlCtxtUseOptions for details.
14056
 *
14057
 * Returns the resulting document tree
14058
 */
14059
xmlDocPtr
14060
xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
14061
              const char *URL, const char *encoding, int options)
14062
0
{
14063
0
    xmlParserInputPtr input;
14064
14065
0
    if (ctxt == NULL)
14066
0
        return(NULL);
14067
14068
0
    xmlCtxtReset(ctxt);
14069
0
    xmlCtxtUseOptions(ctxt, options);
14070
14071
0
    input = xmlNewInputFd(ctxt, URL, fd, encoding, 0);
14072
0
    input->buf->closecallback = NULL;
14073
14074
0
    return(xmlCtxtParseDocument(ctxt, input));
14075
0
}
14076
14077
/**
14078
 * xmlCtxtReadIO:
14079
 * @ctxt:  an XML parser context
14080
 * @ioread:  an I/O read function
14081
 * @ioclose:  an I/O close function
14082
 * @ioctx:  an I/O handler
14083
 * @URL:  the base URL to use for the document
14084
 * @encoding:  the document encoding, or NULL
14085
 * @options:  a combination of xmlParserOption
14086
 *
14087
 * parse an XML document from I/O functions and source and build a tree.
14088
 * This reuses the existing @ctxt parser context
14089
 *
14090
 * @URL is used as base to resolve external entities and for error
14091
 * reporting.
14092
 *
14093
 * See xmlCtxtUseOptions for details.
14094
 *
14095
 * Returns the resulting document tree
14096
 */
14097
xmlDocPtr
14098
xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
14099
              xmlInputCloseCallback ioclose, void *ioctx,
14100
        const char *URL,
14101
              const char *encoding, int options)
14102
0
{
14103
0
    xmlParserInputPtr input;
14104
14105
0
    if (ctxt == NULL)
14106
0
        return(NULL);
14107
14108
0
    xmlCtxtReset(ctxt);
14109
0
    xmlCtxtUseOptions(ctxt, options);
14110
14111
0
    input = xmlNewInputIO(ctxt, URL, ioread, ioclose, ioctx, encoding, 0);
14112
14113
0
    return(xmlCtxtParseDocument(ctxt, input));
14114
0
}
14115