Coverage Report

Created: 2024-02-25 06:11

/src/libprotobuf-mutator/build/examples/libxml2/external.libxml2/src/external.libxml2/parser.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
 *            implemented on top of the SAX interfaces
4
 *
5
 * References:
6
 *   The XML specification:
7
 *     http://www.w3.org/TR/REC-xml
8
 *   Original 1.0 version:
9
 *     http://www.w3.org/TR/1998/REC-xml-19980210
10
 *   XML second edition working draft
11
 *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12
 *
13
 * Okay this is a big file, the parser core is around 7000 lines, then it
14
 * is followed by the progressive parser top routines, then the various
15
 * high level APIs to call the parser and a few miscellaneous functions.
16
 * A number of helper functions and deprecated ones have been moved to
17
 * parserInternals.c to reduce this file size.
18
 * As much as possible the functions are associated with their relative
19
 * production in the XML specification. A few productions defining the
20
 * different ranges of character are actually implanted either in
21
 * parserInternals.h or parserInternals.c
22
 * The DOM tree build is realized from the default SAX callbacks in
23
 * the module SAX.c.
24
 * The routines doing the validation checks are in valid.c and called either
25
 * from the SAX callbacks or as standalone functions using a preparsed
26
 * document.
27
 *
28
 * See Copyright for the status of this software.
29
 *
30
 * daniel@veillard.com
31
 */
32
33
/* To avoid EBCDIC trouble when parsing on zOS */
34
#if defined(__MVS__)
35
#pragma convert("ISO8859-1")
36
#endif
37
38
#define IN_LIBXML
39
#include "libxml.h"
40
41
#if defined(_WIN32)
42
#define XML_DIR_SEP '\\'
43
#else
44
#define XML_DIR_SEP '/'
45
#endif
46
47
#include <stdlib.h>
48
#include <limits.h>
49
#include <string.h>
50
#include <stdarg.h>
51
#include <stddef.h>
52
#include <ctype.h>
53
#include <stdlib.h>
54
#include <libxml/parser.h>
55
#include <libxml/xmlmemory.h>
56
#include <libxml/tree.h>
57
#include <libxml/parserInternals.h>
58
#include <libxml/valid.h>
59
#include <libxml/entities.h>
60
#include <libxml/xmlerror.h>
61
#include <libxml/encoding.h>
62
#include <libxml/xmlIO.h>
63
#include <libxml/uri.h>
64
#include <libxml/SAX2.h>
65
#ifdef LIBXML_CATALOG_ENABLED
66
#include <libxml/catalog.h>
67
#endif
68
69
#include "private/buf.h"
70
#include "private/dict.h"
71
#include "private/entities.h"
72
#include "private/error.h"
73
#include "private/html.h"
74
#include "private/io.h"
75
#include "private/parser.h"
76
77
45.4k
#define NS_INDEX_EMPTY  INT_MAX
78
24.2k
#define NS_INDEX_XML    (INT_MAX - 1)
79
13.0k
#define URI_HASH_EMPTY  0xD943A04E
80
1.83k
#define URI_HASH_XML    0xF0451F02
81
82
struct _xmlStartTag {
83
    const xmlChar *prefix;
84
    const xmlChar *URI;
85
    int line;
86
    int nsNr;
87
};
88
89
typedef struct {
90
    void *saxData;
91
    unsigned prefixHashValue;
92
    unsigned uriHashValue;
93
    unsigned elementId;
94
    int oldIndex;
95
} xmlParserNsExtra;
96
97
typedef struct {
98
    unsigned hashValue;
99
    int index;
100
} xmlParserNsBucket;
101
102
struct _xmlParserNsData {
103
    xmlParserNsExtra *extra;
104
105
    unsigned hashSize;
106
    unsigned hashElems;
107
    xmlParserNsBucket *hash;
108
109
    unsigned elementId;
110
    int defaultNsIndex;
111
    int minNsIndex;
112
};
113
114
struct _xmlAttrHashBucket {
115
    int index;
116
};
117
118
static int
119
xmlParseElementStart(xmlParserCtxtPtr ctxt);
120
121
static void
122
xmlParseElementEnd(xmlParserCtxtPtr ctxt);
123
124
static xmlEntityPtr
125
xmlLookupGeneralEntity(xmlParserCtxtPtr ctxt, const xmlChar *name, int inAttr);
126
127
static const xmlChar *
128
xmlParseEntityRefInternal(xmlParserCtxtPtr ctxt);
129
130
/************************************************************************
131
 *                  *
132
 *  Arbitrary limits set in the parser. See XML_PARSE_HUGE    *
133
 *                  *
134
 ************************************************************************/
135
136
#define XML_PARSER_BIG_ENTITY 1000
137
#define XML_PARSER_LOT_ENTITY 5000
138
139
/*
140
 * Constants for protection against abusive entity expansion
141
 * ("billion laughs").
142
 */
143
144
/*
145
 * A certain amount of entity expansion which is always allowed.
146
 */
147
130k
#define XML_PARSER_ALLOWED_EXPANSION 1000000
148
149
/*
150
 * Fixed cost for each entity reference. This crudely models processing time
151
 * as well to protect, for example, against exponential expansion of empty
152
 * or very short entities.
153
 */
154
134k
#define XML_ENT_FIXED_COST 20
155
156
/**
157
 * xmlParserMaxDepth:
158
 *
159
 * arbitrary depth limit for the XML documents that we allow to
160
 * process. This is not a limitation of the parser but a safety
161
 * boundary feature. It can be disabled with the XML_PARSE_HUGE
162
 * parser option.
163
 */
164
const unsigned int xmlParserMaxDepth = 256;
165
166
167
168
58.7k
#define XML_PARSER_BIG_BUFFER_SIZE 300
169
56.8k
#define XML_PARSER_BUFFER_SIZE 100
170
23.4k
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
171
172
/**
173
 * XML_PARSER_CHUNK_SIZE
174
 *
175
 * When calling GROW that's the minimal amount of data
176
 * the parser expected to have received. It is not a hard
177
 * limit but an optimization when reading strings like Names
178
 * It is not strictly needed as long as inputs available characters
179
 * are followed by 0, which should be provided by the I/O level
180
 */
181
#define XML_PARSER_CHUNK_SIZE 100
182
183
/**
184
 * xmlParserVersion:
185
 *
186
 * Constant string describing the internal version of the library
187
 */
188
const char *const
189
xmlParserVersion = LIBXML_VERSION_STRING LIBXML_VERSION_EXTRA;
190
191
/*
192
 * List of XML prefixed PI allowed by W3C specs
193
 */
194
195
static const char* const xmlW3CPIs[] = {
196
    "xml-stylesheet",
197
    "xml-model",
198
    NULL
199
};
200
201
202
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
203
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
204
                                              const xmlChar **str);
205
206
static void
207
xmlCtxtParseEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr ent);
208
209
static int
210
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
211
212
/************************************************************************
213
 *                  *
214
 *    Some factorized error routines        *
215
 *                  *
216
 ************************************************************************/
217
218
static void
219
0
xmlErrMemory(xmlParserCtxtPtr ctxt) {
220
0
    xmlCtxtErrMemory(ctxt);
221
0
}
222
223
/**
224
 * xmlErrAttributeDup:
225
 * @ctxt:  an XML parser context
226
 * @prefix:  the attribute prefix
227
 * @localname:  the attribute localname
228
 *
229
 * Handle a redefinition of attribute error
230
 */
231
static void
232
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
233
                   const xmlChar * localname)
234
4.84k
{
235
4.84k
    if (prefix == NULL)
236
4.59k
        xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, XML_ERR_ATTRIBUTE_REDEFINED,
237
4.59k
                   XML_ERR_FATAL, localname, NULL, NULL, 0,
238
4.59k
                   "Attribute %s redefined\n", localname);
239
243
    else
240
243
        xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, XML_ERR_ATTRIBUTE_REDEFINED,
241
243
                   XML_ERR_FATAL, prefix, localname, NULL, 0,
242
243
                   "Attribute %s:%s redefined\n", prefix, localname);
243
4.84k
}
244
245
/**
246
 * xmlFatalErrMsg:
247
 * @ctxt:  an XML parser context
248
 * @error:  the error number
249
 * @msg:  the error message
250
 *
251
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
252
 */
253
static void LIBXML_ATTR_FORMAT(3,0)
254
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
255
               const char *msg)
256
288k
{
257
288k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
258
288k
               NULL, NULL, NULL, 0, "%s", msg);
259
288k
}
260
261
/**
262
 * xmlWarningMsg:
263
 * @ctxt:  an XML parser context
264
 * @error:  the error number
265
 * @msg:  the error message
266
 * @str1:  extra data
267
 * @str2:  extra data
268
 *
269
 * Handle a warning.
270
 */
271
void LIBXML_ATTR_FORMAT(3,0)
272
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
273
              const char *msg, const xmlChar *str1, const xmlChar *str2)
274
4.75k
{
275
4.75k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_WARNING,
276
4.75k
               str1, str2, NULL, 0, msg, str1, str2);
277
4.75k
}
278
279
/**
280
 * xmlValidityError:
281
 * @ctxt:  an XML parser context
282
 * @error:  the error number
283
 * @msg:  the error message
284
 * @str1:  extra data
285
 *
286
 * Handle a validity error.
287
 */
288
static void LIBXML_ATTR_FORMAT(3,0)
289
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
290
              const char *msg, const xmlChar *str1, const xmlChar *str2)
291
1.57k
{
292
1.57k
    ctxt->valid = 0;
293
294
1.57k
    xmlCtxtErr(ctxt, NULL, XML_FROM_DTD, error, XML_ERR_ERROR,
295
1.57k
               str1, str2, NULL, 0, msg, str1, str2);
296
1.57k
}
297
298
/**
299
 * xmlFatalErrMsgInt:
300
 * @ctxt:  an XML parser context
301
 * @error:  the error number
302
 * @msg:  the error message
303
 * @val:  an integer value
304
 *
305
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
306
 */
307
static void LIBXML_ATTR_FORMAT(3,0)
308
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
309
                  const char *msg, int val)
310
11.2k
{
311
11.2k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
312
11.2k
               NULL, NULL, NULL, val, msg, val);
313
11.2k
}
314
315
/**
316
 * xmlFatalErrMsgStrIntStr:
317
 * @ctxt:  an XML parser context
318
 * @error:  the error number
319
 * @msg:  the error message
320
 * @str1:  an string info
321
 * @val:  an integer value
322
 * @str2:  an string info
323
 *
324
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
325
 */
326
static void LIBXML_ATTR_FORMAT(3,0)
327
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
328
                  const char *msg, const xmlChar *str1, int val,
329
      const xmlChar *str2)
330
107k
{
331
107k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
332
107k
               str1, str2, NULL, val, msg, str1, val, str2);
333
107k
}
334
335
/**
336
 * xmlFatalErrMsgStr:
337
 * @ctxt:  an XML parser context
338
 * @error:  the error number
339
 * @msg:  the error message
340
 * @val:  a string value
341
 *
342
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
343
 */
344
static void LIBXML_ATTR_FORMAT(3,0)
345
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
346
                  const char *msg, const xmlChar * val)
347
77.2k
{
348
77.2k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
349
77.2k
               val, NULL, NULL, 0, msg, val);
350
77.2k
}
351
352
/**
353
 * xmlErrMsgStr:
354
 * @ctxt:  an XML parser context
355
 * @error:  the error number
356
 * @msg:  the error message
357
 * @val:  a string value
358
 *
359
 * Handle a non fatal parser error
360
 */
361
static void LIBXML_ATTR_FORMAT(3,0)
362
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
363
                  const char *msg, const xmlChar * val)
364
13.9k
{
365
13.9k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_ERROR,
366
13.9k
               val, NULL, NULL, 0, msg, val);
367
13.9k
}
368
369
/**
370
 * xmlNsErr:
371
 * @ctxt:  an XML parser context
372
 * @error:  the error number
373
 * @msg:  the message
374
 * @info1:  extra information string
375
 * @info2:  extra information string
376
 *
377
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
378
 */
379
static void LIBXML_ATTR_FORMAT(3,0)
380
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
381
         const char *msg,
382
         const xmlChar * info1, const xmlChar * info2,
383
         const xmlChar * info3)
384
40.4k
{
385
40.4k
    ctxt->nsWellFormed = 0;
386
387
40.4k
    xmlCtxtErr(ctxt, NULL, XML_FROM_NAMESPACE, error, XML_ERR_ERROR,
388
40.4k
               info1, info2, info3, 0, msg, info1, info2, info3);
389
40.4k
}
390
391
/**
392
 * xmlNsWarn
393
 * @ctxt:  an XML parser context
394
 * @error:  the error number
395
 * @msg:  the message
396
 * @info1:  extra information string
397
 * @info2:  extra information string
398
 *
399
 * Handle a namespace warning error
400
 */
401
static void LIBXML_ATTR_FORMAT(3,0)
402
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
403
         const char *msg,
404
         const xmlChar * info1, const xmlChar * info2,
405
         const xmlChar * info3)
406
770
{
407
770
    xmlCtxtErr(ctxt, NULL, XML_FROM_NAMESPACE, error, XML_ERR_WARNING,
408
770
               info1, info2, info3, 0, msg, info1, info2, info3);
409
770
}
410
411
static void
412
401k
xmlSaturatedAdd(unsigned long *dst, unsigned long val) {
413
401k
    if (val > ULONG_MAX - *dst)
414
0
        *dst = ULONG_MAX;
415
401k
    else
416
401k
        *dst += val;
417
401k
}
418
419
static void
420
132k
xmlSaturatedAddSizeT(unsigned long *dst, unsigned long val) {
421
132k
    if (val > ULONG_MAX - *dst)
422
0
        *dst = ULONG_MAX;
423
132k
    else
424
132k
        *dst += val;
425
132k
}
426
427
/**
428
 * xmlParserEntityCheck:
429
 * @ctxt:  parser context
430
 * @extra:  sum of unexpanded entity sizes
431
 *
432
 * Check for non-linear entity expansion behaviour.
433
 *
434
 * In some cases like xmlExpandEntityInAttValue, this function is called
435
 * for each, possibly nested entity and its unexpanded content length.
436
 *
437
 * In other cases like xmlParseReference, it's only called for each
438
 * top-level entity with its unexpanded content length plus the sum of
439
 * the unexpanded content lengths (plus fixed cost) of all nested
440
 * entities.
441
 *
442
 * Summing the unexpanded lengths also adds the length of the reference.
443
 * This is by design. Taking the length of the entity name into account
444
 * discourages attacks that try to waste CPU time with abusively long
445
 * entity names. See test/recurse/lol6.xml for example. Each call also
446
 * adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with
447
 * short entities.
448
 *
449
 * Returns 1 on error, 0 on success.
450
 */
451
static int
452
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long extra)
453
173k
{
454
173k
    unsigned long consumed;
455
173k
    unsigned long *expandedSize;
456
173k
    xmlParserInputPtr input = ctxt->input;
457
173k
    xmlEntityPtr entity = input->entity;
458
459
173k
    if ((entity) && (entity->flags & XML_ENT_CHECKED))
460
43.5k
        return(0);
461
462
    /*
463
     * Compute total consumed bytes so far, including input streams of
464
     * external entities.
465
     */
466
130k
    consumed = input->consumed;
467
130k
    xmlSaturatedAddSizeT(&consumed, input->cur - input->base);
468
130k
    xmlSaturatedAdd(&consumed, ctxt->sizeentities);
469
470
130k
    if (entity)
471
4.05k
        expandedSize = &entity->expandedSize;
472
126k
    else
473
126k
        expandedSize = &ctxt->sizeentcopy;
474
475
    /*
476
     * Add extra cost and some fixed cost.
477
     */
478
130k
    xmlSaturatedAdd(expandedSize, extra);
479
130k
    xmlSaturatedAdd(expandedSize, XML_ENT_FIXED_COST);
480
481
    /*
482
     * It's important to always use saturation arithmetic when tracking
483
     * entity sizes to make the size checks reliable. If "sizeentcopy"
484
     * overflows, we have to abort.
485
     */
486
130k
    if ((*expandedSize > XML_PARSER_ALLOWED_EXPANSION) &&
487
130k
        ((*expandedSize >= ULONG_MAX) ||
488
16
         (*expandedSize / ctxt->maxAmpl > consumed))) {
489
16
        xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
490
16
                       "Maximum entity amplification factor exceeded, see "
491
16
                       "xmlCtxtSetMaxAmplification.\n");
492
16
        xmlHaltParser(ctxt);
493
16
        return(1);
494
16
    }
495
496
130k
    return(0);
497
130k
}
498
499
/************************************************************************
500
 *                  *
501
 *    Library wide options          *
502
 *                  *
503
 ************************************************************************/
504
505
/**
506
  * xmlHasFeature:
507
  * @feature: the feature to be examined
508
  *
509
  * Examines if the library has been compiled with a given feature.
510
  *
511
  * Returns a non-zero value if the feature exist, otherwise zero.
512
  * Returns zero (0) if the feature does not exist or an unknown
513
  * unknown feature is requested, non-zero otherwise.
514
  */
515
int
516
xmlHasFeature(xmlFeature feature)
517
0
{
518
0
    switch (feature) {
519
0
  case XML_WITH_THREAD:
520
0
#ifdef LIBXML_THREAD_ENABLED
521
0
      return(1);
522
#else
523
      return(0);
524
#endif
525
0
        case XML_WITH_TREE:
526
0
#ifdef LIBXML_TREE_ENABLED
527
0
            return(1);
528
#else
529
            return(0);
530
#endif
531
0
        case XML_WITH_OUTPUT:
532
0
#ifdef LIBXML_OUTPUT_ENABLED
533
0
            return(1);
534
#else
535
            return(0);
536
#endif
537
0
        case XML_WITH_PUSH:
538
0
#ifdef LIBXML_PUSH_ENABLED
539
0
            return(1);
540
#else
541
            return(0);
542
#endif
543
0
        case XML_WITH_READER:
544
0
#ifdef LIBXML_READER_ENABLED
545
0
            return(1);
546
#else
547
            return(0);
548
#endif
549
0
        case XML_WITH_PATTERN:
550
0
#ifdef LIBXML_PATTERN_ENABLED
551
0
            return(1);
552
#else
553
            return(0);
554
#endif
555
0
        case XML_WITH_WRITER:
556
0
#ifdef LIBXML_WRITER_ENABLED
557
0
            return(1);
558
#else
559
            return(0);
560
#endif
561
0
        case XML_WITH_SAX1:
562
0
#ifdef LIBXML_SAX1_ENABLED
563
0
            return(1);
564
#else
565
            return(0);
566
#endif
567
0
        case XML_WITH_FTP:
568
#ifdef LIBXML_FTP_ENABLED
569
            return(1);
570
#else
571
0
            return(0);
572
0
#endif
573
0
        case XML_WITH_HTTP:
574
0
#ifdef LIBXML_HTTP_ENABLED
575
0
            return(1);
576
#else
577
            return(0);
578
#endif
579
0
        case XML_WITH_VALID:
580
0
#ifdef LIBXML_VALID_ENABLED
581
0
            return(1);
582
#else
583
            return(0);
584
#endif
585
0
        case XML_WITH_HTML:
586
0
#ifdef LIBXML_HTML_ENABLED
587
0
            return(1);
588
#else
589
            return(0);
590
#endif
591
0
        case XML_WITH_LEGACY:
592
#ifdef LIBXML_LEGACY_ENABLED
593
            return(1);
594
#else
595
0
            return(0);
596
0
#endif
597
0
        case XML_WITH_C14N:
598
0
#ifdef LIBXML_C14N_ENABLED
599
0
            return(1);
600
#else
601
            return(0);
602
#endif
603
0
        case XML_WITH_CATALOG:
604
0
#ifdef LIBXML_CATALOG_ENABLED
605
0
            return(1);
606
#else
607
            return(0);
608
#endif
609
0
        case XML_WITH_XPATH:
610
0
#ifdef LIBXML_XPATH_ENABLED
611
0
            return(1);
612
#else
613
            return(0);
614
#endif
615
0
        case XML_WITH_XPTR:
616
0
#ifdef LIBXML_XPTR_ENABLED
617
0
            return(1);
618
#else
619
            return(0);
620
#endif
621
0
        case XML_WITH_XINCLUDE:
622
0
#ifdef LIBXML_XINCLUDE_ENABLED
623
0
            return(1);
624
#else
625
            return(0);
626
#endif
627
0
        case XML_WITH_ICONV:
628
0
#ifdef LIBXML_ICONV_ENABLED
629
0
            return(1);
630
#else
631
            return(0);
632
#endif
633
0
        case XML_WITH_ISO8859X:
634
0
#ifdef LIBXML_ISO8859X_ENABLED
635
0
            return(1);
636
#else
637
            return(0);
638
#endif
639
0
        case XML_WITH_UNICODE:
640
0
#ifdef LIBXML_UNICODE_ENABLED
641
0
            return(1);
642
#else
643
            return(0);
644
#endif
645
0
        case XML_WITH_REGEXP:
646
0
#ifdef LIBXML_REGEXP_ENABLED
647
0
            return(1);
648
#else
649
            return(0);
650
#endif
651
0
        case XML_WITH_AUTOMATA:
652
0
#ifdef LIBXML_AUTOMATA_ENABLED
653
0
            return(1);
654
#else
655
            return(0);
656
#endif
657
0
        case XML_WITH_EXPR:
658
#ifdef LIBXML_EXPR_ENABLED
659
            return(1);
660
#else
661
0
            return(0);
662
0
#endif
663
0
        case XML_WITH_SCHEMAS:
664
0
#ifdef LIBXML_SCHEMAS_ENABLED
665
0
            return(1);
666
#else
667
            return(0);
668
#endif
669
0
        case XML_WITH_SCHEMATRON:
670
0
#ifdef LIBXML_SCHEMATRON_ENABLED
671
0
            return(1);
672
#else
673
            return(0);
674
#endif
675
0
        case XML_WITH_MODULES:
676
0
#ifdef LIBXML_MODULES_ENABLED
677
0
            return(1);
678
#else
679
            return(0);
680
#endif
681
0
        case XML_WITH_DEBUG:
682
0
#ifdef LIBXML_DEBUG_ENABLED
683
0
            return(1);
684
#else
685
            return(0);
686
#endif
687
0
        case XML_WITH_DEBUG_MEM:
688
#ifdef DEBUG_MEMORY_LOCATION
689
            return(1);
690
#else
691
0
            return(0);
692
0
#endif
693
0
        case XML_WITH_ZLIB:
694
0
#ifdef LIBXML_ZLIB_ENABLED
695
0
            return(1);
696
#else
697
            return(0);
698
#endif
699
0
        case XML_WITH_LZMA:
700
0
#ifdef LIBXML_LZMA_ENABLED
701
0
            return(1);
702
#else
703
            return(0);
704
#endif
705
0
        case XML_WITH_ICU:
706
#ifdef LIBXML_ICU_ENABLED
707
            return(1);
708
#else
709
0
            return(0);
710
0
#endif
711
0
        default:
712
0
      break;
713
0
     }
714
0
     return(0);
715
0
}
716
717
/************************************************************************
718
 *                  *
719
 *      Simple string buffer        *
720
 *                  *
721
 ************************************************************************/
722
723
typedef struct {
724
    xmlChar *mem;
725
    unsigned size;
726
    unsigned cap; /* size < cap */
727
    unsigned max; /* size <= max */
728
    xmlParserErrors code;
729
} xmlSBuf;
730
731
static void
732
104k
xmlSBufInit(xmlSBuf *buf, unsigned max) {
733
104k
    buf->mem = NULL;
734
104k
    buf->size = 0;
735
104k
    buf->cap = 0;
736
104k
    buf->max = max;
737
104k
    buf->code = XML_ERR_OK;
738
104k
}
739
740
static int
741
75.7k
xmlSBufGrow(xmlSBuf *buf, unsigned len) {
742
75.7k
    xmlChar *mem;
743
75.7k
    unsigned cap;
744
745
75.7k
    if (len >= UINT_MAX / 2 - buf->size) {
746
0
        buf->code = XML_ERR_RESOURCE_LIMIT;
747
0
        return(-1);
748
0
    }
749
750
75.7k
    cap = (buf->size + len) * 2;
751
75.7k
    if (cap < 240)
752
68.4k
        cap = 240;
753
754
75.7k
    mem = xmlRealloc(buf->mem, cap);
755
75.7k
    if (mem == NULL) {
756
0
        buf->code = XML_ERR_NO_MEMORY;
757
0
        return(-1);
758
0
    }
759
760
75.7k
    buf->mem = mem;
761
75.7k
    buf->cap = cap;
762
763
75.7k
    return(0);
764
75.7k
}
765
766
static void
767
878k
xmlSBufAddString(xmlSBuf *buf, const xmlChar *str, unsigned len) {
768
878k
    if (buf->max - buf->size < len) {
769
0
        buf->code = XML_ERR_RESOURCE_LIMIT;
770
0
        return;
771
0
    }
772
773
878k
    if (buf->cap - buf->size <= len) {
774
73.5k
        if (xmlSBufGrow(buf, len) < 0)
775
0
            return;
776
73.5k
    }
777
778
878k
    if (len > 0)
779
878k
        memcpy(buf->mem + buf->size, str, len);
780
878k
    buf->size += len;
781
878k
}
782
783
static void
784
286k
xmlSBufAddCString(xmlSBuf *buf, const char *str, unsigned len) {
785
286k
    xmlSBufAddString(buf, (const xmlChar *) str, len);
786
286k
}
787
788
static void
789
26.2k
xmlSBufAddChar(xmlSBuf *buf, int c) {
790
26.2k
    xmlChar *end;
791
792
26.2k
    if (buf->max - buf->size < 4) {
793
0
        buf->code = XML_ERR_RESOURCE_LIMIT;
794
0
        return;
795
0
    }
796
797
26.2k
    if (buf->cap - buf->size <= 4) {
798
2.20k
        if (xmlSBufGrow(buf, 4) < 0)
799
0
            return;
800
2.20k
    }
801
802
26.2k
    end = buf->mem + buf->size;
803
804
26.2k
    if (c < 0x80) {
805
18.6k
        *end = (xmlChar) c;
806
18.6k
        buf->size += 1;
807
18.6k
    } else {
808
7.66k
        buf->size += xmlCopyCharMultiByte(end, c);
809
7.66k
    }
810
26.2k
}
811
812
static void
813
21.0k
xmlSBufAddReplChar(xmlSBuf *buf) {
814
21.0k
    xmlSBufAddCString(buf, "\xEF\xBF\xBD", 3);
815
21.0k
}
816
817
static void
818
0
xmlSBufReportError(xmlSBuf *buf, xmlParserCtxtPtr ctxt, const char *errMsg) {
819
0
    if (buf->code == XML_ERR_NO_MEMORY)
820
0
        xmlCtxtErrMemory(ctxt);
821
0
    else
822
0
        xmlFatalErr(ctxt, buf->code, errMsg);
823
0
}
824
825
static xmlChar *
826
xmlSBufFinish(xmlSBuf *buf, int *sizeOut, xmlParserCtxtPtr ctxt,
827
75.7k
              const char *errMsg) {
828
75.7k
    if (buf->mem == NULL) {
829
10.6k
        buf->mem = xmlMalloc(1);
830
10.6k
        if (buf->mem == NULL) {
831
0
            buf->code = XML_ERR_NO_MEMORY;
832
10.6k
        } else {
833
10.6k
            buf->mem[0] = 0;
834
10.6k
        }
835
65.0k
    } else {
836
65.0k
        buf->mem[buf->size] = 0;
837
65.0k
    }
838
839
75.7k
    if (buf->code == XML_ERR_OK) {
840
75.7k
        if (sizeOut != NULL)
841
2.08k
            *sizeOut = buf->size;
842
75.7k
        return(buf->mem);
843
75.7k
    }
844
845
0
    xmlSBufReportError(buf, ctxt, errMsg);
846
847
0
    xmlFree(buf->mem);
848
849
0
    if (sizeOut != NULL)
850
0
        *sizeOut = 0;
851
0
    return(NULL);
852
75.7k
}
853
854
static void
855
24.2k
xmlSBufCleanup(xmlSBuf *buf, xmlParserCtxtPtr ctxt, const char *errMsg) {
856
24.2k
    if (buf->code != XML_ERR_OK)
857
0
        xmlSBufReportError(buf, ctxt, errMsg);
858
859
24.2k
    xmlFree(buf->mem);
860
24.2k
}
861
862
static int
863
xmlUTF8MultibyteLen(xmlParserCtxtPtr ctxt, const xmlChar *str,
864
216k
                    const char *errMsg) {
865
216k
    int c = str[0];
866
216k
    int c1 = str[1];
867
868
216k
    if ((c1 & 0xC0) != 0x80)
869
7.33k
        goto encoding_error;
870
871
209k
    if (c < 0xE0) {
872
        /* 2-byte sequence */
873
14.8k
        if (c < 0xC2)
874
6.32k
            goto encoding_error;
875
876
8.56k
        return(2);
877
194k
    } else {
878
194k
        int c2 = str[2];
879
880
194k
        if ((c2 & 0xC0) != 0x80)
881
145
            goto encoding_error;
882
883
193k
        if (c < 0xF0) {
884
            /* 3-byte sequence */
885
191k
            if (c == 0xE0) {
886
                /* overlong */
887
122k
                if (c1 < 0xA0)
888
88
                    goto encoding_error;
889
122k
            } else if (c == 0xED) {
890
                /* surrogate */
891
274
                if (c1 >= 0xA0)
892
46
                    goto encoding_error;
893
69.0k
            } else if (c == 0xEF) {
894
                /* U+FFFE and U+FFFF are invalid Chars */
895
64.7k
                if ((c1 == 0xBF) && (c2 >= 0xBE))
896
246
                    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR, errMsg);
897
64.7k
            }
898
899
191k
            return(3);
900
191k
        } else {
901
            /* 4-byte sequence */
902
2.06k
            if ((str[3] & 0xC0) != 0x80)
903
73
                goto encoding_error;
904
1.98k
            if (c == 0xF0) {
905
                /* overlong */
906
395
                if (c1 < 0x90)
907
74
                    goto encoding_error;
908
1.59k
            } else if (c >= 0xF4) {
909
                /* greater than 0x10FFFF */
910
543
                if ((c > 0xF4) || (c1 >= 0x90))
911
152
                    goto encoding_error;
912
543
            }
913
914
1.76k
            return(4);
915
1.98k
        }
916
193k
    }
917
918
14.2k
encoding_error:
919
    /* Only report the first error */
920
14.2k
    if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
921
814
        xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
922
814
        ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
923
814
    }
924
925
14.2k
    return(0);
926
209k
}
927
928
/************************************************************************
929
 *                  *
930
 *    SAX2 defaulted attributes handling      *
931
 *                  *
932
 ************************************************************************/
933
934
/**
935
 * xmlCtxtInitializeLate:
936
 * @ctxt:  an XML parser context
937
 *
938
 * Final initialization of the parser context before starting to parse.
939
 *
940
 * This accounts for users modifying struct members of parser context
941
 * directly.
942
 */
943
static void
944
18.1k
xmlCtxtInitializeLate(xmlParserCtxtPtr ctxt) {
945
18.1k
    xmlSAXHandlerPtr sax;
946
947
    /* Avoid unused variable warning if features are disabled. */
948
18.1k
    (void) sax;
949
950
    /*
951
     * Changing the SAX struct directly is still widespread practice
952
     * in internal and external code.
953
     */
954
18.1k
    if (ctxt == NULL) return;
955
18.1k
    sax = ctxt->sax;
956
18.1k
#ifdef LIBXML_SAX1_ENABLED
957
    /*
958
     * Only enable SAX2 if there SAX2 element handlers, except when there
959
     * are no element handlers at all.
960
     */
961
18.1k
    if (((ctxt->options & XML_PARSE_SAX1) == 0) &&
962
18.1k
        (sax) &&
963
18.1k
        (sax->initialized == XML_SAX2_MAGIC) &&
964
18.1k
        ((sax->startElementNs != NULL) ||
965
14.3k
         (sax->endElementNs != NULL) ||
966
14.3k
         ((sax->startElement == NULL) && (sax->endElement == NULL))))
967
14.3k
        ctxt->sax2 = 1;
968
#else
969
    ctxt->sax2 = 1;
970
#endif /* LIBXML_SAX1_ENABLED */
971
972
    /*
973
     * Some users replace the dictionary directly in the context struct.
974
     * We really need an API function to do that cleanly.
975
     */
976
18.1k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
977
18.1k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
978
18.1k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
979
18.1k
    if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
980
18.1k
    (ctxt->str_xml_ns == NULL)) {
981
0
        xmlErrMemory(ctxt);
982
0
    }
983
18.1k
}
984
985
typedef struct {
986
    xmlHashedString prefix;
987
    xmlHashedString name;
988
    xmlHashedString value;
989
    const xmlChar *valueEnd;
990
    int external;
991
    int expandedSize;
992
} xmlDefAttr;
993
994
typedef struct _xmlDefAttrs xmlDefAttrs;
995
typedef xmlDefAttrs *xmlDefAttrsPtr;
996
struct _xmlDefAttrs {
997
    int nbAttrs;  /* number of defaulted attributes on that element */
998
    int maxAttrs;       /* the size of the array */
999
#if __STDC_VERSION__ >= 199901L
1000
    /* Using a C99 flexible array member avoids UBSan errors. */
1001
    xmlDefAttr attrs[]; /* array of localname/prefix/values/external */
1002
#else
1003
    xmlDefAttr attrs[1];
1004
#endif
1005
};
1006
1007
/**
1008
 * xmlAttrNormalizeSpace:
1009
 * @src: the source string
1010
 * @dst: the target string
1011
 *
1012
 * Normalize the space in non CDATA attribute values:
1013
 * If the attribute type is not CDATA, then the XML processor MUST further
1014
 * process the normalized attribute value by discarding any leading and
1015
 * trailing space (#x20) characters, and by replacing sequences of space
1016
 * (#x20) characters by a single space (#x20) character.
1017
 * Note that the size of dst need to be at least src, and if one doesn't need
1018
 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1019
 * passing src as dst is just fine.
1020
 *
1021
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1022
 *         is needed.
1023
 */
1024
static xmlChar *
1025
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1026
19.6k
{
1027
19.6k
    if ((src == NULL) || (dst == NULL))
1028
0
        return(NULL);
1029
1030
27.3k
    while (*src == 0x20) src++;
1031
161k
    while (*src != 0) {
1032
141k
  if (*src == 0x20) {
1033
25.3k
      while (*src == 0x20) src++;
1034
12.0k
      if (*src != 0)
1035
10.7k
    *dst++ = 0x20;
1036
129k
  } else {
1037
129k
      *dst++ = *src++;
1038
129k
  }
1039
141k
    }
1040
19.6k
    *dst = 0;
1041
19.6k
    if (dst == src)
1042
11.6k
       return(NULL);
1043
7.99k
    return(dst);
1044
19.6k
}
1045
1046
/**
1047
 * xmlAddDefAttrs:
1048
 * @ctxt:  an XML parser context
1049
 * @fullname:  the element fullname
1050
 * @fullattr:  the attribute fullname
1051
 * @value:  the attribute value
1052
 *
1053
 * Add a defaulted attribute for an element
1054
 */
1055
static void
1056
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1057
               const xmlChar *fullname,
1058
               const xmlChar *fullattr,
1059
16.8k
               const xmlChar *value) {
1060
16.8k
    xmlDefAttrsPtr defaults;
1061
16.8k
    xmlDefAttr *attr;
1062
16.8k
    int len, expandedSize;
1063
16.8k
    xmlHashedString name;
1064
16.8k
    xmlHashedString prefix;
1065
16.8k
    xmlHashedString hvalue;
1066
16.8k
    const xmlChar *localname;
1067
1068
    /*
1069
     * Allows to detect attribute redefinitions
1070
     */
1071
16.8k
    if (ctxt->attsSpecial != NULL) {
1072
14.8k
        if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1073
12.4k
      return;
1074
14.8k
    }
1075
1076
4.38k
    if (ctxt->attsDefault == NULL) {
1077
2.01k
        ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1078
2.01k
  if (ctxt->attsDefault == NULL)
1079
0
      goto mem_error;
1080
2.01k
    }
1081
1082
    /*
1083
     * split the element name into prefix:localname , the string found
1084
     * are within the DTD and then not associated to namespace names.
1085
     */
1086
4.38k
    localname = xmlSplitQName3(fullname, &len);
1087
4.38k
    if (localname == NULL) {
1088
4.21k
        name = xmlDictLookupHashed(ctxt->dict, fullname, -1);
1089
4.21k
  prefix.name = NULL;
1090
4.21k
    } else {
1091
166
        name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1092
166
  prefix = xmlDictLookupHashed(ctxt->dict, fullname, len);
1093
166
        if (prefix.name == NULL)
1094
0
            goto mem_error;
1095
166
    }
1096
4.38k
    if (name.name == NULL)
1097
0
        goto mem_error;
1098
1099
    /*
1100
     * make sure there is some storage
1101
     */
1102
4.38k
    defaults = xmlHashLookup2(ctxt->attsDefault, name.name, prefix.name);
1103
4.38k
    if ((defaults == NULL) ||
1104
4.38k
        (defaults->nbAttrs >= defaults->maxAttrs)) {
1105
2.36k
        xmlDefAttrsPtr temp;
1106
2.36k
        int newSize;
1107
1108
2.36k
        newSize = (defaults != NULL) ? 2 * defaults->maxAttrs : 4;
1109
2.36k
        temp = xmlRealloc(defaults,
1110
2.36k
                          sizeof(*defaults) + newSize * sizeof(xmlDefAttr));
1111
2.36k
  if (temp == NULL)
1112
0
      goto mem_error;
1113
2.36k
        if (defaults == NULL)
1114
2.07k
            temp->nbAttrs = 0;
1115
2.36k
  temp->maxAttrs = newSize;
1116
2.36k
        defaults = temp;
1117
2.36k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name.name, prefix.name,
1118
2.36k
                          defaults, NULL) < 0) {
1119
0
      xmlFree(defaults);
1120
0
      goto mem_error;
1121
0
  }
1122
2.36k
    }
1123
1124
    /*
1125
     * Split the attribute name into prefix:localname , the string found
1126
     * are within the DTD and hen not associated to namespace names.
1127
     */
1128
4.38k
    localname = xmlSplitQName3(fullattr, &len);
1129
4.38k
    if (localname == NULL) {
1130
3.12k
        name = xmlDictLookupHashed(ctxt->dict, fullattr, -1);
1131
3.12k
  prefix.name = NULL;
1132
3.12k
    } else {
1133
1.25k
        name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1134
1.25k
  prefix = xmlDictLookupHashed(ctxt->dict, fullattr, len);
1135
1.25k
        if (prefix.name == NULL)
1136
0
            goto mem_error;
1137
1.25k
    }
1138
4.38k
    if (name.name == NULL)
1139
0
        goto mem_error;
1140
1141
    /* intern the string and precompute the end */
1142
4.38k
    len = strlen((const char *) value);
1143
4.38k
    hvalue = xmlDictLookupHashed(ctxt->dict, value, len);
1144
4.38k
    if (hvalue.name == NULL)
1145
0
        goto mem_error;
1146
1147
4.38k
    expandedSize = strlen((const char *) name.name);
1148
4.38k
    if (prefix.name != NULL)
1149
1.25k
        expandedSize += strlen((const char *) prefix.name);
1150
4.38k
    expandedSize += len;
1151
1152
4.38k
    attr = &defaults->attrs[defaults->nbAttrs++];
1153
4.38k
    attr->name = name;
1154
4.38k
    attr->prefix = prefix;
1155
4.38k
    attr->value = hvalue;
1156
4.38k
    attr->valueEnd = hvalue.name + len;
1157
4.38k
    attr->external = PARSER_EXTERNAL(ctxt);
1158
4.38k
    attr->expandedSize = expandedSize;
1159
1160
4.38k
    return;
1161
1162
0
mem_error:
1163
0
    xmlErrMemory(ctxt);
1164
0
    return;
1165
4.38k
}
1166
1167
/**
1168
 * xmlAddSpecialAttr:
1169
 * @ctxt:  an XML parser context
1170
 * @fullname:  the element fullname
1171
 * @fullattr:  the attribute fullname
1172
 * @type:  the attribute type
1173
 *
1174
 * Register this attribute type
1175
 */
1176
static void
1177
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1178
      const xmlChar *fullname,
1179
      const xmlChar *fullattr,
1180
      int type)
1181
26.8k
{
1182
26.8k
    if (ctxt->attsSpecial == NULL) {
1183
2.43k
        ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1184
2.43k
  if (ctxt->attsSpecial == NULL)
1185
0
      goto mem_error;
1186
2.43k
    }
1187
1188
26.8k
    if (xmlHashAdd2(ctxt->attsSpecial, fullname, fullattr,
1189
26.8k
                    (void *) (ptrdiff_t) type) < 0)
1190
0
        goto mem_error;
1191
26.8k
    return;
1192
1193
26.8k
mem_error:
1194
0
    xmlErrMemory(ctxt);
1195
0
    return;
1196
26.8k
}
1197
1198
/**
1199
 * xmlCleanSpecialAttrCallback:
1200
 *
1201
 * Removes CDATA attributes from the special attribute table
1202
 */
1203
static void
1204
xmlCleanSpecialAttrCallback(void *payload, void *data,
1205
                            const xmlChar *fullname, const xmlChar *fullattr,
1206
4.93k
                            const xmlChar *unused ATTRIBUTE_UNUSED) {
1207
4.93k
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1208
1209
4.93k
    if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1210
674
        xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1211
674
    }
1212
4.93k
}
1213
1214
/**
1215
 * xmlCleanSpecialAttr:
1216
 * @ctxt:  an XML parser context
1217
 *
1218
 * Trim the list of attributes defined to remove all those of type
1219
 * CDATA as they are not special. This call should be done when finishing
1220
 * to parse the DTD and before starting to parse the document root.
1221
 */
1222
static void
1223
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1224
8.79k
{
1225
8.79k
    if (ctxt->attsSpecial == NULL)
1226
6.36k
        return;
1227
1228
2.43k
    xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1229
1230
2.43k
    if (xmlHashSize(ctxt->attsSpecial) == 0) {
1231
200
        xmlHashFree(ctxt->attsSpecial, NULL);
1232
200
        ctxt->attsSpecial = NULL;
1233
200
    }
1234
2.43k
    return;
1235
8.79k
}
1236
1237
/**
1238
 * xmlCheckLanguageID:
1239
 * @lang:  pointer to the string value
1240
 *
1241
 * DEPRECATED: Internal function, do not use.
1242
 *
1243
 * Checks that the value conforms to the LanguageID production:
1244
 *
1245
 * NOTE: this is somewhat deprecated, those productions were removed from
1246
 *       the XML Second edition.
1247
 *
1248
 * [33] LanguageID ::= Langcode ('-' Subcode)*
1249
 * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1250
 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1251
 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1252
 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1253
 * [38] Subcode ::= ([a-z] | [A-Z])+
1254
 *
1255
 * The current REC reference the successors of RFC 1766, currently 5646
1256
 *
1257
 * http://www.rfc-editor.org/rfc/rfc5646.txt
1258
 * langtag       = language
1259
 *                 ["-" script]
1260
 *                 ["-" region]
1261
 *                 *("-" variant)
1262
 *                 *("-" extension)
1263
 *                 ["-" privateuse]
1264
 * language      = 2*3ALPHA            ; shortest ISO 639 code
1265
 *                 ["-" extlang]       ; sometimes followed by
1266
 *                                     ; extended language subtags
1267
 *               / 4ALPHA              ; or reserved for future use
1268
 *               / 5*8ALPHA            ; or registered language subtag
1269
 *
1270
 * extlang       = 3ALPHA              ; selected ISO 639 codes
1271
 *                 *2("-" 3ALPHA)      ; permanently reserved
1272
 *
1273
 * script        = 4ALPHA              ; ISO 15924 code
1274
 *
1275
 * region        = 2ALPHA              ; ISO 3166-1 code
1276
 *               / 3DIGIT              ; UN M.49 code
1277
 *
1278
 * variant       = 5*8alphanum         ; registered variants
1279
 *               / (DIGIT 3alphanum)
1280
 *
1281
 * extension     = singleton 1*("-" (2*8alphanum))
1282
 *
1283
 *                                     ; Single alphanumerics
1284
 *                                     ; "x" reserved for private use
1285
 * singleton     = DIGIT               ; 0 - 9
1286
 *               / %x41-57             ; A - W
1287
 *               / %x59-5A             ; Y - Z
1288
 *               / %x61-77             ; a - w
1289
 *               / %x79-7A             ; y - z
1290
 *
1291
 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1292
 * The parser below doesn't try to cope with extension or privateuse
1293
 * that could be added but that's not interoperable anyway
1294
 *
1295
 * Returns 1 if correct 0 otherwise
1296
 **/
1297
int
1298
xmlCheckLanguageID(const xmlChar * lang)
1299
2.11k
{
1300
2.11k
    const xmlChar *cur = lang, *nxt;
1301
1302
2.11k
    if (cur == NULL)
1303
87
        return (0);
1304
2.02k
    if (((cur[0] == 'i') && (cur[1] == '-')) ||
1305
2.02k
        ((cur[0] == 'I') && (cur[1] == '-')) ||
1306
2.02k
        ((cur[0] == 'x') && (cur[1] == '-')) ||
1307
2.02k
        ((cur[0] == 'X') && (cur[1] == '-'))) {
1308
        /*
1309
         * Still allow IANA code and user code which were coming
1310
         * from the previous version of the XML-1.0 specification
1311
         * it's deprecated but we should not fail
1312
         */
1313
146
        cur += 2;
1314
628
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1315
628
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
1316
482
            cur++;
1317
146
        return(cur[0] == 0);
1318
146
    }
1319
1.88k
    nxt = cur;
1320
6.84k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1321
6.84k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1322
4.96k
           nxt++;
1323
1.88k
    if (nxt - cur >= 4) {
1324
        /*
1325
         * Reserved
1326
         */
1327
151
        if ((nxt - cur > 8) || (nxt[0] != 0))
1328
116
            return(0);
1329
35
        return(1);
1330
151
    }
1331
1.73k
    if (nxt - cur < 2)
1332
188
        return(0);
1333
    /* we got an ISO 639 code */
1334
1.54k
    if (nxt[0] == 0)
1335
41
        return(1);
1336
1.50k
    if (nxt[0] != '-')
1337
106
        return(0);
1338
1339
1.39k
    nxt++;
1340
1.39k
    cur = nxt;
1341
    /* now we can have extlang or script or region or variant */
1342
1.39k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1343
248
        goto region_m49;
1344
1345
5.63k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1346
5.63k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1347
4.48k
           nxt++;
1348
1.14k
    if (nxt - cur == 4)
1349
385
        goto script;
1350
763
    if (nxt - cur == 2)
1351
147
        goto region;
1352
616
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1353
105
        goto variant;
1354
511
    if (nxt - cur != 3)
1355
93
        return(0);
1356
    /* we parsed an extlang */
1357
418
    if (nxt[0] == 0)
1358
97
        return(1);
1359
321
    if (nxt[0] != '-')
1360
66
        return(0);
1361
1362
255
    nxt++;
1363
255
    cur = nxt;
1364
    /* now we can have script or region or variant */
1365
255
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1366
50
        goto region_m49;
1367
1368
1.38k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1369
1.38k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1370
1.18k
           nxt++;
1371
205
    if (nxt - cur == 2)
1372
38
        goto region;
1373
167
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1374
29
        goto variant;
1375
138
    if (nxt - cur != 4)
1376
102
        return(0);
1377
    /* we parsed a script */
1378
421
script:
1379
421
    if (nxt[0] == 0)
1380
71
        return(1);
1381
350
    if (nxt[0] != '-')
1382
98
        return(0);
1383
1384
252
    nxt++;
1385
252
    cur = nxt;
1386
    /* now we can have region or variant */
1387
252
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1388
68
        goto region_m49;
1389
1390
1.07k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1391
1.07k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1392
890
           nxt++;
1393
1394
184
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1395
38
        goto variant;
1396
146
    if (nxt - cur != 2)
1397
71
        return(0);
1398
    /* we parsed a region */
1399
306
region:
1400
306
    if (nxt[0] == 0)
1401
89
        return(1);
1402
217
    if (nxt[0] != '-')
1403
126
        return(0);
1404
1405
91
    nxt++;
1406
91
    cur = nxt;
1407
    /* now we can just have a variant */
1408
830
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1409
830
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1410
739
           nxt++;
1411
1412
91
    if ((nxt - cur < 5) || (nxt - cur > 8))
1413
66
        return(0);
1414
1415
    /* we parsed a variant */
1416
197
variant:
1417
197
    if (nxt[0] == 0)
1418
92
        return(1);
1419
105
    if (nxt[0] != '-')
1420
66
        return(0);
1421
    /* extensions and private use subtags not checked */
1422
39
    return (1);
1423
1424
366
region_m49:
1425
366
    if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1426
366
        ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1427
46
        nxt += 3;
1428
46
        goto region;
1429
46
    }
1430
320
    return(0);
1431
366
}
1432
1433
/************************************************************************
1434
 *                  *
1435
 *    Parser stacks related functions and macros    *
1436
 *                  *
1437
 ************************************************************************/
1438
1439
static xmlChar *
1440
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar **str);
1441
1442
/**
1443
 * xmlParserNsCreate:
1444
 *
1445
 * Create a new namespace database.
1446
 *
1447
 * Returns the new obejct.
1448
 */
1449
xmlParserNsData *
1450
18.1k
xmlParserNsCreate(void) {
1451
18.1k
    xmlParserNsData *nsdb = xmlMalloc(sizeof(*nsdb));
1452
1453
18.1k
    if (nsdb == NULL)
1454
0
        return(NULL);
1455
18.1k
    memset(nsdb, 0, sizeof(*nsdb));
1456
18.1k
    nsdb->defaultNsIndex = INT_MAX;
1457
1458
18.1k
    return(nsdb);
1459
18.1k
}
1460
1461
/**
1462
 * xmlParserNsFree:
1463
 * @nsdb: namespace database
1464
 *
1465
 * Free a namespace database.
1466
 */
1467
void
1468
18.1k
xmlParserNsFree(xmlParserNsData *nsdb) {
1469
18.1k
    if (nsdb == NULL)
1470
0
        return;
1471
1472
18.1k
    xmlFree(nsdb->extra);
1473
18.1k
    xmlFree(nsdb->hash);
1474
18.1k
    xmlFree(nsdb);
1475
18.1k
}
1476
1477
/**
1478
 * xmlParserNsReset:
1479
 * @nsdb: namespace database
1480
 *
1481
 * Reset a namespace database.
1482
 */
1483
static void
1484
0
xmlParserNsReset(xmlParserNsData *nsdb) {
1485
0
    if (nsdb == NULL)
1486
0
        return;
1487
1488
0
    nsdb->hashElems = 0;
1489
0
    nsdb->elementId = 0;
1490
0
    nsdb->defaultNsIndex = INT_MAX;
1491
1492
0
    if (nsdb->hash)
1493
0
        memset(nsdb->hash, 0, nsdb->hashSize * sizeof(nsdb->hash[0]));
1494
0
}
1495
1496
/**
1497
 * xmlParserStartElement:
1498
 * @nsdb: namespace database
1499
 *
1500
 * Signal that a new element has started.
1501
 *
1502
 * Returns 0 on success, -1 if the element counter overflowed.
1503
 */
1504
static int
1505
96.6k
xmlParserNsStartElement(xmlParserNsData *nsdb) {
1506
96.6k
    if (nsdb->elementId == UINT_MAX)
1507
0
        return(-1);
1508
96.6k
    nsdb->elementId++;
1509
1510
96.6k
    return(0);
1511
96.6k
}
1512
1513
/**
1514
 * xmlParserNsLookup:
1515
 * @ctxt: parser context
1516
 * @prefix: namespace prefix
1517
 * @bucketPtr: optional bucket (return value)
1518
 *
1519
 * Lookup namespace with given prefix. If @bucketPtr is non-NULL, it will
1520
 * be set to the matching bucket, or the first empty bucket if no match
1521
 * was found.
1522
 *
1523
 * Returns the namespace index on success, INT_MAX if no namespace was
1524
 * found.
1525
 */
1526
static int
1527
xmlParserNsLookup(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1528
186k
                  xmlParserNsBucket **bucketPtr) {
1529
186k
    xmlParserNsBucket *bucket;
1530
186k
    unsigned index, hashValue;
1531
1532
186k
    if (prefix->name == NULL)
1533
101k
        return(ctxt->nsdb->defaultNsIndex);
1534
1535
85.1k
    if (ctxt->nsdb->hashSize == 0)
1536
6.14k
        return(INT_MAX);
1537
1538
78.9k
    hashValue = prefix->hashValue;
1539
78.9k
    index = hashValue & (ctxt->nsdb->hashSize - 1);
1540
78.9k
    bucket = &ctxt->nsdb->hash[index];
1541
1542
5.10M
    while (bucket->hashValue) {
1543
5.09M
        if ((bucket->hashValue == hashValue) &&
1544
5.09M
            (bucket->index != INT_MAX)) {
1545
61.3k
            if (ctxt->nsTab[bucket->index * 2] == prefix->name) {
1546
61.3k
                if (bucketPtr != NULL)
1547
46.6k
                    *bucketPtr = bucket;
1548
61.3k
                return(bucket->index);
1549
61.3k
            }
1550
61.3k
        }
1551
1552
5.03M
        index++;
1553
5.03M
        bucket++;
1554
5.03M
        if (index == ctxt->nsdb->hashSize) {
1555
12.1k
            index = 0;
1556
12.1k
            bucket = ctxt->nsdb->hash;
1557
12.1k
        }
1558
5.03M
    }
1559
1560
17.6k
    if (bucketPtr != NULL)
1561
12.7k
        *bucketPtr = bucket;
1562
17.6k
    return(INT_MAX);
1563
78.9k
}
1564
1565
/**
1566
 * xmlParserNsLookupUri:
1567
 * @ctxt: parser context
1568
 * @prefix: namespace prefix
1569
 *
1570
 * Lookup namespace URI with given prefix.
1571
 *
1572
 * Returns the namespace URI on success, NULL if no namespace was found.
1573
 */
1574
static const xmlChar *
1575
82.6k
xmlParserNsLookupUri(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix) {
1576
82.6k
    const xmlChar *ret;
1577
82.6k
    int nsIndex;
1578
1579
82.6k
    if (prefix->name == ctxt->str_xml)
1580
485
        return(ctxt->str_xml_ns);
1581
1582
    /*
1583
     * minNsIndex is used when building an entity tree. We must
1584
     * ignore namespaces declared outside the entity.
1585
     */
1586
82.1k
    nsIndex = xmlParserNsLookup(ctxt, prefix, NULL);
1587
82.1k
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1588
54.6k
        return(NULL);
1589
1590
27.5k
    ret = ctxt->nsTab[nsIndex * 2 + 1];
1591
27.5k
    if (ret[0] == 0)
1592
759
        ret = NULL;
1593
27.5k
    return(ret);
1594
82.1k
}
1595
1596
/**
1597
 * xmlParserNsLookupSax:
1598
 * @ctxt: parser context
1599
 * @prefix: namespace prefix
1600
 *
1601
 * Lookup extra data for the given prefix. This returns data stored
1602
 * with xmlParserNsUdpateSax.
1603
 *
1604
 * Returns the data on success, NULL if no namespace was found.
1605
 */
1606
void *
1607
6.60k
xmlParserNsLookupSax(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
1608
6.60k
    xmlHashedString hprefix;
1609
6.60k
    int nsIndex;
1610
1611
6.60k
    if (prefix == ctxt->str_xml)
1612
3.59k
        return(NULL);
1613
1614
3.01k
    hprefix.name = prefix;
1615
3.01k
    if (prefix != NULL)
1616
625
        hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1617
2.38k
    else
1618
2.38k
        hprefix.hashValue = 0;
1619
3.01k
    nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1620
3.01k
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1621
0
        return(NULL);
1622
1623
3.01k
    return(ctxt->nsdb->extra[nsIndex].saxData);
1624
3.01k
}
1625
1626
/**
1627
 * xmlParserNsUpdateSax:
1628
 * @ctxt: parser context
1629
 * @prefix: namespace prefix
1630
 * @saxData: extra data for SAX handler
1631
 *
1632
 * Sets or updates extra data for the given prefix. This value will be
1633
 * returned by xmlParserNsLookupSax as long as the namespace with the
1634
 * given prefix is in scope.
1635
 *
1636
 * Returns the data on success, NULL if no namespace was found.
1637
 */
1638
int
1639
xmlParserNsUpdateSax(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
1640
32.2k
                     void *saxData) {
1641
32.2k
    xmlHashedString hprefix;
1642
32.2k
    int nsIndex;
1643
1644
32.2k
    if (prefix == ctxt->str_xml)
1645
0
        return(-1);
1646
1647
32.2k
    hprefix.name = prefix;
1648
32.2k
    if (prefix != NULL)
1649
11.2k
        hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1650
21.0k
    else
1651
21.0k
        hprefix.hashValue = 0;
1652
32.2k
    nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1653
32.2k
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1654
0
        return(-1);
1655
1656
32.2k
    ctxt->nsdb->extra[nsIndex].saxData = saxData;
1657
32.2k
    return(0);
1658
32.2k
}
1659
1660
/**
1661
 * xmlParserNsGrow:
1662
 * @ctxt: parser context
1663
 *
1664
 * Grows the namespace tables.
1665
 *
1666
 * Returns 0 on success, -1 if a memory allocation failed.
1667
 */
1668
static int
1669
2.33k
xmlParserNsGrow(xmlParserCtxtPtr ctxt) {
1670
2.33k
    const xmlChar **table;
1671
2.33k
    xmlParserNsExtra *extra;
1672
2.33k
    int newSize;
1673
1674
2.33k
    if (ctxt->nsMax > INT_MAX / 2)
1675
0
        goto error;
1676
2.33k
    newSize = ctxt->nsMax ? ctxt->nsMax * 2 : 16;
1677
1678
2.33k
    table = xmlRealloc(ctxt->nsTab, 2 * newSize * sizeof(table[0]));
1679
2.33k
    if (table == NULL)
1680
0
        goto error;
1681
2.33k
    ctxt->nsTab = table;
1682
1683
2.33k
    extra = xmlRealloc(ctxt->nsdb->extra, newSize * sizeof(extra[0]));
1684
2.33k
    if (extra == NULL)
1685
0
        goto error;
1686
2.33k
    ctxt->nsdb->extra = extra;
1687
1688
2.33k
    ctxt->nsMax = newSize;
1689
2.33k
    return(0);
1690
1691
0
error:
1692
0
    xmlErrMemory(ctxt);
1693
0
    return(-1);
1694
2.33k
}
1695
1696
/**
1697
 * xmlParserNsPush:
1698
 * @ctxt: parser context
1699
 * @prefix: prefix with hash value
1700
 * @uri: uri with hash value
1701
 * @saxData: extra data for SAX handler
1702
 * @defAttr: whether the namespace comes from a default attribute
1703
 *
1704
 * Push a new namespace on the table.
1705
 *
1706
 * Returns 1 if the namespace was pushed, 0 if the namespace was ignored,
1707
 * -1 if a memory allocation failed.
1708
 */
1709
static int
1710
xmlParserNsPush(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1711
55.2k
                const xmlHashedString *uri, void *saxData, int defAttr) {
1712
55.2k
    xmlParserNsBucket *bucket = NULL;
1713
55.2k
    xmlParserNsExtra *extra;
1714
55.2k
    const xmlChar **ns;
1715
55.2k
    unsigned hashValue, nsIndex, oldIndex;
1716
1717
55.2k
    if ((prefix != NULL) && (prefix->name == ctxt->str_xml))
1718
203
        return(0);
1719
1720
55.0k
    if ((ctxt->nsNr >= ctxt->nsMax) && (xmlParserNsGrow(ctxt) < 0)) {
1721
0
        xmlErrMemory(ctxt);
1722
0
        return(-1);
1723
0
    }
1724
1725
    /*
1726
     * Default namespace and 'xml' namespace
1727
     */
1728
55.0k
    if ((prefix == NULL) || (prefix->name == NULL)) {
1729
24.6k
        oldIndex = ctxt->nsdb->defaultNsIndex;
1730
1731
24.6k
        if (oldIndex != INT_MAX) {
1732
15.3k
            extra = &ctxt->nsdb->extra[oldIndex];
1733
1734
15.3k
            if (extra->elementId == ctxt->nsdb->elementId) {
1735
254
                if (defAttr == 0)
1736
169
                    xmlErrAttributeDup(ctxt, NULL, BAD_CAST "xmlns");
1737
254
                return(0);
1738
254
            }
1739
1740
15.1k
            if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1741
15.1k
                (uri->name == ctxt->nsTab[oldIndex * 2 + 1]))
1742
334
                return(0);
1743
15.1k
        }
1744
1745
24.0k
        ctxt->nsdb->defaultNsIndex = ctxt->nsNr;
1746
24.0k
        goto populate_entry;
1747
24.6k
    }
1748
1749
    /*
1750
     * Hash table lookup
1751
     */
1752
30.3k
    oldIndex = xmlParserNsLookup(ctxt, prefix, &bucket);
1753
30.3k
    if (oldIndex != INT_MAX) {
1754
16.8k
        extra = &ctxt->nsdb->extra[oldIndex];
1755
1756
        /*
1757
         * Check for duplicate definitions on the same element.
1758
         */
1759
16.8k
        if (extra->elementId == ctxt->nsdb->elementId) {
1760
125
            if (defAttr == 0)
1761
115
                xmlErrAttributeDup(ctxt, BAD_CAST "xmlns", prefix->name);
1762
125
            return(0);
1763
125
        }
1764
1765
16.7k
        if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1766
16.7k
            (uri->name == ctxt->nsTab[bucket->index * 2 + 1]))
1767
278
            return(0);
1768
1769
16.4k
        bucket->index = ctxt->nsNr;
1770
16.4k
        goto populate_entry;
1771
16.7k
    }
1772
1773
    /*
1774
     * Insert new bucket
1775
     */
1776
1777
13.5k
    hashValue = prefix->hashValue;
1778
1779
    /*
1780
     * Grow hash table, 50% fill factor
1781
     */
1782
13.5k
    if (ctxt->nsdb->hashElems + 1 > ctxt->nsdb->hashSize / 2) {
1783
1.16k
        xmlParserNsBucket *newHash;
1784
1.16k
        unsigned newSize, i, index;
1785
1786
1.16k
        if (ctxt->nsdb->hashSize > UINT_MAX / 2) {
1787
0
            xmlErrMemory(ctxt);
1788
0
            return(-1);
1789
0
        }
1790
1.16k
        newSize = ctxt->nsdb->hashSize ? ctxt->nsdb->hashSize * 2 : 16;
1791
1.16k
        newHash = xmlMalloc(newSize * sizeof(newHash[0]));
1792
1.16k
        if (newHash == NULL) {
1793
0
            xmlErrMemory(ctxt);
1794
0
            return(-1);
1795
0
        }
1796
1.16k
        memset(newHash, 0, newSize * sizeof(newHash[0]));
1797
1798
40.3k
        for (i = 0; i < ctxt->nsdb->hashSize; i++) {
1799
39.2k
            unsigned hv = ctxt->nsdb->hash[i].hashValue;
1800
39.2k
            unsigned newIndex;
1801
1802
39.2k
            if (hv == 0)
1803
19.6k
                continue;
1804
19.6k
            newIndex = hv & (newSize - 1);
1805
1806
1.84M
            while (newHash[newIndex].hashValue != 0) {
1807
1.82M
                newIndex++;
1808
1.82M
                if (newIndex == newSize)
1809
3.78k
                    newIndex = 0;
1810
1.82M
            }
1811
1812
19.6k
            newHash[newIndex] = ctxt->nsdb->hash[i];
1813
19.6k
        }
1814
1815
1.16k
        xmlFree(ctxt->nsdb->hash);
1816
1.16k
        ctxt->nsdb->hash = newHash;
1817
1.16k
        ctxt->nsdb->hashSize = newSize;
1818
1819
        /*
1820
         * Relookup
1821
         */
1822
1.16k
        index = hashValue & (newSize - 1);
1823
1824
13.2k
        while (newHash[index].hashValue != 0) {
1825
12.0k
            index++;
1826
12.0k
            if (index == newSize)
1827
135
                index = 0;
1828
12.0k
        }
1829
1830
1.16k
        bucket = &newHash[index];
1831
1.16k
    }
1832
1833
13.5k
    bucket->hashValue = hashValue;
1834
13.5k
    bucket->index = ctxt->nsNr;
1835
13.5k
    ctxt->nsdb->hashElems++;
1836
13.5k
    oldIndex = INT_MAX;
1837
1838
54.0k
populate_entry:
1839
54.0k
    nsIndex = ctxt->nsNr;
1840
1841
54.0k
    ns = &ctxt->nsTab[nsIndex * 2];
1842
54.0k
    ns[0] = prefix ? prefix->name : NULL;
1843
54.0k
    ns[1] = uri->name;
1844
1845
54.0k
    extra = &ctxt->nsdb->extra[nsIndex];
1846
54.0k
    extra->saxData = saxData;
1847
54.0k
    extra->prefixHashValue = prefix ? prefix->hashValue : 0;
1848
54.0k
    extra->uriHashValue = uri->hashValue;
1849
54.0k
    extra->elementId = ctxt->nsdb->elementId;
1850
54.0k
    extra->oldIndex = oldIndex;
1851
1852
54.0k
    ctxt->nsNr++;
1853
1854
54.0k
    return(1);
1855
13.5k
}
1856
1857
/**
1858
 * xmlParserNsPop:
1859
 * @ctxt: an XML parser context
1860
 * @nr:  the number to pop
1861
 *
1862
 * Pops the top @nr namespaces and restores the hash table.
1863
 *
1864
 * Returns the number of namespaces popped.
1865
 */
1866
static int
1867
xmlParserNsPop(xmlParserCtxtPtr ctxt, int nr)
1868
36.5k
{
1869
36.5k
    int i;
1870
1871
    /* assert(nr <= ctxt->nsNr); */
1872
1873
90.1k
    for (i = ctxt->nsNr - 1; i >= ctxt->nsNr - nr; i--) {
1874
53.6k
        const xmlChar *prefix = ctxt->nsTab[i * 2];
1875
53.6k
        xmlParserNsExtra *extra = &ctxt->nsdb->extra[i];
1876
1877
53.6k
        if (prefix == NULL) {
1878
23.8k
            ctxt->nsdb->defaultNsIndex = extra->oldIndex;
1879
29.7k
        } else {
1880
29.7k
            xmlHashedString hprefix;
1881
29.7k
            xmlParserNsBucket *bucket = NULL;
1882
1883
29.7k
            hprefix.name = prefix;
1884
29.7k
            hprefix.hashValue = extra->prefixHashValue;
1885
29.7k
            xmlParserNsLookup(ctxt, &hprefix, &bucket);
1886
            /* assert(bucket && bucket->hashValue); */
1887
29.7k
            bucket->index = extra->oldIndex;
1888
29.7k
        }
1889
53.6k
    }
1890
1891
36.5k
    ctxt->nsNr -= nr;
1892
36.5k
    return(nr);
1893
36.5k
}
1894
1895
static int
1896
1.96k
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1897
1.96k
    const xmlChar **atts;
1898
1.96k
    unsigned *attallocs;
1899
1.96k
    int maxatts;
1900
1901
1.96k
    if (nr + 5 > ctxt->maxatts) {
1902
1.96k
  maxatts = ctxt->maxatts == 0 ? 55 : (nr + 5) * 2;
1903
1.96k
  atts = (const xmlChar **) xmlMalloc(
1904
1.96k
             maxatts * sizeof(const xmlChar *));
1905
1.96k
  if (atts == NULL) goto mem_error;
1906
1.96k
  attallocs = xmlRealloc(ctxt->attallocs,
1907
1.96k
                               (maxatts / 5) * sizeof(attallocs[0]));
1908
1.96k
  if (attallocs == NULL) {
1909
0
            xmlFree(atts);
1910
0
            goto mem_error;
1911
0
        }
1912
1.96k
        if (ctxt->maxatts > 0)
1913
120
            memcpy(atts, ctxt->atts, ctxt->maxatts * sizeof(const xmlChar *));
1914
1.96k
        xmlFree(ctxt->atts);
1915
1.96k
  ctxt->atts = atts;
1916
1.96k
  ctxt->attallocs = attallocs;
1917
1.96k
  ctxt->maxatts = maxatts;
1918
1.96k
    }
1919
1.96k
    return(ctxt->maxatts);
1920
0
mem_error:
1921
0
    xmlErrMemory(ctxt);
1922
0
    return(-1);
1923
1.96k
}
1924
1925
/**
1926
 * inputPush:
1927
 * @ctxt:  an XML parser context
1928
 * @value:  the parser input
1929
 *
1930
 * Pushes a new parser input on top of the input stack
1931
 *
1932
 * Returns -1 in case of error, the index in the stack otherwise
1933
 */
1934
int
1935
inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1936
72.6k
{
1937
72.6k
    if ((ctxt == NULL) || (value == NULL))
1938
0
        return(-1);
1939
72.6k
    if (ctxt->inputNr >= ctxt->inputMax) {
1940
0
        size_t newSize = ctxt->inputMax * 2;
1941
0
        xmlParserInputPtr *tmp;
1942
1943
0
        tmp = (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1944
0
                                               newSize * sizeof(*tmp));
1945
0
        if (tmp == NULL) {
1946
0
            xmlErrMemory(ctxt);
1947
0
            return (-1);
1948
0
        }
1949
0
        ctxt->inputTab = tmp;
1950
0
        ctxt->inputMax = newSize;
1951
0
    }
1952
72.6k
    ctxt->inputTab[ctxt->inputNr] = value;
1953
72.6k
    ctxt->input = value;
1954
72.6k
    return (ctxt->inputNr++);
1955
72.6k
}
1956
/**
1957
 * inputPop:
1958
 * @ctxt: an XML parser context
1959
 *
1960
 * Pops the top parser input from the input stack
1961
 *
1962
 * Returns the input just removed
1963
 */
1964
xmlParserInputPtr
1965
inputPop(xmlParserCtxtPtr ctxt)
1966
108k
{
1967
108k
    xmlParserInputPtr ret;
1968
1969
108k
    if (ctxt == NULL)
1970
0
        return(NULL);
1971
108k
    if (ctxt->inputNr <= 0)
1972
36.2k
        return (NULL);
1973
72.6k
    ctxt->inputNr--;
1974
72.6k
    if (ctxt->inputNr > 0)
1975
54.5k
        ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1976
18.1k
    else
1977
18.1k
        ctxt->input = NULL;
1978
72.6k
    ret = ctxt->inputTab[ctxt->inputNr];
1979
72.6k
    ctxt->inputTab[ctxt->inputNr] = NULL;
1980
72.6k
    return (ret);
1981
108k
}
1982
/**
1983
 * nodePush:
1984
 * @ctxt:  an XML parser context
1985
 * @value:  the element node
1986
 *
1987
 * DEPRECATED: Internal function, do not use.
1988
 *
1989
 * Pushes a new element node on top of the node stack
1990
 *
1991
 * Returns -1 in case of error, the index in the stack otherwise
1992
 */
1993
int
1994
nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1995
101k
{
1996
101k
    int maxDepth;
1997
1998
101k
    if (ctxt == NULL)
1999
0
        return(0);
2000
2001
101k
    maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
2002
101k
    if (ctxt->nodeNr > maxDepth) {
2003
0
        xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
2004
0
                "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
2005
0
                ctxt->nodeNr);
2006
0
        xmlHaltParser(ctxt);
2007
0
        return(-1);
2008
0
    }
2009
101k
    if (ctxt->nodeNr >= ctxt->nodeMax) {
2010
406
        xmlNodePtr *tmp;
2011
2012
406
  tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
2013
406
                                      ctxt->nodeMax * 2 *
2014
406
                                      sizeof(ctxt->nodeTab[0]));
2015
406
        if (tmp == NULL) {
2016
0
            xmlErrMemory(ctxt);
2017
0
            return (-1);
2018
0
        }
2019
406
        ctxt->nodeTab = tmp;
2020
406
  ctxt->nodeMax *= 2;
2021
406
    }
2022
101k
    ctxt->nodeTab[ctxt->nodeNr] = value;
2023
101k
    ctxt->node = value;
2024
101k
    return (ctxt->nodeNr++);
2025
101k
}
2026
2027
/**
2028
 * nodePop:
2029
 * @ctxt: an XML parser context
2030
 *
2031
 * DEPRECATED: Internal function, do not use.
2032
 *
2033
 * Pops the top element node from the node stack
2034
 *
2035
 * Returns the node just removed
2036
 */
2037
xmlNodePtr
2038
nodePop(xmlParserCtxtPtr ctxt)
2039
117k
{
2040
117k
    xmlNodePtr ret;
2041
2042
117k
    if (ctxt == NULL) return(NULL);
2043
117k
    if (ctxt->nodeNr <= 0)
2044
19.0k
        return (NULL);
2045
98.7k
    ctxt->nodeNr--;
2046
98.7k
    if (ctxt->nodeNr > 0)
2047
95.3k
        ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
2048
3.45k
    else
2049
3.45k
        ctxt->node = NULL;
2050
98.7k
    ret = ctxt->nodeTab[ctxt->nodeNr];
2051
98.7k
    ctxt->nodeTab[ctxt->nodeNr] = NULL;
2052
98.7k
    return (ret);
2053
117k
}
2054
2055
/**
2056
 * nameNsPush:
2057
 * @ctxt:  an XML parser context
2058
 * @value:  the element name
2059
 * @prefix:  the element prefix
2060
 * @URI:  the element namespace name
2061
 * @line:  the current line number for error messages
2062
 * @nsNr:  the number of namespaces pushed on the namespace table
2063
 *
2064
 * Pushes a new element name/prefix/URL on top of the name stack
2065
 *
2066
 * Returns -1 in case of error, the index in the stack otherwise
2067
 */
2068
static int
2069
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
2070
           const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
2071
135k
{
2072
135k
    xmlStartTag *tag;
2073
2074
135k
    if (ctxt->nameNr >= ctxt->nameMax) {
2075
576
        const xmlChar * *tmp;
2076
576
        xmlStartTag *tmp2;
2077
576
        ctxt->nameMax *= 2;
2078
576
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
2079
576
                                    ctxt->nameMax *
2080
576
                                    sizeof(ctxt->nameTab[0]));
2081
576
        if (tmp == NULL) {
2082
0
      ctxt->nameMax /= 2;
2083
0
      goto mem_error;
2084
0
        }
2085
576
  ctxt->nameTab = tmp;
2086
576
        tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
2087
576
                                    ctxt->nameMax *
2088
576
                                    sizeof(ctxt->pushTab[0]));
2089
576
        if (tmp2 == NULL) {
2090
0
      ctxt->nameMax /= 2;
2091
0
      goto mem_error;
2092
0
        }
2093
576
  ctxt->pushTab = tmp2;
2094
134k
    } else if (ctxt->pushTab == NULL) {
2095
11.0k
        ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
2096
11.0k
                                            sizeof(ctxt->pushTab[0]));
2097
11.0k
        if (ctxt->pushTab == NULL)
2098
0
            goto mem_error;
2099
11.0k
    }
2100
135k
    ctxt->nameTab[ctxt->nameNr] = value;
2101
135k
    ctxt->name = value;
2102
135k
    tag = &ctxt->pushTab[ctxt->nameNr];
2103
135k
    tag->prefix = prefix;
2104
135k
    tag->URI = URI;
2105
135k
    tag->line = line;
2106
135k
    tag->nsNr = nsNr;
2107
135k
    return (ctxt->nameNr++);
2108
0
mem_error:
2109
0
    xmlErrMemory(ctxt);
2110
0
    return (-1);
2111
135k
}
2112
#ifdef LIBXML_PUSH_ENABLED
2113
/**
2114
 * nameNsPop:
2115
 * @ctxt: an XML parser context
2116
 *
2117
 * Pops the top element/prefix/URI name from the name stack
2118
 *
2119
 * Returns the name just removed
2120
 */
2121
static const xmlChar *
2122
nameNsPop(xmlParserCtxtPtr ctxt)
2123
0
{
2124
0
    const xmlChar *ret;
2125
2126
0
    if (ctxt->nameNr <= 0)
2127
0
        return (NULL);
2128
0
    ctxt->nameNr--;
2129
0
    if (ctxt->nameNr > 0)
2130
0
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2131
0
    else
2132
0
        ctxt->name = NULL;
2133
0
    ret = ctxt->nameTab[ctxt->nameNr];
2134
0
    ctxt->nameTab[ctxt->nameNr] = NULL;
2135
0
    return (ret);
2136
0
}
2137
#endif /* LIBXML_PUSH_ENABLED */
2138
2139
/**
2140
 * namePush:
2141
 * @ctxt:  an XML parser context
2142
 * @value:  the element name
2143
 *
2144
 * DEPRECATED: Internal function, do not use.
2145
 *
2146
 * Pushes a new element name on top of the name stack
2147
 *
2148
 * Returns -1 in case of error, the index in the stack otherwise
2149
 */
2150
int
2151
namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
2152
0
{
2153
0
    if (ctxt == NULL) return (-1);
2154
2155
0
    if (ctxt->nameNr >= ctxt->nameMax) {
2156
0
        const xmlChar * *tmp;
2157
0
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
2158
0
                                    ctxt->nameMax * 2 *
2159
0
                                    sizeof(ctxt->nameTab[0]));
2160
0
        if (tmp == NULL) {
2161
0
      goto mem_error;
2162
0
        }
2163
0
  ctxt->nameTab = tmp;
2164
0
        ctxt->nameMax *= 2;
2165
0
    }
2166
0
    ctxt->nameTab[ctxt->nameNr] = value;
2167
0
    ctxt->name = value;
2168
0
    return (ctxt->nameNr++);
2169
0
mem_error:
2170
0
    xmlErrMemory(ctxt);
2171
0
    return (-1);
2172
0
}
2173
2174
/**
2175
 * namePop:
2176
 * @ctxt: an XML parser context
2177
 *
2178
 * DEPRECATED: Internal function, do not use.
2179
 *
2180
 * Pops the top element name from the name stack
2181
 *
2182
 * Returns the name just removed
2183
 */
2184
const xmlChar *
2185
namePop(xmlParserCtxtPtr ctxt)
2186
131k
{
2187
131k
    const xmlChar *ret;
2188
2189
131k
    if ((ctxt == NULL) || (ctxt->nameNr <= 0))
2190
0
        return (NULL);
2191
131k
    ctxt->nameNr--;
2192
131k
    if (ctxt->nameNr > 0)
2193
124k
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2194
6.65k
    else
2195
6.65k
        ctxt->name = NULL;
2196
131k
    ret = ctxt->nameTab[ctxt->nameNr];
2197
131k
    ctxt->nameTab[ctxt->nameNr] = NULL;
2198
131k
    return (ret);
2199
131k
}
2200
2201
155k
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
2202
155k
    if (ctxt->spaceNr >= ctxt->spaceMax) {
2203
629
        int *tmp;
2204
2205
629
  ctxt->spaceMax *= 2;
2206
629
        tmp = (int *) xmlRealloc(ctxt->spaceTab,
2207
629
                           ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
2208
629
        if (tmp == NULL) {
2209
0
      xmlErrMemory(ctxt);
2210
0
      ctxt->spaceMax /=2;
2211
0
      return(-1);
2212
0
  }
2213
629
  ctxt->spaceTab = tmp;
2214
629
    }
2215
155k
    ctxt->spaceTab[ctxt->spaceNr] = val;
2216
155k
    ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
2217
155k
    return(ctxt->spaceNr++);
2218
155k
}
2219
2220
151k
static int spacePop(xmlParserCtxtPtr ctxt) {
2221
151k
    int ret;
2222
151k
    if (ctxt->spaceNr <= 0) return(0);
2223
151k
    ctxt->spaceNr--;
2224
151k
    if (ctxt->spaceNr > 0)
2225
151k
  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
2226
0
    else
2227
0
        ctxt->space = &ctxt->spaceTab[0];
2228
151k
    ret = ctxt->spaceTab[ctxt->spaceNr];
2229
151k
    ctxt->spaceTab[ctxt->spaceNr] = -1;
2230
151k
    return(ret);
2231
151k
}
2232
2233
/*
2234
 * Macros for accessing the content. Those should be used only by the parser,
2235
 * and not exported.
2236
 *
2237
 * Dirty macros, i.e. one often need to make assumption on the context to
2238
 * use them
2239
 *
2240
 *   CUR_PTR return the current pointer to the xmlChar to be parsed.
2241
 *           To be used with extreme caution since operations consuming
2242
 *           characters may move the input buffer to a different location !
2243
 *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
2244
 *           This should be used internally by the parser
2245
 *           only to compare to ASCII values otherwise it would break when
2246
 *           running with UTF-8 encoding.
2247
 *   RAW     same as CUR but in the input buffer, bypass any token
2248
 *           extraction that may have been done
2249
 *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
2250
 *           to compare on ASCII based substring.
2251
 *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2252
 *           strings without newlines within the parser.
2253
 *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2254
 *           defined char within the parser.
2255
 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2256
 *
2257
 *   NEXT    Skip to the next character, this does the proper decoding
2258
 *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2259
 *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2260
 *   CUR_CHAR(l) returns the current unicode character (int), set l
2261
 *           to the number of xmlChars used for the encoding [0-5].
2262
 *   CUR_SCHAR  same but operate on a string instead of the context
2263
 *   COPY_BUF  copy the current unicode char to the target buffer, increment
2264
 *            the index
2265
 *   GROW, SHRINK  handling of input buffers
2266
 */
2267
2268
2.34M
#define RAW (*ctxt->input->cur)
2269
3.02M
#define CUR (*ctxt->input->cur)
2270
1.01M
#define NXT(val) ctxt->input->cur[(val)]
2271
3.45M
#define CUR_PTR ctxt->input->cur
2272
346k
#define BASE_PTR ctxt->input->base
2273
2274
#define CMP4( s, c1, c2, c3, c4 ) \
2275
1.36M
  ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2276
703k
    ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2277
#define CMP5( s, c1, c2, c3, c4, c5 ) \
2278
1.22M
  ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2279
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2280
995k
  ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2281
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2282
794k
  ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2283
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2284
645k
  ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2285
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2286
290k
  ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2287
290k
    ((unsigned char *) s)[ 8 ] == c9 )
2288
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2289
7.90k
  ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2290
7.90k
    ((unsigned char *) s)[ 9 ] == c10 )
2291
2292
293k
#define SKIP(val) do {             \
2293
293k
    ctxt->input->cur += (val),ctxt->input->col+=(val);      \
2294
293k
    if (*ctxt->input->cur == 0)           \
2295
293k
        xmlParserGrow(ctxt);           \
2296
293k
  } while (0)
2297
2298
0
#define SKIPL(val) do {             \
2299
0
    int skipl;                \
2300
0
    for(skipl=0; skipl<val; skipl++) {         \
2301
0
  if (*(ctxt->input->cur) == '\n') {       \
2302
0
  ctxt->input->line++; ctxt->input->col = 1;      \
2303
0
  } else ctxt->input->col++;         \
2304
0
  ctxt->input->cur++;           \
2305
0
    }                 \
2306
0
    if (*ctxt->input->cur == 0)           \
2307
0
        xmlParserGrow(ctxt);           \
2308
0
  } while (0)
2309
2310
#define SHRINK \
2311
418k
    if ((!PARSER_PROGRESSIVE(ctxt)) && \
2312
418k
        (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2313
418k
  (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2314
418k
  xmlParserShrink(ctxt);
2315
2316
#define GROW \
2317
2.30M
    if ((!PARSER_PROGRESSIVE(ctxt)) && \
2318
2.30M
        (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2319
1.88M
  xmlParserGrow(ctxt);
2320
2321
406k
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2322
2323
507k
#define SKIP_BLANKS_PE xmlSkipBlankCharsPE(ctxt)
2324
2325
738k
#define NEXT xmlNextChar(ctxt)
2326
2327
197k
#define NEXT1 {               \
2328
197k
  ctxt->input->col++;           \
2329
197k
  ctxt->input->cur++;           \
2330
197k
  if (*ctxt->input->cur == 0)         \
2331
197k
      xmlParserGrow(ctxt);           \
2332
197k
    }
2333
2334
2.06M
#define NEXTL(l) do {             \
2335
2.06M
    if (*(ctxt->input->cur) == '\n') {         \
2336
6.92k
  ctxt->input->line++; ctxt->input->col = 1;      \
2337
2.06M
    } else ctxt->input->col++;           \
2338
2.06M
    ctxt->input->cur += l;        \
2339
2.06M
  } while (0)
2340
2341
892k
#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2342
555k
#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2343
2344
#define COPY_BUF(b, i, v)           \
2345
711k
    if (v < 0x80) b[i++] = v;           \
2346
711k
    else i += xmlCopyCharMultiByte(&b[i],v)
2347
2348
/**
2349
 * xmlSkipBlankChars:
2350
 * @ctxt:  the XML parser context
2351
 *
2352
 * DEPRECATED: Internal function, do not use.
2353
 *
2354
 * Skip whitespace in the input stream.
2355
 *
2356
 * Returns the number of space chars skipped
2357
 */
2358
int
2359
516k
xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2360
516k
    const xmlChar *cur;
2361
516k
    int res = 0;
2362
2363
    /*
2364
     * It's Okay to use CUR/NEXT here since all the blanks are on
2365
     * the ASCII range.
2366
     */
2367
516k
    cur = ctxt->input->cur;
2368
516k
    while (IS_BLANK_CH(*cur)) {
2369
99.5k
        if (*cur == '\n') {
2370
6.00k
            ctxt->input->line++; ctxt->input->col = 1;
2371
93.5k
        } else {
2372
93.5k
            ctxt->input->col++;
2373
93.5k
        }
2374
99.5k
        cur++;
2375
99.5k
        if (res < INT_MAX)
2376
99.5k
            res++;
2377
99.5k
        if (*cur == 0) {
2378
1.21k
            ctxt->input->cur = cur;
2379
1.21k
            xmlParserGrow(ctxt);
2380
1.21k
            cur = ctxt->input->cur;
2381
1.21k
        }
2382
99.5k
    }
2383
516k
    ctxt->input->cur = cur;
2384
2385
516k
    return(res);
2386
516k
}
2387
2388
static void
2389
53.9k
xmlPopPE(xmlParserCtxtPtr ctxt) {
2390
53.9k
    unsigned long consumed;
2391
53.9k
    xmlEntityPtr ent;
2392
2393
53.9k
    ent = ctxt->input->entity;
2394
2395
53.9k
    ent->flags &= ~XML_ENT_EXPANDING;
2396
2397
53.9k
    if ((ent->flags & XML_ENT_CHECKED) == 0) {
2398
2.08k
        int result;
2399
2400
        /*
2401
         * Read the rest of the stream in case of errors. We want
2402
         * to account for the whole entity size.
2403
         */
2404
2.08k
        do {
2405
2.08k
            ctxt->input->cur = ctxt->input->end;
2406
2.08k
            xmlParserShrink(ctxt);
2407
2.08k
            result = xmlParserGrow(ctxt);
2408
2.08k
        } while (result > 0);
2409
2410
2.08k
        consumed = ctxt->input->consumed;
2411
2.08k
        xmlSaturatedAddSizeT(&consumed,
2412
2.08k
                             ctxt->input->end - ctxt->input->base);
2413
2414
2.08k
        xmlSaturatedAdd(&ent->expandedSize, consumed);
2415
2416
        /*
2417
         * Add to sizeentities when parsing an external entity
2418
         * for the first time.
2419
         */
2420
2.08k
        if (ent->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
2421
0
            xmlSaturatedAdd(&ctxt->sizeentities, consumed);
2422
0
        }
2423
2424
2.08k
        ent->flags |= XML_ENT_CHECKED;
2425
2.08k
    }
2426
2427
53.9k
    xmlPopInput(ctxt);
2428
2429
53.9k
    xmlParserEntityCheck(ctxt, ent->expandedSize);
2430
53.9k
}
2431
2432
/**
2433
 * xmlSkipBlankCharsPE:
2434
 * @ctxt:  the XML parser context
2435
 *
2436
 * Skip whitespace in the input stream, also handling parameter
2437
 * entities.
2438
 *
2439
 * Returns the number of space chars skipped
2440
 */
2441
static int
2442
507k
xmlSkipBlankCharsPE(xmlParserCtxtPtr ctxt) {
2443
507k
    int res = 0;
2444
507k
    int inParam;
2445
507k
    int expandParam;
2446
2447
507k
    inParam = PARSER_IN_PE(ctxt);
2448
507k
    expandParam = PARSER_EXTERNAL(ctxt);
2449
2450
507k
    if (!inParam && !expandParam)
2451
110k
        return(xmlSkipBlankChars(ctxt));
2452
2453
579k
    while (PARSER_STOPPED(ctxt) == 0) {
2454
579k
        if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2455
128k
            NEXT;
2456
450k
        } else if (CUR == '%') {
2457
52.9k
            if ((expandParam == 0) ||
2458
52.9k
                (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2459
52.9k
                break;
2460
2461
            /*
2462
             * Expand parameter entity. We continue to consume
2463
             * whitespace at the start of the entity and possible
2464
             * even consume the whole entity and pop it. We might
2465
             * even pop multiple PEs in this loop.
2466
             */
2467
0
            xmlParsePEReference(ctxt);
2468
2469
0
            inParam = PARSER_IN_PE(ctxt);
2470
0
            expandParam = PARSER_EXTERNAL(ctxt);
2471
397k
        } else if (CUR == 0) {
2472
53.9k
            if (inParam == 0)
2473
2
                break;
2474
2475
53.9k
            xmlPopPE(ctxt);
2476
2477
53.9k
            inParam = PARSER_IN_PE(ctxt);
2478
53.9k
            expandParam = PARSER_EXTERNAL(ctxt);
2479
343k
        } else {
2480
343k
            break;
2481
343k
        }
2482
2483
        /*
2484
         * Also increase the counter when entering or exiting a PERef.
2485
         * The spec says: "When a parameter-entity reference is recognized
2486
         * in the DTD and included, its replacement text MUST be enlarged
2487
         * by the attachment of one leading and one following space (#x20)
2488
         * character."
2489
         */
2490
182k
        if (res < INT_MAX)
2491
182k
            res++;
2492
182k
    }
2493
2494
396k
    return(res);
2495
507k
}
2496
2497
/************************************************************************
2498
 *                  *
2499
 *    Commodity functions to handle entities      *
2500
 *                  *
2501
 ************************************************************************/
2502
2503
/**
2504
 * xmlPopInput:
2505
 * @ctxt:  an XML parser context
2506
 *
2507
 * xmlPopInput: the current input pointed by ctxt->input came to an end
2508
 *          pop it and return the next char.
2509
 *
2510
 * Returns the current xmlChar in the parser context
2511
 */
2512
xmlChar
2513
53.9k
xmlPopInput(xmlParserCtxtPtr ctxt) {
2514
53.9k
    xmlParserInputPtr input;
2515
2516
53.9k
    if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2517
53.9k
    input = inputPop(ctxt);
2518
53.9k
    xmlFreeInputStream(input);
2519
53.9k
    if (*ctxt->input->cur == 0)
2520
306
        xmlParserGrow(ctxt);
2521
53.9k
    return(CUR);
2522
53.9k
}
2523
2524
/**
2525
 * xmlPushInput:
2526
 * @ctxt:  an XML parser context
2527
 * @input:  an XML parser input fragment (entity, XML fragment ...).
2528
 *
2529
 * Push an input stream onto the stack.
2530
 *
2531
 * This makes the parser use an input returned from advanced functions
2532
 * like xmlNewInputURL or xmlNewInputMemory.
2533
 *
2534
 * Returns -1 in case of error or the index in the input stack
2535
 */
2536
int
2537
54.5k
xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2538
54.5k
    int maxDepth;
2539
54.5k
    int ret;
2540
2541
54.5k
    if ((ctxt == NULL) || (input == NULL))
2542
0
        return(-1);
2543
2544
54.5k
    maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
2545
54.5k
    if (ctxt->inputNr > maxDepth) {
2546
0
        xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
2547
0
                       "Maximum entity nesting depth exceeded");
2548
0
        xmlHaltParser(ctxt);
2549
0
  return(-1);
2550
0
    }
2551
54.5k
    ret = inputPush(ctxt, input);
2552
54.5k
    GROW;
2553
54.5k
    return(ret);
2554
54.5k
}
2555
2556
/**
2557
 * xmlParseCharRef:
2558
 * @ctxt:  an XML parser context
2559
 *
2560
 * DEPRECATED: Internal function, don't use.
2561
 *
2562
 * Parse a numeric character reference. Always consumes '&'.
2563
 *
2564
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2565
 *                  '&#x' [0-9a-fA-F]+ ';'
2566
 *
2567
 * [ WFC: Legal Character ]
2568
 * Characters referred to using character references must match the
2569
 * production for Char.
2570
 *
2571
 * Returns the value parsed (as an int), 0 in case of error
2572
 */
2573
int
2574
12.7k
xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2575
12.7k
    int val = 0;
2576
12.7k
    int count = 0;
2577
2578
    /*
2579
     * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2580
     */
2581
12.7k
    if ((RAW == '&') && (NXT(1) == '#') &&
2582
12.7k
        (NXT(2) == 'x')) {
2583
7.72k
  SKIP(3);
2584
7.72k
  GROW;
2585
31.3k
  while ((RAW != ';') && (PARSER_STOPPED(ctxt) == 0)) {
2586
24.3k
      if (count++ > 20) {
2587
164
    count = 0;
2588
164
    GROW;
2589
164
      }
2590
24.3k
      if ((RAW >= '0') && (RAW <= '9'))
2591
6.21k
          val = val * 16 + (CUR - '0');
2592
18.1k
      else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2593
4.93k
          val = val * 16 + (CUR - 'a') + 10;
2594
13.2k
      else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2595
12.4k
          val = val * 16 + (CUR - 'A') + 10;
2596
725
      else {
2597
725
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2598
725
    val = 0;
2599
725
    break;
2600
725
      }
2601
23.6k
      if (val > 0x110000)
2602
2.45k
          val = 0x110000;
2603
2604
23.6k
      NEXT;
2605
23.6k
      count++;
2606
23.6k
  }
2607
7.72k
  if (RAW == ';') {
2608
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2609
7.00k
      ctxt->input->col++;
2610
7.00k
      ctxt->input->cur++;
2611
7.00k
  }
2612
7.72k
    } else if  ((RAW == '&') && (NXT(1) == '#')) {
2613
5.00k
  SKIP(2);
2614
5.00k
  GROW;
2615
17.3k
  while (RAW != ';') { /* loop blocked by count */
2616
13.3k
      if (count++ > 20) {
2617
197
    count = 0;
2618
197
    GROW;
2619
197
      }
2620
13.3k
      if ((RAW >= '0') && (RAW <= '9'))
2621
12.3k
          val = val * 10 + (CUR - '0');
2622
1.00k
      else {
2623
1.00k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2624
1.00k
    val = 0;
2625
1.00k
    break;
2626
1.00k
      }
2627
12.3k
      if (val > 0x110000)
2628
1.87k
          val = 0x110000;
2629
2630
12.3k
      NEXT;
2631
12.3k
      count++;
2632
12.3k
  }
2633
5.00k
  if (RAW == ';') {
2634
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2635
3.99k
      ctxt->input->col++;
2636
3.99k
      ctxt->input->cur++;
2637
3.99k
  }
2638
5.00k
    } else {
2639
0
        if (RAW == '&')
2640
0
            SKIP(1);
2641
0
        xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2642
0
    }
2643
2644
    /*
2645
     * [ WFC: Legal Character ]
2646
     * Characters referred to using character references must match the
2647
     * production for Char.
2648
     */
2649
12.7k
    if (val >= 0x110000) {
2650
220
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2651
220
                "xmlParseCharRef: character reference out of bounds\n",
2652
220
          val);
2653
12.5k
    } else if (IS_CHAR(val)) {
2654
10.4k
        return(val);
2655
10.4k
    } else {
2656
2.05k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2657
2.05k
                          "xmlParseCharRef: invalid xmlChar value %d\n",
2658
2.05k
                    val);
2659
2.05k
    }
2660
2.27k
    return(0);
2661
12.7k
}
2662
2663
/**
2664
 * xmlParseStringCharRef:
2665
 * @ctxt:  an XML parser context
2666
 * @str:  a pointer to an index in the string
2667
 *
2668
 * parse Reference declarations, variant parsing from a string rather
2669
 * than an an input flow.
2670
 *
2671
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2672
 *                  '&#x' [0-9a-fA-F]+ ';'
2673
 *
2674
 * [ WFC: Legal Character ]
2675
 * Characters referred to using character references must match the
2676
 * production for Char.
2677
 *
2678
 * Returns the value parsed (as an int), 0 in case of error, str will be
2679
 *         updated to the current value of the index
2680
 */
2681
static int
2682
24.2k
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2683
24.2k
    const xmlChar *ptr;
2684
24.2k
    xmlChar cur;
2685
24.2k
    int val = 0;
2686
2687
24.2k
    if ((str == NULL) || (*str == NULL)) return(0);
2688
24.2k
    ptr = *str;
2689
24.2k
    cur = *ptr;
2690
24.2k
    if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2691
5.74k
  ptr += 3;
2692
5.74k
  cur = *ptr;
2693
26.5k
  while (cur != ';') { /* Non input consuming loop */
2694
21.4k
      if ((cur >= '0') && (cur <= '9'))
2695
5.40k
          val = val * 16 + (cur - '0');
2696
16.0k
      else if ((cur >= 'a') && (cur <= 'f'))
2697
3.41k
          val = val * 16 + (cur - 'a') + 10;
2698
12.6k
      else if ((cur >= 'A') && (cur <= 'F'))
2699
12.0k
          val = val * 16 + (cur - 'A') + 10;
2700
669
      else {
2701
669
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2702
669
    val = 0;
2703
669
    break;
2704
669
      }
2705
20.8k
      if (val > 0x110000)
2706
611
          val = 0x110000;
2707
2708
20.8k
      ptr++;
2709
20.8k
      cur = *ptr;
2710
20.8k
  }
2711
5.74k
  if (cur == ';')
2712
5.07k
      ptr++;
2713
18.5k
    } else if  ((cur == '&') && (ptr[1] == '#')){
2714
18.5k
  ptr += 2;
2715
18.5k
  cur = *ptr;
2716
61.4k
  while (cur != ';') { /* Non input consuming loops */
2717
43.7k
      if ((cur >= '0') && (cur <= '9'))
2718
42.8k
          val = val * 10 + (cur - '0');
2719
814
      else {
2720
814
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2721
814
    val = 0;
2722
814
    break;
2723
814
      }
2724
42.8k
      if (val > 0x110000)
2725
414
          val = 0x110000;
2726
2727
42.8k
      ptr++;
2728
42.8k
      cur = *ptr;
2729
42.8k
  }
2730
18.5k
  if (cur == ';')
2731
17.7k
      ptr++;
2732
18.5k
    } else {
2733
0
  xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2734
0
  return(0);
2735
0
    }
2736
24.2k
    *str = ptr;
2737
2738
    /*
2739
     * [ WFC: Legal Character ]
2740
     * Characters referred to using character references must match the
2741
     * production for Char.
2742
     */
2743
24.2k
    if (val >= 0x110000) {
2744
209
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2745
209
                "xmlParseStringCharRef: character reference out of bounds\n",
2746
209
                val);
2747
24.0k
    } else if (IS_CHAR(val)) {
2748
21.6k
        return(val);
2749
21.6k
    } else {
2750
2.43k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2751
2.43k
        "xmlParseStringCharRef: invalid xmlChar value %d\n",
2752
2.43k
        val);
2753
2.43k
    }
2754
2.64k
    return(0);
2755
24.2k
}
2756
2757
/**
2758
 * xmlParserHandlePEReference:
2759
 * @ctxt:  the parser context
2760
 *
2761
 * DEPRECATED: Internal function, do not use.
2762
 *
2763
 * [69] PEReference ::= '%' Name ';'
2764
 *
2765
 * [ WFC: No Recursion ]
2766
 * A parsed entity must not contain a recursive
2767
 * reference to itself, either directly or indirectly.
2768
 *
2769
 * [ WFC: Entity Declared ]
2770
 * In a document without any DTD, a document with only an internal DTD
2771
 * subset which contains no parameter entity references, or a document
2772
 * with "standalone='yes'", ...  ... The declaration of a parameter
2773
 * entity must precede any reference to it...
2774
 *
2775
 * [ VC: Entity Declared ]
2776
 * In a document with an external subset or external parameter entities
2777
 * with "standalone='no'", ...  ... The declaration of a parameter entity
2778
 * must precede any reference to it...
2779
 *
2780
 * [ WFC: In DTD ]
2781
 * Parameter-entity references may only appear in the DTD.
2782
 * NOTE: misleading but this is handled.
2783
 *
2784
 * A PEReference may have been detected in the current input stream
2785
 * the handling is done accordingly to
2786
 *      http://www.w3.org/TR/REC-xml#entproc
2787
 * i.e.
2788
 *   - Included in literal in entity values
2789
 *   - Included as Parameter Entity reference within DTDs
2790
 */
2791
void
2792
0
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2793
0
    xmlParsePEReference(ctxt);
2794
0
}
2795
2796
/**
2797
 * xmlStringLenDecodeEntities:
2798
 * @ctxt:  the parser context
2799
 * @str:  the input string
2800
 * @len: the string length
2801
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2802
 * @end:  an end marker xmlChar, 0 if none
2803
 * @end2:  an end marker xmlChar, 0 if none
2804
 * @end3:  an end marker xmlChar, 0 if none
2805
 *
2806
 * DEPRECATED: Internal function, don't use.
2807
 *
2808
 * Returns A newly allocated string with the substitution done. The caller
2809
 *      must deallocate it !
2810
 */
2811
xmlChar *
2812
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2813
                           int what ATTRIBUTE_UNUSED,
2814
0
                           xmlChar end, xmlChar end2, xmlChar end3) {
2815
0
    if ((ctxt == NULL) || (str == NULL) || (len < 0))
2816
0
        return(NULL);
2817
2818
0
    if ((str[len] != 0) ||
2819
0
        (end != 0) || (end2 != 0) || (end3 != 0))
2820
0
        return(NULL);
2821
2822
0
    return(xmlExpandEntitiesInAttValue(ctxt, str, 0));
2823
0
}
2824
2825
/**
2826
 * xmlStringDecodeEntities:
2827
 * @ctxt:  the parser context
2828
 * @str:  the input string
2829
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2830
 * @end:  an end marker xmlChar, 0 if none
2831
 * @end2:  an end marker xmlChar, 0 if none
2832
 * @end3:  an end marker xmlChar, 0 if none
2833
 *
2834
 * DEPRECATED: Internal function, don't use.
2835
 *
2836
 * Returns A newly allocated string with the substitution done. The caller
2837
 *      must deallocate it !
2838
 */
2839
xmlChar *
2840
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str,
2841
                        int what ATTRIBUTE_UNUSED,
2842
0
            xmlChar end, xmlChar  end2, xmlChar end3) {
2843
0
    if ((ctxt == NULL) || (str == NULL))
2844
0
        return(NULL);
2845
2846
0
    if ((end != 0) || (end2 != 0) || (end3 != 0))
2847
0
        return(NULL);
2848
2849
0
    return(xmlExpandEntitiesInAttValue(ctxt, str, 0));
2850
0
}
2851
2852
/************************************************************************
2853
 *                  *
2854
 *    Commodity functions, cleanup needed ?     *
2855
 *                  *
2856
 ************************************************************************/
2857
2858
/**
2859
 * areBlanks:
2860
 * @ctxt:  an XML parser context
2861
 * @str:  a xmlChar *
2862
 * @len:  the size of @str
2863
 * @blank_chars: we know the chars are blanks
2864
 *
2865
 * Is this a sequence of blank chars that one can ignore ?
2866
 *
2867
 * Returns 1 if ignorable 0 otherwise.
2868
 */
2869
2870
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2871
7.04k
                     int blank_chars) {
2872
7.04k
    int i;
2873
7.04k
    xmlNodePtr lastChild;
2874
2875
    /*
2876
     * Don't spend time trying to differentiate them, the same callback is
2877
     * used !
2878
     */
2879
7.04k
    if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2880
1.77k
  return(0);
2881
2882
    /*
2883
     * Check for xml:space value.
2884
     */
2885
5.27k
    if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2886
5.27k
        (*(ctxt->space) == -2))
2887
1.48k
  return(0);
2888
2889
    /*
2890
     * Check that the string is made of blanks
2891
     */
2892
3.78k
    if (blank_chars == 0) {
2893
7.83k
  for (i = 0;i < len;i++)
2894
6.55k
      if (!(IS_BLANK_CH(str[i]))) return(0);
2895
2.27k
    }
2896
2897
    /*
2898
     * Look if the element is mixed content in the DTD if available
2899
     */
2900
2.78k
    if (ctxt->node == NULL) return(0);
2901
2.78k
    if (ctxt->myDoc != NULL) {
2902
2.78k
        xmlElementPtr elemDecl = NULL;
2903
2.78k
        xmlDocPtr doc = ctxt->myDoc;
2904
2.78k
        const xmlChar *prefix = NULL;
2905
2906
2.78k
        if (ctxt->node->ns)
2907
263
            prefix = ctxt->node->ns->prefix;
2908
2.78k
        if (doc->intSubset != NULL)
2909
900
            elemDecl = xmlHashLookup2(doc->intSubset->elements, ctxt->node->name,
2910
900
                                      prefix);
2911
2.78k
        if ((elemDecl == NULL) && (doc->extSubset != NULL))
2912
0
            elemDecl = xmlHashLookup2(doc->extSubset->elements, ctxt->node->name,
2913
0
                                      prefix);
2914
2.78k
        if (elemDecl != NULL) {
2915
593
            if (elemDecl->etype == XML_ELEMENT_TYPE_ELEMENT)
2916
220
                return(1);
2917
373
            if ((elemDecl->etype == XML_ELEMENT_TYPE_ANY) ||
2918
373
                (elemDecl->etype == XML_ELEMENT_TYPE_MIXED))
2919
151
                return(0);
2920
373
        }
2921
2.78k
    }
2922
2923
    /*
2924
     * Otherwise, heuristic :-\
2925
     */
2926
2.41k
    if ((RAW != '<') && (RAW != 0xD)) return(0);
2927
2.09k
    if ((ctxt->node->children == NULL) &&
2928
2.09k
  (RAW == '<') && (NXT(1) == '/')) return(0);
2929
2930
1.86k
    lastChild = xmlGetLastChild(ctxt->node);
2931
1.86k
    if (lastChild == NULL) {
2932
1.24k
        if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2933
1.24k
            (ctxt->node->content != NULL)) return(0);
2934
1.24k
    } else if (xmlNodeIsText(lastChild))
2935
205
        return(0);
2936
416
    else if ((ctxt->node->children != NULL) &&
2937
416
             (xmlNodeIsText(ctxt->node->children)))
2938
113
        return(0);
2939
1.54k
    return(1);
2940
1.86k
}
2941
2942
/************************************************************************
2943
 *                  *
2944
 *    Extra stuff for namespace support     *
2945
 *  Relates to http://www.w3.org/TR/WD-xml-names      *
2946
 *                  *
2947
 ************************************************************************/
2948
2949
/**
2950
 * xmlSplitQName:
2951
 * @ctxt:  an XML parser context
2952
 * @name:  an XML parser context
2953
 * @prefixOut:  a xmlChar **
2954
 *
2955
 * parse an UTF8 encoded XML qualified name string
2956
 *
2957
 * [NS 5] QName ::= (Prefix ':')? LocalPart
2958
 *
2959
 * [NS 6] Prefix ::= NCName
2960
 *
2961
 * [NS 7] LocalPart ::= NCName
2962
 *
2963
 * Returns the local part, and prefix is updated
2964
 *   to get the Prefix if any.
2965
 */
2966
2967
xmlChar *
2968
118k
xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefixOut) {
2969
118k
    xmlChar buf[XML_MAX_NAMELEN + 5];
2970
118k
    xmlChar *buffer = NULL;
2971
118k
    int len = 0;
2972
118k
    int max = XML_MAX_NAMELEN;
2973
118k
    xmlChar *ret = NULL;
2974
118k
    xmlChar *prefix;
2975
118k
    const xmlChar *cur = name;
2976
118k
    int c;
2977
2978
118k
    if (prefixOut == NULL) return(NULL);
2979
118k
    *prefixOut = NULL;
2980
2981
118k
    if (cur == NULL) return(NULL);
2982
2983
#ifndef XML_XML_NAMESPACE
2984
    /* xml: prefix is not really a namespace */
2985
    if ((cur[0] == 'x') && (cur[1] == 'm') &&
2986
        (cur[2] == 'l') && (cur[3] == ':'))
2987
  return(xmlStrdup(name));
2988
#endif
2989
2990
    /* nasty but well=formed */
2991
118k
    if (cur[0] == ':')
2992
9.89k
  return(xmlStrdup(name));
2993
2994
108k
    c = *cur++;
2995
433k
    while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2996
324k
  buf[len++] = c;
2997
324k
  c = *cur++;
2998
324k
    }
2999
108k
    if (len >= max) {
3000
  /*
3001
   * Okay someone managed to make a huge name, so he's ready to pay
3002
   * for the processing speed.
3003
   */
3004
517
  max = len * 2;
3005
3006
517
  buffer = (xmlChar *) xmlMallocAtomic(max);
3007
517
  if (buffer == NULL) {
3008
0
      xmlErrMemory(ctxt);
3009
0
      return(NULL);
3010
0
  }
3011
517
  memcpy(buffer, buf, len);
3012
55.5k
  while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3013
55.0k
      if (len + 10 > max) {
3014
409
          xmlChar *tmp;
3015
3016
409
    max *= 2;
3017
409
    tmp = (xmlChar *) xmlRealloc(buffer, max);
3018
409
    if (tmp == NULL) {
3019
0
        xmlFree(buffer);
3020
0
        xmlErrMemory(ctxt);
3021
0
        return(NULL);
3022
0
    }
3023
409
    buffer = tmp;
3024
409
      }
3025
55.0k
      buffer[len++] = c;
3026
55.0k
      c = *cur++;
3027
55.0k
  }
3028
517
  buffer[len] = 0;
3029
517
    }
3030
3031
108k
    if ((c == ':') && (*cur == 0)) {
3032
804
        if (buffer != NULL)
3033
197
      xmlFree(buffer);
3034
804
  return(xmlStrdup(name));
3035
804
    }
3036
3037
108k
    if (buffer == NULL) {
3038
107k
  ret = xmlStrndup(buf, len);
3039
107k
        if (ret == NULL) {
3040
0
      xmlErrMemory(ctxt);
3041
0
      return(NULL);
3042
0
        }
3043
107k
    } else {
3044
320
  ret = buffer;
3045
320
  buffer = NULL;
3046
320
  max = XML_MAX_NAMELEN;
3047
320
    }
3048
3049
3050
108k
    if (c == ':') {
3051
14.8k
  c = *cur;
3052
14.8k
        prefix = ret;
3053
14.8k
  if (c == 0) {
3054
0
      ret = xmlStrndup(BAD_CAST "", 0);
3055
0
            if (ret == NULL) {
3056
0
                xmlFree(prefix);
3057
0
                return(NULL);
3058
0
            }
3059
0
            *prefixOut = prefix;
3060
0
            return(ret);
3061
0
  }
3062
14.8k
  len = 0;
3063
3064
  /*
3065
   * Check that the first character is proper to start
3066
   * a new name
3067
   */
3068
14.8k
  if (!(((c >= 0x61) && (c <= 0x7A)) ||
3069
14.8k
        ((c >= 0x41) && (c <= 0x5A)) ||
3070
14.8k
        (c == '_') || (c == ':'))) {
3071
3.73k
      int l;
3072
3.73k
      int first = CUR_SCHAR(cur, l);
3073
3074
3.73k
      if (!IS_LETTER(first) && (first != '_')) {
3075
2.44k
    xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3076
2.44k
          "Name %s is not XML Namespace compliant\n",
3077
2.44k
          name);
3078
2.44k
      }
3079
3.73k
  }
3080
14.8k
  cur++;
3081
3082
143k
  while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3083
128k
      buf[len++] = c;
3084
128k
      c = *cur++;
3085
128k
  }
3086
14.8k
  if (len >= max) {
3087
      /*
3088
       * Okay someone managed to make a huge name, so he's ready to pay
3089
       * for the processing speed.
3090
       */
3091
753
      max = len * 2;
3092
3093
753
      buffer = (xmlChar *) xmlMallocAtomic(max);
3094
753
      if (buffer == NULL) {
3095
0
          xmlErrMemory(ctxt);
3096
0
                xmlFree(prefix);
3097
0
    return(NULL);
3098
0
      }
3099
753
      memcpy(buffer, buf, len);
3100
28.4k
      while (c != 0) { /* tested bigname2.xml */
3101
27.6k
    if (len + 10 > max) {
3102
236
        xmlChar *tmp;
3103
3104
236
        max *= 2;
3105
236
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3106
236
        if (tmp == NULL) {
3107
0
      xmlErrMemory(ctxt);
3108
0
                        xmlFree(prefix);
3109
0
      xmlFree(buffer);
3110
0
      return(NULL);
3111
0
        }
3112
236
        buffer = tmp;
3113
236
    }
3114
27.6k
    buffer[len++] = c;
3115
27.6k
    c = *cur++;
3116
27.6k
      }
3117
753
      buffer[len] = 0;
3118
753
  }
3119
3120
14.8k
  if (buffer == NULL) {
3121
14.0k
      ret = xmlStrndup(buf, len);
3122
14.0k
            if (ret == NULL) {
3123
0
                xmlFree(prefix);
3124
0
                return(NULL);
3125
0
            }
3126
14.0k
  } else {
3127
753
      ret = buffer;
3128
753
  }
3129
3130
14.8k
        *prefixOut = prefix;
3131
14.8k
    }
3132
3133
108k
    return(ret);
3134
108k
}
3135
3136
/************************************************************************
3137
 *                  *
3138
 *      The parser itself       *
3139
 *  Relates to http://www.w3.org/TR/REC-xml       *
3140
 *                  *
3141
 ************************************************************************/
3142
3143
/************************************************************************
3144
 *                  *
3145
 *  Routines to parse Name, NCName and NmToken      *
3146
 *                  *
3147
 ************************************************************************/
3148
3149
/*
3150
 * The two following functions are related to the change of accepted
3151
 * characters for Name and NmToken in the Revision 5 of XML-1.0
3152
 * They correspond to the modified production [4] and the new production [4a]
3153
 * changes in that revision. Also note that the macros used for the
3154
 * productions Letter, Digit, CombiningChar and Extender are not needed
3155
 * anymore.
3156
 * We still keep compatibility to pre-revision5 parsing semantic if the
3157
 * new XML_PARSE_OLD10 option is given to the parser.
3158
 */
3159
static int
3160
303k
xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3161
303k
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3162
        /*
3163
   * Use the new checks of production [4] [4a] amd [5] of the
3164
   * Update 5 of XML-1.0
3165
   */
3166
283k
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3167
283k
      (((c >= 'a') && (c <= 'z')) ||
3168
282k
       ((c >= 'A') && (c <= 'Z')) ||
3169
282k
       (c == '_') || (c == ':') ||
3170
282k
       ((c >= 0xC0) && (c <= 0xD6)) ||
3171
282k
       ((c >= 0xD8) && (c <= 0xF6)) ||
3172
282k
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3173
282k
       ((c >= 0x370) && (c <= 0x37D)) ||
3174
282k
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3175
282k
       ((c >= 0x200C) && (c <= 0x200D)) ||
3176
282k
       ((c >= 0x2070) && (c <= 0x218F)) ||
3177
282k
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3178
282k
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3179
282k
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3180
282k
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3181
282k
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3182
227k
      return(1);
3183
283k
    } else {
3184
19.5k
        if (IS_LETTER(c) || (c == '_') || (c == ':'))
3185
16.7k
      return(1);
3186
19.5k
    }
3187
59.2k
    return(0);
3188
303k
}
3189
3190
static int
3191
526k
xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3192
526k
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3193
        /*
3194
   * Use the new checks of production [4] [4a] amd [5] of the
3195
   * Update 5 of XML-1.0
3196
   */
3197
495k
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3198
495k
      (((c >= 'a') && (c <= 'z')) ||
3199
487k
       ((c >= 'A') && (c <= 'Z')) ||
3200
487k
       ((c >= '0') && (c <= '9')) || /* !start */
3201
487k
       (c == '_') || (c == ':') ||
3202
487k
       (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3203
487k
       ((c >= 0xC0) && (c <= 0xD6)) ||
3204
487k
       ((c >= 0xD8) && (c <= 0xF6)) ||
3205
487k
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3206
487k
       ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3207
487k
       ((c >= 0x370) && (c <= 0x37D)) ||
3208
487k
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3209
487k
       ((c >= 0x200C) && (c <= 0x200D)) ||
3210
487k
       ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3211
487k
       ((c >= 0x2070) && (c <= 0x218F)) ||
3212
487k
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3213
487k
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3214
487k
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3215
487k
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3216
487k
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3217
261k
       return(1);
3218
495k
    } else {
3219
30.2k
        if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3220
30.2k
            (c == '.') || (c == '-') ||
3221
30.2k
      (c == '_') || (c == ':') ||
3222
30.2k
      (IS_COMBINING(c)) ||
3223
30.2k
      (IS_EXTENDER(c)))
3224
12.4k
      return(1);
3225
30.2k
    }
3226
252k
    return(0);
3227
526k
}
3228
3229
static const xmlChar *
3230
109k
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3231
109k
    const xmlChar *ret;
3232
109k
    int len = 0, l;
3233
109k
    int c;
3234
109k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3235
0
                    XML_MAX_TEXT_LENGTH :
3236
109k
                    XML_MAX_NAME_LENGTH;
3237
3238
    /*
3239
     * Handler for more complex cases
3240
     */
3241
109k
    c = CUR_CHAR(l);
3242
109k
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3243
        /*
3244
   * Use the new checks of production [4] [4a] amd [5] of the
3245
   * Update 5 of XML-1.0
3246
   */
3247
97.4k
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3248
97.4k
      (!(((c >= 'a') && (c <= 'z')) ||
3249
86.7k
         ((c >= 'A') && (c <= 'Z')) ||
3250
86.7k
         (c == '_') || (c == ':') ||
3251
86.7k
         ((c >= 0xC0) && (c <= 0xD6)) ||
3252
86.7k
         ((c >= 0xD8) && (c <= 0xF6)) ||
3253
86.7k
         ((c >= 0xF8) && (c <= 0x2FF)) ||
3254
86.7k
         ((c >= 0x370) && (c <= 0x37D)) ||
3255
86.7k
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
3256
86.7k
         ((c >= 0x200C) && (c <= 0x200D)) ||
3257
86.7k
         ((c >= 0x2070) && (c <= 0x218F)) ||
3258
86.7k
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3259
86.7k
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
3260
86.7k
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
3261
86.7k
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3262
87.5k
         ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3263
87.5k
      return(NULL);
3264
87.5k
  }
3265
9.97k
  len += l;
3266
9.97k
  NEXTL(l);
3267
9.97k
  c = CUR_CHAR(l);
3268
159k
  while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3269
159k
         (((c >= 'a') && (c <= 'z')) ||
3270
156k
          ((c >= 'A') && (c <= 'Z')) ||
3271
156k
          ((c >= '0') && (c <= '9')) || /* !start */
3272
156k
          (c == '_') || (c == ':') ||
3273
156k
          (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3274
156k
          ((c >= 0xC0) && (c <= 0xD6)) ||
3275
156k
          ((c >= 0xD8) && (c <= 0xF6)) ||
3276
156k
          ((c >= 0xF8) && (c <= 0x2FF)) ||
3277
156k
          ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3278
156k
          ((c >= 0x370) && (c <= 0x37D)) ||
3279
156k
          ((c >= 0x37F) && (c <= 0x1FFF)) ||
3280
156k
          ((c >= 0x200C) && (c <= 0x200D)) ||
3281
156k
          ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3282
156k
          ((c >= 0x2070) && (c <= 0x218F)) ||
3283
156k
          ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3284
156k
          ((c >= 0x3001) && (c <= 0xD7FF)) ||
3285
156k
          ((c >= 0xF900) && (c <= 0xFDCF)) ||
3286
156k
          ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3287
156k
          ((c >= 0x10000) && (c <= 0xEFFFF))
3288
156k
    )) {
3289
149k
            if (len <= INT_MAX - l)
3290
149k
          len += l;
3291
149k
      NEXTL(l);
3292
149k
      c = CUR_CHAR(l);
3293
149k
  }
3294
12.4k
    } else {
3295
12.4k
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3296
12.4k
      (!IS_LETTER(c) && (c != '_') &&
3297
11.0k
       (c != ':'))) {
3298
8.53k
      return(NULL);
3299
8.53k
  }
3300
3.95k
  len += l;
3301
3.95k
  NEXTL(l);
3302
3.95k
  c = CUR_CHAR(l);
3303
3304
12.3k
  while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3305
12.3k
         ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3306
11.2k
    (c == '.') || (c == '-') ||
3307
11.2k
    (c == '_') || (c == ':') ||
3308
11.2k
    (IS_COMBINING(c)) ||
3309
11.2k
    (IS_EXTENDER(c)))) {
3310
8.36k
            if (len <= INT_MAX - l)
3311
8.36k
          len += l;
3312
8.36k
      NEXTL(l);
3313
8.36k
      c = CUR_CHAR(l);
3314
8.36k
  }
3315
3.95k
    }
3316
13.9k
    if (len > maxLength) {
3317
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3318
0
        return(NULL);
3319
0
    }
3320
13.9k
    if (ctxt->input->cur - ctxt->input->base < len) {
3321
        /*
3322
         * There were a couple of bugs where PERefs lead to to a change
3323
         * of the buffer. Check the buffer size to avoid passing an invalid
3324
         * pointer to xmlDictLookup.
3325
         */
3326
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3327
0
                    "unexpected change of input buffer");
3328
0
        return (NULL);
3329
0
    }
3330
13.9k
    if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3331
195
        ret = xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len);
3332
13.7k
    else
3333
13.7k
        ret = xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len);
3334
13.9k
    if (ret == NULL)
3335
0
        xmlErrMemory(ctxt);
3336
13.9k
    return(ret);
3337
13.9k
}
3338
3339
/**
3340
 * xmlParseName:
3341
 * @ctxt:  an XML parser context
3342
 *
3343
 * DEPRECATED: Internal function, don't use.
3344
 *
3345
 * parse an XML name.
3346
 *
3347
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3348
 *                  CombiningChar | Extender
3349
 *
3350
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3351
 *
3352
 * [6] Names ::= Name (#x20 Name)*
3353
 *
3354
 * Returns the Name parsed or NULL
3355
 */
3356
3357
const xmlChar *
3358
441k
xmlParseName(xmlParserCtxtPtr ctxt) {
3359
441k
    const xmlChar *in;
3360
441k
    const xmlChar *ret;
3361
441k
    size_t count = 0;
3362
441k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3363
0
                       XML_MAX_TEXT_LENGTH :
3364
441k
                       XML_MAX_NAME_LENGTH;
3365
3366
441k
    GROW;
3367
3368
    /*
3369
     * Accelerator for simple ASCII names
3370
     */
3371
441k
    in = ctxt->input->cur;
3372
441k
    if (((*in >= 0x61) && (*in <= 0x7A)) ||
3373
441k
  ((*in >= 0x41) && (*in <= 0x5A)) ||
3374
441k
  (*in == '_') || (*in == ':')) {
3375
336k
  in++;
3376
656k
  while (((*in >= 0x61) && (*in <= 0x7A)) ||
3377
656k
         ((*in >= 0x41) && (*in <= 0x5A)) ||
3378
656k
         ((*in >= 0x30) && (*in <= 0x39)) ||
3379
656k
         (*in == '_') || (*in == '-') ||
3380
656k
         (*in == ':') || (*in == '.'))
3381
320k
      in++;
3382
336k
  if ((*in > 0) && (*in < 0x80)) {
3383
331k
      count = in - ctxt->input->cur;
3384
331k
            if (count > maxLength) {
3385
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3386
0
                return(NULL);
3387
0
            }
3388
331k
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3389
331k
      ctxt->input->cur = in;
3390
331k
      ctxt->input->col += count;
3391
331k
      if (ret == NULL)
3392
0
          xmlErrMemory(ctxt);
3393
331k
      return(ret);
3394
331k
  }
3395
336k
    }
3396
    /* accelerator for special cases */
3397
109k
    return(xmlParseNameComplex(ctxt));
3398
441k
}
3399
3400
static xmlHashedString
3401
91.5k
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3402
91.5k
    xmlHashedString ret;
3403
91.5k
    int len = 0, l;
3404
91.5k
    int c;
3405
91.5k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3406
0
                    XML_MAX_TEXT_LENGTH :
3407
91.5k
                    XML_MAX_NAME_LENGTH;
3408
91.5k
    size_t startPosition = 0;
3409
3410
91.5k
    ret.name = NULL;
3411
91.5k
    ret.hashValue = 0;
3412
3413
    /*
3414
     * Handler for more complex cases
3415
     */
3416
91.5k
    startPosition = CUR_PTR - BASE_PTR;
3417
91.5k
    c = CUR_CHAR(l);
3418
91.5k
    if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3419
91.5k
  (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3420
86.2k
  return(ret);
3421
86.2k
    }
3422
3423
67.5k
    while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3424
67.5k
     (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3425
62.1k
        if (len <= INT_MAX - l)
3426
62.1k
      len += l;
3427
62.1k
  NEXTL(l);
3428
62.1k
  c = CUR_CHAR(l);
3429
62.1k
    }
3430
5.35k
    if (len > maxLength) {
3431
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3432
0
        return(ret);
3433
0
    }
3434
5.35k
    ret = xmlDictLookupHashed(ctxt->dict, (BASE_PTR + startPosition), len);
3435
5.35k
    if (ret.name == NULL)
3436
0
        xmlErrMemory(ctxt);
3437
5.35k
    return(ret);
3438
5.35k
}
3439
3440
/**
3441
 * xmlParseNCName:
3442
 * @ctxt:  an XML parser context
3443
 * @len:  length of the string parsed
3444
 *
3445
 * parse an XML name.
3446
 *
3447
 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3448
 *                      CombiningChar | Extender
3449
 *
3450
 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3451
 *
3452
 * Returns the Name parsed or NULL
3453
 */
3454
3455
static xmlHashedString
3456
179k
xmlParseNCName(xmlParserCtxtPtr ctxt) {
3457
179k
    const xmlChar *in, *e;
3458
179k
    xmlHashedString ret;
3459
179k
    size_t count = 0;
3460
179k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3461
0
                       XML_MAX_TEXT_LENGTH :
3462
179k
                       XML_MAX_NAME_LENGTH;
3463
3464
179k
    ret.name = NULL;
3465
3466
    /*
3467
     * Accelerator for simple ASCII names
3468
     */
3469
179k
    in = ctxt->input->cur;
3470
179k
    e = ctxt->input->end;
3471
179k
    if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3472
179k
   ((*in >= 0x41) && (*in <= 0x5A)) ||
3473
179k
   (*in == '_')) && (in < e)) {
3474
89.0k
  in++;
3475
152k
  while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3476
152k
          ((*in >= 0x41) && (*in <= 0x5A)) ||
3477
152k
          ((*in >= 0x30) && (*in <= 0x39)) ||
3478
152k
          (*in == '_') || (*in == '-') ||
3479
152k
          (*in == '.')) && (in < e))
3480
63.1k
      in++;
3481
89.0k
  if (in >= e)
3482
25
      goto complex;
3483
89.0k
  if ((*in > 0) && (*in < 0x80)) {
3484
88.1k
      count = in - ctxt->input->cur;
3485
88.1k
            if (count > maxLength) {
3486
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3487
0
                return(ret);
3488
0
            }
3489
88.1k
      ret = xmlDictLookupHashed(ctxt->dict, ctxt->input->cur, count);
3490
88.1k
      ctxt->input->cur = in;
3491
88.1k
      ctxt->input->col += count;
3492
88.1k
      if (ret.name == NULL) {
3493
0
          xmlErrMemory(ctxt);
3494
0
      }
3495
88.1k
      return(ret);
3496
88.1k
  }
3497
89.0k
    }
3498
91.5k
complex:
3499
91.5k
    return(xmlParseNCNameComplex(ctxt));
3500
179k
}
3501
3502
/**
3503
 * xmlParseNameAndCompare:
3504
 * @ctxt:  an XML parser context
3505
 *
3506
 * parse an XML name and compares for match
3507
 * (specialized for endtag parsing)
3508
 *
3509
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3510
 * and the name for mismatch
3511
 */
3512
3513
static const xmlChar *
3514
12.1k
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3515
12.1k
    register const xmlChar *cmp = other;
3516
12.1k
    register const xmlChar *in;
3517
12.1k
    const xmlChar *ret;
3518
3519
12.1k
    GROW;
3520
3521
12.1k
    in = ctxt->input->cur;
3522
34.1k
    while (*in != 0 && *in == *cmp) {
3523
22.0k
  ++in;
3524
22.0k
  ++cmp;
3525
22.0k
    }
3526
12.1k
    if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3527
  /* success */
3528
2.65k
  ctxt->input->col += in - ctxt->input->cur;
3529
2.65k
  ctxt->input->cur = in;
3530
2.65k
  return (const xmlChar*) 1;
3531
2.65k
    }
3532
    /* failure (or end of input buffer), check with full function */
3533
9.51k
    ret = xmlParseName (ctxt);
3534
    /* strings coming from the dictionary direct compare possible */
3535
9.51k
    if (ret == other) {
3536
781
  return (const xmlChar*) 1;
3537
781
    }
3538
8.73k
    return ret;
3539
9.51k
}
3540
3541
/**
3542
 * xmlParseStringName:
3543
 * @ctxt:  an XML parser context
3544
 * @str:  a pointer to the string pointer (IN/OUT)
3545
 *
3546
 * parse an XML name.
3547
 *
3548
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3549
 *                  CombiningChar | Extender
3550
 *
3551
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3552
 *
3553
 * [6] Names ::= Name (#x20 Name)*
3554
 *
3555
 * Returns the Name parsed or NULL. The @str pointer
3556
 * is updated to the current location in the string.
3557
 */
3558
3559
static xmlChar *
3560
218k
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3561
218k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3562
218k
    xmlChar *ret;
3563
218k
    const xmlChar *cur = *str;
3564
218k
    int len = 0, l;
3565
218k
    int c;
3566
218k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3567
0
                    XML_MAX_TEXT_LENGTH :
3568
218k
                    XML_MAX_NAME_LENGTH;
3569
3570
218k
    c = CUR_SCHAR(cur, l);
3571
218k
    if (!xmlIsNameStartChar(ctxt, c)) {
3572
1.15k
  return(NULL);
3573
1.15k
    }
3574
3575
217k
    COPY_BUF(buf, len, c);
3576
217k
    cur += l;
3577
217k
    c = CUR_SCHAR(cur, l);
3578
323k
    while (xmlIsNameChar(ctxt, c)) {
3579
106k
  COPY_BUF(buf, len, c);
3580
106k
  cur += l;
3581
106k
  c = CUR_SCHAR(cur, l);
3582
106k
  if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3583
      /*
3584
       * Okay someone managed to make a huge name, so he's ready to pay
3585
       * for the processing speed.
3586
       */
3587
310
      xmlChar *buffer;
3588
310
      int max = len * 2;
3589
3590
310
      buffer = (xmlChar *) xmlMallocAtomic(max);
3591
310
      if (buffer == NULL) {
3592
0
          xmlErrMemory(ctxt);
3593
0
    return(NULL);
3594
0
      }
3595
310
      memcpy(buffer, buf, len);
3596
10.1k
      while (xmlIsNameChar(ctxt, c)) {
3597
9.79k
    if (len + 10 > max) {
3598
237
        xmlChar *tmp;
3599
3600
237
        max *= 2;
3601
237
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3602
237
        if (tmp == NULL) {
3603
0
      xmlErrMemory(ctxt);
3604
0
      xmlFree(buffer);
3605
0
      return(NULL);
3606
0
        }
3607
237
        buffer = tmp;
3608
237
    }
3609
9.79k
    COPY_BUF(buffer, len, c);
3610
9.79k
    cur += l;
3611
9.79k
    c = CUR_SCHAR(cur, l);
3612
9.79k
                if (len > maxLength) {
3613
0
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3614
0
                    xmlFree(buffer);
3615
0
                    return(NULL);
3616
0
                }
3617
9.79k
      }
3618
310
      buffer[len] = 0;
3619
310
      *str = cur;
3620
310
      return(buffer);
3621
310
  }
3622
106k
    }
3623
216k
    if (len > maxLength) {
3624
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3625
0
        return(NULL);
3626
0
    }
3627
216k
    *str = cur;
3628
216k
    ret = xmlStrndup(buf, len);
3629
216k
    if (ret == NULL)
3630
0
        xmlErrMemory(ctxt);
3631
216k
    return(ret);
3632
216k
}
3633
3634
/**
3635
 * xmlParseNmtoken:
3636
 * @ctxt:  an XML parser context
3637
 *
3638
 * DEPRECATED: Internal function, don't use.
3639
 *
3640
 * parse an XML Nmtoken.
3641
 *
3642
 * [7] Nmtoken ::= (NameChar)+
3643
 *
3644
 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3645
 *
3646
 * Returns the Nmtoken parsed or NULL
3647
 */
3648
3649
xmlChar *
3650
31.4k
xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3651
31.4k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3652
31.4k
    xmlChar *ret;
3653
31.4k
    int len = 0, l;
3654
31.4k
    int c;
3655
31.4k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3656
0
                    XML_MAX_TEXT_LENGTH :
3657
31.4k
                    XML_MAX_NAME_LENGTH;
3658
3659
31.4k
    c = CUR_CHAR(l);
3660
3661
91.0k
    while (xmlIsNameChar(ctxt, c)) {
3662
60.2k
  COPY_BUF(buf, len, c);
3663
60.2k
  NEXTL(l);
3664
60.2k
  c = CUR_CHAR(l);
3665
60.2k
  if (len >= XML_MAX_NAMELEN) {
3666
      /*
3667
       * Okay someone managed to make a huge token, so he's ready to pay
3668
       * for the processing speed.
3669
       */
3670
589
      xmlChar *buffer;
3671
589
      int max = len * 2;
3672
3673
589
      buffer = (xmlChar *) xmlMallocAtomic(max);
3674
589
      if (buffer == NULL) {
3675
0
          xmlErrMemory(ctxt);
3676
0
    return(NULL);
3677
0
      }
3678
589
      memcpy(buffer, buf, len);
3679
35.3k
      while (xmlIsNameChar(ctxt, c)) {
3680
34.7k
    if (len + 10 > max) {
3681
374
        xmlChar *tmp;
3682
3683
374
        max *= 2;
3684
374
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3685
374
        if (tmp == NULL) {
3686
0
      xmlErrMemory(ctxt);
3687
0
      xmlFree(buffer);
3688
0
      return(NULL);
3689
0
        }
3690
374
        buffer = tmp;
3691
374
    }
3692
34.7k
    COPY_BUF(buffer, len, c);
3693
34.7k
                if (len > maxLength) {
3694
0
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3695
0
                    xmlFree(buffer);
3696
0
                    return(NULL);
3697
0
                }
3698
34.7k
    NEXTL(l);
3699
34.7k
    c = CUR_CHAR(l);
3700
34.7k
      }
3701
589
      buffer[len] = 0;
3702
589
      return(buffer);
3703
589
  }
3704
60.2k
    }
3705
30.8k
    if (len == 0)
3706
1.50k
        return(NULL);
3707
29.3k
    if (len > maxLength) {
3708
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3709
0
        return(NULL);
3710
0
    }
3711
29.3k
    ret = xmlStrndup(buf, len);
3712
29.3k
    if (ret == NULL)
3713
0
        xmlErrMemory(ctxt);
3714
29.3k
    return(ret);
3715
29.3k
}
3716
3717
/**
3718
 * xmlExpandPEsInEntityValue:
3719
 * @ctxt:  parser context
3720
 * @buf:  string buffer
3721
 * @str:  entity value
3722
 * @length:  size of entity value
3723
 * @depth:  nesting depth
3724
 *
3725
 * Validate an entity value and expand parameter entities.
3726
 */
3727
static void
3728
xmlExpandPEsInEntityValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf,
3729
12.8k
                          const xmlChar *str, int length, int depth) {
3730
12.8k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3731
12.8k
    const xmlChar *end, *chunk;
3732
12.8k
    int c, l;
3733
3734
12.8k
    if (str == NULL)
3735
0
        return;
3736
3737
12.8k
    depth += 1;
3738
12.8k
    if (depth > maxDepth) {
3739
0
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3740
0
                       "Maximum entity nesting depth exceeded");
3741
0
  return;
3742
0
    }
3743
3744
12.8k
    end = str + length;
3745
12.8k
    chunk = str;
3746
3747
295k
    while ((str < end) && (!PARSER_STOPPED(ctxt))) {
3748
288k
        c = *str;
3749
3750
288k
        if (c >= 0x80) {
3751
89.8k
            l = xmlUTF8MultibyteLen(ctxt, str,
3752
89.8k
                    "invalid character in entity value\n");
3753
89.8k
            if (l == 0) {
3754
6.95k
                if (chunk < str)
3755
1.35k
                    xmlSBufAddString(buf, chunk, str - chunk);
3756
6.95k
                xmlSBufAddReplChar(buf);
3757
6.95k
                str += 1;
3758
6.95k
                chunk = str;
3759
82.9k
            } else {
3760
82.9k
                str += l;
3761
82.9k
            }
3762
198k
        } else if (c == '&') {
3763
17.0k
            if (str[1] == '#') {
3764
6.28k
                if (chunk < str)
3765
3.95k
                    xmlSBufAddString(buf, chunk, str - chunk);
3766
3767
6.28k
                c = xmlParseStringCharRef(ctxt, &str);
3768
6.28k
                if (c == 0)
3769
2.64k
                    return;
3770
3771
3.64k
                xmlSBufAddChar(buf, c);
3772
3773
3.64k
                chunk = str;
3774
10.7k
            } else {
3775
10.7k
                xmlChar *name;
3776
3777
                /*
3778
                 * General entity references are checked for
3779
                 * syntactic validity.
3780
                 */
3781
10.7k
                str++;
3782
10.7k
                name = xmlParseStringName(ctxt, &str);
3783
3784
10.7k
                if ((name == NULL) || (*str++ != ';')) {
3785
1.35k
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3786
1.35k
                            "EntityValue: '&' forbidden except for entities "
3787
1.35k
                            "references\n");
3788
1.35k
                    xmlFree(name);
3789
1.35k
                    return;
3790
1.35k
                }
3791
3792
9.43k
                xmlFree(name);
3793
9.43k
            }
3794
181k
        } else if (c == '%') {
3795
2.49k
            xmlEntityPtr ent;
3796
3797
2.49k
            if (chunk < str)
3798
1.71k
                xmlSBufAddString(buf, chunk, str - chunk);
3799
3800
2.49k
            ent = xmlParseStringPEReference(ctxt, &str);
3801
2.49k
            if (ent == NULL)
3802
2.23k
                return;
3803
3804
260
            if (!PARSER_EXTERNAL(ctxt)) {
3805
260
                xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3806
260
                return;
3807
260
            }
3808
3809
0
            if (ent->content == NULL) {
3810
                /*
3811
                 * Note: external parsed entities will not be loaded,
3812
                 * it is not required for a non-validating parser to
3813
                 * complete external PEReferences coming from the
3814
                 * internal subset
3815
                 */
3816
0
                if (((ctxt->options & XML_PARSE_NO_XXE) == 0) &&
3817
0
                    ((ctxt->replaceEntities) ||
3818
0
                     (ctxt->validate))) {
3819
0
                    xmlLoadEntityContent(ctxt, ent);
3820
0
                } else {
3821
0
                    xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
3822
0
                                  "not validating will not read content for "
3823
0
                                  "PE entity %s\n", ent->name, NULL);
3824
0
                }
3825
0
            }
3826
3827
            /*
3828
             * TODO: Skip if ent->content is still NULL.
3829
             */
3830
3831
0
            if (xmlParserEntityCheck(ctxt, ent->length))
3832
0
                return;
3833
3834
0
            if (ent->flags & XML_ENT_EXPANDING) {
3835
0
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
3836
0
                xmlHaltParser(ctxt);
3837
0
                return;
3838
0
            }
3839
3840
0
            ent->flags |= XML_ENT_EXPANDING;
3841
0
            xmlExpandPEsInEntityValue(ctxt, buf, ent->content, ent->length,
3842
0
                                      depth);
3843
0
            ent->flags &= ~XML_ENT_EXPANDING;
3844
3845
0
            chunk = str;
3846
179k
        } else {
3847
            /* Normal ASCII char */
3848
179k
            if (!IS_BYTE_CHAR(c)) {
3849
2.21k
                xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3850
2.21k
                        "invalid character in entity value\n");
3851
2.21k
                if (chunk < str)
3852
439
                    xmlSBufAddString(buf, chunk, str - chunk);
3853
2.21k
                xmlSBufAddReplChar(buf);
3854
2.21k
                str += 1;
3855
2.21k
                chunk = str;
3856
177k
            } else {
3857
177k
                str += 1;
3858
177k
            }
3859
179k
        }
3860
288k
    }
3861
3862
6.35k
    if (chunk < str)
3863
5.88k
        xmlSBufAddString(buf, chunk, str - chunk);
3864
3865
6.35k
    return;
3866
12.8k
}
3867
3868
/**
3869
 * xmlParseEntityValue:
3870
 * @ctxt:  an XML parser context
3871
 * @orig:  if non-NULL store a copy of the original entity value
3872
 *
3873
 * DEPRECATED: Internal function, don't use.
3874
 *
3875
 * parse a value for ENTITY declarations
3876
 *
3877
 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3878
 *                 "'" ([^%&'] | PEReference | Reference)* "'"
3879
 *
3880
 * Returns the EntityValue parsed with reference substituted or NULL
3881
 */
3882
xmlChar *
3883
13.8k
xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3884
13.8k
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3885
0
                         XML_MAX_HUGE_LENGTH :
3886
13.8k
                         XML_MAX_TEXT_LENGTH;
3887
13.8k
    xmlSBuf buf;
3888
13.8k
    const xmlChar *start;
3889
13.8k
    int quote, length;
3890
3891
13.8k
    xmlSBufInit(&buf, maxLength);
3892
3893
13.8k
    GROW;
3894
3895
13.8k
    quote = CUR;
3896
13.8k
    if ((quote != '"') && (quote != '\'')) {
3897
0
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3898
0
  return(NULL);
3899
0
    }
3900
13.8k
    CUR_PTR++;
3901
3902
13.8k
    length = 0;
3903
3904
    /*
3905
     * Copy raw content of the entity into a buffer
3906
     */
3907
582k
    while (1) {
3908
582k
        int c;
3909
3910
582k
        if (PARSER_STOPPED(ctxt))
3911
0
            goto error;
3912
3913
582k
        if (CUR_PTR >= ctxt->input->end) {
3914
957
            xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3915
957
            goto error;
3916
957
        }
3917
3918
581k
        c = CUR;
3919
3920
581k
        if (c == 0) {
3921
7
            xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3922
7
                    "invalid character in entity value\n");
3923
7
            goto error;
3924
7
        }
3925
581k
        if (c == quote)
3926
12.8k
            break;
3927
569k
        NEXTL(1);
3928
569k
        length += 1;
3929
3930
        /*
3931
         * TODO: Check growth threshold
3932
         */
3933
569k
        if (ctxt->input->end - CUR_PTR < 10)
3934
23.5k
            GROW;
3935
569k
    }
3936
3937
12.8k
    start = CUR_PTR - length;
3938
3939
12.8k
    if (orig != NULL) {
3940
12.8k
        *orig = xmlStrndup(start, length);
3941
12.8k
        if (*orig == NULL)
3942
0
            xmlErrMemory(ctxt);
3943
12.8k
    }
3944
3945
12.8k
    xmlExpandPEsInEntityValue(ctxt, &buf, start, length, ctxt->inputNr);
3946
3947
12.8k
    NEXTL(1);
3948
3949
12.8k
    return(xmlSBufFinish(&buf, NULL, ctxt, "entity length too long"));
3950
3951
964
error:
3952
964
    xmlSBufCleanup(&buf, ctxt, "entity length too long");
3953
964
    return(NULL);
3954
13.8k
}
3955
3956
/**
3957
 * xmlCheckEntityInAttValue:
3958
 * @ctxt:  parser context
3959
 * @pent:  entity
3960
 * @depth:  nesting depth
3961
 *
3962
 * Check an entity reference in an attribute value for validity
3963
 * without expanding it.
3964
 */
3965
static void
3966
868
xmlCheckEntityInAttValue(xmlParserCtxtPtr ctxt, xmlEntityPtr pent, int depth) {
3967
868
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3968
868
    const xmlChar *str;
3969
868
    unsigned long expandedSize = pent->length;
3970
868
    int c, flags;
3971
3972
868
    depth += 1;
3973
868
    if (depth > maxDepth) {
3974
0
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3975
0
                       "Maximum entity nesting depth exceeded");
3976
0
  return;
3977
0
    }
3978
3979
868
    if (pent->flags & XML_ENT_EXPANDING) {
3980
9
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
3981
9
        xmlHaltParser(ctxt);
3982
9
        return;
3983
9
    }
3984
3985
    /*
3986
     * If we're parsing a default attribute value in DTD content,
3987
     * the entity might reference other entities which weren't
3988
     * defined yet, so the check isn't reliable.
3989
     */
3990
859
    if (ctxt->inSubset == 0)
3991
823
        flags = XML_ENT_CHECKED | XML_ENT_VALIDATED;
3992
36
    else
3993
36
        flags = XML_ENT_VALIDATED;
3994
3995
859
    str = pent->content;
3996
859
    if (str == NULL)
3997
0
        goto done;
3998
3999
    /*
4000
     * Note that entity values are already validated. We only check
4001
     * for illegal less-than signs and compute the expanded size
4002
     * of the entity. No special handling for multi-byte characters
4003
     * is needed.
4004
     */
4005
49.2k
    while (!PARSER_STOPPED(ctxt)) {
4006
49.2k
        c = *str;
4007
4008
49.2k
  if (c != '&') {
4009
43.2k
            if (c == 0)
4010
838
                break;
4011
4012
42.3k
            if (c == '<')
4013
496
                xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
4014
496
                        "'<' in entity '%s' is not allowed in attributes "
4015
496
                        "values\n", pent->name);
4016
4017
42.3k
            str += 1;
4018
42.3k
        } else if (str[1] == '#') {
4019
293
            int val;
4020
4021
293
      val = xmlParseStringCharRef(ctxt, &str);
4022
293
      if (val == 0) {
4023
2
                pent->content[0] = 0;
4024
2
                break;
4025
2
            }
4026
5.74k
  } else {
4027
5.74k
            xmlChar *name;
4028
5.74k
            xmlEntityPtr ent;
4029
4030
5.74k
      name = xmlParseStringEntityRef(ctxt, &str);
4031
5.74k
      if (name == NULL) {
4032
5
                pent->content[0] = 0;
4033
5
                break;
4034
5
            }
4035
4036
5.73k
            ent = xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 1);
4037
5.73k
            xmlFree(name);
4038
4039
5.73k
            if ((ent != NULL) &&
4040
5.73k
                (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
4041
4.57k
                if ((ent->flags & flags) != flags) {
4042
297
                    pent->flags |= XML_ENT_EXPANDING;
4043
297
                    xmlCheckEntityInAttValue(ctxt, ent, depth);
4044
297
                    pent->flags &= ~XML_ENT_EXPANDING;
4045
297
                }
4046
4047
4.57k
                xmlSaturatedAdd(&expandedSize, ent->expandedSize);
4048
4.57k
                xmlSaturatedAdd(&expandedSize, XML_ENT_FIXED_COST);
4049
4.57k
            }
4050
5.73k
        }
4051
49.2k
    }
4052
4053
859
done:
4054
859
    if (ctxt->inSubset == 0)
4055
823
        pent->expandedSize = expandedSize;
4056
4057
859
    pent->flags |= flags;
4058
859
}
4059
4060
/**
4061
 * xmlExpandEntityInAttValue:
4062
 * @ctxt:  parser context
4063
 * @buf:  string buffer
4064
 * @str:  entity or attribute value
4065
 * @pent:  entity for entity value, NULL for attribute values
4066
 * @normalize:  whether to collapse whitespace
4067
 * @inSpace:  whitespace state
4068
 * @depth:  nesting depth
4069
 * @check:  whether to check for amplification
4070
 *
4071
 * Expand general entity references in an entity or attribute value.
4072
 * Perform attribute value normalization.
4073
 */
4074
static void
4075
xmlExpandEntityInAttValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf,
4076
                          const xmlChar *str, xmlEntityPtr pent, int normalize,
4077
156k
                          int *inSpace, int depth, int check) {
4078
156k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
4079
156k
    int c, chunkSize;
4080
4081
156k
    if (str == NULL)
4082
0
        return;
4083
4084
156k
    depth += 1;
4085
156k
    if (depth > maxDepth) {
4086
0
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
4087
0
                       "Maximum entity nesting depth exceeded");
4088
0
  return;
4089
0
    }
4090
4091
156k
    if (pent != NULL) {
4092
123k
        if (pent->flags & XML_ENT_EXPANDING) {
4093
0
            xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
4094
0
            xmlHaltParser(ctxt);
4095
0
            return;
4096
0
        }
4097
4098
123k
        if (check) {
4099
0
            if (xmlParserEntityCheck(ctxt, pent->length))
4100
0
                return;
4101
0
        }
4102
123k
    }
4103
4104
156k
    chunkSize = 0;
4105
4106
    /*
4107
     * Note that entity values are already validated. No special
4108
     * handling for multi-byte characters is needed.
4109
     */
4110
27.1M
    while (!PARSER_STOPPED(ctxt)) {
4111
27.1M
        c = *str;
4112
4113
27.1M
  if (c != '&') {
4114
26.9M
            if (c == 0)
4115
150k
                break;
4116
4117
            /*
4118
             * If this function is called without an entity, it is used to
4119
             * expand entities in an attribute content where less-than was
4120
             * already unscaped and is allowed.
4121
             */
4122
26.7M
            if ((pent != NULL) && (c == '<')) {
4123
5.86k
                xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
4124
5.86k
                        "'<' in entity '%s' is not allowed in attributes "
4125
5.86k
                        "values\n", pent->name);
4126
5.86k
                break;
4127
5.86k
            }
4128
4129
26.7M
            if (c <= 0x20) {
4130
614k
                if ((normalize) && (*inSpace)) {
4131
                    /* Skip char */
4132
0
                    if (chunkSize > 0) {
4133
0
                        xmlSBufAddString(buf, str - chunkSize, chunkSize);
4134
0
                        chunkSize = 0;
4135
0
                    }
4136
614k
                } else if (c < 0x20) {
4137
137k
                    if (chunkSize > 0) {
4138
134k
                        xmlSBufAddString(buf, str - chunkSize, chunkSize);
4139
134k
                        chunkSize = 0;
4140
134k
                    }
4141
4142
137k
                    xmlSBufAddCString(buf, " ", 1);
4143
477k
                } else {
4144
477k
                    chunkSize += 1;
4145
477k
                }
4146
4147
614k
                *inSpace = 1;
4148
26.1M
            } else {
4149
26.1M
                chunkSize += 1;
4150
26.1M
                *inSpace = 0;
4151
26.1M
            }
4152
4153
26.7M
            str += 1;
4154
26.7M
        } else if (str[1] == '#') {
4155
17.7k
            int val;
4156
4157
17.7k
            if (chunkSize > 0) {
4158
12.2k
                xmlSBufAddString(buf, str - chunkSize, chunkSize);
4159
12.2k
                chunkSize = 0;
4160
12.2k
            }
4161
4162
17.7k
      val = xmlParseStringCharRef(ctxt, &str);
4163
17.7k
      if (val == 0) {
4164
0
                if (pent != NULL)
4165
0
                    pent->content[0] = 0;
4166
0
                break;
4167
0
            }
4168
4169
17.7k
            if (val == ' ') {
4170
922
                if ((!normalize) || (!*inSpace))
4171
922
                    xmlSBufAddCString(buf, " ", 1);
4172
922
                *inSpace = 1;
4173
16.7k
            } else {
4174
16.7k
                xmlSBufAddChar(buf, val);
4175
16.7k
                *inSpace = 0;
4176
16.7k
            }
4177
199k
  } else {
4178
199k
            xmlChar *name;
4179
199k
            xmlEntityPtr ent;
4180
4181
199k
            if (chunkSize > 0) {
4182
108k
                xmlSBufAddString(buf, str - chunkSize, chunkSize);
4183
108k
                chunkSize = 0;
4184
108k
            }
4185
4186
199k
      name = xmlParseStringEntityRef(ctxt, &str);
4187
199k
            if (name == NULL) {
4188
1
                if (pent != NULL)
4189
0
                    pent->content[0] = 0;
4190
1
                break;
4191
1
            }
4192
4193
199k
            ent = xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 1);
4194
199k
            xmlFree(name);
4195
4196
199k
      if ((ent != NULL) &&
4197
199k
    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
4198
57.0k
    if (ent->content == NULL) {
4199
0
        xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
4200
0
          "predefined entity has no content\n");
4201
0
                    break;
4202
0
                }
4203
4204
57.0k
                xmlSBufAddString(buf, ent->content, ent->length);
4205
4206
57.0k
                *inSpace = 0;
4207
142k
      } else if ((ent != NULL) && (ent->content != NULL)) {
4208
123k
                if (pent != NULL)
4209
116k
                    pent->flags |= XML_ENT_EXPANDING;
4210
123k
    xmlExpandEntityInAttValue(ctxt, buf, ent->content, ent,
4211
123k
                                          normalize, inSpace, depth, check);
4212
123k
                if (pent != NULL)
4213
116k
                    pent->flags &= ~XML_ENT_EXPANDING;
4214
123k
      }
4215
199k
        }
4216
27.1M
    }
4217
4218
156k
    if (chunkSize > 0)
4219
152k
        xmlSBufAddString(buf, str - chunkSize, chunkSize);
4220
4221
156k
    return;
4222
156k
}
4223
4224
/**
4225
 * xmlExpandEntitiesInAttValue:
4226
 * @ctxt:  parser context
4227
 * @str:  entity or attribute value
4228
 * @normalize:  whether to collapse whitespace
4229
 *
4230
 * Expand general entity references in an entity or attribute value.
4231
 * Perform attribute value normalization.
4232
 *
4233
 * Returns the expanded attribtue value.
4234
 */
4235
xmlChar *
4236
xmlExpandEntitiesInAttValue(xmlParserCtxtPtr ctxt, const xmlChar *str,
4237
32.7k
                            int normalize) {
4238
32.7k
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4239
0
                         XML_MAX_HUGE_LENGTH :
4240
32.7k
                         XML_MAX_TEXT_LENGTH;
4241
32.7k
    xmlSBuf buf;
4242
32.7k
    int inSpace = 1;
4243
4244
32.7k
    xmlSBufInit(&buf, maxLength);
4245
4246
32.7k
    xmlExpandEntityInAttValue(ctxt, &buf, str, NULL, normalize, &inSpace,
4247
32.7k
                              ctxt->inputNr, /* check */ 0);
4248
4249
32.7k
    if ((normalize) && (inSpace) && (buf.size > 0))
4250
0
        buf.size--;
4251
4252
32.7k
    return(xmlSBufFinish(&buf, NULL, ctxt, "AttValue length too long"));
4253
32.7k
}
4254
4255
/**
4256
 * xmlParseAttValueInternal:
4257
 * @ctxt:  an XML parser context
4258
 * @len:  attribute len result
4259
 * @alloc:  whether the attribute was reallocated as a new string
4260
 * @normalize:  if 1 then further non-CDATA normalization must be done
4261
 *
4262
 * parse a value for an attribute.
4263
 * NOTE: if no normalization is needed, the routine will return pointers
4264
 *       directly from the data buffer.
4265
 *
4266
 * 3.3.3 Attribute-Value Normalization:
4267
 * Before the value of an attribute is passed to the application or
4268
 * checked for validity, the XML processor must normalize it as follows:
4269
 * - a character reference is processed by appending the referenced
4270
 *   character to the attribute value
4271
 * - an entity reference is processed by recursively processing the
4272
 *   replacement text of the entity
4273
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4274
 *   appending #x20 to the normalized value, except that only a single
4275
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4276
 *   parsed entity or the literal entity value of an internal parsed entity
4277
 * - other characters are processed by appending them to the normalized value
4278
 * If the declared value is not CDATA, then the XML processor must further
4279
 * process the normalized attribute value by discarding any leading and
4280
 * trailing space (#x20) characters, and by replacing sequences of space
4281
 * (#x20) characters by a single space (#x20) character.
4282
 * All attributes for which no declaration has been read should be treated
4283
 * by a non-validating parser as if declared CDATA.
4284
 *
4285
 * Returns the AttValue parsed or NULL. The value has to be freed by the
4286
 *     caller if it was copied, this can be detected by val[*len] == 0.
4287
 */
4288
static xmlChar *
4289
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *attlen, int *alloc,
4290
57.7k
                         int normalize) {
4291
57.7k
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4292
0
                         XML_MAX_HUGE_LENGTH :
4293
57.7k
                         XML_MAX_TEXT_LENGTH;
4294
57.7k
    xmlSBuf buf;
4295
57.7k
    xmlChar *ret;
4296
57.7k
    int c, l, quote, flags, chunkSize;
4297
57.7k
    int inSpace = 1;
4298
4299
57.7k
    xmlSBufInit(&buf, maxLength);
4300
4301
57.7k
    GROW;
4302
4303
57.7k
    quote = CUR;
4304
57.7k
    if ((quote != '"') && (quote != '\'')) {
4305
4.31k
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
4306
4.31k
  return(NULL);
4307
4.31k
    }
4308
53.4k
    NEXTL(1);
4309
4310
53.4k
    if (ctxt->inSubset == 0)
4311
22.8k
        flags = XML_ENT_CHECKED | XML_ENT_VALIDATED;
4312
30.6k
    else
4313
30.6k
        flags = XML_ENT_VALIDATED;
4314
4315
53.4k
    inSpace = 1;
4316
53.4k
    chunkSize = 0;
4317
4318
904k
    while (1) {
4319
904k
        if (PARSER_STOPPED(ctxt))
4320
9
            goto error;
4321
4322
904k
        if (CUR_PTR >= ctxt->input->end) {
4323
9.04k
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4324
9.04k
                           "AttValue: ' expected\n");
4325
9.04k
            goto error;
4326
9.04k
        }
4327
4328
        /*
4329
         * TODO: Check growth threshold
4330
         */
4331
895k
        if (ctxt->input->end - CUR_PTR < 10)
4332
104k
            GROW;
4333
4334
895k
        c = CUR;
4335
4336
895k
        if (c >= 0x80) {
4337
126k
            l = xmlUTF8MultibyteLen(ctxt, CUR_PTR,
4338
126k
                    "invalid character in attribute value\n");
4339
126k
            if (l == 0) {
4340
7.28k
                if (chunkSize > 0) {
4341
1.35k
                    xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4342
1.35k
                    chunkSize = 0;
4343
1.35k
                }
4344
7.28k
                xmlSBufAddReplChar(&buf);
4345
7.28k
                NEXTL(1);
4346
119k
            } else {
4347
119k
                chunkSize += l;
4348
119k
                NEXTL(l);
4349
119k
            }
4350
4351
126k
            inSpace = 0;
4352
768k
        } else if (c != '&') {
4353
678k
            if (c > 0x20) {
4354
500k
                if (c == quote)
4355
43.3k
                    break;
4356
4357
457k
                if (c == '<')
4358
36.1k
                    xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4359
4360
457k
                chunkSize += 1;
4361
457k
                inSpace = 0;
4362
457k
            } else if (!IS_BYTE_CHAR(c)) {
4363
4.62k
                xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4364
4.62k
                        "invalid character in attribute value\n");
4365
4.62k
                if (chunkSize > 0) {
4366
1.01k
                    xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4367
1.01k
                    chunkSize = 0;
4368
1.01k
                }
4369
4.62k
                xmlSBufAddReplChar(&buf);
4370
4.62k
                inSpace = 0;
4371
173k
            } else {
4372
                /* Whitespace */
4373
173k
                if ((normalize) && (inSpace)) {
4374
                    /* Skip char */
4375
1.49k
                    if (chunkSize > 0) {
4376
660
                        xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4377
660
                        chunkSize = 0;
4378
660
                    }
4379
171k
                } else if (c < 0x20) {
4380
                    /* Convert to space */
4381
7.27k
                    if (chunkSize > 0) {
4382
3.50k
                        xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4383
3.50k
                        chunkSize = 0;
4384
3.50k
                    }
4385
4386
7.27k
                    xmlSBufAddCString(&buf, " ", 1);
4387
164k
                } else {
4388
164k
                    chunkSize += 1;
4389
164k
                }
4390
4391
173k
                inSpace = 1;
4392
4393
173k
                if ((c == 0xD) && (NXT(1) == 0xA))
4394
243
                    CUR_PTR++;
4395
173k
            }
4396
4397
635k
            NEXTL(1);
4398
635k
        } else if (NXT(1) == '#') {
4399
9.17k
            int val;
4400
4401
9.17k
            if (chunkSize > 0) {
4402
6.45k
                xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4403
6.45k
                chunkSize = 0;
4404
6.45k
            }
4405
4406
9.17k
            val = xmlParseCharRef(ctxt);
4407
9.17k
            if (val == 0)
4408
1.00k
                goto error;
4409
4410
8.17k
            if ((val == '&') && (!ctxt->replaceEntities)) {
4411
                /*
4412
                 * The reparsing will be done in xmlStringGetNodeList()
4413
                 * called by the attribute() function in SAX.c
4414
                 */
4415
1.09k
                xmlSBufAddCString(&buf, "&#38;", 5);
4416
1.09k
                inSpace = 0;
4417
7.08k
            } else if (val == ' ') {
4418
1.21k
                if ((!normalize) || (!inSpace))
4419
1.14k
                    xmlSBufAddCString(&buf, " ", 1);
4420
1.21k
                inSpace = 1;
4421
5.86k
            } else {
4422
5.86k
                xmlSBufAddChar(&buf, val);
4423
5.86k
                inSpace = 0;
4424
5.86k
            }
4425
81.0k
        } else {
4426
81.0k
            const xmlChar *name;
4427
81.0k
            xmlEntityPtr ent;
4428
4429
81.0k
            if (chunkSize > 0) {
4430
21.5k
                xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4431
21.5k
                chunkSize = 0;
4432
21.5k
            }
4433
4434
81.0k
            name = xmlParseEntityRefInternal(ctxt);
4435
81.0k
            if (name == NULL) {
4436
                /*
4437
                 * Probably a literal '&' which wasn't escaped.
4438
                 * TODO: Handle gracefully in recovery mode.
4439
                 */
4440
4.85k
                continue;
4441
4.85k
            }
4442
4443
76.2k
            ent = xmlLookupGeneralEntity(ctxt, name, /* isAttr */ 1);
4444
76.2k
            if (ent == NULL)
4445
14.2k
                continue;
4446
4447
61.9k
            if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
4448
5.29k
                if ((ent->content[0] == '&') && (!ctxt->replaceEntities))
4449
4.07k
                    xmlSBufAddCString(&buf, "&#38;", 5);
4450
1.21k
                else
4451
1.21k
                    xmlSBufAddString(&buf, ent->content, ent->length);
4452
5.29k
                inSpace = 0;
4453
56.6k
            } else if (ctxt->replaceEntities) {
4454
0
                xmlExpandEntityInAttValue(ctxt, &buf, ent->content, ent,
4455
0
                                          normalize, &inSpace, ctxt->inputNr,
4456
0
                                          /* check */ 1);
4457
56.6k
            } else {
4458
56.6k
                if ((ent->flags & flags) != flags)
4459
571
                    xmlCheckEntityInAttValue(ctxt, ent, ctxt->inputNr);
4460
4461
56.6k
                if (xmlParserEntityCheck(ctxt, ent->expandedSize)) {
4462
16
                    ent->content[0] = 0;
4463
16
                    goto error;
4464
16
                }
4465
4466
                /*
4467
                 * Just output the reference
4468
                 */
4469
56.6k
                xmlSBufAddCString(&buf, "&", 1);
4470
56.6k
                xmlSBufAddString(&buf, ent->name, xmlStrlen(ent->name));
4471
56.6k
                xmlSBufAddCString(&buf, ";", 1);
4472
4473
56.6k
                inSpace = 0;
4474
56.6k
            }
4475
61.9k
  }
4476
895k
    }
4477
4478
43.3k
    if ((buf.mem == NULL) && (alloc != NULL)) {
4479
13.1k
        ret = (xmlChar *) CUR_PTR - chunkSize;
4480
4481
13.1k
        if (attlen != NULL)
4482
13.1k
            *attlen = chunkSize;
4483
13.1k
        if ((normalize) && (inSpace) && (chunkSize > 0))
4484
88
            *attlen -= 1;
4485
13.1k
        *alloc = 0;
4486
4487
        /* Report potential error */
4488
13.1k
        xmlSBufCleanup(&buf, ctxt, "AttValue length too long");
4489
30.1k
    } else {
4490
30.1k
        if (chunkSize > 0)
4491
21.8k
            xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4492
4493
30.1k
        if ((normalize) && (inSpace) && (buf.size > 0))
4494
164
            buf.size--;
4495
4496
30.1k
        ret = xmlSBufFinish(&buf, attlen, ctxt, "AttValue length too long");
4497
4498
30.1k
        if (ret != NULL) {
4499
30.1k
            if (attlen != NULL)
4500
2.08k
                *attlen = buf.size;
4501
30.1k
            if (alloc != NULL)
4502
2.08k
                *alloc = 1;
4503
30.1k
        }
4504
30.1k
    }
4505
4506
43.3k
    NEXTL(1);
4507
4508
43.3k
    return(ret);
4509
4510
10.0k
error:
4511
10.0k
    xmlSBufCleanup(&buf, ctxt, "AttValue length too long");
4512
10.0k
    return(NULL);
4513
53.4k
}
4514
4515
/**
4516
 * xmlParseAttValue:
4517
 * @ctxt:  an XML parser context
4518
 *
4519
 * DEPRECATED: Internal function, don't use.
4520
 *
4521
 * parse a value for an attribute
4522
 * Note: the parser won't do substitution of entities here, this
4523
 * will be handled later in xmlStringGetNodeList
4524
 *
4525
 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4526
 *                   "'" ([^<&'] | Reference)* "'"
4527
 *
4528
 * 3.3.3 Attribute-Value Normalization:
4529
 * Before the value of an attribute is passed to the application or
4530
 * checked for validity, the XML processor must normalize it as follows:
4531
 * - a character reference is processed by appending the referenced
4532
 *   character to the attribute value
4533
 * - an entity reference is processed by recursively processing the
4534
 *   replacement text of the entity
4535
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4536
 *   appending #x20 to the normalized value, except that only a single
4537
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4538
 *   parsed entity or the literal entity value of an internal parsed entity
4539
 * - other characters are processed by appending them to the normalized value
4540
 * If the declared value is not CDATA, then the XML processor must further
4541
 * process the normalized attribute value by discarding any leading and
4542
 * trailing space (#x20) characters, and by replacing sequences of space
4543
 * (#x20) characters by a single space (#x20) character.
4544
 * All attributes for which no declaration has been read should be treated
4545
 * by a non-validating parser as if declared CDATA.
4546
 *
4547
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4548
 */
4549
4550
4551
xmlChar *
4552
41.1k
xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4553
41.1k
    if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4554
41.1k
    return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4555
41.1k
}
4556
4557
/**
4558
 * xmlParseSystemLiteral:
4559
 * @ctxt:  an XML parser context
4560
 *
4561
 * DEPRECATED: Internal function, don't use.
4562
 *
4563
 * parse an XML Literal
4564
 *
4565
 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4566
 *
4567
 * Returns the SystemLiteral parsed or NULL
4568
 */
4569
4570
xmlChar *
4571
4.64k
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4572
4.64k
    xmlChar *buf = NULL;
4573
4.64k
    int len = 0;
4574
4.64k
    int size = XML_PARSER_BUFFER_SIZE;
4575
4.64k
    int cur, l;
4576
4.64k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4577
0
                    XML_MAX_TEXT_LENGTH :
4578
4.64k
                    XML_MAX_NAME_LENGTH;
4579
4.64k
    xmlChar stop;
4580
4581
4.64k
    if (RAW == '"') {
4582
2.68k
        NEXT;
4583
2.68k
  stop = '"';
4584
2.68k
    } else if (RAW == '\'') {
4585
532
        NEXT;
4586
532
  stop = '\'';
4587
1.42k
    } else {
4588
1.42k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4589
1.42k
  return(NULL);
4590
1.42k
    }
4591
4592
3.21k
    buf = (xmlChar *) xmlMallocAtomic(size);
4593
3.21k
    if (buf == NULL) {
4594
0
        xmlErrMemory(ctxt);
4595
0
  return(NULL);
4596
0
    }
4597
3.21k
    cur = CUR_CHAR(l);
4598
42.6k
    while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4599
39.4k
  if (len + 5 >= size) {
4600
230
      xmlChar *tmp;
4601
4602
230
      size *= 2;
4603
230
      tmp = (xmlChar *) xmlRealloc(buf, size);
4604
230
      if (tmp == NULL) {
4605
0
          xmlFree(buf);
4606
0
    xmlErrMemory(ctxt);
4607
0
    return(NULL);
4608
0
      }
4609
230
      buf = tmp;
4610
230
  }
4611
39.4k
  COPY_BUF(buf, len, cur);
4612
39.4k
        if (len > maxLength) {
4613
0
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4614
0
            xmlFree(buf);
4615
0
            return(NULL);
4616
0
        }
4617
39.4k
  NEXTL(l);
4618
39.4k
  cur = CUR_CHAR(l);
4619
39.4k
    }
4620
3.21k
    buf[len] = 0;
4621
3.21k
    if (!IS_CHAR(cur)) {
4622
940
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4623
2.27k
    } else {
4624
2.27k
  NEXT;
4625
2.27k
    }
4626
3.21k
    return(buf);
4627
3.21k
}
4628
4629
/**
4630
 * xmlParsePubidLiteral:
4631
 * @ctxt:  an XML parser context
4632
 *
4633
 * DEPRECATED: Internal function, don't use.
4634
 *
4635
 * parse an XML public literal
4636
 *
4637
 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4638
 *
4639
 * Returns the PubidLiteral parsed or NULL.
4640
 */
4641
4642
xmlChar *
4643
3.23k
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4644
3.23k
    xmlChar *buf = NULL;
4645
3.23k
    int len = 0;
4646
3.23k
    int size = XML_PARSER_BUFFER_SIZE;
4647
3.23k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4648
0
                    XML_MAX_TEXT_LENGTH :
4649
3.23k
                    XML_MAX_NAME_LENGTH;
4650
3.23k
    xmlChar cur;
4651
3.23k
    xmlChar stop;
4652
4653
3.23k
    if (RAW == '"') {
4654
1.84k
        NEXT;
4655
1.84k
  stop = '"';
4656
1.84k
    } else if (RAW == '\'') {
4657
905
        NEXT;
4658
905
  stop = '\'';
4659
905
    } else {
4660
487
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4661
487
  return(NULL);
4662
487
    }
4663
2.74k
    buf = (xmlChar *) xmlMallocAtomic(size);
4664
2.74k
    if (buf == NULL) {
4665
0
  xmlErrMemory(ctxt);
4666
0
  return(NULL);
4667
0
    }
4668
2.74k
    cur = CUR;
4669
54.6k
    while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop) &&
4670
54.6k
           (PARSER_STOPPED(ctxt) == 0)) { /* checked */
4671
51.8k
  if (len + 1 >= size) {
4672
324
      xmlChar *tmp;
4673
4674
324
      size *= 2;
4675
324
      tmp = (xmlChar *) xmlRealloc(buf, size);
4676
324
      if (tmp == NULL) {
4677
0
    xmlErrMemory(ctxt);
4678
0
    xmlFree(buf);
4679
0
    return(NULL);
4680
0
      }
4681
324
      buf = tmp;
4682
324
  }
4683
51.8k
  buf[len++] = cur;
4684
51.8k
        if (len > maxLength) {
4685
0
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4686
0
            xmlFree(buf);
4687
0
            return(NULL);
4688
0
        }
4689
51.8k
  NEXT;
4690
51.8k
  cur = CUR;
4691
51.8k
    }
4692
2.74k
    buf[len] = 0;
4693
2.74k
    if (cur != stop) {
4694
1.82k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4695
1.82k
    } else {
4696
927
  NEXTL(1);
4697
927
    }
4698
2.74k
    return(buf);
4699
2.74k
}
4700
4701
static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial);
4702
4703
/*
4704
 * used for the test in the inner loop of the char data testing
4705
 */
4706
static const unsigned char test_char_data[256] = {
4707
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4708
    0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4709
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4710
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4711
    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4712
    0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4713
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4714
    0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4715
    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4716
    0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4717
    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4718
    0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4719
    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4720
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4721
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4722
    0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4723
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4724
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4725
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4726
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4727
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4728
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4729
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4730
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4731
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4732
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4733
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4734
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4735
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4736
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4737
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4738
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4739
};
4740
4741
/**
4742
 * xmlParseCharDataInternal:
4743
 * @ctxt:  an XML parser context
4744
 * @partial:  buffer may contain partial UTF-8 sequences
4745
 *
4746
 * Parse character data. Always makes progress if the first char isn't
4747
 * '<' or '&'.
4748
 *
4749
 * The right angle bracket (>) may be represented using the string "&gt;",
4750
 * and must, for compatibility, be escaped using "&gt;" or a character
4751
 * reference when it appears in the string "]]>" in content, when that
4752
 * string is not marking the end of a CDATA section.
4753
 *
4754
 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4755
 */
4756
static void
4757
47.7k
xmlParseCharDataInternal(xmlParserCtxtPtr ctxt, int partial) {
4758
47.7k
    const xmlChar *in;
4759
47.7k
    int nbchar = 0;
4760
47.7k
    int line = ctxt->input->line;
4761
47.7k
    int col = ctxt->input->col;
4762
47.7k
    int ccol;
4763
4764
47.7k
    GROW;
4765
    /*
4766
     * Accelerated common case where input don't need to be
4767
     * modified before passing it to the handler.
4768
     */
4769
47.7k
    in = ctxt->input->cur;
4770
48.2k
    do {
4771
48.5k
get_more_space:
4772
55.6k
        while (*in == 0x20) { in++; ctxt->input->col++; }
4773
48.5k
        if (*in == 0xA) {
4774
1.04k
            do {
4775
1.04k
                ctxt->input->line++; ctxt->input->col = 1;
4776
1.04k
                in++;
4777
1.04k
            } while (*in == 0xA);
4778
299
            goto get_more_space;
4779
299
        }
4780
48.2k
        if (*in == '<') {
4781
3.00k
            nbchar = in - ctxt->input->cur;
4782
3.00k
            if (nbchar > 0) {
4783
3.00k
                const xmlChar *tmp = ctxt->input->cur;
4784
3.00k
                ctxt->input->cur = in;
4785
4786
3.00k
                if ((ctxt->sax != NULL) &&
4787
3.00k
                    (ctxt->disableSAX == 0) &&
4788
3.00k
                    (ctxt->sax->ignorableWhitespace !=
4789
2.35k
                     ctxt->sax->characters)) {
4790
1.80k
                    if (areBlanks(ctxt, tmp, nbchar, 1)) {
4791
822
                        if (ctxt->sax->ignorableWhitespace != NULL)
4792
822
                            ctxt->sax->ignorableWhitespace(ctxt->userData,
4793
822
                                                   tmp, nbchar);
4794
982
                    } else {
4795
982
                        if (ctxt->sax->characters != NULL)
4796
982
                            ctxt->sax->characters(ctxt->userData,
4797
982
                                                  tmp, nbchar);
4798
982
                        if (*ctxt->space == -1)
4799
521
                            *ctxt->space = -2;
4800
982
                    }
4801
1.80k
                } else if ((ctxt->sax != NULL) &&
4802
1.19k
                           (ctxt->disableSAX == 0) &&
4803
1.19k
                           (ctxt->sax->characters != NULL)) {
4804
546
                    ctxt->sax->characters(ctxt->userData,
4805
546
                                          tmp, nbchar);
4806
546
                }
4807
3.00k
            }
4808
3.00k
            return;
4809
3.00k
        }
4810
4811
48.5k
get_more:
4812
48.5k
        ccol = ctxt->input->col;
4813
146k
        while (test_char_data[*in]) {
4814
98.1k
            in++;
4815
98.1k
            ccol++;
4816
98.1k
        }
4817
48.5k
        ctxt->input->col = ccol;
4818
48.5k
        if (*in == 0xA) {
4819
687
            do {
4820
687
                ctxt->input->line++; ctxt->input->col = 1;
4821
687
                in++;
4822
687
            } while (*in == 0xA);
4823
478
            goto get_more;
4824
478
        }
4825
48.0k
        if (*in == ']') {
4826
3.12k
            if ((in[1] == ']') && (in[2] == '>')) {
4827
334
                xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4828
334
                ctxt->input->cur = in + 1;
4829
334
                return;
4830
334
            }
4831
2.78k
            in++;
4832
2.78k
            ctxt->input->col++;
4833
2.78k
            goto get_more;
4834
3.12k
        }
4835
44.9k
        nbchar = in - ctxt->input->cur;
4836
44.9k
        if (nbchar > 0) {
4837
35.0k
            if ((ctxt->sax != NULL) &&
4838
35.0k
                (ctxt->disableSAX == 0) &&
4839
35.0k
                (ctxt->sax->ignorableWhitespace !=
4840
23.5k
                 ctxt->sax->characters) &&
4841
35.0k
                (IS_BLANK_CH(*ctxt->input->cur))) {
4842
1.85k
                const xmlChar *tmp = ctxt->input->cur;
4843
1.85k
                ctxt->input->cur = in;
4844
4845
1.85k
                if (areBlanks(ctxt, tmp, nbchar, 0)) {
4846
552
                    if (ctxt->sax->ignorableWhitespace != NULL)
4847
552
                        ctxt->sax->ignorableWhitespace(ctxt->userData,
4848
552
                                                       tmp, nbchar);
4849
1.30k
                } else {
4850
1.30k
                    if (ctxt->sax->characters != NULL)
4851
1.30k
                        ctxt->sax->characters(ctxt->userData,
4852
1.30k
                                              tmp, nbchar);
4853
1.30k
                    if (*ctxt->space == -1)
4854
722
                        *ctxt->space = -2;
4855
1.30k
                }
4856
1.85k
                line = ctxt->input->line;
4857
1.85k
                col = ctxt->input->col;
4858
33.2k
            } else if ((ctxt->sax != NULL) &&
4859
33.2k
                       (ctxt->disableSAX == 0)) {
4860
21.6k
                if (ctxt->sax->characters != NULL)
4861
21.6k
                    ctxt->sax->characters(ctxt->userData,
4862
21.6k
                                          ctxt->input->cur, nbchar);
4863
21.6k
                line = ctxt->input->line;
4864
21.6k
                col = ctxt->input->col;
4865
21.6k
            }
4866
35.0k
        }
4867
44.9k
        ctxt->input->cur = in;
4868
44.9k
        if (*in == 0xD) {
4869
1.51k
            in++;
4870
1.51k
            if (*in == 0xA) {
4871
541
                ctxt->input->cur = in;
4872
541
                in++;
4873
541
                ctxt->input->line++; ctxt->input->col = 1;
4874
541
                continue; /* while */
4875
541
            }
4876
974
            in--;
4877
974
        }
4878
44.3k
        if (*in == '<') {
4879
28.2k
            return;
4880
28.2k
        }
4881
16.1k
        if (*in == '&') {
4882
3.95k
            return;
4883
3.95k
        }
4884
12.1k
        SHRINK;
4885
12.1k
        GROW;
4886
12.1k
        in = ctxt->input->cur;
4887
12.7k
    } while (((*in >= 0x20) && (*in <= 0x7F)) ||
4888
12.7k
             (*in == 0x09) || (*in == 0x0a));
4889
12.1k
    ctxt->input->line = line;
4890
12.1k
    ctxt->input->col = col;
4891
12.1k
    xmlParseCharDataComplex(ctxt, partial);
4892
12.1k
}
4893
4894
/**
4895
 * xmlParseCharDataComplex:
4896
 * @ctxt:  an XML parser context
4897
 * @cdata:  int indicating whether we are within a CDATA section
4898
 *
4899
 * Always makes progress if the first char isn't '<' or '&'.
4900
 *
4901
 * parse a CharData section.this is the fallback function
4902
 * of xmlParseCharData() when the parsing requires handling
4903
 * of non-ASCII characters.
4904
 */
4905
static void
4906
12.1k
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial) {
4907
12.1k
    xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4908
12.1k
    int nbchar = 0;
4909
12.1k
    int cur, l;
4910
4911
12.1k
    cur = CUR_CHAR(l);
4912
70.9k
    while ((cur != '<') && /* checked */
4913
70.9k
           (cur != '&') &&
4914
70.9k
     (IS_CHAR(cur))) {
4915
58.7k
  if ((cur == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4916
106
      xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4917
106
  }
4918
58.7k
  COPY_BUF(buf, nbchar, cur);
4919
  /* move current position before possible calling of ctxt->sax->characters */
4920
58.7k
  NEXTL(l);
4921
58.7k
  if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4922
346
      buf[nbchar] = 0;
4923
4924
      /*
4925
       * OK the segment is to be consumed as chars.
4926
       */
4927
346
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4928
179
    if (areBlanks(ctxt, buf, nbchar, 0)) {
4929
10
        if (ctxt->sax->ignorableWhitespace != NULL)
4930
10
      ctxt->sax->ignorableWhitespace(ctxt->userData,
4931
10
                                     buf, nbchar);
4932
169
    } else {
4933
169
        if (ctxt->sax->characters != NULL)
4934
169
      ctxt->sax->characters(ctxt->userData, buf, nbchar);
4935
169
        if ((ctxt->sax->characters !=
4936
169
             ctxt->sax->ignorableWhitespace) &&
4937
169
      (*ctxt->space == -1))
4938
29
      *ctxt->space = -2;
4939
169
    }
4940
179
      }
4941
346
      nbchar = 0;
4942
346
            SHRINK;
4943
346
  }
4944
58.7k
  cur = CUR_CHAR(l);
4945
58.7k
    }
4946
12.1k
    if (nbchar != 0) {
4947
5.20k
        buf[nbchar] = 0;
4948
  /*
4949
   * OK the segment is to be consumed as chars.
4950
   */
4951
5.20k
  if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4952
3.21k
      if (areBlanks(ctxt, buf, nbchar, 0)) {
4953
380
    if (ctxt->sax->ignorableWhitespace != NULL)
4954
380
        ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4955
2.83k
      } else {
4956
2.83k
    if (ctxt->sax->characters != NULL)
4957
2.83k
        ctxt->sax->characters(ctxt->userData, buf, nbchar);
4958
2.83k
    if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4959
2.83k
        (*ctxt->space == -1))
4960
408
        *ctxt->space = -2;
4961
2.83k
      }
4962
3.21k
  }
4963
5.20k
    }
4964
    /*
4965
     * cur == 0 can mean
4966
     *
4967
     * - End of buffer.
4968
     * - An actual 0 character.
4969
     * - An incomplete UTF-8 sequence. This is allowed if partial is set.
4970
     */
4971
12.1k
    if (ctxt->input->cur < ctxt->input->end) {
4972
10.8k
        if ((cur == 0) && (CUR != 0)) {
4973
7
            if (partial == 0) {
4974
7
                xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4975
7
                        "Incomplete UTF-8 sequence starting with %02X\n", CUR);
4976
7
                NEXTL(1);
4977
7
            }
4978
10.8k
        } else if ((cur != '<') && (cur != '&')) {
4979
            /* Generate the error and skip the offending character */
4980
5.91k
            xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4981
5.91k
                              "PCDATA invalid Char value %d\n", cur);
4982
5.91k
            NEXTL(l);
4983
5.91k
        }
4984
10.8k
    }
4985
12.1k
}
4986
4987
/**
4988
 * xmlParseCharData:
4989
 * @ctxt:  an XML parser context
4990
 * @cdata:  unused
4991
 *
4992
 * DEPRECATED: Internal function, don't use.
4993
 */
4994
void
4995
0
xmlParseCharData(xmlParserCtxtPtr ctxt, ATTRIBUTE_UNUSED int cdata) {
4996
0
    xmlParseCharDataInternal(ctxt, 0);
4997
0
}
4998
4999
/**
5000
 * xmlParseExternalID:
5001
 * @ctxt:  an XML parser context
5002
 * @publicID:  a xmlChar** receiving PubidLiteral
5003
 * @strict: indicate whether we should restrict parsing to only
5004
 *          production [75], see NOTE below
5005
 *
5006
 * DEPRECATED: Internal function, don't use.
5007
 *
5008
 * Parse an External ID or a Public ID
5009
 *
5010
 * NOTE: Productions [75] and [83] interact badly since [75] can generate
5011
 *       'PUBLIC' S PubidLiteral S SystemLiteral
5012
 *
5013
 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
5014
 *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
5015
 *
5016
 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
5017
 *
5018
 * Returns the function returns SystemLiteral and in the second
5019
 *                case publicID receives PubidLiteral, is strict is off
5020
 *                it is possible to return NULL and have publicID set.
5021
 */
5022
5023
xmlChar *
5024
15.9k
xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
5025
15.9k
    xmlChar *URI = NULL;
5026
5027
15.9k
    *publicID = NULL;
5028
15.9k
    if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
5029
3.06k
        SKIP(6);
5030
3.06k
  if (SKIP_BLANKS == 0) {
5031
2.83k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5032
2.83k
                     "Space required after 'SYSTEM'\n");
5033
2.83k
  }
5034
3.06k
  URI = xmlParseSystemLiteral(ctxt);
5035
3.06k
  if (URI == NULL) {
5036
356
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
5037
356
        }
5038
12.8k
    } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
5039
3.23k
        SKIP(6);
5040
3.23k
  if (SKIP_BLANKS == 0) {
5041
2.62k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5042
2.62k
        "Space required after 'PUBLIC'\n");
5043
2.62k
  }
5044
3.23k
  *publicID = xmlParsePubidLiteral(ctxt);
5045
3.23k
  if (*publicID == NULL) {
5046
487
      xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
5047
487
  }
5048
3.23k
  if (strict) {
5049
      /*
5050
       * We don't handle [83] so "S SystemLiteral" is required.
5051
       */
5052
1.11k
      if (SKIP_BLANKS == 0) {
5053
919
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5054
919
      "Space required after the Public Identifier\n");
5055
919
      }
5056
2.12k
  } else {
5057
      /*
5058
       * We handle [83] so we return immediately, if
5059
       * "S SystemLiteral" is not detected. We skip blanks if no
5060
             * system literal was found, but this is harmless since we must
5061
             * be at the end of a NotationDecl.
5062
       */
5063
2.12k
      if (SKIP_BLANKS == 0) return(NULL);
5064
697
      if ((CUR != '\'') && (CUR != '"')) return(NULL);
5065
697
  }
5066
1.57k
  URI = xmlParseSystemLiteral(ctxt);
5067
1.57k
  if (URI == NULL) {
5068
1.07k
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
5069
1.07k
        }
5070
1.57k
    }
5071
14.2k
    return(URI);
5072
15.9k
}
5073
5074
/**
5075
 * xmlParseCommentComplex:
5076
 * @ctxt:  an XML parser context
5077
 * @buf:  the already parsed part of the buffer
5078
 * @len:  number of bytes in the buffer
5079
 * @size:  allocated size of the buffer
5080
 *
5081
 * Skip an XML (SGML) comment <!-- .... -->
5082
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
5083
 *  must not occur within comments. "
5084
 * This is the slow routine in case the accelerator for ascii didn't work
5085
 *
5086
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
5087
 */
5088
static void
5089
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
5090
9.93k
                       size_t len, size_t size) {
5091
9.93k
    int q, ql;
5092
9.93k
    int r, rl;
5093
9.93k
    int cur, l;
5094
9.93k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5095
0
                       XML_MAX_HUGE_LENGTH :
5096
9.93k
                       XML_MAX_TEXT_LENGTH;
5097
5098
9.93k
    if (buf == NULL) {
5099
4.73k
        len = 0;
5100
4.73k
  size = XML_PARSER_BUFFER_SIZE;
5101
4.73k
  buf = (xmlChar *) xmlMallocAtomic(size);
5102
4.73k
  if (buf == NULL) {
5103
0
      xmlErrMemory(ctxt);
5104
0
      return;
5105
0
  }
5106
4.73k
    }
5107
9.93k
    q = CUR_CHAR(ql);
5108
9.93k
    if (q == 0)
5109
6.58k
        goto not_terminated;
5110
3.35k
    if (!IS_CHAR(q)) {
5111
112
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
5112
112
                          "xmlParseComment: invalid xmlChar value %d\n",
5113
112
                    q);
5114
112
  xmlFree (buf);
5115
112
  return;
5116
112
    }
5117
3.24k
    NEXTL(ql);
5118
3.24k
    r = CUR_CHAR(rl);
5119
3.24k
    if (r == 0)
5120
203
        goto not_terminated;
5121
3.03k
    if (!IS_CHAR(r)) {
5122
144
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
5123
144
                          "xmlParseComment: invalid xmlChar value %d\n",
5124
144
                    r);
5125
144
  xmlFree (buf);
5126
144
  return;
5127
144
    }
5128
2.89k
    NEXTL(rl);
5129
2.89k
    cur = CUR_CHAR(l);
5130
2.89k
    if (cur == 0)
5131
458
        goto not_terminated;
5132
27.9k
    while (IS_CHAR(cur) && /* checked */
5133
27.9k
           ((cur != '>') ||
5134
27.0k
      (r != '-') || (q != '-'))) {
5135
25.4k
  if ((r == '-') && (q == '-')) {
5136
350
      xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
5137
350
  }
5138
25.4k
  if (len + 5 >= size) {
5139
333
      xmlChar *new_buf;
5140
333
            size_t new_size;
5141
5142
333
      new_size = size * 2;
5143
333
      new_buf = (xmlChar *) xmlRealloc(buf, new_size);
5144
333
      if (new_buf == NULL) {
5145
0
    xmlFree (buf);
5146
0
    xmlErrMemory(ctxt);
5147
0
    return;
5148
0
      }
5149
333
      buf = new_buf;
5150
333
            size = new_size;
5151
333
  }
5152
25.4k
  COPY_BUF(buf, len, q);
5153
25.4k
        if (len > maxLength) {
5154
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5155
0
                         "Comment too big found", NULL);
5156
0
            xmlFree (buf);
5157
0
            return;
5158
0
        }
5159
5160
25.4k
  q = r;
5161
25.4k
  ql = rl;
5162
25.4k
  r = cur;
5163
25.4k
  rl = l;
5164
5165
25.4k
  NEXTL(l);
5166
25.4k
  cur = CUR_CHAR(l);
5167
5168
25.4k
    }
5169
2.43k
    buf[len] = 0;
5170
2.43k
    if (cur == 0) {
5171
758
  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5172
758
                       "Comment not terminated \n<!--%.50s\n", buf);
5173
1.67k
    } else if (!IS_CHAR(cur)) {
5174
157
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
5175
157
                          "xmlParseComment: invalid xmlChar value %d\n",
5176
157
                    cur);
5177
1.52k
    } else {
5178
1.52k
        NEXT;
5179
1.52k
  if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5180
1.52k
      (!ctxt->disableSAX))
5181
1.30k
      ctxt->sax->comment(ctxt->userData, buf);
5182
1.52k
    }
5183
2.43k
    xmlFree(buf);
5184
2.43k
    return;
5185
7.24k
not_terminated:
5186
7.24k
    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5187
7.24k
       "Comment not terminated\n", NULL);
5188
7.24k
    xmlFree(buf);
5189
7.24k
    return;
5190
2.43k
}
5191
5192
/**
5193
 * xmlParseComment:
5194
 * @ctxt:  an XML parser context
5195
 *
5196
 * DEPRECATED: Internal function, don't use.
5197
 *
5198
 * Parse an XML (SGML) comment. Always consumes '<!'.
5199
 *
5200
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
5201
 *  must not occur within comments. "
5202
 *
5203
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
5204
 */
5205
void
5206
12.5k
xmlParseComment(xmlParserCtxtPtr ctxt) {
5207
12.5k
    xmlChar *buf = NULL;
5208
12.5k
    size_t size = XML_PARSER_BUFFER_SIZE;
5209
12.5k
    size_t len = 0;
5210
12.5k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5211
0
                       XML_MAX_HUGE_LENGTH :
5212
12.5k
                       XML_MAX_TEXT_LENGTH;
5213
12.5k
    const xmlChar *in;
5214
12.5k
    size_t nbchar = 0;
5215
12.5k
    int ccol;
5216
5217
    /*
5218
     * Check that there is a comment right here.
5219
     */
5220
12.5k
    if ((RAW != '<') || (NXT(1) != '!'))
5221
0
        return;
5222
12.5k
    SKIP(2);
5223
12.5k
    if ((RAW != '-') || (NXT(1) != '-'))
5224
3
        return;
5225
12.5k
    SKIP(2);
5226
12.5k
    GROW;
5227
5228
    /*
5229
     * Accelerated common case where input don't need to be
5230
     * modified before passing it to the handler.
5231
     */
5232
12.5k
    in = ctxt->input->cur;
5233
12.5k
    do {
5234
12.5k
  if (*in == 0xA) {
5235
443
      do {
5236
443
    ctxt->input->line++; ctxt->input->col = 1;
5237
443
    in++;
5238
443
      } while (*in == 0xA);
5239
217
  }
5240
26.1k
get_more:
5241
26.1k
        ccol = ctxt->input->col;
5242
61.6k
  while (((*in > '-') && (*in <= 0x7F)) ||
5243
61.6k
         ((*in >= 0x20) && (*in < '-')) ||
5244
61.6k
         (*in == 0x09)) {
5245
35.5k
        in++;
5246
35.5k
        ccol++;
5247
35.5k
  }
5248
26.1k
  ctxt->input->col = ccol;
5249
26.1k
  if (*in == 0xA) {
5250
650
      do {
5251
650
    ctxt->input->line++; ctxt->input->col = 1;
5252
650
    in++;
5253
650
      } while (*in == 0xA);
5254
268
      goto get_more;
5255
268
  }
5256
25.8k
  nbchar = in - ctxt->input->cur;
5257
  /*
5258
   * save current set of data
5259
   */
5260
25.8k
  if (nbchar > 0) {
5261
17.0k
            if (buf == NULL) {
5262
6.48k
                if ((*in == '-') && (in[1] == '-'))
5263
1.30k
                    size = nbchar + 1;
5264
5.18k
                else
5265
5.18k
                    size = XML_PARSER_BUFFER_SIZE + nbchar;
5266
6.48k
                buf = (xmlChar *) xmlMallocAtomic(size);
5267
6.48k
                if (buf == NULL) {
5268
0
                    xmlErrMemory(ctxt);
5269
0
                    return;
5270
0
                }
5271
6.48k
                len = 0;
5272
10.5k
            } else if (len + nbchar + 1 >= size) {
5273
715
                xmlChar *new_buf;
5274
715
                size  += len + nbchar + XML_PARSER_BUFFER_SIZE;
5275
715
                new_buf = (xmlChar *) xmlRealloc(buf, size);
5276
715
                if (new_buf == NULL) {
5277
0
                    xmlFree (buf);
5278
0
                    xmlErrMemory(ctxt);
5279
0
                    return;
5280
0
                }
5281
715
                buf = new_buf;
5282
715
            }
5283
17.0k
            memcpy(&buf[len], ctxt->input->cur, nbchar);
5284
17.0k
            len += nbchar;
5285
17.0k
            buf[len] = 0;
5286
17.0k
  }
5287
25.8k
        if (len > maxLength) {
5288
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5289
0
                         "Comment too big found", NULL);
5290
0
            xmlFree (buf);
5291
0
            return;
5292
0
        }
5293
25.8k
  ctxt->input->cur = in;
5294
25.8k
  if (*in == 0xA) {
5295
0
      in++;
5296
0
      ctxt->input->line++; ctxt->input->col = 1;
5297
0
  }
5298
25.8k
  if (*in == 0xD) {
5299
725
      in++;
5300
725
      if (*in == 0xA) {
5301
196
    ctxt->input->cur = in;
5302
196
    in++;
5303
196
    ctxt->input->line++; ctxt->input->col = 1;
5304
196
    goto get_more;
5305
196
      }
5306
529
      in--;
5307
529
  }
5308
25.6k
  SHRINK;
5309
25.6k
  GROW;
5310
25.6k
  in = ctxt->input->cur;
5311
25.6k
  if (*in == '-') {
5312
15.7k
      if (in[1] == '-') {
5313
12.6k
          if (in[2] == '>') {
5314
2.57k
        SKIP(3);
5315
2.57k
        if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5316
2.57k
            (!ctxt->disableSAX)) {
5317
2.15k
      if (buf != NULL)
5318
1.12k
          ctxt->sax->comment(ctxt->userData, buf);
5319
1.02k
      else
5320
1.02k
          ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5321
2.15k
        }
5322
2.57k
        if (buf != NULL)
5323
1.28k
            xmlFree(buf);
5324
2.57k
        return;
5325
2.57k
    }
5326
10.0k
    if (buf != NULL) {
5327
7.32k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5328
7.32k
                          "Double hyphen within comment: "
5329
7.32k
                                      "<!--%.50s\n",
5330
7.32k
              buf);
5331
7.32k
    } else
5332
2.74k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5333
2.74k
                          "Double hyphen within comment\n", NULL);
5334
10.0k
    in++;
5335
10.0k
    ctxt->input->col++;
5336
10.0k
      }
5337
13.1k
      in++;
5338
13.1k
      ctxt->input->col++;
5339
13.1k
      goto get_more;
5340
15.7k
  }
5341
25.6k
    } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5342
9.93k
    xmlParseCommentComplex(ctxt, buf, len, size);
5343
9.93k
    return;
5344
12.5k
}
5345
5346
5347
/**
5348
 * xmlParsePITarget:
5349
 * @ctxt:  an XML parser context
5350
 *
5351
 * DEPRECATED: Internal function, don't use.
5352
 *
5353
 * parse the name of a PI
5354
 *
5355
 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5356
 *
5357
 * Returns the PITarget name or NULL
5358
 */
5359
5360
const xmlChar *
5361
23.5k
xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5362
23.5k
    const xmlChar *name;
5363
5364
23.5k
    name = xmlParseName(ctxt);
5365
23.5k
    if ((name != NULL) &&
5366
23.5k
        ((name[0] == 'x') || (name[0] == 'X')) &&
5367
23.5k
        ((name[1] == 'm') || (name[1] == 'M')) &&
5368
23.5k
        ((name[2] == 'l') || (name[2] == 'L'))) {
5369
1.39k
  int i;
5370
1.39k
  if ((name[0] == 'x') && (name[1] == 'm') &&
5371
1.39k
      (name[2] == 'l') && (name[3] == 0)) {
5372
225
      xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5373
225
     "XML declaration allowed only at the start of the document\n");
5374
225
      return(name);
5375
1.17k
  } else if (name[3] == 0) {
5376
690
      xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5377
690
      return(name);
5378
690
  }
5379
1.25k
  for (i = 0;;i++) {
5380
1.25k
      if (xmlW3CPIs[i] == NULL) break;
5381
964
      if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5382
194
          return(name);
5383
964
  }
5384
288
  xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5385
288
          "xmlParsePITarget: invalid name prefix 'xml'\n",
5386
288
          NULL, NULL);
5387
288
    }
5388
22.4k
    if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5389
1.11k
  xmlNsErr(ctxt, XML_NS_ERR_COLON,
5390
1.11k
     "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5391
1.11k
    }
5392
22.4k
    return(name);
5393
23.5k
}
5394
5395
#ifdef LIBXML_CATALOG_ENABLED
5396
/**
5397
 * xmlParseCatalogPI:
5398
 * @ctxt:  an XML parser context
5399
 * @catalog:  the PI value string
5400
 *
5401
 * parse an XML Catalog Processing Instruction.
5402
 *
5403
 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5404
 *
5405
 * Occurs only if allowed by the user and if happening in the Misc
5406
 * part of the document before any doctype information
5407
 * This will add the given catalog to the parsing context in order
5408
 * to be used if there is a resolution need further down in the document
5409
 */
5410
5411
static void
5412
525
xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5413
525
    xmlChar *URL = NULL;
5414
525
    const xmlChar *tmp, *base;
5415
525
    xmlChar marker;
5416
5417
525
    tmp = catalog;
5418
525
    while (IS_BLANK_CH(*tmp)) tmp++;
5419
525
    if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5420
126
  goto error;
5421
399
    tmp += 7;
5422
407
    while (IS_BLANK_CH(*tmp)) tmp++;
5423
399
    if (*tmp != '=') {
5424
62
  return;
5425
62
    }
5426
337
    tmp++;
5427
521
    while (IS_BLANK_CH(*tmp)) tmp++;
5428
337
    marker = *tmp;
5429
337
    if ((marker != '\'') && (marker != '"'))
5430
56
  goto error;
5431
281
    tmp++;
5432
281
    base = tmp;
5433
814
    while ((*tmp != 0) && (*tmp != marker)) tmp++;
5434
281
    if (*tmp == 0)
5435
95
  goto error;
5436
186
    URL = xmlStrndup(base, tmp - base);
5437
186
    tmp++;
5438
500
    while (IS_BLANK_CH(*tmp)) tmp++;
5439
186
    if (*tmp != 0)
5440
55
  goto error;
5441
5442
131
    if (URL != NULL) {
5443
        /*
5444
         * Unfortunately, the catalog API doesn't report OOM errors.
5445
         * xmlGetLastError isn't very helpful since we don't know
5446
         * where the last error came from. We'd have to reset it
5447
         * before this call and restore it afterwards.
5448
         */
5449
131
  ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5450
131
  xmlFree(URL);
5451
131
    }
5452
131
    return;
5453
5454
332
error:
5455
332
    xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5456
332
            "Catalog PI syntax error: %s\n",
5457
332
      catalog, NULL);
5458
332
    if (URL != NULL)
5459
55
  xmlFree(URL);
5460
332
}
5461
#endif
5462
5463
/**
5464
 * xmlParsePI:
5465
 * @ctxt:  an XML parser context
5466
 *
5467
 * DEPRECATED: Internal function, don't use.
5468
 *
5469
 * parse an XML Processing Instruction.
5470
 *
5471
 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5472
 *
5473
 * The processing is transferred to SAX once parsed.
5474
 */
5475
5476
void
5477
23.5k
xmlParsePI(xmlParserCtxtPtr ctxt) {
5478
23.5k
    xmlChar *buf = NULL;
5479
23.5k
    size_t len = 0;
5480
23.5k
    size_t size = XML_PARSER_BUFFER_SIZE;
5481
23.5k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5482
0
                       XML_MAX_HUGE_LENGTH :
5483
23.5k
                       XML_MAX_TEXT_LENGTH;
5484
23.5k
    int cur, l;
5485
23.5k
    const xmlChar *target;
5486
5487
23.5k
    if ((RAW == '<') && (NXT(1) == '?')) {
5488
  /*
5489
   * this is a Processing Instruction.
5490
   */
5491
23.5k
  SKIP(2);
5492
5493
  /*
5494
   * Parse the target name and check for special support like
5495
   * namespace.
5496
   */
5497
23.5k
        target = xmlParsePITarget(ctxt);
5498
23.5k
  if (target != NULL) {
5499
17.8k
      if ((RAW == '?') && (NXT(1) == '>')) {
5500
4.87k
    SKIP(2);
5501
5502
    /*
5503
     * SAX: PI detected.
5504
     */
5505
4.87k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5506
4.87k
        (ctxt->sax->processingInstruction != NULL))
5507
4.42k
        ctxt->sax->processingInstruction(ctxt->userData,
5508
4.42k
                                         target, NULL);
5509
4.87k
    return;
5510
4.87k
      }
5511
13.0k
      buf = (xmlChar *) xmlMallocAtomic(size);
5512
13.0k
      if (buf == NULL) {
5513
0
    xmlErrMemory(ctxt);
5514
0
    return;
5515
0
      }
5516
13.0k
      if (SKIP_BLANKS == 0) {
5517
10.2k
    xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5518
10.2k
        "ParsePI: PI %s space expected\n", target);
5519
10.2k
      }
5520
13.0k
      cur = CUR_CHAR(l);
5521
154k
      while (IS_CHAR(cur) && /* checked */
5522
154k
       ((cur != '?') || (NXT(1) != '>'))) {
5523
141k
    if (len + 5 >= size) {
5524
912
        xmlChar *tmp;
5525
912
                    size_t new_size = size * 2;
5526
912
        tmp = (xmlChar *) xmlRealloc(buf, new_size);
5527
912
        if (tmp == NULL) {
5528
0
      xmlErrMemory(ctxt);
5529
0
      xmlFree(buf);
5530
0
      return;
5531
0
        }
5532
912
        buf = tmp;
5533
912
                    size = new_size;
5534
912
    }
5535
141k
    COPY_BUF(buf, len, cur);
5536
141k
                if (len > maxLength) {
5537
0
                    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5538
0
                                      "PI %s too big found", target);
5539
0
                    xmlFree(buf);
5540
0
                    return;
5541
0
                }
5542
141k
    NEXTL(l);
5543
141k
    cur = CUR_CHAR(l);
5544
141k
      }
5545
13.0k
      buf[len] = 0;
5546
13.0k
      if (cur != '?') {
5547
8.88k
    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5548
8.88k
          "ParsePI: PI %s never end ...\n", target);
5549
8.88k
      } else {
5550
4.12k
    SKIP(2);
5551
5552
4.12k
#ifdef LIBXML_CATALOG_ENABLED
5553
4.12k
    if ((ctxt->inSubset == 0) &&
5554
4.12k
        (xmlStrEqual(target, XML_CATALOG_PI))) {
5555
525
        xmlCatalogAllow allow = xmlCatalogGetDefaults();
5556
525
        if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5557
525
      (allow == XML_CATA_ALLOW_ALL))
5558
525
      xmlParseCatalogPI(ctxt, buf);
5559
525
    }
5560
4.12k
#endif
5561
5562
5563
    /*
5564
     * SAX: PI detected.
5565
     */
5566
4.12k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5567
4.12k
        (ctxt->sax->processingInstruction != NULL))
5568
2.94k
        ctxt->sax->processingInstruction(ctxt->userData,
5569
2.94k
                                         target, buf);
5570
4.12k
      }
5571
13.0k
      xmlFree(buf);
5572
13.0k
  } else {
5573
5.71k
      xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5574
5.71k
  }
5575
23.5k
    }
5576
23.5k
}
5577
5578
/**
5579
 * xmlParseNotationDecl:
5580
 * @ctxt:  an XML parser context
5581
 *
5582
 * DEPRECATED: Internal function, don't use.
5583
 *
5584
 * Parse a notation declaration. Always consumes '<!'.
5585
 *
5586
 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
5587
 *
5588
 * Hence there is actually 3 choices:
5589
 *     'PUBLIC' S PubidLiteral
5590
 *     'PUBLIC' S PubidLiteral S SystemLiteral
5591
 * and 'SYSTEM' S SystemLiteral
5592
 *
5593
 * See the NOTE on xmlParseExternalID().
5594
 */
5595
5596
void
5597
4.07k
xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5598
4.07k
    const xmlChar *name;
5599
4.07k
    xmlChar *Pubid;
5600
4.07k
    xmlChar *Systemid;
5601
5602
4.07k
    if ((CUR != '<') || (NXT(1) != '!'))
5603
0
        return;
5604
4.07k
    SKIP(2);
5605
5606
4.07k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5607
4.05k
  int inputid = ctxt->input->id;
5608
4.05k
  SKIP(8);
5609
4.05k
  if (SKIP_BLANKS_PE == 0) {
5610
261
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5611
261
         "Space required after '<!NOTATION'\n");
5612
261
      return;
5613
261
  }
5614
5615
3.79k
        name = xmlParseName(ctxt);
5616
3.79k
  if (name == NULL) {
5617
229
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5618
229
      return;
5619
229
  }
5620
3.56k
  if (xmlStrchr(name, ':') != NULL) {
5621
230
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5622
230
         "colons are forbidden from notation names '%s'\n",
5623
230
         name, NULL, NULL);
5624
230
  }
5625
3.56k
  if (SKIP_BLANKS_PE == 0) {
5626
212
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5627
212
         "Space required after the NOTATION name'\n");
5628
212
      return;
5629
212
  }
5630
5631
  /*
5632
   * Parse the IDs.
5633
   */
5634
3.35k
  Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5635
3.35k
  SKIP_BLANKS_PE;
5636
5637
3.35k
  if (RAW == '>') {
5638
1.20k
      if (inputid != ctxt->input->id) {
5639
122
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5640
122
                         "Notation declaration doesn't start and stop"
5641
122
                               " in the same entity\n");
5642
122
      }
5643
1.20k
      NEXT;
5644
1.20k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5645
1.20k
    (ctxt->sax->notationDecl != NULL))
5646
1.10k
    ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5647
2.15k
  } else {
5648
2.15k
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5649
2.15k
  }
5650
3.35k
  if (Systemid != NULL) xmlFree(Systemid);
5651
3.35k
  if (Pubid != NULL) xmlFree(Pubid);
5652
3.35k
    }
5653
4.07k
}
5654
5655
/**
5656
 * xmlParseEntityDecl:
5657
 * @ctxt:  an XML parser context
5658
 *
5659
 * DEPRECATED: Internal function, don't use.
5660
 *
5661
 * Parse an entity declaration. Always consumes '<!'.
5662
 *
5663
 * [70] EntityDecl ::= GEDecl | PEDecl
5664
 *
5665
 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5666
 *
5667
 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5668
 *
5669
 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5670
 *
5671
 * [74] PEDef ::= EntityValue | ExternalID
5672
 *
5673
 * [76] NDataDecl ::= S 'NDATA' S Name
5674
 *
5675
 * [ VC: Notation Declared ]
5676
 * The Name must match the declared name of a notation.
5677
 */
5678
5679
void
5680
18.3k
xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5681
18.3k
    const xmlChar *name = NULL;
5682
18.3k
    xmlChar *value = NULL;
5683
18.3k
    xmlChar *URI = NULL, *literal = NULL;
5684
18.3k
    const xmlChar *ndata = NULL;
5685
18.3k
    int isParameter = 0;
5686
18.3k
    xmlChar *orig = NULL;
5687
5688
18.3k
    if ((CUR != '<') || (NXT(1) != '!'))
5689
0
        return;
5690
18.3k
    SKIP(2);
5691
5692
    /* GROW; done in the caller */
5693
18.3k
    if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5694
18.3k
  int inputid = ctxt->input->id;
5695
18.3k
  SKIP(6);
5696
18.3k
  if (SKIP_BLANKS_PE == 0) {
5697
16.8k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5698
16.8k
         "Space required after '<!ENTITY'\n");
5699
16.8k
  }
5700
5701
18.3k
  if (RAW == '%') {
5702
4.19k
      NEXT;
5703
4.19k
      if (SKIP_BLANKS_PE == 0) {
5704
3.06k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5705
3.06k
             "Space required after '%%'\n");
5706
3.06k
      }
5707
4.19k
      isParameter = 1;
5708
4.19k
  }
5709
5710
18.3k
        name = xmlParseName(ctxt);
5711
18.3k
  if (name == NULL) {
5712
779
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5713
779
                     "xmlParseEntityDecl: no name\n");
5714
779
            return;
5715
779
  }
5716
17.5k
  if (xmlStrchr(name, ':') != NULL) {
5717
3.76k
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5718
3.76k
         "colons are forbidden from entities names '%s'\n",
5719
3.76k
         name, NULL, NULL);
5720
3.76k
  }
5721
17.5k
  if (SKIP_BLANKS_PE == 0) {
5722
13.7k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5723
13.7k
         "Space required after the entity name\n");
5724
13.7k
  }
5725
5726
  /*
5727
   * handle the various case of definitions...
5728
   */
5729
17.5k
  if (isParameter) {
5730
3.82k
      if ((RAW == '"') || (RAW == '\'')) {
5731
2.70k
          value = xmlParseEntityValue(ctxt, &orig);
5732
2.70k
    if (value) {
5733
2.61k
        if ((ctxt->sax != NULL) &&
5734
2.61k
      (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5735
2.42k
      ctxt->sax->entityDecl(ctxt->userData, name,
5736
2.42k
                        XML_INTERNAL_PARAMETER_ENTITY,
5737
2.42k
            NULL, NULL, value);
5738
2.61k
    }
5739
2.70k
      } else {
5740
1.12k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5741
1.12k
    if ((URI == NULL) && (literal == NULL)) {
5742
251
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5743
251
    }
5744
1.12k
    if (URI) {
5745
863
        xmlURIPtr uri;
5746
5747
863
                    if (xmlParseURISafe((const char *) URI, &uri) < 0) {
5748
0
                        xmlErrMemory(ctxt);
5749
863
                    } else if (uri == NULL) {
5750
                        /*
5751
                         * This really ought to be a well formedness error
5752
                         * but the XML Core WG decided otherwise c.f. issue
5753
                         * E26 of the XML erratas.
5754
                         */
5755
271
                        xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5756
271
                                     "Invalid URI: %s\n", URI);
5757
592
                    } else if (uri->fragment != NULL) {
5758
                        /*
5759
                         * Okay this is foolish to block those but not
5760
                         * invalid URIs.
5761
                         */
5762
194
                        xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5763
398
                    } else {
5764
398
                        if ((ctxt->sax != NULL) &&
5765
398
                            (!ctxt->disableSAX) &&
5766
398
                            (ctxt->sax->entityDecl != NULL))
5767
359
                            ctxt->sax->entityDecl(ctxt->userData, name,
5768
359
                                        XML_EXTERNAL_PARAMETER_ENTITY,
5769
359
                                        literal, URI, NULL);
5770
398
                    }
5771
863
        xmlFreeURI(uri);
5772
863
    }
5773
1.12k
      }
5774
13.7k
  } else {
5775
13.7k
      if ((RAW == '"') || (RAW == '\'')) {
5776
11.1k
          value = xmlParseEntityValue(ctxt, &orig);
5777
11.1k
    if ((ctxt->sax != NULL) &&
5778
11.1k
        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5779
10.3k
        ctxt->sax->entityDecl(ctxt->userData, name,
5780
10.3k
        XML_INTERNAL_GENERAL_ENTITY,
5781
10.3k
        NULL, NULL, value);
5782
    /*
5783
     * For expat compatibility in SAX mode.
5784
     */
5785
11.1k
    if ((ctxt->myDoc == NULL) ||
5786
11.1k
        (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5787
241
        if (ctxt->myDoc == NULL) {
5788
118
      ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5789
118
      if (ctxt->myDoc == NULL) {
5790
0
          xmlErrMemory(ctxt);
5791
0
          goto done;
5792
0
      }
5793
118
      ctxt->myDoc->properties = XML_DOC_INTERNAL;
5794
118
        }
5795
241
        if (ctxt->myDoc->intSubset == NULL) {
5796
118
      ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5797
118
              BAD_CAST "fake", NULL, NULL);
5798
118
                        if (ctxt->myDoc->intSubset == NULL) {
5799
0
                            xmlErrMemory(ctxt);
5800
0
                            goto done;
5801
0
                        }
5802
118
                    }
5803
5804
241
        xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5805
241
                    NULL, NULL, value);
5806
241
    }
5807
11.1k
      } else {
5808
2.64k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5809
2.64k
    if ((URI == NULL) && (literal == NULL)) {
5810
1.21k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5811
1.21k
    }
5812
2.64k
    if (URI) {
5813
1.02k
        xmlURIPtr uri;
5814
5815
1.02k
                    if (xmlParseURISafe((const char *) URI, &uri) < 0) {
5816
0
                        xmlErrMemory(ctxt);
5817
1.02k
                    } else if (uri == NULL) {
5818
                        /*
5819
                         * This really ought to be a well formedness error
5820
                         * but the XML Core WG decided otherwise c.f. issue
5821
                         * E26 of the XML erratas.
5822
                         */
5823
163
                        xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5824
163
                                     "Invalid URI: %s\n", URI);
5825
865
                    } else if (uri->fragment != NULL) {
5826
                        /*
5827
                         * Okay this is foolish to block those but not
5828
                         * invalid URIs.
5829
                         */
5830
268
                        xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5831
268
                    }
5832
1.02k
                    xmlFreeURI(uri);
5833
1.02k
    }
5834
2.64k
    if ((RAW != '>') && (SKIP_BLANKS_PE == 0)) {
5835
261
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5836
261
           "Space required before 'NDATA'\n");
5837
261
    }
5838
2.64k
    if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5839
343
        SKIP(5);
5840
343
        if (SKIP_BLANKS_PE == 0) {
5841
336
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5842
336
               "Space required after 'NDATA'\n");
5843
336
        }
5844
343
        ndata = xmlParseName(ctxt);
5845
343
        if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5846
343
            (ctxt->sax->unparsedEntityDecl != NULL))
5847
258
      ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5848
258
            literal, URI, ndata);
5849
2.30k
    } else {
5850
2.30k
        if ((ctxt->sax != NULL) &&
5851
2.30k
            (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5852
2.18k
      ctxt->sax->entityDecl(ctxt->userData, name,
5853
2.18k
            XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5854
2.18k
            literal, URI, NULL);
5855
        /*
5856
         * For expat compatibility in SAX mode.
5857
         * assuming the entity replacement was asked for
5858
         */
5859
2.30k
        if ((ctxt->replaceEntities != 0) &&
5860
2.30k
      ((ctxt->myDoc == NULL) ||
5861
0
      (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5862
0
      if (ctxt->myDoc == NULL) {
5863
0
          ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5864
0
          if (ctxt->myDoc == NULL) {
5865
0
              xmlErrMemory(ctxt);
5866
0
        goto done;
5867
0
          }
5868
0
          ctxt->myDoc->properties = XML_DOC_INTERNAL;
5869
0
      }
5870
5871
0
      if (ctxt->myDoc->intSubset == NULL) {
5872
0
          ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5873
0
            BAD_CAST "fake", NULL, NULL);
5874
0
                            if (ctxt->myDoc->intSubset == NULL) {
5875
0
                                xmlErrMemory(ctxt);
5876
0
                                goto done;
5877
0
                            }
5878
0
                        }
5879
0
      xmlSAX2EntityDecl(ctxt, name,
5880
0
                  XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5881
0
                  literal, URI, NULL);
5882
0
        }
5883
2.30k
    }
5884
2.64k
      }
5885
13.7k
  }
5886
17.5k
  SKIP_BLANKS_PE;
5887
17.5k
  if (RAW != '>') {
5888
1.23k
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5889
1.23k
              "xmlParseEntityDecl: entity %s not terminated\n", name);
5890
1.23k
      xmlHaltParser(ctxt);
5891
16.3k
  } else {
5892
16.3k
      if (inputid != ctxt->input->id) {
5893
183
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5894
183
                         "Entity declaration doesn't start and stop in"
5895
183
                               " the same entity\n");
5896
183
      }
5897
16.3k
      NEXT;
5898
16.3k
  }
5899
17.5k
  if (orig != NULL) {
5900
      /*
5901
       * Ugly mechanism to save the raw entity value.
5902
       */
5903
12.8k
      xmlEntityPtr cur = NULL;
5904
5905
12.8k
      if (isParameter) {
5906
2.61k
          if ((ctxt->sax != NULL) &&
5907
2.61k
        (ctxt->sax->getParameterEntity != NULL))
5908
2.61k
        cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5909
10.2k
      } else {
5910
10.2k
          if ((ctxt->sax != NULL) &&
5911
10.2k
        (ctxt->sax->getEntity != NULL))
5912
10.2k
        cur = ctxt->sax->getEntity(ctxt->userData, name);
5913
10.2k
    if ((cur == NULL) && (ctxt->userData==ctxt)) {
5914
453
        cur = xmlSAX2GetEntity(ctxt, name);
5915
453
    }
5916
10.2k
      }
5917
12.8k
            if ((cur != NULL) && (cur->orig == NULL)) {
5918
4.37k
    cur->orig = orig;
5919
4.37k
                orig = NULL;
5920
4.37k
      }
5921
12.8k
  }
5922
5923
17.5k
done:
5924
17.5k
  if (value != NULL) xmlFree(value);
5925
17.5k
  if (URI != NULL) xmlFree(URI);
5926
17.5k
  if (literal != NULL) xmlFree(literal);
5927
17.5k
        if (orig != NULL) xmlFree(orig);
5928
17.5k
    }
5929
18.3k
}
5930
5931
/**
5932
 * xmlParseDefaultDecl:
5933
 * @ctxt:  an XML parser context
5934
 * @value:  Receive a possible fixed default value for the attribute
5935
 *
5936
 * DEPRECATED: Internal function, don't use.
5937
 *
5938
 * Parse an attribute default declaration
5939
 *
5940
 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5941
 *
5942
 * [ VC: Required Attribute ]
5943
 * if the default declaration is the keyword #REQUIRED, then the
5944
 * attribute must be specified for all elements of the type in the
5945
 * attribute-list declaration.
5946
 *
5947
 * [ VC: Attribute Default Legal ]
5948
 * The declared default value must meet the lexical constraints of
5949
 * the declared attribute type c.f. xmlValidateAttributeDecl()
5950
 *
5951
 * [ VC: Fixed Attribute Default ]
5952
 * if an attribute has a default value declared with the #FIXED
5953
 * keyword, instances of that attribute must match the default value.
5954
 *
5955
 * [ WFC: No < in Attribute Values ]
5956
 * handled in xmlParseAttValue()
5957
 *
5958
 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5959
 *          or XML_ATTRIBUTE_FIXED.
5960
 */
5961
5962
int
5963
34.8k
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5964
34.8k
    int val;
5965
34.8k
    xmlChar *ret;
5966
5967
34.8k
    *value = NULL;
5968
34.8k
    if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5969
413
  SKIP(9);
5970
413
  return(XML_ATTRIBUTE_REQUIRED);
5971
413
    }
5972
34.4k
    if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5973
1.14k
  SKIP(8);
5974
1.14k
  return(XML_ATTRIBUTE_IMPLIED);
5975
1.14k
    }
5976
33.2k
    val = XML_ATTRIBUTE_NONE;
5977
33.2k
    if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5978
445
  SKIP(6);
5979
445
  val = XML_ATTRIBUTE_FIXED;
5980
445
  if (SKIP_BLANKS_PE == 0) {
5981
215
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5982
215
         "Space required after '#FIXED'\n");
5983
215
  }
5984
445
    }
5985
33.2k
    ret = xmlParseAttValue(ctxt);
5986
33.2k
    if (ret == NULL) {
5987
12.0k
  xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5988
12.0k
           "Attribute default value declaration error\n");
5989
12.0k
    } else
5990
21.2k
        *value = ret;
5991
33.2k
    return(val);
5992
34.4k
}
5993
5994
/**
5995
 * xmlParseNotationType:
5996
 * @ctxt:  an XML parser context
5997
 *
5998
 * DEPRECATED: Internal function, don't use.
5999
 *
6000
 * parse an Notation attribute type.
6001
 *
6002
 * Note: the leading 'NOTATION' S part has already being parsed...
6003
 *
6004
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
6005
 *
6006
 * [ VC: Notation Attributes ]
6007
 * Values of this type must match one of the notation names included
6008
 * in the declaration; all notation names in the declaration must be declared.
6009
 *
6010
 * Returns: the notation attribute tree built while parsing
6011
 */
6012
6013
xmlEnumerationPtr
6014
1.53k
xmlParseNotationType(xmlParserCtxtPtr ctxt) {
6015
1.53k
    const xmlChar *name;
6016
1.53k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
6017
6018
1.53k
    if (RAW != '(') {
6019
247
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
6020
247
  return(NULL);
6021
247
    }
6022
2.80k
    do {
6023
2.80k
        NEXT;
6024
2.80k
  SKIP_BLANKS_PE;
6025
2.80k
        name = xmlParseName(ctxt);
6026
2.80k
  if (name == NULL) {
6027
363
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6028
363
         "Name expected in NOTATION declaration\n");
6029
363
            xmlFreeEnumeration(ret);
6030
363
      return(NULL);
6031
363
  }
6032
2.44k
  tmp = ret;
6033
4.52k
  while (tmp != NULL) {
6034
2.88k
      if (xmlStrEqual(name, tmp->name)) {
6035
794
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
6036
794
    "standalone: attribute notation value token %s duplicated\n",
6037
794
         name, NULL);
6038
794
    if (!xmlDictOwns(ctxt->dict, name))
6039
0
        xmlFree((xmlChar *) name);
6040
794
    break;
6041
794
      }
6042
2.08k
      tmp = tmp->next;
6043
2.08k
  }
6044
2.44k
  if (tmp == NULL) {
6045
1.64k
      cur = xmlCreateEnumeration(name);
6046
1.64k
      if (cur == NULL) {
6047
0
                xmlErrMemory(ctxt);
6048
0
                xmlFreeEnumeration(ret);
6049
0
                return(NULL);
6050
0
            }
6051
1.64k
      if (last == NULL) ret = last = cur;
6052
616
      else {
6053
616
    last->next = cur;
6054
616
    last = cur;
6055
616
      }
6056
1.64k
  }
6057
2.44k
  SKIP_BLANKS_PE;
6058
2.44k
    } while (RAW == '|');
6059
925
    if (RAW != ')') {
6060
457
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
6061
457
        xmlFreeEnumeration(ret);
6062
457
  return(NULL);
6063
457
    }
6064
468
    NEXT;
6065
468
    return(ret);
6066
925
}
6067
6068
/**
6069
 * xmlParseEnumerationType:
6070
 * @ctxt:  an XML parser context
6071
 *
6072
 * DEPRECATED: Internal function, don't use.
6073
 *
6074
 * parse an Enumeration attribute type.
6075
 *
6076
 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
6077
 *
6078
 * [ VC: Enumeration ]
6079
 * Values of this type must match one of the Nmtoken tokens in
6080
 * the declaration
6081
 *
6082
 * Returns: the enumeration attribute tree built while parsing
6083
 */
6084
6085
xmlEnumerationPtr
6086
7.50k
xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
6087
7.50k
    xmlChar *name;
6088
7.50k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
6089
6090
7.50k
    if (RAW != '(') {
6091
623
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
6092
623
  return(NULL);
6093
623
    }
6094
8.50k
    do {
6095
8.50k
        NEXT;
6096
8.50k
  SKIP_BLANKS_PE;
6097
8.50k
        name = xmlParseNmtoken(ctxt);
6098
8.50k
  if (name == NULL) {
6099
293
      xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
6100
293
      return(ret);
6101
293
  }
6102
8.21k
  tmp = ret;
6103
11.5k
  while (tmp != NULL) {
6104
4.08k
      if (xmlStrEqual(name, tmp->name)) {
6105
777
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
6106
777
    "standalone: attribute enumeration value token %s duplicated\n",
6107
777
         name, NULL);
6108
777
    if (!xmlDictOwns(ctxt->dict, name))
6109
777
        xmlFree(name);
6110
777
    break;
6111
777
      }
6112
3.30k
      tmp = tmp->next;
6113
3.30k
  }
6114
8.21k
  if (tmp == NULL) {
6115
7.43k
      cur = xmlCreateEnumeration(name);
6116
7.43k
      if (!xmlDictOwns(ctxt->dict, name))
6117
7.43k
    xmlFree(name);
6118
7.43k
      if (cur == NULL) {
6119
0
                xmlErrMemory(ctxt);
6120
0
                xmlFreeEnumeration(ret);
6121
0
                return(NULL);
6122
0
            }
6123
7.43k
      if (last == NULL) ret = last = cur;
6124
802
      else {
6125
802
    last->next = cur;
6126
802
    last = cur;
6127
802
      }
6128
7.43k
  }
6129
8.21k
  SKIP_BLANKS_PE;
6130
8.21k
    } while (RAW == '|');
6131
6.58k
    if (RAW != ')') {
6132
1.05k
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
6133
1.05k
  return(ret);
6134
1.05k
    }
6135
5.53k
    NEXT;
6136
5.53k
    return(ret);
6137
6.58k
}
6138
6139
/**
6140
 * xmlParseEnumeratedType:
6141
 * @ctxt:  an XML parser context
6142
 * @tree:  the enumeration tree built while parsing
6143
 *
6144
 * DEPRECATED: Internal function, don't use.
6145
 *
6146
 * parse an Enumerated attribute type.
6147
 *
6148
 * [57] EnumeratedType ::= NotationType | Enumeration
6149
 *
6150
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
6151
 *
6152
 *
6153
 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
6154
 */
6155
6156
int
6157
9.23k
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6158
9.23k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
6159
1.73k
  SKIP(8);
6160
1.73k
  if (SKIP_BLANKS_PE == 0) {
6161
200
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6162
200
         "Space required after 'NOTATION'\n");
6163
200
      return(0);
6164
200
  }
6165
1.53k
  *tree = xmlParseNotationType(ctxt);
6166
1.53k
  if (*tree == NULL) return(0);
6167
468
  return(XML_ATTRIBUTE_NOTATION);
6168
1.53k
    }
6169
7.50k
    *tree = xmlParseEnumerationType(ctxt);
6170
7.50k
    if (*tree == NULL) return(0);
6171
6.63k
    return(XML_ATTRIBUTE_ENUMERATION);
6172
7.50k
}
6173
6174
/**
6175
 * xmlParseAttributeType:
6176
 * @ctxt:  an XML parser context
6177
 * @tree:  the enumeration tree built while parsing
6178
 *
6179
 * DEPRECATED: Internal function, don't use.
6180
 *
6181
 * parse the Attribute list def for an element
6182
 *
6183
 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
6184
 *
6185
 * [55] StringType ::= 'CDATA'
6186
 *
6187
 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
6188
 *                        'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
6189
 *
6190
 * Validity constraints for attribute values syntax are checked in
6191
 * xmlValidateAttributeValue()
6192
 *
6193
 * [ VC: ID ]
6194
 * Values of type ID must match the Name production. A name must not
6195
 * appear more than once in an XML document as a value of this type;
6196
 * i.e., ID values must uniquely identify the elements which bear them.
6197
 *
6198
 * [ VC: One ID per Element Type ]
6199
 * No element type may have more than one ID attribute specified.
6200
 *
6201
 * [ VC: ID Attribute Default ]
6202
 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
6203
 *
6204
 * [ VC: IDREF ]
6205
 * Values of type IDREF must match the Name production, and values
6206
 * of type IDREFS must match Names; each IDREF Name must match the value
6207
 * of an ID attribute on some element in the XML document; i.e. IDREF
6208
 * values must match the value of some ID attribute.
6209
 *
6210
 * [ VC: Entity Name ]
6211
 * Values of type ENTITY must match the Name production, values
6212
 * of type ENTITIES must match Names; each Entity Name must match the
6213
 * name of an unparsed entity declared in the DTD.
6214
 *
6215
 * [ VC: Name Token ]
6216
 * Values of type NMTOKEN must match the Nmtoken production; values
6217
 * of type NMTOKENS must match Nmtokens.
6218
 *
6219
 * Returns the attribute type
6220
 */
6221
int
6222
38.2k
xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6223
38.2k
    if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
6224
1.85k
  SKIP(5);
6225
1.85k
  return(XML_ATTRIBUTE_CDATA);
6226
36.4k
     } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
6227
3.66k
  SKIP(6);
6228
3.66k
  return(XML_ATTRIBUTE_IDREFS);
6229
32.7k
     } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
6230
778
  SKIP(5);
6231
778
  return(XML_ATTRIBUTE_IDREF);
6232
31.9k
     } else if ((RAW == 'I') && (NXT(1) == 'D')) {
6233
19.7k
        SKIP(2);
6234
19.7k
  return(XML_ATTRIBUTE_ID);
6235
19.7k
     } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
6236
686
  SKIP(6);
6237
686
  return(XML_ATTRIBUTE_ENTITY);
6238
11.5k
     } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
6239
435
  SKIP(8);
6240
435
  return(XML_ATTRIBUTE_ENTITIES);
6241
11.0k
     } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
6242
422
  SKIP(8);
6243
422
  return(XML_ATTRIBUTE_NMTOKENS);
6244
10.6k
     } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
6245
1.43k
  SKIP(7);
6246
1.43k
  return(XML_ATTRIBUTE_NMTOKEN);
6247
1.43k
     }
6248
9.23k
     return(xmlParseEnumeratedType(ctxt, tree));
6249
38.2k
}
6250
6251
/**
6252
 * xmlParseAttributeListDecl:
6253
 * @ctxt:  an XML parser context
6254
 *
6255
 * DEPRECATED: Internal function, don't use.
6256
 *
6257
 * Parse an attribute list declaration for an element. Always consumes '<!'.
6258
 *
6259
 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
6260
 *
6261
 * [53] AttDef ::= S Name S AttType S DefaultDecl
6262
 *
6263
 */
6264
void
6265
31.8k
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
6266
31.8k
    const xmlChar *elemName;
6267
31.8k
    const xmlChar *attrName;
6268
31.8k
    xmlEnumerationPtr tree;
6269
6270
31.8k
    if ((CUR != '<') || (NXT(1) != '!'))
6271
0
        return;
6272
31.8k
    SKIP(2);
6273
6274
31.8k
    if (CMP7(CUR_PTR, 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
6275
31.7k
  int inputid = ctxt->input->id;
6276
6277
31.7k
  SKIP(7);
6278
31.7k
  if (SKIP_BLANKS_PE == 0) {
6279
30.4k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6280
30.4k
                     "Space required after '<!ATTLIST'\n");
6281
30.4k
  }
6282
31.7k
        elemName = xmlParseName(ctxt);
6283
31.7k
  if (elemName == NULL) {
6284
535
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6285
535
         "ATTLIST: no name for Element\n");
6286
535
      return;
6287
535
  }
6288
31.2k
  SKIP_BLANKS_PE;
6289
31.2k
  GROW;
6290
62.7k
  while ((RAW != '>') && (PARSER_STOPPED(ctxt) == 0)) {
6291
57.9k
      int type;
6292
57.9k
      int def;
6293
57.9k
      xmlChar *defaultValue = NULL;
6294
6295
57.9k
      GROW;
6296
57.9k
            tree = NULL;
6297
57.9k
      attrName = xmlParseName(ctxt);
6298
57.9k
      if (attrName == NULL) {
6299
19.0k
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6300
19.0k
             "ATTLIST: no name for Attribute\n");
6301
19.0k
    break;
6302
19.0k
      }
6303
38.9k
      GROW;
6304
38.9k
      if (SKIP_BLANKS_PE == 0) {
6305
616
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6306
616
            "Space required after the attribute name\n");
6307
616
    break;
6308
616
      }
6309
6310
38.2k
      type = xmlParseAttributeType(ctxt, &tree);
6311
38.2k
      if (type <= 0) {
6312
2.13k
          break;
6313
2.13k
      }
6314
6315
36.1k
      GROW;
6316
36.1k
      if (SKIP_BLANKS_PE == 0) {
6317
1.33k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6318
1.33k
             "Space required after the attribute type\n");
6319
1.33k
          if (tree != NULL)
6320
1.10k
        xmlFreeEnumeration(tree);
6321
1.33k
    break;
6322
1.33k
      }
6323
6324
34.8k
      def = xmlParseDefaultDecl(ctxt, &defaultValue);
6325
34.8k
      if (def <= 0) {
6326
0
                if (defaultValue != NULL)
6327
0
        xmlFree(defaultValue);
6328
0
          if (tree != NULL)
6329
0
        xmlFreeEnumeration(tree);
6330
0
          break;
6331
0
      }
6332
34.8k
      if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6333
19.6k
          xmlAttrNormalizeSpace(defaultValue, defaultValue);
6334
6335
34.8k
      GROW;
6336
34.8k
            if (RAW != '>') {
6337
30.4k
    if (SKIP_BLANKS_PE == 0) {
6338
3.34k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6339
3.34k
      "Space required after the attribute default value\n");
6340
3.34k
        if (defaultValue != NULL)
6341
740
      xmlFree(defaultValue);
6342
3.34k
        if (tree != NULL)
6343
428
      xmlFreeEnumeration(tree);
6344
3.34k
        break;
6345
3.34k
    }
6346
30.4k
      }
6347
31.4k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6348
31.4k
    (ctxt->sax->attributeDecl != NULL))
6349
30.0k
    ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6350
30.0k
                          type, def, defaultValue, tree);
6351
1.44k
      else if (tree != NULL)
6352
133
    xmlFreeEnumeration(tree);
6353
6354
31.4k
      if ((ctxt->sax2) && (defaultValue != NULL) &&
6355
31.4k
          (def != XML_ATTRIBUTE_IMPLIED) &&
6356
31.4k
    (def != XML_ATTRIBUTE_REQUIRED)) {
6357
16.8k
    xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6358
16.8k
      }
6359
31.4k
      if (ctxt->sax2) {
6360
26.8k
    xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6361
26.8k
      }
6362
31.4k
      if (defaultValue != NULL)
6363
20.5k
          xmlFree(defaultValue);
6364
31.4k
      GROW;
6365
31.4k
  }
6366
31.2k
  if (RAW == '>') {
6367
5.89k
      if (inputid != ctxt->input->id) {
6368
214
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6369
214
                               "Attribute list declaration doesn't start and"
6370
214
                               " stop in the same entity\n");
6371
214
      }
6372
5.89k
      NEXT;
6373
5.89k
  }
6374
31.2k
    }
6375
31.8k
}
6376
6377
/**
6378
 * xmlParseElementMixedContentDecl:
6379
 * @ctxt:  an XML parser context
6380
 * @inputchk:  the input used for the current entity, needed for boundary checks
6381
 *
6382
 * DEPRECATED: Internal function, don't use.
6383
 *
6384
 * parse the declaration for a Mixed Element content
6385
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6386
 *
6387
 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6388
 *                '(' S? '#PCDATA' S? ')'
6389
 *
6390
 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6391
 *
6392
 * [ VC: No Duplicate Types ]
6393
 * The same name must not appear more than once in a single
6394
 * mixed-content declaration.
6395
 *
6396
 * returns: the list of the xmlElementContentPtr describing the element choices
6397
 */
6398
xmlElementContentPtr
6399
1.65k
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6400
1.65k
    xmlElementContentPtr ret = NULL, cur = NULL, n;
6401
1.65k
    const xmlChar *elem = NULL;
6402
6403
1.65k
    GROW;
6404
1.65k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6405
1.65k
  SKIP(7);
6406
1.65k
  SKIP_BLANKS_PE;
6407
1.65k
  if (RAW == ')') {
6408
491
      if (ctxt->input->id != inputchk) {
6409
66
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6410
66
                               "Element content declaration doesn't start and"
6411
66
                               " stop in the same entity\n");
6412
66
      }
6413
491
      NEXT;
6414
491
      ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6415
491
      if (ret == NULL)
6416
0
                goto mem_error;
6417
491
      if (RAW == '*') {
6418
202
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6419
202
    NEXT;
6420
202
      }
6421
491
      return(ret);
6422
491
  }
6423
1.16k
  if ((RAW == '(') || (RAW == '|')) {
6424
795
      ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6425
795
      if (ret == NULL)
6426
0
                goto mem_error;
6427
795
  }
6428
1.86k
  while ((RAW == '|') && (PARSER_STOPPED(ctxt) == 0)) {
6429
954
      NEXT;
6430
954
            n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6431
954
            if (n == NULL)
6432
0
                goto mem_error;
6433
954
      if (elem == NULL) {
6434
794
    n->c1 = cur;
6435
794
    if (cur != NULL)
6436
794
        cur->parent = n;
6437
794
    ret = cur = n;
6438
794
      } else {
6439
160
          cur->c2 = n;
6440
160
    n->parent = cur;
6441
160
    n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6442
160
                if (n->c1 == NULL)
6443
0
                    goto mem_error;
6444
160
    n->c1->parent = n;
6445
160
    cur = n;
6446
160
      }
6447
954
      SKIP_BLANKS_PE;
6448
954
      elem = xmlParseName(ctxt);
6449
954
      if (elem == NULL) {
6450
249
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6451
249
      "xmlParseElementMixedContentDecl : Name expected\n");
6452
249
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6453
249
    return(NULL);
6454
249
      }
6455
705
      SKIP_BLANKS_PE;
6456
705
      GROW;
6457
705
  }
6458
911
  if ((RAW == ')') && (NXT(1) == '*')) {
6459
510
      if (elem != NULL) {
6460
510
    cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6461
510
                                   XML_ELEMENT_CONTENT_ELEMENT);
6462
510
    if (cur->c2 == NULL)
6463
0
                    goto mem_error;
6464
510
    cur->c2->parent = cur;
6465
510
            }
6466
510
            if (ret != NULL)
6467
510
                ret->ocur = XML_ELEMENT_CONTENT_MULT;
6468
510
      if (ctxt->input->id != inputchk) {
6469
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6470
0
                               "Element content declaration doesn't start and"
6471
0
                               " stop in the same entity\n");
6472
0
      }
6473
510
      SKIP(2);
6474
510
  } else {
6475
401
      xmlFreeDocElementContent(ctxt->myDoc, ret);
6476
401
      xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6477
401
      return(NULL);
6478
401
  }
6479
6480
911
    } else {
6481
0
  xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6482
0
    }
6483
510
    return(ret);
6484
6485
0
mem_error:
6486
0
    xmlErrMemory(ctxt);
6487
0
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6488
0
    return(NULL);
6489
1.65k
}
6490
6491
/**
6492
 * xmlParseElementChildrenContentDeclPriv:
6493
 * @ctxt:  an XML parser context
6494
 * @inputchk:  the input used for the current entity, needed for boundary checks
6495
 * @depth: the level of recursion
6496
 *
6497
 * parse the declaration for a Mixed Element content
6498
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6499
 *
6500
 *
6501
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6502
 *
6503
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6504
 *
6505
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6506
 *
6507
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6508
 *
6509
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6510
 * TODO Parameter-entity replacement text must be properly nested
6511
 *  with parenthesized groups. That is to say, if either of the
6512
 *  opening or closing parentheses in a choice, seq, or Mixed
6513
 *  construct is contained in the replacement text for a parameter
6514
 *  entity, both must be contained in the same replacement text. For
6515
 *  interoperability, if a parameter-entity reference appears in a
6516
 *  choice, seq, or Mixed construct, its replacement text should not
6517
 *  be empty, and neither the first nor last non-blank character of
6518
 *  the replacement text should be a connector (| or ,).
6519
 *
6520
 * Returns the tree of xmlElementContentPtr describing the element
6521
 *          hierarchy.
6522
 */
6523
static xmlElementContentPtr
6524
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6525
13.9k
                                       int depth) {
6526
13.9k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
6527
13.9k
    xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6528
13.9k
    const xmlChar *elem;
6529
13.9k
    xmlChar type = 0;
6530
6531
13.9k
    if (depth > maxDepth) {
6532
1
        xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
6533
1
                "xmlParseElementChildrenContentDecl : depth %d too deep, "
6534
1
                "use XML_PARSE_HUGE\n", depth);
6535
1
  return(NULL);
6536
1
    }
6537
13.9k
    SKIP_BLANKS_PE;
6538
13.9k
    GROW;
6539
13.9k
    if (RAW == '(') {
6540
5.58k
  int inputid = ctxt->input->id;
6541
6542
        /* Recurse on first child */
6543
5.58k
  NEXT;
6544
5.58k
  SKIP_BLANKS_PE;
6545
5.58k
        cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6546
5.58k
                                                           depth + 1);
6547
5.58k
        if (cur == NULL)
6548
3.03k
            return(NULL);
6549
2.54k
  SKIP_BLANKS_PE;
6550
2.54k
  GROW;
6551
8.35k
    } else {
6552
8.35k
  elem = xmlParseName(ctxt);
6553
8.35k
  if (elem == NULL) {
6554
547
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6555
547
      return(NULL);
6556
547
  }
6557
7.80k
        cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6558
7.80k
  if (cur == NULL) {
6559
0
      xmlErrMemory(ctxt);
6560
0
      return(NULL);
6561
0
  }
6562
7.80k
  GROW;
6563
7.80k
  if (RAW == '?') {
6564
1.90k
      cur->ocur = XML_ELEMENT_CONTENT_OPT;
6565
1.90k
      NEXT;
6566
5.90k
  } else if (RAW == '*') {
6567
1.31k
      cur->ocur = XML_ELEMENT_CONTENT_MULT;
6568
1.31k
      NEXT;
6569
4.58k
  } else if (RAW == '+') {
6570
429
      cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6571
429
      NEXT;
6572
4.15k
  } else {
6573
4.15k
      cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6574
4.15k
  }
6575
7.80k
  GROW;
6576
7.80k
    }
6577
10.3k
    SKIP_BLANKS_PE;
6578
17.4k
    while ((RAW != ')') && (PARSER_STOPPED(ctxt) == 0)) {
6579
        /*
6580
   * Each loop we parse one separator and one element.
6581
   */
6582
10.2k
        if (RAW == ',') {
6583
1.70k
      if (type == 0) type = CUR;
6584
6585
      /*
6586
       * Detect "Name | Name , Name" error
6587
       */
6588
1.01k
      else if (type != CUR) {
6589
1
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6590
1
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6591
1
                      type);
6592
1
    if ((last != NULL) && (last != ret))
6593
1
        xmlFreeDocElementContent(ctxt->myDoc, last);
6594
1
    if (ret != NULL)
6595
1
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6596
1
    return(NULL);
6597
1
      }
6598
1.70k
      NEXT;
6599
6600
1.70k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6601
1.70k
      if (op == NULL) {
6602
0
                xmlErrMemory(ctxt);
6603
0
    if ((last != NULL) && (last != ret))
6604
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6605
0
          xmlFreeDocElementContent(ctxt->myDoc, ret);
6606
0
    return(NULL);
6607
0
      }
6608
1.70k
      if (last == NULL) {
6609
689
    op->c1 = ret;
6610
689
    if (ret != NULL)
6611
689
        ret->parent = op;
6612
689
    ret = cur = op;
6613
1.01k
      } else {
6614
1.01k
          cur->c2 = op;
6615
1.01k
    if (op != NULL)
6616
1.01k
        op->parent = cur;
6617
1.01k
    op->c1 = last;
6618
1.01k
    if (last != NULL)
6619
1.01k
        last->parent = op;
6620
1.01k
    cur =op;
6621
1.01k
    last = NULL;
6622
1.01k
      }
6623
8.54k
  } else if (RAW == '|') {
6624
7.12k
      if (type == 0) type = CUR;
6625
6626
      /*
6627
       * Detect "Name , Name | Name" error
6628
       */
6629
2.19k
      else if (type != CUR) {
6630
3
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6631
3
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6632
3
          type);
6633
3
    if ((last != NULL) && (last != ret))
6634
3
        xmlFreeDocElementContent(ctxt->myDoc, last);
6635
3
    if (ret != NULL)
6636
3
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6637
3
    return(NULL);
6638
3
      }
6639
7.12k
      NEXT;
6640
6641
7.12k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6642
7.12k
      if (op == NULL) {
6643
0
                xmlErrMemory(ctxt);
6644
0
    if ((last != NULL) && (last != ret))
6645
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6646
0
    if (ret != NULL)
6647
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6648
0
    return(NULL);
6649
0
      }
6650
7.12k
      if (last == NULL) {
6651
4.93k
    op->c1 = ret;
6652
4.93k
    if (ret != NULL)
6653
4.93k
        ret->parent = op;
6654
4.93k
    ret = cur = op;
6655
4.93k
      } else {
6656
2.19k
          cur->c2 = op;
6657
2.19k
    if (op != NULL)
6658
2.19k
        op->parent = cur;
6659
2.19k
    op->c1 = last;
6660
2.19k
    if (last != NULL)
6661
2.19k
        last->parent = op;
6662
2.19k
    cur =op;
6663
2.19k
    last = NULL;
6664
2.19k
      }
6665
7.12k
  } else {
6666
1.42k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6667
1.42k
      if ((last != NULL) && (last != ret))
6668
724
          xmlFreeDocElementContent(ctxt->myDoc, last);
6669
1.42k
      if (ret != NULL)
6670
1.42k
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6671
1.42k
      return(NULL);
6672
1.42k
  }
6673
8.82k
  GROW;
6674
8.82k
  SKIP_BLANKS_PE;
6675
8.82k
  GROW;
6676
8.82k
  if (RAW == '(') {
6677
3.16k
      int inputid = ctxt->input->id;
6678
      /* Recurse on second child */
6679
3.16k
      NEXT;
6680
3.16k
      SKIP_BLANKS_PE;
6681
3.16k
      last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6682
3.16k
                                                          depth + 1);
6683
3.16k
            if (last == NULL) {
6684
1.46k
    if (ret != NULL)
6685
1.46k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6686
1.46k
    return(NULL);
6687
1.46k
            }
6688
1.69k
      SKIP_BLANKS_PE;
6689
5.66k
  } else {
6690
5.66k
      elem = xmlParseName(ctxt);
6691
5.66k
      if (elem == NULL) {
6692
296
    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6693
296
    if (ret != NULL)
6694
296
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6695
296
    return(NULL);
6696
296
      }
6697
5.36k
      last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6698
5.36k
      if (last == NULL) {
6699
0
                xmlErrMemory(ctxt);
6700
0
    if (ret != NULL)
6701
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6702
0
    return(NULL);
6703
0
      }
6704
5.36k
      if (RAW == '?') {
6705
1.27k
    last->ocur = XML_ELEMENT_CONTENT_OPT;
6706
1.27k
    NEXT;
6707
4.08k
      } else if (RAW == '*') {
6708
634
    last->ocur = XML_ELEMENT_CONTENT_MULT;
6709
634
    NEXT;
6710
3.45k
      } else if (RAW == '+') {
6711
404
    last->ocur = XML_ELEMENT_CONTENT_PLUS;
6712
404
    NEXT;
6713
3.05k
      } else {
6714
3.05k
    last->ocur = XML_ELEMENT_CONTENT_ONCE;
6715
3.05k
      }
6716
5.36k
  }
6717
7.06k
  SKIP_BLANKS_PE;
6718
7.06k
  GROW;
6719
7.06k
    }
6720
7.16k
    if ((cur != NULL) && (last != NULL)) {
6721
3.12k
        cur->c2 = last;
6722
3.12k
  if (last != NULL)
6723
3.12k
      last->parent = cur;
6724
3.12k
    }
6725
7.16k
    if (ctxt->input->id != inputchk) {
6726
38
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6727
38
                       "Element content declaration doesn't start and stop in"
6728
38
                       " the same entity\n");
6729
38
    }
6730
7.16k
    NEXT;
6731
7.16k
    if (RAW == '?') {
6732
496
  if (ret != NULL) {
6733
496
      if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6734
496
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6735
294
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6736
202
      else
6737
202
          ret->ocur = XML_ELEMENT_CONTENT_OPT;
6738
496
  }
6739
496
  NEXT;
6740
6.66k
    } else if (RAW == '*') {
6741
2.19k
  if (ret != NULL) {
6742
2.19k
      ret->ocur = XML_ELEMENT_CONTENT_MULT;
6743
2.19k
      cur = ret;
6744
      /*
6745
       * Some normalization:
6746
       * (a | b* | c?)* == (a | b | c)*
6747
       */
6748
4.40k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6749
2.21k
    if ((cur->c1 != NULL) &&
6750
2.21k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6751
2.21k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6752
1.02k
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6753
2.21k
    if ((cur->c2 != NULL) &&
6754
2.21k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6755
2.21k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6756
855
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6757
2.21k
    cur = cur->c2;
6758
2.21k
      }
6759
2.19k
  }
6760
2.19k
  NEXT;
6761
4.46k
    } else if (RAW == '+') {
6762
2.28k
  if (ret != NULL) {
6763
2.28k
      int found = 0;
6764
6765
2.28k
      if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6766
2.28k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6767
981
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6768
1.30k
      else
6769
1.30k
          ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6770
      /*
6771
       * Some normalization:
6772
       * (a | b*)+ == (a | b)*
6773
       * (a | b?)+ == (a | b)*
6774
       */
6775
3.51k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6776
1.23k
    if ((cur->c1 != NULL) &&
6777
1.23k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6778
1.23k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6779
787
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6780
787
        found = 1;
6781
787
    }
6782
1.23k
    if ((cur->c2 != NULL) &&
6783
1.23k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6784
1.23k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6785
742
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6786
742
        found = 1;
6787
742
    }
6788
1.23k
    cur = cur->c2;
6789
1.23k
      }
6790
2.28k
      if (found)
6791
888
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6792
2.28k
  }
6793
2.28k
  NEXT;
6794
2.28k
    }
6795
7.16k
    return(ret);
6796
10.3k
}
6797
6798
/**
6799
 * xmlParseElementChildrenContentDecl:
6800
 * @ctxt:  an XML parser context
6801
 * @inputchk:  the input used for the current entity, needed for boundary checks
6802
 *
6803
 * DEPRECATED: Internal function, don't use.
6804
 *
6805
 * parse the declaration for a Mixed Element content
6806
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6807
 *
6808
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6809
 *
6810
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6811
 *
6812
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6813
 *
6814
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6815
 *
6816
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6817
 * TODO Parameter-entity replacement text must be properly nested
6818
 *  with parenthesized groups. That is to say, if either of the
6819
 *  opening or closing parentheses in a choice, seq, or Mixed
6820
 *  construct is contained in the replacement text for a parameter
6821
 *  entity, both must be contained in the same replacement text. For
6822
 *  interoperability, if a parameter-entity reference appears in a
6823
 *  choice, seq, or Mixed construct, its replacement text should not
6824
 *  be empty, and neither the first nor last non-blank character of
6825
 *  the replacement text should be a connector (| or ,).
6826
 *
6827
 * Returns the tree of xmlElementContentPtr describing the element
6828
 *          hierarchy.
6829
 */
6830
xmlElementContentPtr
6831
0
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6832
    /* stub left for API/ABI compat */
6833
0
    return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6834
0
}
6835
6836
/**
6837
 * xmlParseElementContentDecl:
6838
 * @ctxt:  an XML parser context
6839
 * @name:  the name of the element being defined.
6840
 * @result:  the Element Content pointer will be stored here if any
6841
 *
6842
 * DEPRECATED: Internal function, don't use.
6843
 *
6844
 * parse the declaration for an Element content either Mixed or Children,
6845
 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6846
 *
6847
 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6848
 *
6849
 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6850
 */
6851
6852
int
6853
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6854
6.83k
                           xmlElementContentPtr *result) {
6855
6856
6.83k
    xmlElementContentPtr tree = NULL;
6857
6.83k
    int inputid = ctxt->input->id;
6858
6.83k
    int res;
6859
6860
6.83k
    *result = NULL;
6861
6862
6.83k
    if (RAW != '(') {
6863
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6864
0
    "xmlParseElementContentDecl : %s '(' expected\n", name);
6865
0
  return(-1);
6866
0
    }
6867
6.83k
    NEXT;
6868
6.83k
    GROW;
6869
6.83k
    SKIP_BLANKS_PE;
6870
6.83k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6871
1.65k
        tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6872
1.65k
  res = XML_ELEMENT_TYPE_MIXED;
6873
5.18k
    } else {
6874
5.18k
        tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6875
5.18k
  res = XML_ELEMENT_TYPE_ELEMENT;
6876
5.18k
    }
6877
6.83k
    SKIP_BLANKS_PE;
6878
6.83k
    *result = tree;
6879
6.83k
    return(res);
6880
6.83k
}
6881
6882
/**
6883
 * xmlParseElementDecl:
6884
 * @ctxt:  an XML parser context
6885
 *
6886
 * DEPRECATED: Internal function, don't use.
6887
 *
6888
 * Parse an element declaration. Always consumes '<!'.
6889
 *
6890
 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6891
 *
6892
 * [ VC: Unique Element Type Declaration ]
6893
 * No element type may be declared more than once
6894
 *
6895
 * Returns the type of the element, or -1 in case of error
6896
 */
6897
int
6898
8.41k
xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6899
8.41k
    const xmlChar *name;
6900
8.41k
    int ret = -1;
6901
8.41k
    xmlElementContentPtr content  = NULL;
6902
6903
8.41k
    if ((CUR != '<') || (NXT(1) != '!'))
6904
0
        return(ret);
6905
8.41k
    SKIP(2);
6906
6907
    /* GROW; done in the caller */
6908
8.41k
    if (CMP7(CUR_PTR, 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6909
8.39k
  int inputid = ctxt->input->id;
6910
6911
8.39k
  SKIP(7);
6912
8.39k
  if (SKIP_BLANKS_PE == 0) {
6913
256
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6914
256
               "Space required after 'ELEMENT'\n");
6915
256
      return(-1);
6916
256
  }
6917
8.14k
        name = xmlParseName(ctxt);
6918
8.14k
  if (name == NULL) {
6919
270
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6920
270
         "xmlParseElementDecl: no name for Element\n");
6921
270
      return(-1);
6922
270
  }
6923
7.87k
  if (SKIP_BLANKS_PE == 0) {
6924
6.80k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6925
6.80k
         "Space required after the element name\n");
6926
6.80k
  }
6927
7.87k
  if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6928
484
      SKIP(5);
6929
      /*
6930
       * Element must always be empty.
6931
       */
6932
484
      ret = XML_ELEMENT_TYPE_EMPTY;
6933
7.38k
  } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6934
7.38k
             (NXT(2) == 'Y')) {
6935
297
      SKIP(3);
6936
      /*
6937
       * Element is a generic container.
6938
       */
6939
297
      ret = XML_ELEMENT_TYPE_ANY;
6940
7.09k
  } else if (RAW == '(') {
6941
6.83k
      ret = xmlParseElementContentDecl(ctxt, name, &content);
6942
6.83k
  } else {
6943
      /*
6944
       * [ WFC: PEs in Internal Subset ] error handling.
6945
       */
6946
252
            xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6947
252
                  "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6948
252
      return(-1);
6949
252
  }
6950
6951
7.62k
  SKIP_BLANKS_PE;
6952
6953
7.62k
  if (RAW != '>') {
6954
3.93k
      xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6955
3.93k
      if (content != NULL) {
6956
1.72k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6957
1.72k
      }
6958
3.93k
  } else {
6959
3.68k
      if (inputid != ctxt->input->id) {
6960
117
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6961
117
                               "Element declaration doesn't start and stop in"
6962
117
                               " the same entity\n");
6963
117
      }
6964
6965
3.68k
      NEXT;
6966
3.68k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6967
3.68k
    (ctxt->sax->elementDecl != NULL)) {
6968
3.43k
    if (content != NULL)
6969
2.06k
        content->parent = NULL;
6970
3.43k
          ctxt->sax->elementDecl(ctxt->userData, name, ret,
6971
3.43k
                           content);
6972
3.43k
    if ((content != NULL) && (content->parent == NULL)) {
6973
        /*
6974
         * this is a trick: if xmlAddElementDecl is called,
6975
         * instead of copying the full tree it is plugged directly
6976
         * if called from the parser. Avoid duplicating the
6977
         * interfaces or change the API/ABI
6978
         */
6979
1.80k
        xmlFreeDocElementContent(ctxt->myDoc, content);
6980
1.80k
    }
6981
3.43k
      } else if (content != NULL) {
6982
127
    xmlFreeDocElementContent(ctxt->myDoc, content);
6983
127
      }
6984
3.68k
  }
6985
7.62k
    }
6986
7.63k
    return(ret);
6987
8.41k
}
6988
6989
/**
6990
 * xmlParseConditionalSections
6991
 * @ctxt:  an XML parser context
6992
 *
6993
 * Parse a conditional section. Always consumes '<!['.
6994
 *
6995
 * [61] conditionalSect ::= includeSect | ignoreSect
6996
 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6997
 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6998
 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6999
 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
7000
 */
7001
7002
static void
7003
0
xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
7004
0
    int *inputIds = NULL;
7005
0
    size_t inputIdsSize = 0;
7006
0
    size_t depth = 0;
7007
7008
0
    while (PARSER_STOPPED(ctxt) == 0) {
7009
0
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7010
0
            int id = ctxt->input->id;
7011
7012
0
            SKIP(3);
7013
0
            SKIP_BLANKS_PE;
7014
7015
0
            if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
7016
0
                SKIP(7);
7017
0
                SKIP_BLANKS_PE;
7018
0
                if (RAW != '[') {
7019
0
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
7020
0
                    xmlHaltParser(ctxt);
7021
0
                    goto error;
7022
0
                }
7023
0
                if (ctxt->input->id != id) {
7024
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
7025
0
                                   "All markup of the conditional section is"
7026
0
                                   " not in the same entity\n");
7027
0
                }
7028
0
                NEXT;
7029
7030
0
                if (inputIdsSize <= depth) {
7031
0
                    int *tmp;
7032
7033
0
                    inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2);
7034
0
                    tmp = (int *) xmlRealloc(inputIds,
7035
0
                            inputIdsSize * sizeof(int));
7036
0
                    if (tmp == NULL) {
7037
0
                        xmlErrMemory(ctxt);
7038
0
                        goto error;
7039
0
                    }
7040
0
                    inputIds = tmp;
7041
0
                }
7042
0
                inputIds[depth] = id;
7043
0
                depth++;
7044
0
            } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
7045
0
                size_t ignoreDepth = 0;
7046
7047
0
                SKIP(6);
7048
0
                SKIP_BLANKS_PE;
7049
0
                if (RAW != '[') {
7050
0
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
7051
0
                    xmlHaltParser(ctxt);
7052
0
                    goto error;
7053
0
                }
7054
0
                if (ctxt->input->id != id) {
7055
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
7056
0
                                   "All markup of the conditional section is"
7057
0
                                   " not in the same entity\n");
7058
0
                }
7059
0
                NEXT;
7060
7061
0
                while (PARSER_STOPPED(ctxt) == 0) {
7062
0
                    if (RAW == 0) {
7063
0
                        xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
7064
0
                        goto error;
7065
0
                    }
7066
0
                    if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7067
0
                        SKIP(3);
7068
0
                        ignoreDepth++;
7069
                        /* Check for integer overflow */
7070
0
                        if (ignoreDepth == 0) {
7071
0
                            xmlErrMemory(ctxt);
7072
0
                            goto error;
7073
0
                        }
7074
0
                    } else if ((RAW == ']') && (NXT(1) == ']') &&
7075
0
                               (NXT(2) == '>')) {
7076
0
                        SKIP(3);
7077
0
                        if (ignoreDepth == 0)
7078
0
                            break;
7079
0
                        ignoreDepth--;
7080
0
                    } else {
7081
0
                        NEXT;
7082
0
                    }
7083
0
                }
7084
7085
0
                if (ctxt->input->id != id) {
7086
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
7087
0
                                   "All markup of the conditional section is"
7088
0
                                   " not in the same entity\n");
7089
0
                }
7090
0
            } else {
7091
0
                xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
7092
0
                xmlHaltParser(ctxt);
7093
0
                goto error;
7094
0
            }
7095
0
        } else if ((depth > 0) &&
7096
0
                   (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
7097
0
            depth--;
7098
0
            if (ctxt->input->id != inputIds[depth]) {
7099
0
                xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
7100
0
                               "All markup of the conditional section is not"
7101
0
                               " in the same entity\n");
7102
0
            }
7103
0
            SKIP(3);
7104
0
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7105
0
            xmlParseMarkupDecl(ctxt);
7106
0
        } else {
7107
0
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7108
0
            xmlHaltParser(ctxt);
7109
0
            goto error;
7110
0
        }
7111
7112
0
        if (depth == 0)
7113
0
            break;
7114
7115
0
        SKIP_BLANKS_PE;
7116
0
        SHRINK;
7117
0
        GROW;
7118
0
    }
7119
7120
0
error:
7121
0
    xmlFree(inputIds);
7122
0
}
7123
7124
/**
7125
 * xmlParseMarkupDecl:
7126
 * @ctxt:  an XML parser context
7127
 *
7128
 * DEPRECATED: Internal function, don't use.
7129
 *
7130
 * Parse markup declarations. Always consumes '<!' or '<?'.
7131
 *
7132
 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
7133
 *                     NotationDecl | PI | Comment
7134
 *
7135
 * [ VC: Proper Declaration/PE Nesting ]
7136
 * Parameter-entity replacement text must be properly nested with
7137
 * markup declarations. That is to say, if either the first character
7138
 * or the last character of a markup declaration (markupdecl above) is
7139
 * contained in the replacement text for a parameter-entity reference,
7140
 * both must be contained in the same replacement text.
7141
 *
7142
 * [ WFC: PEs in Internal Subset ]
7143
 * In the internal DTD subset, parameter-entity references can occur
7144
 * only where markup declarations can occur, not within markup declarations.
7145
 * (This does not apply to references that occur in external parameter
7146
 * entities or to the external subset.)
7147
 */
7148
void
7149
93.5k
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
7150
93.5k
    GROW;
7151
93.5k
    if (CUR == '<') {
7152
93.5k
        if (NXT(1) == '!') {
7153
73.9k
      switch (NXT(2)) {
7154
26.8k
          case 'E':
7155
26.8k
        if (NXT(3) == 'L')
7156
8.41k
      xmlParseElementDecl(ctxt);
7157
18.3k
        else if (NXT(3) == 'N')
7158
18.3k
      xmlParseEntityDecl(ctxt);
7159
8
                    else
7160
8
                        SKIP(2);
7161
26.8k
        break;
7162
31.8k
          case 'A':
7163
31.8k
        xmlParseAttributeListDecl(ctxt);
7164
31.8k
        break;
7165
4.07k
          case 'N':
7166
4.07k
        xmlParseNotationDecl(ctxt);
7167
4.07k
        break;
7168
10.4k
          case '-':
7169
10.4k
        xmlParseComment(ctxt);
7170
10.4k
        break;
7171
810
    default:
7172
        /* there is an error but it will be detected later */
7173
810
                    SKIP(2);
7174
810
        break;
7175
73.9k
      }
7176
73.9k
  } else if (NXT(1) == '?') {
7177
19.6k
      xmlParsePI(ctxt);
7178
19.6k
  }
7179
93.5k
    }
7180
93.5k
}
7181
7182
/**
7183
 * xmlParseTextDecl:
7184
 * @ctxt:  an XML parser context
7185
 *
7186
 * DEPRECATED: Internal function, don't use.
7187
 *
7188
 * parse an XML declaration header for external entities
7189
 *
7190
 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
7191
 */
7192
7193
void
7194
0
xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
7195
0
    xmlChar *version;
7196
7197
    /*
7198
     * We know that '<?xml' is here.
7199
     */
7200
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7201
0
  SKIP(5);
7202
0
    } else {
7203
0
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
7204
0
  return;
7205
0
    }
7206
7207
0
    if (SKIP_BLANKS == 0) {
7208
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7209
0
           "Space needed after '<?xml'\n");
7210
0
    }
7211
7212
    /*
7213
     * We may have the VersionInfo here.
7214
     */
7215
0
    version = xmlParseVersionInfo(ctxt);
7216
0
    if (version == NULL) {
7217
0
  version = xmlCharStrdup(XML_DEFAULT_VERSION);
7218
0
        if (version == NULL) {
7219
0
            xmlErrMemory(ctxt);
7220
0
            return;
7221
0
        }
7222
0
    } else {
7223
0
  if (SKIP_BLANKS == 0) {
7224
0
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7225
0
               "Space needed here\n");
7226
0
  }
7227
0
    }
7228
0
    ctxt->input->version = version;
7229
7230
    /*
7231
     * We must have the encoding declaration
7232
     */
7233
0
    xmlParseEncodingDecl(ctxt);
7234
7235
0
    SKIP_BLANKS;
7236
0
    if ((RAW == '?') && (NXT(1) == '>')) {
7237
0
        SKIP(2);
7238
0
    } else if (RAW == '>') {
7239
        /* Deprecated old WD ... */
7240
0
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7241
0
  NEXT;
7242
0
    } else {
7243
0
        int c;
7244
7245
0
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7246
0
        while ((PARSER_STOPPED(ctxt) == 0) && ((c = CUR) != 0)) {
7247
0
            NEXT;
7248
0
            if (c == '>')
7249
0
                break;
7250
0
        }
7251
0
    }
7252
0
}
7253
7254
/**
7255
 * xmlParseExternalSubset:
7256
 * @ctxt:  an XML parser context
7257
 * @ExternalID: the external identifier
7258
 * @SystemID: the system identifier (or URL)
7259
 *
7260
 * parse Markup declarations from an external subset
7261
 *
7262
 * [30] extSubset ::= textDecl? extSubsetDecl
7263
 *
7264
 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7265
 */
7266
void
7267
xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7268
0
                       const xmlChar *SystemID) {
7269
0
    int oldInputNr;
7270
7271
0
    xmlCtxtInitializeLate(ctxt);
7272
7273
0
    xmlDetectEncoding(ctxt);
7274
7275
0
    if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7276
0
  xmlParseTextDecl(ctxt);
7277
0
    }
7278
0
    if (ctxt->myDoc == NULL) {
7279
0
        ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7280
0
  if (ctxt->myDoc == NULL) {
7281
0
      xmlErrMemory(ctxt);
7282
0
      return;
7283
0
  }
7284
0
  ctxt->myDoc->properties = XML_DOC_INTERNAL;
7285
0
    }
7286
0
    if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL) &&
7287
0
        (xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID) == NULL)) {
7288
0
        xmlErrMemory(ctxt);
7289
0
    }
7290
7291
0
    ctxt->inSubset = 2;
7292
0
    oldInputNr = ctxt->inputNr;
7293
7294
0
    SKIP_BLANKS_PE;
7295
0
    while (((RAW != 0) || (ctxt->inputNr > oldInputNr)) &&
7296
0
           (!PARSER_STOPPED(ctxt))) {
7297
0
  GROW;
7298
0
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7299
0
            xmlParseConditionalSections(ctxt);
7300
0
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7301
0
            xmlParseMarkupDecl(ctxt);
7302
0
        } else {
7303
0
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7304
0
            xmlHaltParser(ctxt);
7305
0
            return;
7306
0
        }
7307
0
        SKIP_BLANKS_PE;
7308
0
        SHRINK;
7309
0
    }
7310
7311
0
    while (ctxt->inputNr > oldInputNr)
7312
0
        xmlPopPE(ctxt);
7313
7314
0
    if (RAW != 0) {
7315
0
  xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7316
0
    }
7317
0
}
7318
7319
/**
7320
 * xmlParseReference:
7321
 * @ctxt:  an XML parser context
7322
 *
7323
 * DEPRECATED: Internal function, don't use.
7324
 *
7325
 * parse and handle entity references in content, depending on the SAX
7326
 * interface, this may end-up in a call to character() if this is a
7327
 * CharRef, a predefined entity, if there is no reference() callback.
7328
 * or if the parser was asked to switch to that mode.
7329
 *
7330
 * Always consumes '&'.
7331
 *
7332
 * [67] Reference ::= EntityRef | CharRef
7333
 */
7334
void
7335
15.6k
xmlParseReference(xmlParserCtxtPtr ctxt) {
7336
15.6k
    xmlEntityPtr ent = NULL;
7337
15.6k
    const xmlChar *name;
7338
15.6k
    xmlChar *val;
7339
7340
15.6k
    if (RAW != '&')
7341
0
        return;
7342
7343
    /*
7344
     * Simple case of a CharRef
7345
     */
7346
15.6k
    if (NXT(1) == '#') {
7347
3.55k
  int i = 0;
7348
3.55k
  xmlChar out[16];
7349
3.55k
  int value = xmlParseCharRef(ctxt);
7350
7351
3.55k
  if (value == 0)
7352
1.27k
      return;
7353
7354
        /*
7355
         * Just encode the value in UTF-8
7356
         */
7357
2.28k
        COPY_BUF(out, i, value);
7358
2.28k
        out[i] = 0;
7359
2.28k
        if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7360
2.28k
            (!ctxt->disableSAX))
7361
1.61k
            ctxt->sax->characters(ctxt->userData, out, i);
7362
2.28k
  return;
7363
3.55k
    }
7364
7365
    /*
7366
     * We are seeing an entity reference
7367
     */
7368
12.1k
    name = xmlParseEntityRefInternal(ctxt);
7369
12.1k
    if (name != NULL)
7370
7.60k
        ent = xmlLookupGeneralEntity(ctxt, name, /* isAttr */ 0);
7371
12.1k
    if (ent == NULL) return;
7372
5.55k
    if (!ctxt->wellFormed)
7373
4.08k
  return;
7374
7375
    /* special case of predefined entities */
7376
1.47k
    if ((ent->name == NULL) ||
7377
1.47k
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7378
126
  val = ent->content;
7379
126
  if (val == NULL) return;
7380
  /*
7381
   * inline the entity.
7382
   */
7383
126
  if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7384
126
      (!ctxt->disableSAX))
7385
126
      ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7386
126
  return;
7387
126
    }
7388
7389
    /*
7390
     * The first reference to the entity trigger a parsing phase
7391
     * where the ent->children is filled with the result from
7392
     * the parsing.
7393
     * Note: external parsed entities will not be loaded, it is not
7394
     * required for a non-validating parser, unless the parsing option
7395
     * of validating, or substituting entities were given. Doing so is
7396
     * far more secure as the parser will only process data coming from
7397
     * the document entity by default.
7398
     *
7399
     * FIXME: This doesn't work correctly since entities can be
7400
     * expanded with different namespace declarations in scope.
7401
     * For example:
7402
     *
7403
     * <!DOCTYPE doc [
7404
     *   <!ENTITY ent "<ns:elem/>">
7405
     * ]>
7406
     * <doc>
7407
     *   <decl1 xmlns:ns="urn:ns1">
7408
     *     &ent;
7409
     *   </decl1>
7410
     *   <decl2 xmlns:ns="urn:ns2">
7411
     *     &ent;
7412
     *   </decl2>
7413
     * </doc>
7414
     *
7415
     * Proposed fix:
7416
     *
7417
     * - Ignore current namespace declarations when parsing the
7418
     *   entity. If a prefix can't be resolved, don't report an error
7419
     *   but mark it as unresolved.
7420
     * - Try to resolve these prefixes when expanding the entity.
7421
     *   This will require a specialized version of xmlStaticCopyNode
7422
     *   which can also make use of the namespace hash table to avoid
7423
     *   quadratic behavior.
7424
     *
7425
     * Alternatively, we could simply reparse the entity on each
7426
     * expansion like we already do with custom SAX callbacks.
7427
     * External entity content should be cached in this case.
7428
     */
7429
1.34k
    if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7430
1.34k
        (((ctxt->options & XML_PARSE_NO_XXE) == 0) &&
7431
439
         ((ctxt->replaceEntities) ||
7432
908
          (ctxt->validate)))) {
7433
908
        if ((ent->flags & XML_ENT_PARSED) == 0) {
7434
329
            xmlCtxtParseEntity(ctxt, ent);
7435
579
        } else if (ent->children == NULL) {
7436
            /*
7437
             * Probably running in SAX mode and the callbacks don't
7438
             * build the entity content. Parse the entity again.
7439
             *
7440
             * This will also be triggered in normal tree builder mode
7441
             * if an entity happens to be empty, causing unnecessary
7442
             * reloads. It's hard to come up with a reliable check in
7443
             * which mode we're running.
7444
             */
7445
270
            xmlCtxtParseEntity(ctxt, ent);
7446
270
        }
7447
908
    }
7448
7449
    /*
7450
     * We also check for amplification if entities aren't substituted.
7451
     * They might be expanded later.
7452
     */
7453
1.34k
    if (xmlParserEntityCheck(ctxt, ent->expandedSize))
7454
0
        return;
7455
7456
1.34k
    if ((ctxt->sax == NULL) || (ctxt->disableSAX))
7457
160
        return;
7458
7459
1.18k
    if (ctxt->replaceEntities == 0) {
7460
  /*
7461
   * Create a reference
7462
   */
7463
1.18k
        if (ctxt->sax->reference != NULL)
7464
1.18k
      ctxt->sax->reference(ctxt->userData, ent->name);
7465
1.18k
    } else if ((ent->children != NULL) && (ctxt->node != NULL)) {
7466
0
        xmlNodePtr copy, cur;
7467
7468
        /*
7469
         * Seems we are generating the DOM content, copy the tree
7470
   */
7471
0
        cur = ent->children;
7472
7473
        /*
7474
         * Handle first text node with SAX to coalesce text efficiently
7475
         */
7476
0
        if ((cur->type == XML_TEXT_NODE) ||
7477
0
            (cur->type == XML_CDATA_SECTION_NODE)) {
7478
0
            int len = xmlStrlen(cur->content);
7479
7480
0
            if ((cur->type == XML_TEXT_NODE) ||
7481
0
                (ctxt->sax->cdataBlock == NULL)) {
7482
0
                if (ctxt->sax->characters != NULL)
7483
0
                    ctxt->sax->characters(ctxt, cur->content, len);
7484
0
            } else {
7485
0
                if (ctxt->sax->cdataBlock != NULL)
7486
0
                    ctxt->sax->cdataBlock(ctxt, cur->content, len);
7487
0
            }
7488
7489
0
            cur = cur->next;
7490
0
        }
7491
7492
0
        while (cur != NULL) {
7493
0
            xmlNodePtr last;
7494
7495
            /*
7496
             * Handle last text node with SAX to coalesce text efficiently
7497
             */
7498
0
            if ((cur->next == NULL) &&
7499
0
                ((cur->type == XML_TEXT_NODE) ||
7500
0
                 (cur->type == XML_CDATA_SECTION_NODE))) {
7501
0
                int len = xmlStrlen(cur->content);
7502
7503
0
                if ((cur->type == XML_TEXT_NODE) ||
7504
0
                    (ctxt->sax->cdataBlock == NULL)) {
7505
0
                    if (ctxt->sax->characters != NULL)
7506
0
                        ctxt->sax->characters(ctxt, cur->content, len);
7507
0
                } else {
7508
0
                    if (ctxt->sax->cdataBlock != NULL)
7509
0
                        ctxt->sax->cdataBlock(ctxt, cur->content, len);
7510
0
                }
7511
7512
0
                break;
7513
0
            }
7514
7515
            /*
7516
             * Reset coalesce buffer stats only for non-text nodes.
7517
             */
7518
0
            ctxt->nodemem = 0;
7519
0
            ctxt->nodelen = 0;
7520
7521
0
            copy = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7522
7523
0
            if (copy == NULL) {
7524
0
                xmlErrMemory(ctxt);
7525
0
                break;
7526
0
            }
7527
7528
0
            if (ctxt->parseMode == XML_PARSE_READER) {
7529
                /* Needed for reader */
7530
0
                copy->extra = cur->extra;
7531
                /* Maybe needed for reader */
7532
0
                copy->_private = cur->_private;
7533
0
            }
7534
7535
0
            copy->parent = ctxt->node;
7536
0
            last = ctxt->node->last;
7537
0
            if (last == NULL) {
7538
0
                ctxt->node->children = copy;
7539
0
            } else {
7540
0
                last->next = copy;
7541
0
                copy->prev = last;
7542
0
            }
7543
0
            ctxt->node->last = copy;
7544
7545
0
            cur = cur->next;
7546
0
        }
7547
0
    }
7548
1.18k
}
7549
7550
static xmlEntityPtr
7551
288k
xmlLookupGeneralEntity(xmlParserCtxtPtr ctxt, const xmlChar *name, int inAttr) {
7552
288k
    xmlEntityPtr ent;
7553
7554
    /*
7555
     * Predefined entities override any extra definition
7556
     */
7557
288k
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7558
283k
        ent = xmlGetPredefinedEntity(name);
7559
283k
        if (ent != NULL)
7560
62.6k
            return(ent);
7561
283k
    }
7562
7563
    /*
7564
     * Ask first SAX for entity resolution, otherwise try the
7565
     * entities which may have stored in the parser context.
7566
     */
7567
226k
    if (ctxt->sax != NULL) {
7568
226k
  if (ctxt->sax->getEntity != NULL)
7569
226k
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7570
226k
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7571
226k
      (ctxt->options & XML_PARSE_OLDSAX))
7572
194
      ent = xmlGetPredefinedEntity(name);
7573
226k
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7574
226k
      (ctxt->userData==ctxt)) {
7575
439
      ent = xmlSAX2GetEntity(ctxt, name);
7576
439
  }
7577
226k
    }
7578
    /*
7579
     * [ WFC: Entity Declared ]
7580
     * In a document without any DTD, a document with only an
7581
     * internal DTD subset which contains no parameter entity
7582
     * references, or a document with "standalone='yes'", the
7583
     * Name given in the entity reference must match that in an
7584
     * entity declaration, except that well-formed documents
7585
     * need not declare any of the following entities: amp, lt,
7586
     * gt, apos, quot.
7587
     * The declaration of a parameter entity must precede any
7588
     * reference to it.
7589
     * Similarly, the declaration of a general entity must
7590
     * precede any reference to it which appears in a default
7591
     * value in an attribute-list declaration. Note that if
7592
     * entities are declared in the external subset or in
7593
     * external parameter entities, a non-validating processor
7594
     * is not obligated to read and process their declarations;
7595
     * for such documents, the rule that an entity must be
7596
     * declared is a well-formedness constraint only if
7597
     * standalone='yes'.
7598
     */
7599
226k
    if (ent == NULL) {
7600
35.6k
  if ((ctxt->standalone == 1) ||
7601
35.6k
      ((ctxt->hasExternalSubset == 0) &&
7602
35.3k
       (ctxt->hasPErefs == 0))) {
7603
22.2k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7604
22.2k
         "Entity '%s' not defined\n", name);
7605
22.2k
  } else {
7606
13.4k
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7607
13.4k
         "Entity '%s' not defined\n", name);
7608
13.4k
      if ((ctxt->inSubset == 0) &&
7609
13.4k
    (ctxt->sax != NULL) &&
7610
13.4k
                (ctxt->disableSAX == 0) &&
7611
13.4k
    (ctxt->sax->reference != NULL)) {
7612
1.15k
    ctxt->sax->reference(ctxt->userData, name);
7613
1.15k
      }
7614
13.4k
  }
7615
35.6k
  ctxt->valid = 0;
7616
35.6k
    }
7617
7618
    /*
7619
     * [ WFC: Parsed Entity ]
7620
     * An entity reference must not contain the name of an
7621
     * unparsed entity
7622
     */
7623
190k
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7624
206
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7625
206
     "Entity reference to unparsed entity %s\n", name);
7626
206
        ent = NULL;
7627
206
    }
7628
7629
    /*
7630
     * [ WFC: No External Entity References ]
7631
     * Attribute values cannot contain direct or indirect
7632
     * entity references to external entities.
7633
     */
7634
190k
    else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7635
661
        if (inAttr) {
7636
207
            xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7637
207
                 "Attribute references external entity '%s'\n", name);
7638
207
            ent = NULL;
7639
207
        }
7640
661
    }
7641
7642
226k
    return(ent);
7643
288k
}
7644
7645
/**
7646
 * xmlParseEntityRefInternal:
7647
 * @ctxt:  an XML parser context
7648
 * @inAttr:  whether we are in an attribute value
7649
 *
7650
 * Parse an entity reference. Always consumes '&'.
7651
 *
7652
 * [68] EntityRef ::= '&' Name ';'
7653
 *
7654
 * Returns the name, or NULL in case of error.
7655
 */
7656
static const xmlChar *
7657
93.2k
xmlParseEntityRefInternal(xmlParserCtxtPtr ctxt) {
7658
93.2k
    const xmlChar *name;
7659
7660
93.2k
    GROW;
7661
7662
93.2k
    if (RAW != '&')
7663
0
        return(NULL);
7664
93.2k
    NEXT;
7665
93.2k
    name = xmlParseName(ctxt);
7666
93.2k
    if (name == NULL) {
7667
7.06k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7668
7.06k
           "xmlParseEntityRef: no name\n");
7669
7.06k
        return(NULL);
7670
7.06k
    }
7671
86.1k
    if (RAW != ';') {
7672
2.31k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7673
2.31k
  return(NULL);
7674
2.31k
    }
7675
83.8k
    NEXT;
7676
7677
83.8k
    return(name);
7678
86.1k
}
7679
7680
/**
7681
 * xmlParseEntityRef:
7682
 * @ctxt:  an XML parser context
7683
 *
7684
 * DEPRECATED: Internal function, don't use.
7685
 *
7686
 * Returns the xmlEntityPtr if found, or NULL otherwise.
7687
 */
7688
xmlEntityPtr
7689
0
xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7690
0
    const xmlChar *name;
7691
7692
0
    if (ctxt == NULL)
7693
0
        return(NULL);
7694
7695
0
    name = xmlParseEntityRefInternal(ctxt);
7696
0
    if (name == NULL)
7697
0
        return(NULL);
7698
7699
0
    return(xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 0));
7700
0
}
7701
7702
/**
7703
 * xmlParseStringEntityRef:
7704
 * @ctxt:  an XML parser context
7705
 * @str:  a pointer to an index in the string
7706
 *
7707
 * parse ENTITY references declarations, but this version parses it from
7708
 * a string value.
7709
 *
7710
 * [68] EntityRef ::= '&' Name ';'
7711
 *
7712
 * [ WFC: Entity Declared ]
7713
 * In a document without any DTD, a document with only an internal DTD
7714
 * subset which contains no parameter entity references, or a document
7715
 * with "standalone='yes'", the Name given in the entity reference
7716
 * must match that in an entity declaration, except that well-formed
7717
 * documents need not declare any of the following entities: amp, lt,
7718
 * gt, apos, quot.  The declaration of a parameter entity must precede
7719
 * any reference to it.  Similarly, the declaration of a general entity
7720
 * must precede any reference to it which appears in a default value in an
7721
 * attribute-list declaration. Note that if entities are declared in the
7722
 * external subset or in external parameter entities, a non-validating
7723
 * processor is not obligated to read and process their declarations;
7724
 * for such documents, the rule that an entity must be declared is a
7725
 * well-formedness constraint only if standalone='yes'.
7726
 *
7727
 * [ WFC: Parsed Entity ]
7728
 * An entity reference must not contain the name of an unparsed entity
7729
 *
7730
 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7731
 * is updated to the current location in the string.
7732
 */
7733
static xmlChar *
7734
205k
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7735
205k
    xmlChar *name;
7736
205k
    const xmlChar *ptr;
7737
205k
    xmlChar cur;
7738
7739
205k
    if ((str == NULL) || (*str == NULL))
7740
0
        return(NULL);
7741
205k
    ptr = *str;
7742
205k
    cur = *ptr;
7743
205k
    if (cur != '&')
7744
0
  return(NULL);
7745
7746
205k
    ptr++;
7747
205k
    name = xmlParseStringName(ctxt, &ptr);
7748
205k
    if (name == NULL) {
7749
2
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7750
2
           "xmlParseStringEntityRef: no name\n");
7751
2
  *str = ptr;
7752
2
  return(NULL);
7753
2
    }
7754
205k
    if (*ptr != ';') {
7755
4
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7756
4
        xmlFree(name);
7757
4
  *str = ptr;
7758
4
  return(NULL);
7759
4
    }
7760
205k
    ptr++;
7761
7762
205k
    *str = ptr;
7763
205k
    return(name);
7764
205k
}
7765
7766
/**
7767
 * xmlParsePEReference:
7768
 * @ctxt:  an XML parser context
7769
 *
7770
 * DEPRECATED: Internal function, don't use.
7771
 *
7772
 * Parse a parameter entity reference. Always consumes '%'.
7773
 *
7774
 * The entity content is handled directly by pushing it's content as
7775
 * a new input stream.
7776
 *
7777
 * [69] PEReference ::= '%' Name ';'
7778
 *
7779
 * [ WFC: No Recursion ]
7780
 * A parsed entity must not contain a recursive
7781
 * reference to itself, either directly or indirectly.
7782
 *
7783
 * [ WFC: Entity Declared ]
7784
 * In a document without any DTD, a document with only an internal DTD
7785
 * subset which contains no parameter entity references, or a document
7786
 * with "standalone='yes'", ...  ... The declaration of a parameter
7787
 * entity must precede any reference to it...
7788
 *
7789
 * [ VC: Entity Declared ]
7790
 * In a document with an external subset or external parameter entities
7791
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7792
 * must precede any reference to it...
7793
 *
7794
 * [ WFC: In DTD ]
7795
 * Parameter-entity references may only appear in the DTD.
7796
 * NOTE: misleading but this is handled.
7797
 */
7798
void
7799
xmlParsePEReference(xmlParserCtxtPtr ctxt)
7800
57.5k
{
7801
57.5k
    const xmlChar *name;
7802
57.5k
    xmlEntityPtr entity = NULL;
7803
57.5k
    xmlParserInputPtr input;
7804
7805
57.5k
    if (RAW != '%')
7806
0
        return;
7807
57.5k
    NEXT;
7808
57.5k
    name = xmlParseName(ctxt);
7809
57.5k
    if (name == NULL) {
7810
678
  xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
7811
678
  return;
7812
678
    }
7813
56.8k
    if (RAW != ';') {
7814
521
  xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
7815
521
        return;
7816
521
    }
7817
7818
56.3k
    NEXT;
7819
7820
    /*
7821
     * Request the entity from SAX
7822
     */
7823
56.3k
    if ((ctxt->sax != NULL) &&
7824
56.3k
  (ctxt->sax->getParameterEntity != NULL))
7825
56.3k
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7826
56.3k
    if (entity == NULL) {
7827
  /*
7828
   * [ WFC: Entity Declared ]
7829
   * In a document without any DTD, a document with only an
7830
   * internal DTD subset which contains no parameter entity
7831
   * references, or a document with "standalone='yes'", ...
7832
   * ... The declaration of a parameter entity must precede
7833
   * any reference to it...
7834
   */
7835
2.18k
  if ((ctxt->standalone == 1) ||
7836
2.18k
      ((ctxt->hasExternalSubset == 0) &&
7837
1.98k
       (ctxt->hasPErefs == 0))) {
7838
337
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7839
337
            "PEReference: %%%s; not found\n",
7840
337
            name);
7841
1.85k
  } else {
7842
      /*
7843
       * [ VC: Entity Declared ]
7844
       * In a document with an external subset or external
7845
       * parameter entities with "standalone='no'", ...
7846
       * ... The declaration of a parameter entity must
7847
       * precede any reference to it...
7848
       */
7849
1.85k
            if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
7850
0
                xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
7851
0
                                 "PEReference: %%%s; not found\n",
7852
0
                                 name, NULL);
7853
0
            } else
7854
1.85k
                xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7855
1.85k
                              "PEReference: %%%s; not found\n",
7856
1.85k
                              name, NULL);
7857
1.85k
            ctxt->valid = 0;
7858
1.85k
  }
7859
54.1k
    } else {
7860
  /*
7861
   * Internal checking in case the entity quest barfed
7862
   */
7863
54.1k
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7864
54.1k
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7865
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7866
0
      "Internal: %%%s; is not a parameter entity\n",
7867
0
        name, NULL);
7868
54.1k
  } else {
7869
54.1k
      if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7870
54.1k
                ((ctxt->options & XML_PARSE_NO_XXE) ||
7871
138
     ((ctxt->loadsubset == 0) &&
7872
67
      (ctxt->replaceEntities == 0) &&
7873
67
      (ctxt->validate == 0))))
7874
138
    return;
7875
7876
53.9k
            if (entity->flags & XML_ENT_EXPANDING) {
7877
2
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7878
2
                xmlHaltParser(ctxt);
7879
2
                return;
7880
2
            }
7881
7882
53.9k
      input = xmlNewEntityInputStream(ctxt, entity);
7883
53.9k
      if (xmlPushInput(ctxt, input) < 0) {
7884
0
                xmlFreeInputStream(input);
7885
0
    return;
7886
0
            }
7887
7888
53.9k
            entity->flags |= XML_ENT_EXPANDING;
7889
7890
53.9k
      if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
7891
0
                xmlDetectEncoding(ctxt);
7892
7893
0
                if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7894
0
                    (IS_BLANK_CH(NXT(5)))) {
7895
0
                    xmlParseTextDecl(ctxt);
7896
0
                }
7897
0
            }
7898
53.9k
  }
7899
54.1k
    }
7900
56.1k
    ctxt->hasPErefs = 1;
7901
56.1k
}
7902
7903
/**
7904
 * xmlLoadEntityContent:
7905
 * @ctxt:  an XML parser context
7906
 * @entity: an unloaded system entity
7907
 *
7908
 * Load the original content of the given system entity from the
7909
 * ExternalID/SystemID given. This is to be used for Included in Literal
7910
 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
7911
 *
7912
 * Returns 0 in case of success and -1 in case of failure
7913
 */
7914
static int
7915
0
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7916
0
    xmlParserInputPtr oldinput, input = NULL;
7917
0
    xmlParserInputPtr *oldinputTab;
7918
0
    const xmlChar *oldencoding;
7919
0
    xmlChar *content = NULL;
7920
0
    size_t length, i;
7921
0
    int oldinputNr, oldinputMax;
7922
0
    int ret = -1;
7923
0
    int res;
7924
7925
0
    if ((ctxt == NULL) || (entity == NULL) ||
7926
0
        ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7927
0
   (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7928
0
  (entity->content != NULL)) {
7929
0
  xmlFatalErr(ctxt, XML_ERR_ARGUMENT,
7930
0
              "xmlLoadEntityContent parameter error");
7931
0
        return(-1);
7932
0
    }
7933
7934
0
    input = xmlLoadExternalEntity((char *) entity->URI,
7935
0
           (char *) entity->ExternalID, ctxt);
7936
0
    if (input == NULL)
7937
0
        return(-1);
7938
7939
0
    oldinput = ctxt->input;
7940
0
    oldinputNr = ctxt->inputNr;
7941
0
    oldinputMax = ctxt->inputMax;
7942
0
    oldinputTab = ctxt->inputTab;
7943
0
    oldencoding = ctxt->encoding;
7944
7945
0
    ctxt->input = NULL;
7946
0
    ctxt->inputNr = 0;
7947
0
    ctxt->inputMax = 1;
7948
0
    ctxt->encoding = NULL;
7949
0
    ctxt->inputTab = xmlMalloc(sizeof(xmlParserInputPtr));
7950
0
    if (ctxt->inputTab == NULL) {
7951
0
        xmlErrMemory(ctxt);
7952
0
        xmlFreeInputStream(input);
7953
0
        goto error;
7954
0
    }
7955
7956
0
    xmlBufResetInput(input->buf->buffer, input);
7957
7958
0
    inputPush(ctxt, input);
7959
7960
0
    xmlDetectEncoding(ctxt);
7961
7962
    /*
7963
     * Parse a possible text declaration first
7964
     */
7965
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7966
0
  xmlParseTextDecl(ctxt);
7967
        /*
7968
         * An XML-1.0 document can't reference an entity not XML-1.0
7969
         */
7970
0
        if ((xmlStrEqual(ctxt->version, BAD_CAST "1.0")) &&
7971
0
            (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
7972
0
            xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
7973
0
                           "Version mismatch between document and entity\n");
7974
0
        }
7975
0
    }
7976
7977
0
    length = input->cur - input->base;
7978
0
    xmlBufShrink(input->buf->buffer, length);
7979
0
    xmlSaturatedAdd(&ctxt->sizeentities, length);
7980
7981
0
    while ((res = xmlParserInputBufferGrow(input->buf, 4096)) > 0)
7982
0
        ;
7983
7984
0
    xmlBufResetInput(input->buf->buffer, input);
7985
7986
0
    if (res < 0) {
7987
0
        xmlCtxtErrIO(ctxt, input->buf->error, NULL);
7988
0
        goto error;
7989
0
    }
7990
7991
0
    length = xmlBufUse(input->buf->buffer);
7992
0
    content = xmlBufDetach(input->buf->buffer);
7993
7994
0
    if (length > INT_MAX) {
7995
0
        xmlErrMemory(ctxt);
7996
0
        goto error;
7997
0
    }
7998
7999
0
    for (i = 0; i < length; ) {
8000
0
        int clen = length - i;
8001
0
        int c = xmlGetUTF8Char(content + i, &clen);
8002
8003
0
        if ((c < 0) || (!IS_CHAR(c))) {
8004
0
            xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8005
0
                              "xmlLoadEntityContent: invalid char value %d\n",
8006
0
                              content[i]);
8007
0
            goto error;
8008
0
        }
8009
0
        i += clen;
8010
0
    }
8011
8012
0
    xmlSaturatedAdd(&ctxt->sizeentities, length);
8013
0
    entity->content = content;
8014
0
    entity->length = length;
8015
0
    content = NULL;
8016
0
    ret = 0;
8017
8018
0
error:
8019
0
    while (ctxt->inputNr > 0)
8020
0
        xmlFreeInputStream(inputPop(ctxt));
8021
0
    xmlFree(ctxt->inputTab);
8022
0
    xmlFree((xmlChar *) ctxt->encoding);
8023
8024
0
    ctxt->input = oldinput;
8025
0
    ctxt->inputNr = oldinputNr;
8026
0
    ctxt->inputMax = oldinputMax;
8027
0
    ctxt->inputTab = oldinputTab;
8028
0
    ctxt->encoding = oldencoding;
8029
8030
0
    xmlFree(content);
8031
8032
0
    return(ret);
8033
0
}
8034
8035
/**
8036
 * xmlParseStringPEReference:
8037
 * @ctxt:  an XML parser context
8038
 * @str:  a pointer to an index in the string
8039
 *
8040
 * parse PEReference declarations
8041
 *
8042
 * [69] PEReference ::= '%' Name ';'
8043
 *
8044
 * [ WFC: No Recursion ]
8045
 * A parsed entity must not contain a recursive
8046
 * reference to itself, either directly or indirectly.
8047
 *
8048
 * [ WFC: Entity Declared ]
8049
 * In a document without any DTD, a document with only an internal DTD
8050
 * subset which contains no parameter entity references, or a document
8051
 * with "standalone='yes'", ...  ... The declaration of a parameter
8052
 * entity must precede any reference to it...
8053
 *
8054
 * [ VC: Entity Declared ]
8055
 * In a document with an external subset or external parameter entities
8056
 * with "standalone='no'", ...  ... The declaration of a parameter entity
8057
 * must precede any reference to it...
8058
 *
8059
 * [ WFC: In DTD ]
8060
 * Parameter-entity references may only appear in the DTD.
8061
 * NOTE: misleading but this is handled.
8062
 *
8063
 * Returns the string of the entity content.
8064
 *         str is updated to the current value of the index
8065
 */
8066
static xmlEntityPtr
8067
2.49k
xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8068
2.49k
    const xmlChar *ptr;
8069
2.49k
    xmlChar cur;
8070
2.49k
    xmlChar *name;
8071
2.49k
    xmlEntityPtr entity = NULL;
8072
8073
2.49k
    if ((str == NULL) || (*str == NULL)) return(NULL);
8074
2.49k
    ptr = *str;
8075
2.49k
    cur = *ptr;
8076
2.49k
    if (cur != '%')
8077
0
        return(NULL);
8078
2.49k
    ptr++;
8079
2.49k
    name = xmlParseStringName(ctxt, &ptr);
8080
2.49k
    if (name == NULL) {
8081
753
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8082
753
           "xmlParseStringPEReference: no name\n");
8083
753
  *str = ptr;
8084
753
  return(NULL);
8085
753
    }
8086
1.74k
    cur = *ptr;
8087
1.74k
    if (cur != ';') {
8088
809
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8089
809
  xmlFree(name);
8090
809
  *str = ptr;
8091
809
  return(NULL);
8092
809
    }
8093
934
    ptr++;
8094
8095
    /*
8096
     * Request the entity from SAX
8097
     */
8098
934
    if ((ctxt->sax != NULL) &&
8099
934
  (ctxt->sax->getParameterEntity != NULL))
8100
934
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8101
934
    if (entity == NULL) {
8102
  /*
8103
   * [ WFC: Entity Declared ]
8104
   * In a document without any DTD, a document with only an
8105
   * internal DTD subset which contains no parameter entity
8106
   * references, or a document with "standalone='yes'", ...
8107
   * ... The declaration of a parameter entity must precede
8108
   * any reference to it...
8109
   */
8110
674
  if ((ctxt->standalone == 1) ||
8111
674
      ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8112
213
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8113
213
     "PEReference: %%%s; not found\n", name);
8114
461
  } else {
8115
      /*
8116
       * [ VC: Entity Declared ]
8117
       * In a document with an external subset or external
8118
       * parameter entities with "standalone='no'", ...
8119
       * ... The declaration of a parameter entity must
8120
       * precede any reference to it...
8121
       */
8122
461
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8123
461
        "PEReference: %%%s; not found\n",
8124
461
        name, NULL);
8125
461
      ctxt->valid = 0;
8126
461
  }
8127
674
    } else {
8128
  /*
8129
   * Internal checking in case the entity quest barfed
8130
   */
8131
260
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8132
260
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8133
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8134
0
        "%%%s; is not a parameter entity\n",
8135
0
        name, NULL);
8136
0
  }
8137
260
    }
8138
934
    ctxt->hasPErefs = 1;
8139
934
    xmlFree(name);
8140
934
    *str = ptr;
8141
934
    return(entity);
8142
1.74k
}
8143
8144
/**
8145
 * xmlParseDocTypeDecl:
8146
 * @ctxt:  an XML parser context
8147
 *
8148
 * DEPRECATED: Internal function, don't use.
8149
 *
8150
 * parse a DOCTYPE declaration
8151
 *
8152
 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8153
 *                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8154
 *
8155
 * [ VC: Root Element Type ]
8156
 * The Name in the document type declaration must match the element
8157
 * type of the root element.
8158
 */
8159
8160
void
8161
8.79k
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8162
8.79k
    const xmlChar *name = NULL;
8163
8.79k
    xmlChar *ExternalID = NULL;
8164
8.79k
    xmlChar *URI = NULL;
8165
8166
    /*
8167
     * We know that '<!DOCTYPE' has been detected.
8168
     */
8169
8.79k
    SKIP(9);
8170
8171
8.79k
    SKIP_BLANKS;
8172
8173
    /*
8174
     * Parse the DOCTYPE name.
8175
     */
8176
8.79k
    name = xmlParseName(ctxt);
8177
8.79k
    if (name == NULL) {
8178
6.29k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8179
6.29k
           "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8180
6.29k
    }
8181
8.79k
    ctxt->intSubName = name;
8182
8183
8.79k
    SKIP_BLANKS;
8184
8185
    /*
8186
     * Check for SystemID and ExternalID
8187
     */
8188
8.79k
    URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8189
8190
8.79k
    if ((URI != NULL) || (ExternalID != NULL)) {
8191
739
        ctxt->hasExternalSubset = 1;
8192
739
    }
8193
8.79k
    ctxt->extSubURI = URI;
8194
8.79k
    ctxt->extSubSystem = ExternalID;
8195
8196
8.79k
    SKIP_BLANKS;
8197
8198
    /*
8199
     * Create and update the internal subset.
8200
     */
8201
8.79k
    if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8202
8.79k
  (!ctxt->disableSAX))
8203
6.68k
  ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8204
8205
    /*
8206
     * Is there any internal subset declarations ?
8207
     * they are handled separately in xmlParseInternalSubset()
8208
     */
8209
8.79k
    if (RAW == '[')
8210
7.00k
  return;
8211
8212
    /*
8213
     * We should be at the end of the DOCTYPE declaration.
8214
     */
8215
1.78k
    if (RAW != '>') {
8216
390
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8217
390
    }
8218
1.78k
    NEXT;
8219
1.78k
}
8220
8221
/**
8222
 * xmlParseInternalSubset:
8223
 * @ctxt:  an XML parser context
8224
 *
8225
 * parse the internal subset declaration
8226
 *
8227
 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8228
 */
8229
8230
static void
8231
7.15k
xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8232
    /*
8233
     * Is there any DTD definition ?
8234
     */
8235
7.15k
    if (RAW == '[') {
8236
7.15k
        int oldInputNr = ctxt->inputNr;
8237
8238
7.15k
        NEXT;
8239
  /*
8240
   * Parse the succession of Markup declarations and
8241
   * PEReferences.
8242
   * Subsequence (markupdecl | PEReference | S)*
8243
   */
8244
7.15k
  SKIP_BLANKS;
8245
158k
  while (((RAW != ']') || (ctxt->inputNr > oldInputNr)) &&
8246
158k
               (PARSER_STOPPED(ctxt) == 0)) {
8247
8248
            /*
8249
             * Conditional sections are allowed from external entities included
8250
             * by PE References in the internal subset.
8251
             */
8252
154k
            if ((PARSER_EXTERNAL(ctxt)) &&
8253
154k
                (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8254
0
                xmlParseConditionalSections(ctxt);
8255
154k
            } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
8256
93.5k
          xmlParseMarkupDecl(ctxt);
8257
93.5k
            } else if (RAW == '%') {
8258
57.5k
          xmlParsePEReference(ctxt);
8259
57.5k
            } else {
8260
3.45k
    xmlFatalErr(ctxt, XML_ERR_INT_SUBSET_NOT_FINISHED, NULL);
8261
3.45k
                break;
8262
3.45k
            }
8263
151k
      SKIP_BLANKS_PE;
8264
151k
            SHRINK;
8265
151k
            GROW;
8266
151k
  }
8267
8268
7.23k
        while (ctxt->inputNr > oldInputNr)
8269
85
            xmlPopPE(ctxt);
8270
8271
7.15k
  if (RAW == ']') {
8272
2.52k
      NEXT;
8273
2.52k
      SKIP_BLANKS;
8274
2.52k
  }
8275
7.15k
    }
8276
8277
    /*
8278
     * We should be at the end of the DOCTYPE declaration.
8279
     */
8280
7.15k
    if ((ctxt->wellFormed) && (RAW != '>')) {
8281
172
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8282
172
  return;
8283
172
    }
8284
6.97k
    NEXT;
8285
6.97k
}
8286
8287
#ifdef LIBXML_SAX1_ENABLED
8288
/**
8289
 * xmlParseAttribute:
8290
 * @ctxt:  an XML parser context
8291
 * @value:  a xmlChar ** used to store the value of the attribute
8292
 *
8293
 * DEPRECATED: Internal function, don't use.
8294
 *
8295
 * parse an attribute
8296
 *
8297
 * [41] Attribute ::= Name Eq AttValue
8298
 *
8299
 * [ WFC: No External Entity References ]
8300
 * Attribute values cannot contain direct or indirect entity references
8301
 * to external entities.
8302
 *
8303
 * [ WFC: No < in Attribute Values ]
8304
 * The replacement text of any entity referred to directly or indirectly in
8305
 * an attribute value (other than "&lt;") must not contain a <.
8306
 *
8307
 * [ VC: Attribute Value Type ]
8308
 * The attribute must have been declared; the value must be of the type
8309
 * declared for it.
8310
 *
8311
 * [25] Eq ::= S? '=' S?
8312
 *
8313
 * With namespace:
8314
 *
8315
 * [NS 11] Attribute ::= QName Eq AttValue
8316
 *
8317
 * Also the case QName == xmlns:??? is handled independently as a namespace
8318
 * definition.
8319
 *
8320
 * Returns the attribute name, and the value in *value.
8321
 */
8322
8323
const xmlChar *
8324
51.7k
xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8325
51.7k
    const xmlChar *name;
8326
51.7k
    xmlChar *val;
8327
8328
51.7k
    *value = NULL;
8329
51.7k
    GROW;
8330
51.7k
    name = xmlParseName(ctxt);
8331
51.7k
    if (name == NULL) {
8332
39.9k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8333
39.9k
                 "error parsing attribute name\n");
8334
39.9k
        return(NULL);
8335
39.9k
    }
8336
8337
    /*
8338
     * read the value
8339
     */
8340
11.8k
    SKIP_BLANKS;
8341
11.8k
    if (RAW == '=') {
8342
7.93k
        NEXT;
8343
7.93k
  SKIP_BLANKS;
8344
7.93k
  val = xmlParseAttValue(ctxt);
8345
7.93k
    } else {
8346
3.91k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8347
3.91k
         "Specification mandates value for attribute %s\n", name);
8348
3.91k
  return(name);
8349
3.91k
    }
8350
8351
    /*
8352
     * Check that xml:lang conforms to the specification
8353
     * No more registered as an error, just generate a warning now
8354
     * since this was deprecated in XML second edition
8355
     */
8356
7.93k
    if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8357
1.64k
  if (!xmlCheckLanguageID(val)) {
8358
1.26k
      xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8359
1.26k
              "Malformed value for xml:lang : %s\n",
8360
1.26k
        val, NULL);
8361
1.26k
  }
8362
1.64k
    }
8363
8364
    /*
8365
     * Check that xml:space conforms to the specification
8366
     */
8367
7.93k
    if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8368
174
  if (xmlStrEqual(val, BAD_CAST "default"))
8369
74
      *(ctxt->space) = 0;
8370
100
  else if (xmlStrEqual(val, BAD_CAST "preserve"))
8371
26
      *(ctxt->space) = 1;
8372
74
  else {
8373
74
    xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8374
74
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8375
74
                                 val, NULL);
8376
74
  }
8377
174
    }
8378
8379
7.93k
    *value = val;
8380
7.93k
    return(name);
8381
11.8k
}
8382
8383
/**
8384
 * xmlParseStartTag:
8385
 * @ctxt:  an XML parser context
8386
 *
8387
 * DEPRECATED: Internal function, don't use.
8388
 *
8389
 * Parse a start tag. Always consumes '<'.
8390
 *
8391
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8392
 *
8393
 * [ WFC: Unique Att Spec ]
8394
 * No attribute name may appear more than once in the same start-tag or
8395
 * empty-element tag.
8396
 *
8397
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8398
 *
8399
 * [ WFC: Unique Att Spec ]
8400
 * No attribute name may appear more than once in the same start-tag or
8401
 * empty-element tag.
8402
 *
8403
 * With namespace:
8404
 *
8405
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8406
 *
8407
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8408
 *
8409
 * Returns the element name parsed
8410
 */
8411
8412
const xmlChar *
8413
58.6k
xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8414
58.6k
    const xmlChar *name;
8415
58.6k
    const xmlChar *attname;
8416
58.6k
    xmlChar *attvalue;
8417
58.6k
    const xmlChar **atts = ctxt->atts;
8418
58.6k
    int nbatts = 0;
8419
58.6k
    int maxatts = ctxt->maxatts;
8420
58.6k
    int i;
8421
8422
58.6k
    if (RAW != '<') return(NULL);
8423
58.6k
    NEXT1;
8424
8425
58.6k
    name = xmlParseName(ctxt);
8426
58.6k
    if (name == NULL) {
8427
6.36k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8428
6.36k
       "xmlParseStartTag: invalid element name\n");
8429
6.36k
        return(NULL);
8430
6.36k
    }
8431
8432
    /*
8433
     * Now parse the attributes, it ends up with the ending
8434
     *
8435
     * (S Attribute)* S?
8436
     */
8437
52.3k
    SKIP_BLANKS;
8438
52.3k
    GROW;
8439
8440
62.1k
    while (((RAW != '>') &&
8441
62.1k
     ((RAW != '/') || (NXT(1) != '>')) &&
8442
62.1k
     (IS_BYTE_CHAR(RAW))) && (PARSER_STOPPED(ctxt) == 0)) {
8443
51.7k
  attname = xmlParseAttribute(ctxt, &attvalue);
8444
51.7k
        if (attname == NULL)
8445
39.9k
      break;
8446
11.8k
        if (attvalue != NULL) {
8447
      /*
8448
       * [ WFC: Unique Att Spec ]
8449
       * No attribute name may appear more than once in the same
8450
       * start-tag or empty-element tag.
8451
       */
8452
10.4k
      for (i = 0; i < nbatts;i += 2) {
8453
4.47k
          if (xmlStrEqual(atts[i], attname)) {
8454
823
        xmlErrAttributeDup(ctxt, NULL, attname);
8455
823
        xmlFree(attvalue);
8456
823
        goto failed;
8457
823
    }
8458
4.47k
      }
8459
      /*
8460
       * Add the pair to atts
8461
       */
8462
6.02k
      if (atts == NULL) {
8463
1.23k
          maxatts = 22; /* allow for 10 attrs by default */
8464
1.23k
          atts = (const xmlChar **)
8465
1.23k
           xmlMalloc(maxatts * sizeof(xmlChar *));
8466
1.23k
    if (atts == NULL) {
8467
0
        xmlErrMemory(ctxt);
8468
0
        if (attvalue != NULL)
8469
0
      xmlFree(attvalue);
8470
0
        goto failed;
8471
0
    }
8472
1.23k
    ctxt->atts = atts;
8473
1.23k
    ctxt->maxatts = maxatts;
8474
4.78k
      } else if (nbatts + 4 > maxatts) {
8475
4
          const xmlChar **n;
8476
8477
4
          maxatts *= 2;
8478
4
          n = (const xmlChar **) xmlRealloc((void *) atts,
8479
4
               maxatts * sizeof(const xmlChar *));
8480
4
    if (n == NULL) {
8481
0
        xmlErrMemory(ctxt);
8482
0
        if (attvalue != NULL)
8483
0
      xmlFree(attvalue);
8484
0
        goto failed;
8485
0
    }
8486
4
    atts = n;
8487
4
    ctxt->atts = atts;
8488
4
    ctxt->maxatts = maxatts;
8489
4
      }
8490
6.02k
      atts[nbatts++] = attname;
8491
6.02k
      atts[nbatts++] = attvalue;
8492
6.02k
      atts[nbatts] = NULL;
8493
6.02k
      atts[nbatts + 1] = NULL;
8494
6.02k
  } else {
8495
5.00k
      if (attvalue != NULL)
8496
0
    xmlFree(attvalue);
8497
5.00k
  }
8498
8499
11.8k
failed:
8500
8501
11.8k
  GROW
8502
11.8k
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8503
2.03k
      break;
8504
9.81k
  if (SKIP_BLANKS == 0) {
8505
7.91k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8506
7.91k
         "attributes construct error\n");
8507
7.91k
  }
8508
9.81k
  SHRINK;
8509
9.81k
        GROW;
8510
9.81k
    }
8511
8512
    /*
8513
     * SAX: Start of Element !
8514
     */
8515
52.3k
    if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8516
52.3k
  (!ctxt->disableSAX)) {
8517
51.1k
  if (nbatts > 0)
8518
4.70k
      ctxt->sax->startElement(ctxt->userData, name, atts);
8519
46.4k
  else
8520
46.4k
      ctxt->sax->startElement(ctxt->userData, name, NULL);
8521
51.1k
    }
8522
8523
52.3k
    if (atts != NULL) {
8524
        /* Free only the content strings */
8525
12.8k
        for (i = 1;i < nbatts;i+=2)
8526
6.02k
      if (atts[i] != NULL)
8527
6.02k
         xmlFree((xmlChar *) atts[i]);
8528
6.86k
    }
8529
52.3k
    return(name);
8530
52.3k
}
8531
8532
/**
8533
 * xmlParseEndTag1:
8534
 * @ctxt:  an XML parser context
8535
 * @line:  line of the start tag
8536
 * @nsNr:  number of namespaces on the start tag
8537
 *
8538
 * Parse an end tag. Always consumes '</'.
8539
 *
8540
 * [42] ETag ::= '</' Name S? '>'
8541
 *
8542
 * With namespace
8543
 *
8544
 * [NS 9] ETag ::= '</' QName S? '>'
8545
 */
8546
8547
static void
8548
4.82k
xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8549
4.82k
    const xmlChar *name;
8550
8551
4.82k
    GROW;
8552
4.82k
    if ((RAW != '<') || (NXT(1) != '/')) {
8553
7
  xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8554
7
           "xmlParseEndTag: '</' not found\n");
8555
7
  return;
8556
7
    }
8557
4.81k
    SKIP(2);
8558
8559
4.81k
    name = xmlParseNameAndCompare(ctxt,ctxt->name);
8560
8561
    /*
8562
     * We should definitely be at the ending "S? '>'" part
8563
     */
8564
4.81k
    GROW;
8565
4.81k
    SKIP_BLANKS;
8566
4.81k
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8567
2.17k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8568
2.17k
    } else
8569
2.63k
  NEXT1;
8570
8571
    /*
8572
     * [ WFC: Element Type Match ]
8573
     * The Name in an element's end-tag must match the element type in the
8574
     * start-tag.
8575
     *
8576
     */
8577
4.81k
    if (name != (xmlChar*)1) {
8578
3.49k
        if (name == NULL) name = BAD_CAST "unparsable";
8579
3.49k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8580
3.49k
         "Opening and ending tag mismatch: %s line %d and %s\n",
8581
3.49k
                    ctxt->name, line, name);
8582
3.49k
    }
8583
8584
    /*
8585
     * SAX: End of Tag
8586
     */
8587
4.81k
    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8588
4.81k
  (!ctxt->disableSAX))
8589
4.28k
        ctxt->sax->endElement(ctxt->userData, ctxt->name);
8590
8591
4.81k
    namePop(ctxt);
8592
4.81k
    spacePop(ctxt);
8593
4.81k
    return;
8594
4.82k
}
8595
8596
/**
8597
 * xmlParseEndTag:
8598
 * @ctxt:  an XML parser context
8599
 *
8600
 * DEPRECATED: Internal function, don't use.
8601
 *
8602
 * parse an end of tag
8603
 *
8604
 * [42] ETag ::= '</' Name S? '>'
8605
 *
8606
 * With namespace
8607
 *
8608
 * [NS 9] ETag ::= '</' QName S? '>'
8609
 */
8610
8611
void
8612
0
xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8613
0
    xmlParseEndTag1(ctxt, 0);
8614
0
}
8615
#endif /* LIBXML_SAX1_ENABLED */
8616
8617
/************************************************************************
8618
 *                  *
8619
 *          SAX 2 specific operations       *
8620
 *                  *
8621
 ************************************************************************/
8622
8623
/**
8624
 * xmlParseQNameHashed:
8625
 * @ctxt:  an XML parser context
8626
 * @prefix:  pointer to store the prefix part
8627
 *
8628
 * parse an XML Namespace QName
8629
 *
8630
 * [6]  QName  ::= (Prefix ':')? LocalPart
8631
 * [7]  Prefix  ::= NCName
8632
 * [8]  LocalPart  ::= NCName
8633
 *
8634
 * Returns the Name parsed or NULL
8635
 */
8636
8637
static xmlHashedString
8638
165k
xmlParseQNameHashed(xmlParserCtxtPtr ctxt, xmlHashedString *prefix) {
8639
165k
    xmlHashedString l, p;
8640
165k
    int start, isNCName = 0;
8641
8642
165k
    l.name = NULL;
8643
165k
    p.name = NULL;
8644
8645
165k
    GROW;
8646
165k
    start = CUR_PTR - BASE_PTR;
8647
8648
165k
    l = xmlParseNCName(ctxt);
8649
165k
    if (l.name != NULL) {
8650
80.9k
        isNCName = 1;
8651
80.9k
        if (CUR == ':') {
8652
13.9k
            NEXT;
8653
13.9k
            p = l;
8654
13.9k
            l = xmlParseNCName(ctxt);
8655
13.9k
        }
8656
80.9k
    }
8657
165k
    if ((l.name == NULL) || (CUR == ':')) {
8658
86.5k
        xmlChar *tmp;
8659
8660
86.5k
        l.name = NULL;
8661
86.5k
        p.name = NULL;
8662
86.5k
        if ((isNCName == 0) && (CUR != ':'))
8663
63.6k
            return(l);
8664
22.9k
        tmp = xmlParseNmtoken(ctxt);
8665
22.9k
        if (tmp != NULL)
8666
21.7k
            xmlFree(tmp);
8667
22.9k
        l = xmlDictLookupHashed(ctxt->dict, BASE_PTR + start,
8668
22.9k
                                CUR_PTR - (BASE_PTR + start));
8669
22.9k
        if (l.name == NULL) {
8670
0
            xmlErrMemory(ctxt);
8671
0
            return(l);
8672
0
        }
8673
22.9k
        xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8674
22.9k
                 "Failed to parse QName '%s'\n", l.name, NULL, NULL);
8675
22.9k
    }
8676
8677
102k
    *prefix = p;
8678
102k
    return(l);
8679
165k
}
8680
8681
/**
8682
 * xmlParseQName:
8683
 * @ctxt:  an XML parser context
8684
 * @prefix:  pointer to store the prefix part
8685
 *
8686
 * parse an XML Namespace QName
8687
 *
8688
 * [6]  QName  ::= (Prefix ':')? LocalPart
8689
 * [7]  Prefix  ::= NCName
8690
 * [8]  LocalPart  ::= NCName
8691
 *
8692
 * Returns the Name parsed or NULL
8693
 */
8694
8695
static const xmlChar *
8696
1.01k
xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8697
1.01k
    xmlHashedString n, p;
8698
8699
1.01k
    n = xmlParseQNameHashed(ctxt, &p);
8700
1.01k
    if (n.name == NULL)
8701
497
        return(NULL);
8702
520
    *prefix = p.name;
8703
520
    return(n.name);
8704
1.01k
}
8705
8706
/**
8707
 * xmlParseQNameAndCompare:
8708
 * @ctxt:  an XML parser context
8709
 * @name:  the localname
8710
 * @prefix:  the prefix, if any.
8711
 *
8712
 * parse an XML name and compares for match
8713
 * (specialized for endtag parsing)
8714
 *
8715
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8716
 * and the name for mismatch
8717
 */
8718
8719
static const xmlChar *
8720
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8721
1.31k
                        xmlChar const *prefix) {
8722
1.31k
    const xmlChar *cmp;
8723
1.31k
    const xmlChar *in;
8724
1.31k
    const xmlChar *ret;
8725
1.31k
    const xmlChar *prefix2;
8726
8727
1.31k
    if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8728
8729
1.31k
    GROW;
8730
1.31k
    in = ctxt->input->cur;
8731
8732
1.31k
    cmp = prefix;
8733
2.59k
    while (*in != 0 && *in == *cmp) {
8734
1.27k
  ++in;
8735
1.27k
  ++cmp;
8736
1.27k
    }
8737
1.31k
    if ((*cmp == 0) && (*in == ':')) {
8738
627
        in++;
8739
627
  cmp = name;
8740
1.65k
  while (*in != 0 && *in == *cmp) {
8741
1.02k
      ++in;
8742
1.02k
      ++cmp;
8743
1.02k
  }
8744
627
  if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8745
      /* success */
8746
302
            ctxt->input->col += in - ctxt->input->cur;
8747
302
      ctxt->input->cur = in;
8748
302
      return((const xmlChar*) 1);
8749
302
  }
8750
627
    }
8751
    /*
8752
     * all strings coms from the dictionary, equality can be done directly
8753
     */
8754
1.01k
    ret = xmlParseQName (ctxt, &prefix2);
8755
1.01k
    if (ret == NULL)
8756
497
        return(NULL);
8757
520
    if ((ret == name) && (prefix == prefix2))
8758
178
  return((const xmlChar*) 1);
8759
342
    return ret;
8760
520
}
8761
8762
/**
8763
 * xmlParseAttribute2:
8764
 * @ctxt:  an XML parser context
8765
 * @pref:  the element prefix
8766
 * @elem:  the element name
8767
 * @prefix:  a xmlChar ** used to store the value of the attribute prefix
8768
 * @value:  a xmlChar ** used to store the value of the attribute
8769
 * @len:  an int * to save the length of the attribute
8770
 * @alloc:  an int * to indicate if the attribute was allocated
8771
 *
8772
 * parse an attribute in the new SAX2 framework.
8773
 *
8774
 * Returns the attribute name, and the value in *value, .
8775
 */
8776
8777
static xmlHashedString
8778
xmlParseAttribute2(xmlParserCtxtPtr ctxt,
8779
                   const xmlChar * pref, const xmlChar * elem,
8780
                   xmlHashedString * hprefix, xmlChar ** value,
8781
                   int *len, int *alloc)
8782
68.0k
{
8783
68.0k
    xmlHashedString hname;
8784
68.0k
    const xmlChar *prefix, *name;
8785
68.0k
    xmlChar *val = NULL, *internal_val = NULL;
8786
68.0k
    int normalize = 0;
8787
8788
68.0k
    *value = NULL;
8789
68.0k
    GROW;
8790
68.0k
    hname = xmlParseQNameHashed(ctxt, hprefix);
8791
68.0k
    if (hname.name == NULL) {
8792
49.1k
        xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8793
49.1k
                       "error parsing attribute name\n");
8794
49.1k
        return(hname);
8795
49.1k
    }
8796
18.9k
    name = hname.name;
8797
18.9k
    if (hprefix->name != NULL)
8798
7.50k
        prefix = hprefix->name;
8799
11.4k
    else
8800
11.4k
        prefix = NULL;
8801
8802
    /*
8803
     * get the type if needed
8804
     */
8805
18.9k
    if (ctxt->attsSpecial != NULL) {
8806
3.05k
        int type;
8807
8808
3.05k
        type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
8809
3.05k
                                                 pref, elem,
8810
3.05k
                                                 prefix, name);
8811
3.05k
        if (type != 0)
8812
1.04k
            normalize = 1;
8813
3.05k
    }
8814
8815
    /*
8816
     * read the value
8817
     */
8818
18.9k
    SKIP_BLANKS;
8819
18.9k
    if (RAW == '=') {
8820
16.5k
        NEXT;
8821
16.5k
        SKIP_BLANKS;
8822
16.5k
        val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
8823
16.5k
        if (val == NULL)
8824
1.29k
            goto error;
8825
16.5k
    } else {
8826
2.38k
        xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8827
2.38k
                          "Specification mandates value for attribute %s\n",
8828
2.38k
                          name);
8829
2.38k
        goto error;
8830
2.38k
    }
8831
8832
15.2k
    if (prefix == ctxt->str_xml) {
8833
        /*
8834
         * Check that xml:lang conforms to the specification
8835
         * No more registered as an error, just generate a warning now
8836
         * since this was deprecated in XML second edition
8837
         */
8838
3.71k
        if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8839
471
            internal_val = xmlStrndup(val, *len);
8840
471
            if (internal_val == NULL)
8841
0
                goto mem_error;
8842
471
            if (!xmlCheckLanguageID(internal_val)) {
8843
365
                xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8844
365
                              "Malformed value for xml:lang : %s\n",
8845
365
                              internal_val, NULL);
8846
365
            }
8847
471
        }
8848
8849
        /*
8850
         * Check that xml:space conforms to the specification
8851
         */
8852
3.71k
        if (xmlStrEqual(name, BAD_CAST "space")) {
8853
159
            internal_val = xmlStrndup(val, *len);
8854
159
            if (internal_val == NULL)
8855
0
                goto mem_error;
8856
159
            if (xmlStrEqual(internal_val, BAD_CAST "default"))
8857
67
                *(ctxt->space) = 0;
8858
92
            else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8859
18
                *(ctxt->space) = 1;
8860
74
            else {
8861
74
                xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8862
74
                              "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8863
74
                              internal_val, NULL);
8864
74
            }
8865
159
        }
8866
3.71k
        if (internal_val) {
8867
630
            xmlFree(internal_val);
8868
630
        }
8869
3.71k
    }
8870
8871
15.2k
    *value = val;
8872
15.2k
    return (hname);
8873
8874
0
mem_error:
8875
0
    xmlErrMemory(ctxt);
8876
3.68k
error:
8877
3.68k
    if ((val != NULL) && (*alloc != 0))
8878
0
        xmlFree(val);
8879
3.68k
    return(hname);
8880
0
}
8881
8882
/**
8883
 * xmlAttrHashInsert:
8884
 * @ctxt: parser context
8885
 * @size: size of the hash table
8886
 * @name: attribute name
8887
 * @uri: namespace uri
8888
 * @hashValue: combined hash value of name and uri
8889
 * @aindex: attribute index (this is a multiple of 5)
8890
 *
8891
 * Inserts a new attribute into the hash table.
8892
 *
8893
 * Returns INT_MAX if no existing attribute was found, the attribute
8894
 * index if an attribute was found, -1 if a memory allocation failed.
8895
 */
8896
static int
8897
xmlAttrHashInsert(xmlParserCtxtPtr ctxt, unsigned size, const xmlChar *name,
8898
13.3k
                  const xmlChar *uri, unsigned hashValue, int aindex) {
8899
13.3k
    xmlAttrHashBucket *table = ctxt->attrHash;
8900
13.3k
    xmlAttrHashBucket *bucket;
8901
13.3k
    unsigned hindex;
8902
8903
13.3k
    hindex = hashValue & (size - 1);
8904
13.3k
    bucket = &table[hindex];
8905
8906
33.4k
    while (bucket->index >= 0) {
8907
24.9k
        const xmlChar **atts = &ctxt->atts[bucket->index];
8908
8909
24.9k
        if (name == atts[0]) {
8910
22.8k
            int nsIndex = (int) (ptrdiff_t) atts[2];
8911
8912
22.8k
            if ((nsIndex == NS_INDEX_EMPTY) ? (uri == NULL) :
8913
22.8k
                (nsIndex == NS_INDEX_XML) ? (uri == ctxt->str_xml) :
8914
18.1k
                (uri == ctxt->nsTab[nsIndex * 2 + 1]))
8915
4.79k
                return(bucket->index);
8916
22.8k
        }
8917
8918
20.1k
        hindex++;
8919
20.1k
        bucket++;
8920
20.1k
        if (hindex >= size) {
8921
343
            hindex = 0;
8922
343
            bucket = table;
8923
343
        }
8924
20.1k
    }
8925
8926
8.55k
    bucket->index = aindex;
8927
8928
8.55k
    return(INT_MAX);
8929
13.3k
}
8930
8931
/**
8932
 * xmlParseStartTag2:
8933
 * @ctxt:  an XML parser context
8934
 *
8935
 * Parse a start tag. Always consumes '<'.
8936
 *
8937
 * This routine is called when running SAX2 parsing
8938
 *
8939
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8940
 *
8941
 * [ WFC: Unique Att Spec ]
8942
 * No attribute name may appear more than once in the same start-tag or
8943
 * empty-element tag.
8944
 *
8945
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8946
 *
8947
 * [ WFC: Unique Att Spec ]
8948
 * No attribute name may appear more than once in the same start-tag or
8949
 * empty-element tag.
8950
 *
8951
 * With namespace:
8952
 *
8953
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8954
 *
8955
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8956
 *
8957
 * Returns the element name parsed
8958
 */
8959
8960
static const xmlChar *
8961
xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
8962
96.6k
                  const xmlChar **URI, int *nbNsPtr) {
8963
96.6k
    xmlHashedString hlocalname;
8964
96.6k
    xmlHashedString hprefix;
8965
96.6k
    xmlHashedString hattname;
8966
96.6k
    xmlHashedString haprefix;
8967
96.6k
    const xmlChar *localname;
8968
96.6k
    const xmlChar *prefix;
8969
96.6k
    const xmlChar *attname;
8970
96.6k
    const xmlChar *aprefix;
8971
96.6k
    const xmlChar *uri;
8972
96.6k
    xmlChar *attvalue = NULL;
8973
96.6k
    const xmlChar **atts = ctxt->atts;
8974
96.6k
    unsigned attrHashSize = 0;
8975
96.6k
    int maxatts = ctxt->maxatts;
8976
96.6k
    int nratts, nbatts, nbdef;
8977
96.6k
    int i, j, nbNs, nbTotalDef, attval, nsIndex, maxAtts;
8978
96.6k
    int alloc = 0;
8979
8980
96.6k
    if (RAW != '<') return(NULL);
8981
96.6k
    NEXT1;
8982
8983
96.6k
    nbatts = 0;
8984
96.6k
    nratts = 0;
8985
96.6k
    nbdef = 0;
8986
96.6k
    nbNs = 0;
8987
96.6k
    nbTotalDef = 0;
8988
96.6k
    attval = 0;
8989
8990
96.6k
    if (xmlParserNsStartElement(ctxt->nsdb) < 0) {
8991
0
        xmlErrMemory(ctxt);
8992
0
        return(NULL);
8993
0
    }
8994
8995
96.6k
    hlocalname = xmlParseQNameHashed(ctxt, &hprefix);
8996
96.6k
    if (hlocalname.name == NULL) {
8997
14.0k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8998
14.0k
           "StartTag: invalid element name\n");
8999
14.0k
        return(NULL);
9000
14.0k
    }
9001
82.6k
    localname = hlocalname.name;
9002
82.6k
    prefix = hprefix.name;
9003
9004
    /*
9005
     * Now parse the attributes, it ends up with the ending
9006
     *
9007
     * (S Attribute)* S?
9008
     */
9009
82.6k
    SKIP_BLANKS;
9010
82.6k
    GROW;
9011
9012
    /*
9013
     * The ctxt->atts array will be ultimately passed to the SAX callback
9014
     * containing five xmlChar pointers for each attribute:
9015
     *
9016
     * [0] attribute name
9017
     * [1] attribute prefix
9018
     * [2] namespace URI
9019
     * [3] attribute value
9020
     * [4] end of attribute value
9021
     *
9022
     * To save memory, we reuse this array temporarily and store integers
9023
     * in these pointer variables.
9024
     *
9025
     * [0] attribute name
9026
     * [1] attribute prefix
9027
     * [2] hash value of attribute prefix, and later namespace index
9028
     * [3] for non-allocated values: ptrdiff_t offset into input buffer
9029
     * [4] for non-allocated values: ptrdiff_t offset into input buffer
9030
     *
9031
     * The ctxt->attallocs array contains an additional unsigned int for
9032
     * each attribute, containing the hash value of the attribute name
9033
     * and the alloc flag in bit 31.
9034
     */
9035
9036
90.8k
    while (((RAW != '>') &&
9037
90.8k
     ((RAW != '/') || (NXT(1) != '>')) &&
9038
90.8k
     (IS_BYTE_CHAR(RAW))) && (PARSER_STOPPED(ctxt) == 0)) {
9039
68.0k
  int len = -1;
9040
9041
68.0k
  hattname = xmlParseAttribute2(ctxt, prefix, localname,
9042
68.0k
                                          &haprefix, &attvalue, &len,
9043
68.0k
                                          &alloc);
9044
68.0k
        if (hattname.name == NULL)
9045
49.1k
      break;
9046
18.9k
        if (attvalue == NULL)
9047
3.68k
            goto next_attr;
9048
15.2k
        attname = hattname.name;
9049
15.2k
        aprefix = haprefix.name;
9050
15.2k
  if (len < 0) len = xmlStrlen(attvalue);
9051
9052
15.2k
        if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9053
1.95k
            xmlHashedString huri;
9054
1.95k
            xmlURIPtr parsedUri;
9055
9056
1.95k
            huri = xmlDictLookupHashed(ctxt->dict, attvalue, len);
9057
1.95k
            uri = huri.name;
9058
1.95k
            if (uri == NULL) {
9059
0
                xmlErrMemory(ctxt);
9060
0
                goto next_attr;
9061
0
            }
9062
1.95k
            if (*uri != 0) {
9063
1.74k
                if (xmlParseURISafe((const char *) uri, &parsedUri) < 0) {
9064
0
                    xmlErrMemory(ctxt);
9065
0
                    goto next_attr;
9066
0
                }
9067
1.74k
                if (parsedUri == NULL) {
9068
805
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9069
805
                             "xmlns: '%s' is not a valid URI\n",
9070
805
                                       uri, NULL, NULL);
9071
939
                } else {
9072
939
                    if (parsedUri->scheme == NULL) {
9073
672
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9074
672
                                  "xmlns: URI %s is not absolute\n",
9075
672
                                  uri, NULL, NULL);
9076
672
                    }
9077
939
                    xmlFreeURI(parsedUri);
9078
939
                }
9079
1.74k
                if (uri == ctxt->str_xml_ns) {
9080
34
                    if (attname != ctxt->str_xml) {
9081
34
                        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9082
34
                     "xml namespace URI cannot be the default namespace\n",
9083
34
                                 NULL, NULL, NULL);
9084
34
                    }
9085
34
                    goto next_attr;
9086
34
                }
9087
1.71k
                if ((len == 29) &&
9088
1.71k
                    (xmlStrEqual(uri,
9089
60
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9090
18
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9091
18
                         "reuse of the xmlns namespace name is forbidden\n",
9092
18
                             NULL, NULL, NULL);
9093
18
                    goto next_attr;
9094
18
                }
9095
1.71k
            }
9096
9097
1.89k
            if (xmlParserNsPush(ctxt, NULL, &huri, NULL, 0) > 0)
9098
1.67k
                nbNs++;
9099
13.3k
        } else if (aprefix == ctxt->str_xmlns) {
9100
1.96k
            xmlHashedString huri;
9101
1.96k
            xmlURIPtr parsedUri;
9102
9103
1.96k
            huri = xmlDictLookupHashed(ctxt->dict, attvalue, len);
9104
1.96k
            uri = huri.name;
9105
1.96k
            if (uri == NULL) {
9106
0
                xmlErrMemory(ctxt);
9107
0
                goto next_attr;
9108
0
            }
9109
9110
1.96k
            if (attname == ctxt->str_xml) {
9111
94
                if (uri != ctxt->str_xml_ns) {
9112
76
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9113
76
                             "xml namespace prefix mapped to wrong URI\n",
9114
76
                             NULL, NULL, NULL);
9115
76
                }
9116
                /*
9117
                 * Do not keep a namespace definition node
9118
                 */
9119
94
                goto next_attr;
9120
94
            }
9121
1.86k
            if (uri == ctxt->str_xml_ns) {
9122
38
                if (attname != ctxt->str_xml) {
9123
38
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9124
38
                             "xml namespace URI mapped to wrong prefix\n",
9125
38
                             NULL, NULL, NULL);
9126
38
                }
9127
38
                goto next_attr;
9128
38
            }
9129
1.82k
            if (attname == ctxt->str_xmlns) {
9130
34
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9131
34
                         "redefinition of the xmlns prefix is forbidden\n",
9132
34
                         NULL, NULL, NULL);
9133
34
                goto next_attr;
9134
34
            }
9135
1.79k
            if ((len == 29) &&
9136
1.79k
                (xmlStrEqual(uri,
9137
74
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9138
39
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9139
39
                         "reuse of the xmlns namespace name is forbidden\n",
9140
39
                         NULL, NULL, NULL);
9141
39
                goto next_attr;
9142
39
            }
9143
1.75k
            if ((uri == NULL) || (uri[0] == 0)) {
9144
38
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9145
38
                         "xmlns:%s: Empty XML namespace is not allowed\n",
9146
38
                              attname, NULL, NULL);
9147
38
                goto next_attr;
9148
1.71k
            } else {
9149
1.71k
                if (xmlParseURISafe((const char *) uri, &parsedUri) < 0) {
9150
0
                    xmlErrMemory(ctxt);
9151
0
                    goto next_attr;
9152
0
                }
9153
1.71k
                if (parsedUri == NULL) {
9154
262
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9155
262
                         "xmlns:%s: '%s' is not a valid URI\n",
9156
262
                                       attname, uri, NULL);
9157
1.45k
                } else {
9158
1.45k
                    if ((ctxt->pedantic) && (parsedUri->scheme == NULL)) {
9159
98
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9160
98
                                  "xmlns:%s: URI %s is not absolute\n",
9161
98
                                  attname, uri, NULL);
9162
98
                    }
9163
1.45k
                    xmlFreeURI(parsedUri);
9164
1.45k
                }
9165
1.71k
            }
9166
9167
1.71k
            if (xmlParserNsPush(ctxt, &hattname, &huri, NULL, 0) > 0)
9168
1.59k
                nbNs++;
9169
11.3k
        } else {
9170
            /*
9171
             * Populate attributes array, see above for repurposing
9172
             * of xmlChar pointers.
9173
             */
9174
11.3k
            if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9175
1.71k
                if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9176
0
                    goto next_attr;
9177
0
                }
9178
1.71k
                maxatts = ctxt->maxatts;
9179
1.71k
                atts = ctxt->atts;
9180
1.71k
            }
9181
11.3k
            ctxt->attallocs[nratts++] = (hattname.hashValue & 0x7FFFFFFF) |
9182
11.3k
                                        ((unsigned) alloc << 31);
9183
11.3k
            atts[nbatts++] = attname;
9184
11.3k
            atts[nbatts++] = aprefix;
9185
11.3k
            atts[nbatts++] = (const xmlChar *) (size_t) haprefix.hashValue;
9186
11.3k
            if (alloc) {
9187
1.78k
                atts[nbatts++] = attvalue;
9188
1.78k
                attvalue += len;
9189
1.78k
                atts[nbatts++] = attvalue;
9190
9.56k
            } else {
9191
                /*
9192
                 * attvalue points into the input buffer which can be
9193
                 * reallocated. Store differences to input->base instead.
9194
                 * The pointers will be reconstructed later.
9195
                 */
9196
9.56k
                atts[nbatts++] = (void *) (attvalue - BASE_PTR);
9197
9.56k
                attvalue += len;
9198
9.56k
                atts[nbatts++] = (void *) (attvalue - BASE_PTR);
9199
9.56k
            }
9200
            /*
9201
             * tag if some deallocation is needed
9202
             */
9203
11.3k
            if (alloc != 0) attval = 1;
9204
11.3k
            attvalue = NULL; /* moved into atts */
9205
11.3k
        }
9206
9207
18.9k
next_attr:
9208
18.9k
        if ((attvalue != NULL) && (alloc != 0)) {
9209
301
            xmlFree(attvalue);
9210
301
            attvalue = NULL;
9211
301
        }
9212
9213
18.9k
  GROW
9214
18.9k
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9215
2.80k
      break;
9216
16.1k
  if (SKIP_BLANKS == 0) {
9217
7.89k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9218
7.89k
         "attributes construct error\n");
9219
7.89k
      break;
9220
7.89k
  }
9221
8.22k
        GROW;
9222
8.22k
    }
9223
9224
    /*
9225
     * Namespaces from default attributes
9226
     */
9227
82.6k
    if (ctxt->attsDefault != NULL) {
9228
41.8k
        xmlDefAttrsPtr defaults;
9229
9230
41.8k
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9231
41.8k
  if (defaults != NULL) {
9232
100k
      for (i = 0; i < defaults->nbAttrs; i++) {
9233
62.0k
                xmlDefAttr *attr = &defaults->attrs[i];
9234
9235
62.0k
          attname = attr->name.name;
9236
62.0k
    aprefix = attr->prefix.name;
9237
9238
62.0k
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9239
22.7k
                    xmlParserEntityCheck(ctxt, attr->expandedSize);
9240
9241
22.7k
                    if (xmlParserNsPush(ctxt, NULL, &attr->value, NULL, 1) > 0)
9242
22.3k
                        nbNs++;
9243
39.2k
    } else if (aprefix == ctxt->str_xmlns) {
9244
28.8k
                    xmlParserEntityCheck(ctxt, attr->expandedSize);
9245
9246
28.8k
                    if (xmlParserNsPush(ctxt, &attr->name, &attr->value,
9247
28.8k
                                      NULL, 1) > 0)
9248
28.3k
                        nbNs++;
9249
28.8k
    } else {
9250
10.4k
                    nbTotalDef += 1;
9251
10.4k
                }
9252
62.0k
      }
9253
37.9k
  }
9254
41.8k
    }
9255
9256
    /*
9257
     * Resolve attribute namespaces
9258
     */
9259
93.9k
    for (i = 0; i < nbatts; i += 5) {
9260
11.3k
        attname = atts[i];
9261
11.3k
        aprefix = atts[i+1];
9262
9263
        /*
9264
  * The default namespace does not apply to attribute names.
9265
  */
9266
11.3k
  if (aprefix == NULL) {
9267
6.30k
            nsIndex = NS_INDEX_EMPTY;
9268
6.30k
        } else if (aprefix == ctxt->str_xml) {
9269
3.71k
            nsIndex = NS_INDEX_XML;
9270
3.71k
        } else {
9271
1.32k
            haprefix.name = aprefix;
9272
1.32k
            haprefix.hashValue = (size_t) atts[i+2];
9273
1.32k
            nsIndex = xmlParserNsLookup(ctxt, &haprefix, NULL);
9274
9275
1.32k
      if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex)) {
9276
755
                xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9277
755
        "Namespace prefix %s for %s on %s is not defined\n",
9278
755
        aprefix, attname, localname);
9279
755
                nsIndex = NS_INDEX_EMPTY;
9280
755
            }
9281
1.32k
        }
9282
9283
11.3k
        atts[i+2] = (const xmlChar *) (ptrdiff_t) nsIndex;
9284
11.3k
    }
9285
9286
    /*
9287
     * Maximum number of attributes including default attributes.
9288
     */
9289
82.6k
    maxAtts = nratts + nbTotalDef;
9290
9291
    /*
9292
     * Verify that attribute names are unique.
9293
     */
9294
82.6k
    if (maxAtts > 1) {
9295
3.88k
        attrHashSize = 4;
9296
5.56k
        while (attrHashSize / 2 < (unsigned) maxAtts)
9297
1.68k
            attrHashSize *= 2;
9298
9299
3.88k
        if (attrHashSize > ctxt->attrHashMax) {
9300
580
            xmlAttrHashBucket *tmp;
9301
9302
580
            tmp = xmlRealloc(ctxt->attrHash, attrHashSize * sizeof(tmp[0]));
9303
580
            if (tmp == NULL) {
9304
0
                xmlErrMemory(ctxt);
9305
0
                goto done;
9306
0
            }
9307
9308
580
            ctxt->attrHash = tmp;
9309
580
            ctxt->attrHashMax = attrHashSize;
9310
580
        }
9311
9312
3.88k
        memset(ctxt->attrHash, -1, attrHashSize * sizeof(ctxt->attrHash[0]));
9313
9314
11.0k
        for (i = 0, j = 0; j < nratts; i += 5, j++) {
9315
7.11k
            const xmlChar *nsuri;
9316
7.11k
            unsigned hashValue, nameHashValue, uriHashValue;
9317
7.11k
            int res;
9318
9319
7.11k
            attname = atts[i];
9320
7.11k
            aprefix = atts[i+1];
9321
7.11k
            nsIndex = (ptrdiff_t) atts[i+2];
9322
            /* Hash values always have bit 31 set, see dict.c */
9323
7.11k
            nameHashValue = ctxt->attallocs[j] | 0x80000000;
9324
9325
7.11k
            if (nsIndex == NS_INDEX_EMPTY) {
9326
                /*
9327
                 * Prefix with empty namespace means an undeclared
9328
                 * prefix which was already reported above.
9329
                 */
9330
5.18k
                if (aprefix != NULL)
9331
533
                    continue;
9332
4.64k
                nsuri = NULL;
9333
4.64k
                uriHashValue = URI_HASH_EMPTY;
9334
4.64k
            } else if (nsIndex == NS_INDEX_XML) {
9335
1.46k
                nsuri = ctxt->str_xml_ns;
9336
1.46k
                uriHashValue = URI_HASH_XML;
9337
1.46k
            } else {
9338
477
                nsuri = ctxt->nsTab[nsIndex * 2 + 1];
9339
477
                uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue;
9340
477
            }
9341
9342
6.58k
            hashValue = xmlDictCombineHash(nameHashValue, uriHashValue);
9343
6.58k
            res = xmlAttrHashInsert(ctxt, attrHashSize, attname, nsuri,
9344
6.58k
                                    hashValue, i);
9345
6.58k
            if (res < 0)
9346
0
                continue;
9347
9348
            /*
9349
             * [ WFC: Unique Att Spec ]
9350
             * No attribute name may appear more than once in the same
9351
             * start-tag or empty-element tag.
9352
             * As extended by the Namespace in XML REC.
9353
             */
9354
6.58k
            if (res < INT_MAX) {
9355
3.80k
                if (aprefix == atts[res+1]) {
9356
3.73k
                    xmlErrAttributeDup(ctxt, aprefix, attname);
9357
3.73k
                } else {
9358
67
                    xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9359
67
                             "Namespaced Attribute %s in '%s' redefined\n",
9360
67
                             attname, nsuri, NULL);
9361
67
                }
9362
3.80k
            }
9363
6.58k
        }
9364
3.88k
    }
9365
9366
    /*
9367
     * Default attributes
9368
     */
9369
82.6k
    if (ctxt->attsDefault != NULL) {
9370
41.8k
        xmlDefAttrsPtr defaults;
9371
9372
41.8k
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9373
41.8k
  if (defaults != NULL) {
9374
100k
      for (i = 0; i < defaults->nbAttrs; i++) {
9375
62.0k
                xmlDefAttr *attr = &defaults->attrs[i];
9376
62.0k
                const xmlChar *nsuri;
9377
62.0k
                unsigned hashValue, uriHashValue;
9378
62.0k
                int res;
9379
9380
62.0k
          attname = attr->name.name;
9381
62.0k
    aprefix = attr->prefix.name;
9382
9383
62.0k
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL))
9384
22.7k
                    continue;
9385
39.2k
    if (aprefix == ctxt->str_xmlns)
9386
28.8k
                    continue;
9387
9388
10.4k
                if (aprefix == NULL) {
9389
2.26k
                    nsIndex = NS_INDEX_EMPTY;
9390
2.26k
                    nsuri = NULL;
9391
2.26k
                    uriHashValue = URI_HASH_EMPTY;
9392
10.4k
                } if (aprefix == ctxt->str_xml) {
9393
370
                    nsIndex = NS_INDEX_XML;
9394
370
                    nsuri = ctxt->str_xml_ns;
9395
370
                    uriHashValue = URI_HASH_XML;
9396
10.0k
                } else if (aprefix != NULL) {
9397
7.76k
                    nsIndex = xmlParserNsLookup(ctxt, &attr->prefix, NULL);
9398
7.76k
                    if ((nsIndex == INT_MAX) ||
9399
7.76k
                        (nsIndex < ctxt->nsdb->minNsIndex)) {
9400
6.14k
                        xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9401
6.14k
                                 "Namespace prefix %s for %s on %s is not "
9402
6.14k
                                 "defined\n",
9403
6.14k
                                 aprefix, attname, localname);
9404
6.14k
                        nsIndex = NS_INDEX_EMPTY;
9405
6.14k
                        nsuri = NULL;
9406
6.14k
                        uriHashValue = URI_HASH_EMPTY;
9407
6.14k
                    } else {
9408
1.62k
                        nsuri = ctxt->nsTab[nsIndex * 2 + 1];
9409
1.62k
                        uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue;
9410
1.62k
                    }
9411
7.76k
                }
9412
9413
                /*
9414
                 * Check whether the attribute exists
9415
                 */
9416
10.4k
                if (maxAtts > 1) {
9417
6.76k
                    hashValue = xmlDictCombineHash(attr->name.hashValue,
9418
6.76k
                                                   uriHashValue);
9419
6.76k
                    res = xmlAttrHashInsert(ctxt, attrHashSize, attname, nsuri,
9420
6.76k
                                            hashValue, nbatts);
9421
6.76k
                    if (res < 0)
9422
0
                        continue;
9423
6.76k
                    if (res < INT_MAX) {
9424
993
                        if (aprefix == atts[res+1])
9425
305
                            continue;
9426
688
                        xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9427
688
                                 "Namespaced Attribute %s in '%s' redefined\n",
9428
688
                                 attname, nsuri, NULL);
9429
688
                    }
9430
6.76k
                }
9431
9432
10.0k
                xmlParserEntityCheck(ctxt, attr->expandedSize);
9433
9434
10.0k
                if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9435
253
                    if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9436
0
                        localname = NULL;
9437
0
                        goto done;
9438
0
                    }
9439
253
                    maxatts = ctxt->maxatts;
9440
253
                    atts = ctxt->atts;
9441
253
                }
9442
9443
10.0k
                atts[nbatts++] = attname;
9444
10.0k
                atts[nbatts++] = aprefix;
9445
10.0k
                atts[nbatts++] = (const xmlChar *) (ptrdiff_t) nsIndex;
9446
10.0k
                atts[nbatts++] = attr->value.name;
9447
10.0k
                atts[nbatts++] = attr->valueEnd;
9448
10.0k
                if ((ctxt->standalone == 1) && (attr->external != 0)) {
9449
0
                    xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9450
0
                            "standalone: attribute %s on %s defaulted "
9451
0
                            "from external subset\n",
9452
0
                            attname, localname);
9453
0
                }
9454
10.0k
                nbdef++;
9455
10.0k
      }
9456
37.9k
  }
9457
41.8k
    }
9458
9459
    /*
9460
     * Reconstruct attribute pointers
9461
     */
9462
104k
    for (i = 0, j = 0; i < nbatts; i += 5, j++) {
9463
        /* namespace URI */
9464
21.4k
        nsIndex = (ptrdiff_t) atts[i+2];
9465
21.4k
        if (nsIndex == INT_MAX)
9466
15.1k
            atts[i+2] = NULL;
9467
6.27k
        else if (nsIndex == INT_MAX - 1)
9468
4.08k
            atts[i+2] = ctxt->str_xml_ns;
9469
2.19k
        else
9470
2.19k
            atts[i+2] = ctxt->nsTab[nsIndex * 2 + 1];
9471
9472
21.4k
        if ((j < nratts) && (ctxt->attallocs[j] & 0x80000000) == 0) {
9473
9.56k
            atts[i+3] = BASE_PTR + (ptrdiff_t) atts[i+3];  /* value */
9474
9.56k
            atts[i+4] = BASE_PTR + (ptrdiff_t) atts[i+4];  /* valuend */
9475
9.56k
        }
9476
21.4k
    }
9477
9478
82.6k
    uri = xmlParserNsLookupUri(ctxt, &hprefix);
9479
82.6k
    if ((prefix != NULL) && (uri == NULL)) {
9480
3.40k
  xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9481
3.40k
           "Namespace prefix %s on %s is not defined\n",
9482
3.40k
     prefix, localname, NULL);
9483
3.40k
    }
9484
82.6k
    *pref = prefix;
9485
82.6k
    *URI = uri;
9486
9487
    /*
9488
     * SAX callback
9489
     */
9490
82.6k
    if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9491
82.6k
  (!ctxt->disableSAX)) {
9492
50.2k
  if (nbNs > 0)
9493
26.1k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix, uri,
9494
26.1k
                          nbNs, ctxt->nsTab + 2 * (ctxt->nsNr - nbNs),
9495
26.1k
        nbatts / 5, nbdef, atts);
9496
24.0k
  else
9497
24.0k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix, uri,
9498
24.0k
                          0, NULL, nbatts / 5, nbdef, atts);
9499
50.2k
    }
9500
9501
82.6k
done:
9502
    /*
9503
     * Free allocated attribute values
9504
     */
9505
82.6k
    if (attval != 0) {
9506
3.86k
  for (i = 0, j = 0; j < nratts; i += 5, j++)
9507
2.24k
      if (ctxt->attallocs[j] & 0x80000000)
9508
1.78k
          xmlFree((xmlChar *) atts[i+3]);
9509
1.62k
    }
9510
9511
82.6k
    *nbNsPtr = nbNs;
9512
82.6k
    return(localname);
9513
82.6k
}
9514
9515
/**
9516
 * xmlParseEndTag2:
9517
 * @ctxt:  an XML parser context
9518
 * @line:  line of the start tag
9519
 * @nsNr:  number of namespaces on the start tag
9520
 *
9521
 * Parse an end tag. Always consumes '</'.
9522
 *
9523
 * [42] ETag ::= '</' Name S? '>'
9524
 *
9525
 * With namespace
9526
 *
9527
 * [NS 9] ETag ::= '</' QName S? '>'
9528
 */
9529
9530
static void
9531
8.70k
xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9532
8.70k
    const xmlChar *name;
9533
9534
8.70k
    GROW;
9535
8.70k
    if ((RAW != '<') || (NXT(1) != '/')) {
9536
26
  xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9537
26
  return;
9538
26
    }
9539
8.67k
    SKIP(2);
9540
9541
8.67k
    if (tag->prefix == NULL)
9542
7.35k
        name = xmlParseNameAndCompare(ctxt, ctxt->name);
9543
1.31k
    else
9544
1.31k
        name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9545
9546
    /*
9547
     * We should definitely be at the ending "S? '>'" part
9548
     */
9549
8.67k
    GROW;
9550
8.67k
    SKIP_BLANKS;
9551
8.67k
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9552
3.18k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9553
3.18k
    } else
9554
5.49k
  NEXT1;
9555
9556
    /*
9557
     * [ WFC: Element Type Match ]
9558
     * The Name in an element's end-tag must match the element type in the
9559
     * start-tag.
9560
     *
9561
     */
9562
8.67k
    if (name != (xmlChar*)1) {
9563
6.08k
        if (name == NULL) name = BAD_CAST "unparsable";
9564
6.08k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9565
6.08k
         "Opening and ending tag mismatch: %s line %d and %s\n",
9566
6.08k
                    ctxt->name, tag->line, name);
9567
6.08k
    }
9568
9569
    /*
9570
     * SAX: End of Tag
9571
     */
9572
8.67k
    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9573
8.67k
  (!ctxt->disableSAX))
9574
2.43k
  ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9575
2.43k
                                tag->URI);
9576
9577
8.67k
    spacePop(ctxt);
9578
8.67k
    if (tag->nsNr != 0)
9579
1.25k
  xmlParserNsPop(ctxt, tag->nsNr);
9580
8.67k
}
9581
9582
/**
9583
 * xmlParseCDSect:
9584
 * @ctxt:  an XML parser context
9585
 *
9586
 * DEPRECATED: Internal function, don't use.
9587
 *
9588
 * Parse escaped pure raw content. Always consumes '<!['.
9589
 *
9590
 * [18] CDSect ::= CDStart CData CDEnd
9591
 *
9592
 * [19] CDStart ::= '<![CDATA['
9593
 *
9594
 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9595
 *
9596
 * [21] CDEnd ::= ']]>'
9597
 */
9598
void
9599
2.22k
xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9600
2.22k
    xmlChar *buf = NULL;
9601
2.22k
    int len = 0;
9602
2.22k
    int size = XML_PARSER_BUFFER_SIZE;
9603
2.22k
    int r, rl;
9604
2.22k
    int s, sl;
9605
2.22k
    int cur, l;
9606
2.22k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9607
0
                    XML_MAX_HUGE_LENGTH :
9608
2.22k
                    XML_MAX_TEXT_LENGTH;
9609
9610
2.22k
    if ((CUR != '<') || (NXT(1) != '!') || (NXT(2) != '['))
9611
0
        return;
9612
2.22k
    SKIP(3);
9613
9614
2.22k
    if (!CMP6(CUR_PTR, 'C', 'D', 'A', 'T', 'A', '['))
9615
0
        return;
9616
2.22k
    SKIP(6);
9617
9618
2.22k
    r = CUR_CHAR(rl);
9619
2.22k
    if (!IS_CHAR(r)) {
9620
158
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9621
158
        goto out;
9622
158
    }
9623
2.06k
    NEXTL(rl);
9624
2.06k
    s = CUR_CHAR(sl);
9625
2.06k
    if (!IS_CHAR(s)) {
9626
246
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9627
246
        goto out;
9628
246
    }
9629
1.82k
    NEXTL(sl);
9630
1.82k
    cur = CUR_CHAR(l);
9631
1.82k
    buf = (xmlChar *) xmlMallocAtomic(size);
9632
1.82k
    if (buf == NULL) {
9633
0
  xmlErrMemory(ctxt);
9634
0
        goto out;
9635
0
    }
9636
17.0k
    while (IS_CHAR(cur) &&
9637
17.0k
           ((r != ']') || (s != ']') || (cur != '>'))) {
9638
15.2k
  if (len + 5 >= size) {
9639
80
      xmlChar *tmp;
9640
9641
80
      tmp = (xmlChar *) xmlRealloc(buf, size * 2);
9642
80
      if (tmp == NULL) {
9643
0
    xmlErrMemory(ctxt);
9644
0
                goto out;
9645
0
      }
9646
80
      buf = tmp;
9647
80
      size *= 2;
9648
80
  }
9649
15.2k
  COPY_BUF(buf, len, r);
9650
15.2k
        if (len > maxLength) {
9651
0
            xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9652
0
                           "CData section too big found\n");
9653
0
            goto out;
9654
0
        }
9655
15.2k
  r = s;
9656
15.2k
  rl = sl;
9657
15.2k
  s = cur;
9658
15.2k
  sl = l;
9659
15.2k
  NEXTL(l);
9660
15.2k
  cur = CUR_CHAR(l);
9661
15.2k
    }
9662
1.82k
    buf[len] = 0;
9663
1.82k
    if (cur != '>') {
9664
491
  xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9665
491
                       "CData section not finished\n%.50s\n", buf);
9666
491
        goto out;
9667
491
    }
9668
1.33k
    NEXTL(l);
9669
9670
    /*
9671
     * OK the buffer is to be consumed as cdata.
9672
     */
9673
1.33k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9674
938
  if (ctxt->sax->cdataBlock != NULL)
9675
653
      ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9676
285
  else if (ctxt->sax->characters != NULL)
9677
285
      ctxt->sax->characters(ctxt->userData, buf, len);
9678
938
    }
9679
9680
2.22k
out:
9681
2.22k
    xmlFree(buf);
9682
2.22k
}
9683
9684
/**
9685
 * xmlParseContentInternal:
9686
 * @ctxt:  an XML parser context
9687
 *
9688
 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9689
 * unexpected EOF to the caller.
9690
 */
9691
9692
static void
9693
8.61k
xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9694
8.61k
    int oldNameNr = ctxt->nameNr;
9695
8.61k
    int oldSpaceNr = ctxt->spaceNr;
9696
8.61k
    int oldNodeNr = ctxt->nodeNr;
9697
9698
8.61k
    GROW;
9699
227k
    while ((ctxt->input->cur < ctxt->input->end) &&
9700
227k
     (PARSER_STOPPED(ctxt) == 0)) {
9701
222k
  const xmlChar *cur = ctxt->input->cur;
9702
9703
  /*
9704
   * First case : a Processing Instruction.
9705
   */
9706
222k
  if ((*cur == '<') && (cur[1] == '?')) {
9707
973
      xmlParsePI(ctxt);
9708
973
  }
9709
9710
  /*
9711
   * Second case : a CDSection
9712
   */
9713
  /* 2.6.0 test was *cur not RAW */
9714
221k
  else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9715
2.22k
      xmlParseCDSect(ctxt);
9716
2.22k
  }
9717
9718
  /*
9719
   * Third case :  a comment
9720
   */
9721
219k
  else if ((*cur == '<') && (NXT(1) == '!') &&
9722
219k
     (NXT(2) == '-') && (NXT(3) == '-')) {
9723
974
      xmlParseComment(ctxt);
9724
974
  }
9725
9726
  /*
9727
   * Fourth case :  a sub-element.
9728
   */
9729
218k
  else if (*cur == '<') {
9730
154k
            if (NXT(1) == '/') {
9731
13.5k
                if (ctxt->nameNr <= oldNameNr)
9732
3.58k
                    break;
9733
9.91k
          xmlParseElementEnd(ctxt);
9734
141k
            } else {
9735
141k
          xmlParseElementStart(ctxt);
9736
141k
            }
9737
154k
  }
9738
9739
  /*
9740
   * Fifth case : a reference. If if has not been resolved,
9741
   *    parsing returns it's Name, create the node
9742
   */
9743
9744
63.3k
  else if (*cur == '&') {
9745
15.6k
      xmlParseReference(ctxt);
9746
15.6k
  }
9747
9748
  /*
9749
   * Last case, text. Note that References are handled directly.
9750
   */
9751
47.7k
  else {
9752
47.7k
      xmlParseCharDataInternal(ctxt, 0);
9753
47.7k
  }
9754
9755
218k
  SHRINK;
9756
218k
  GROW;
9757
218k
    }
9758
9759
8.61k
    if ((ctxt->nameNr > oldNameNr) &&
9760
8.61k
        (ctxt->input->cur >= ctxt->input->end) &&
9761
8.61k
        (ctxt->wellFormed)) {
9762
25
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9763
25
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9764
25
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9765
25
                "Premature end of data in tag %s line %d\n",
9766
25
                name, line, NULL);
9767
25
    }
9768
9769
    /*
9770
     * Clean up in error case
9771
     */
9772
9773
20.1k
    while (ctxt->nodeNr > oldNodeNr)
9774
11.5k
        nodePop(ctxt);
9775
9776
25.0k
    while (ctxt->nameNr > oldNameNr) {
9777
16.4k
        xmlStartTag *tag = &ctxt->pushTab[ctxt->nameNr - 1];
9778
9779
16.4k
        if (tag->nsNr != 0)
9780
4.62k
            xmlParserNsPop(ctxt, tag->nsNr);
9781
9782
16.4k
        namePop(ctxt);
9783
16.4k
    }
9784
9785
25.0k
    while (ctxt->spaceNr > oldSpaceNr)
9786
16.4k
        spacePop(ctxt);
9787
8.61k
}
9788
9789
/**
9790
 * xmlParseContent:
9791
 * @ctxt:  an XML parser context
9792
 *
9793
 * Parse XML element content. This is useful if you're only interested
9794
 * in custom SAX callbacks. If you want a node list, use
9795
 * xmlParseInNodeContext.
9796
 */
9797
void
9798
0
xmlParseContent(xmlParserCtxtPtr ctxt) {
9799
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
9800
0
        return;
9801
9802
0
    xmlCtxtInitializeLate(ctxt);
9803
9804
0
    xmlParseContentInternal(ctxt);
9805
9806
0
    if (ctxt->input->cur < ctxt->input->end)
9807
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
9808
0
}
9809
9810
/**
9811
 * xmlParseElement:
9812
 * @ctxt:  an XML parser context
9813
 *
9814
 * DEPRECATED: Internal function, don't use.
9815
 *
9816
 * parse an XML element
9817
 *
9818
 * [39] element ::= EmptyElemTag | STag content ETag
9819
 *
9820
 * [ WFC: Element Type Match ]
9821
 * The Name in an element's end-tag must match the element type in the
9822
 * start-tag.
9823
 *
9824
 */
9825
9826
void
9827
13.8k
xmlParseElement(xmlParserCtxtPtr ctxt) {
9828
13.8k
    if (xmlParseElementStart(ctxt) != 0)
9829
5.79k
        return;
9830
9831
8.02k
    xmlParseContentInternal(ctxt);
9832
9833
8.02k
    if (ctxt->input->cur >= ctxt->input->end) {
9834
4.41k
        if (ctxt->wellFormed) {
9835
18
            const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9836
18
            int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9837
18
            xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9838
18
                    "Premature end of data in tag %s line %d\n",
9839
18
                    name, line, NULL);
9840
18
        }
9841
4.41k
        return;
9842
4.41k
    }
9843
9844
3.60k
    xmlParseElementEnd(ctxt);
9845
3.60k
}
9846
9847
/**
9848
 * xmlParseElementStart:
9849
 * @ctxt:  an XML parser context
9850
 *
9851
 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
9852
 * opening tag was parsed, 1 if an empty element was parsed.
9853
 *
9854
 * Always consumes '<'.
9855
 */
9856
static int
9857
155k
xmlParseElementStart(xmlParserCtxtPtr ctxt) {
9858
155k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
9859
155k
    const xmlChar *name;
9860
155k
    const xmlChar *prefix = NULL;
9861
155k
    const xmlChar *URI = NULL;
9862
155k
    xmlParserNodeInfo node_info;
9863
155k
    int line;
9864
155k
    xmlNodePtr cur;
9865
155k
    int nbNs = 0;
9866
9867
155k
    if (ctxt->nameNr > maxDepth) {
9868
2
        xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
9869
2
                "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9870
2
                ctxt->nameNr);
9871
2
  xmlHaltParser(ctxt);
9872
2
  return(-1);
9873
2
    }
9874
9875
    /* Capture start position */
9876
155k
    if (ctxt->record_info) {
9877
0
        node_info.begin_pos = ctxt->input->consumed +
9878
0
                          (CUR_PTR - ctxt->input->base);
9879
0
  node_info.begin_line = ctxt->input->line;
9880
0
    }
9881
9882
155k
    if (ctxt->spaceNr == 0)
9883
0
  spacePush(ctxt, -1);
9884
155k
    else if (*ctxt->space == -2)
9885
6.59k
  spacePush(ctxt, -1);
9886
148k
    else
9887
148k
  spacePush(ctxt, *ctxt->space);
9888
9889
155k
    line = ctxt->input->line;
9890
155k
#ifdef LIBXML_SAX1_ENABLED
9891
155k
    if (ctxt->sax2)
9892
96.6k
#endif /* LIBXML_SAX1_ENABLED */
9893
96.6k
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &nbNs);
9894
58.6k
#ifdef LIBXML_SAX1_ENABLED
9895
58.6k
    else
9896
58.6k
  name = xmlParseStartTag(ctxt);
9897
155k
#endif /* LIBXML_SAX1_ENABLED */
9898
155k
    if (name == NULL) {
9899
20.3k
  spacePop(ctxt);
9900
20.3k
        return(-1);
9901
20.3k
    }
9902
134k
    nameNsPush(ctxt, name, prefix, URI, line, nbNs);
9903
134k
    cur = ctxt->node;
9904
9905
134k
#ifdef LIBXML_VALID_ENABLED
9906
    /*
9907
     * [ VC: Root Element Type ]
9908
     * The Name in the document type declaration must match the element
9909
     * type of the root element.
9910
     */
9911
134k
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9912
134k
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
9913
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9914
134k
#endif /* LIBXML_VALID_ENABLED */
9915
9916
    /*
9917
     * Check for an Empty Element.
9918
     */
9919
134k
    if ((RAW == '/') && (NXT(1) == '>')) {
9920
2.58k
        SKIP(2);
9921
2.58k
  if (ctxt->sax2) {
9922
1.83k
      if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9923
1.83k
    (!ctxt->disableSAX))
9924
535
    ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
9925
1.83k
#ifdef LIBXML_SAX1_ENABLED
9926
1.83k
  } else {
9927
748
      if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9928
748
    (!ctxt->disableSAX))
9929
514
    ctxt->sax->endElement(ctxt->userData, name);
9930
748
#endif /* LIBXML_SAX1_ENABLED */
9931
748
  }
9932
2.58k
  namePop(ctxt);
9933
2.58k
  spacePop(ctxt);
9934
2.58k
  if (nbNs > 0)
9935
766
      xmlParserNsPop(ctxt, nbNs);
9936
2.58k
  if (cur != NULL && ctxt->record_info) {
9937
0
            node_info.node = cur;
9938
0
            node_info.end_pos = ctxt->input->consumed +
9939
0
                                (CUR_PTR - ctxt->input->base);
9940
0
            node_info.end_line = ctxt->input->line;
9941
0
            xmlParserAddNodeInfo(ctxt, &node_info);
9942
0
  }
9943
2.58k
  return(1);
9944
2.58k
    }
9945
132k
    if (RAW == '>') {
9946
34.3k
        NEXT1;
9947
34.3k
        if (cur != NULL && ctxt->record_info) {
9948
0
            node_info.node = cur;
9949
0
            node_info.end_pos = 0;
9950
0
            node_info.end_line = 0;
9951
0
            xmlParserAddNodeInfo(ctxt, &node_info);
9952
0
        }
9953
97.9k
    } else {
9954
97.9k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9955
97.9k
         "Couldn't find end of Start Tag %s line %d\n",
9956
97.9k
                    name, line, NULL);
9957
9958
  /*
9959
   * end of parsing of this node.
9960
   */
9961
97.9k
  nodePop(ctxt);
9962
97.9k
  namePop(ctxt);
9963
97.9k
  spacePop(ctxt);
9964
97.9k
  if (nbNs > 0)
9965
29.8k
      xmlParserNsPop(ctxt, nbNs);
9966
97.9k
  return(-1);
9967
97.9k
    }
9968
9969
34.3k
    return(0);
9970
132k
}
9971
9972
/**
9973
 * xmlParseElementEnd:
9974
 * @ctxt:  an XML parser context
9975
 *
9976
 * Parse the end of an XML element. Always consumes '</'.
9977
 */
9978
static void
9979
13.5k
xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
9980
13.5k
    xmlNodePtr cur = ctxt->node;
9981
9982
13.5k
    if (ctxt->nameNr <= 0) {
9983
0
        if ((RAW == '<') && (NXT(1) == '/'))
9984
0
            SKIP(2);
9985
0
        return;
9986
0
    }
9987
9988
    /*
9989
     * parse the end of tag: '</' should be here.
9990
     */
9991
13.5k
    if (ctxt->sax2) {
9992
8.70k
  xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
9993
8.70k
  namePop(ctxt);
9994
8.70k
    }
9995
4.82k
#ifdef LIBXML_SAX1_ENABLED
9996
4.82k
    else
9997
4.82k
  xmlParseEndTag1(ctxt, 0);
9998
13.5k
#endif /* LIBXML_SAX1_ENABLED */
9999
10000
    /*
10001
     * Capture end position
10002
     */
10003
13.5k
    if (cur != NULL && ctxt->record_info) {
10004
0
        xmlParserNodeInfoPtr node_info;
10005
10006
0
        node_info = (xmlParserNodeInfoPtr) xmlParserFindNodeInfo(ctxt, cur);
10007
0
        if (node_info != NULL) {
10008
0
            node_info->end_pos = ctxt->input->consumed +
10009
0
                                 (CUR_PTR - ctxt->input->base);
10010
0
            node_info->end_line = ctxt->input->line;
10011
0
        }
10012
0
    }
10013
13.5k
}
10014
10015
/**
10016
 * xmlParseVersionNum:
10017
 * @ctxt:  an XML parser context
10018
 *
10019
 * DEPRECATED: Internal function, don't use.
10020
 *
10021
 * parse the XML version value.
10022
 *
10023
 * [26] VersionNum ::= '1.' [0-9]+
10024
 *
10025
 * In practice allow [0-9].[0-9]+ at that level
10026
 *
10027
 * Returns the string giving the XML version number, or NULL
10028
 */
10029
xmlChar *
10030
5.85k
xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10031
5.85k
    xmlChar *buf = NULL;
10032
5.85k
    int len = 0;
10033
5.85k
    int size = 10;
10034
5.85k
    xmlChar cur;
10035
10036
5.85k
    buf = (xmlChar *) xmlMallocAtomic(size);
10037
5.85k
    if (buf == NULL) {
10038
0
  xmlErrMemory(ctxt);
10039
0
  return(NULL);
10040
0
    }
10041
5.85k
    cur = CUR;
10042
5.85k
    if (!((cur >= '0') && (cur <= '9'))) {
10043
5.74k
  xmlFree(buf);
10044
5.74k
  return(NULL);
10045
5.74k
    }
10046
114
    buf[len++] = cur;
10047
114
    NEXT;
10048
114
    cur=CUR;
10049
114
    if (cur != '.') {
10050
56
  xmlFree(buf);
10051
56
  return(NULL);
10052
56
    }
10053
58
    buf[len++] = cur;
10054
58
    NEXT;
10055
58
    cur=CUR;
10056
4.01k
    while ((cur >= '0') && (cur <= '9')) {
10057
3.95k
  if (len + 1 >= size) {
10058
95
      xmlChar *tmp;
10059
10060
95
      size *= 2;
10061
95
      tmp = (xmlChar *) xmlRealloc(buf, size);
10062
95
      if (tmp == NULL) {
10063
0
          xmlFree(buf);
10064
0
    xmlErrMemory(ctxt);
10065
0
    return(NULL);
10066
0
      }
10067
95
      buf = tmp;
10068
95
  }
10069
3.95k
  buf[len++] = cur;
10070
3.95k
  NEXT;
10071
3.95k
  cur=CUR;
10072
3.95k
    }
10073
58
    buf[len] = 0;
10074
58
    return(buf);
10075
58
}
10076
10077
/**
10078
 * xmlParseVersionInfo:
10079
 * @ctxt:  an XML parser context
10080
 *
10081
 * DEPRECATED: Internal function, don't use.
10082
 *
10083
 * parse the XML version.
10084
 *
10085
 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10086
 *
10087
 * [25] Eq ::= S? '=' S?
10088
 *
10089
 * Returns the version string, e.g. "1.0"
10090
 */
10091
10092
xmlChar *
10093
10.0k
xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10094
10.0k
    xmlChar *version = NULL;
10095
10096
10.0k
    if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10097
5.86k
  SKIP(7);
10098
5.86k
  SKIP_BLANKS;
10099
5.86k
  if (RAW != '=') {
10100
2
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10101
2
      return(NULL);
10102
2
        }
10103
5.85k
  NEXT;
10104
5.85k
  SKIP_BLANKS;
10105
5.85k
  if (RAW == '"') {
10106
5.85k
      NEXT;
10107
5.85k
      version = xmlParseVersionNum(ctxt);
10108
5.85k
      if (RAW != '"') {
10109
5.74k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10110
5.74k
      } else
10111
114
          NEXT;
10112
5.85k
  } else if (RAW == '\''){
10113
2
      NEXT;
10114
2
      version = xmlParseVersionNum(ctxt);
10115
2
      if (RAW != '\'') {
10116
1
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10117
1
      } else
10118
1
          NEXT;
10119
2
  } else {
10120
0
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10121
0
  }
10122
5.85k
    }
10123
10.0k
    return(version);
10124
10.0k
}
10125
10126
/**
10127
 * xmlParseEncName:
10128
 * @ctxt:  an XML parser context
10129
 *
10130
 * DEPRECATED: Internal function, don't use.
10131
 *
10132
 * parse the XML encoding name
10133
 *
10134
 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10135
 *
10136
 * Returns the encoding name value or NULL
10137
 */
10138
xmlChar *
10139
4.23k
xmlParseEncName(xmlParserCtxtPtr ctxt) {
10140
4.23k
    xmlChar *buf = NULL;
10141
4.23k
    int len = 0;
10142
4.23k
    int size = 10;
10143
4.23k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
10144
0
                    XML_MAX_TEXT_LENGTH :
10145
4.23k
                    XML_MAX_NAME_LENGTH;
10146
4.23k
    xmlChar cur;
10147
10148
4.23k
    cur = CUR;
10149
4.23k
    if (((cur >= 'a') && (cur <= 'z')) ||
10150
4.23k
        ((cur >= 'A') && (cur <= 'Z'))) {
10151
3.10k
  buf = (xmlChar *) xmlMallocAtomic(size);
10152
3.10k
  if (buf == NULL) {
10153
0
      xmlErrMemory(ctxt);
10154
0
      return(NULL);
10155
0
  }
10156
10157
3.10k
  buf[len++] = cur;
10158
3.10k
  NEXT;
10159
3.10k
  cur = CUR;
10160
18.1k
  while (((cur >= 'a') && (cur <= 'z')) ||
10161
18.1k
         ((cur >= 'A') && (cur <= 'Z')) ||
10162
18.1k
         ((cur >= '0') && (cur <= '9')) ||
10163
18.1k
         (cur == '.') || (cur == '_') ||
10164
18.1k
         (cur == '-')) {
10165
15.0k
      if (len + 1 >= size) {
10166
385
          xmlChar *tmp;
10167
10168
385
    size *= 2;
10169
385
    tmp = (xmlChar *) xmlRealloc(buf, size);
10170
385
    if (tmp == NULL) {
10171
0
        xmlErrMemory(ctxt);
10172
0
        xmlFree(buf);
10173
0
        return(NULL);
10174
0
    }
10175
385
    buf = tmp;
10176
385
      }
10177
15.0k
      buf[len++] = cur;
10178
15.0k
            if (len > maxLength) {
10179
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "EncName");
10180
0
                xmlFree(buf);
10181
0
                return(NULL);
10182
0
            }
10183
15.0k
      NEXT;
10184
15.0k
      cur = CUR;
10185
15.0k
        }
10186
3.10k
  buf[len] = 0;
10187
3.10k
    } else {
10188
1.13k
  xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10189
1.13k
    }
10190
4.23k
    return(buf);
10191
4.23k
}
10192
10193
/**
10194
 * xmlParseEncodingDecl:
10195
 * @ctxt:  an XML parser context
10196
 *
10197
 * DEPRECATED: Internal function, don't use.
10198
 *
10199
 * parse the XML encoding declaration
10200
 *
10201
 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'")
10202
 *
10203
 * this setups the conversion filters.
10204
 *
10205
 * Returns the encoding value or NULL
10206
 */
10207
10208
const xmlChar *
10209
9.95k
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10210
9.95k
    xmlChar *encoding = NULL;
10211
10212
9.95k
    SKIP_BLANKS;
10213
9.95k
    if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g') == 0)
10214
5.67k
        return(NULL);
10215
10216
4.28k
    SKIP(8);
10217
4.28k
    SKIP_BLANKS;
10218
4.28k
    if (RAW != '=') {
10219
31
        xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10220
31
        return(NULL);
10221
31
    }
10222
4.25k
    NEXT;
10223
4.25k
    SKIP_BLANKS;
10224
4.25k
    if (RAW == '"') {
10225
9
        NEXT;
10226
9
        encoding = xmlParseEncName(ctxt);
10227
9
        if (RAW != '"') {
10228
8
            xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10229
8
            xmlFree((xmlChar *) encoding);
10230
8
            return(NULL);
10231
8
        } else
10232
1
            NEXT;
10233
4.24k
    } else if (RAW == '\''){
10234
4.22k
        NEXT;
10235
4.22k
        encoding = xmlParseEncName(ctxt);
10236
4.22k
        if (RAW != '\'') {
10237
1.22k
            xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10238
1.22k
            xmlFree((xmlChar *) encoding);
10239
1.22k
            return(NULL);
10240
1.22k
        } else
10241
3.00k
            NEXT;
10242
4.22k
    } else {
10243
18
        xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10244
18
    }
10245
10246
3.01k
    if (encoding == NULL)
10247
35
        return(NULL);
10248
10249
2.98k
    xmlSetDeclaredEncoding(ctxt, encoding);
10250
10251
2.98k
    return(ctxt->encoding);
10252
3.01k
}
10253
10254
/**
10255
 * xmlParseSDDecl:
10256
 * @ctxt:  an XML parser context
10257
 *
10258
 * DEPRECATED: Internal function, don't use.
10259
 *
10260
 * parse the XML standalone declaration
10261
 *
10262
 * [32] SDDecl ::= S 'standalone' Eq
10263
 *                 (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10264
 *
10265
 * [ VC: Standalone Document Declaration ]
10266
 * TODO The standalone document declaration must have the value "no"
10267
 * if any external markup declarations contain declarations of:
10268
 *  - attributes with default values, if elements to which these
10269
 *    attributes apply appear in the document without specifications
10270
 *    of values for these attributes, or
10271
 *  - entities (other than amp, lt, gt, apos, quot), if references
10272
 *    to those entities appear in the document, or
10273
 *  - attributes with values subject to normalization, where the
10274
 *    attribute appears in the document with a value which will change
10275
 *    as a result of normalization, or
10276
 *  - element types with element content, if white space occurs directly
10277
 *    within any instance of those types.
10278
 *
10279
 * Returns:
10280
 *   1 if standalone="yes"
10281
 *   0 if standalone="no"
10282
 *  -2 if standalone attribute is missing or invalid
10283
 *    (A standalone value of -2 means that the XML declaration was found,
10284
 *     but no value was specified for the standalone attribute).
10285
 */
10286
10287
int
10288
7.90k
xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10289
7.90k
    int standalone = -2;
10290
10291
7.90k
    SKIP_BLANKS;
10292
7.90k
    if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10293
120
  SKIP(10);
10294
120
        SKIP_BLANKS;
10295
120
  if (RAW != '=') {
10296
3
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10297
3
      return(standalone);
10298
3
        }
10299
117
  NEXT;
10300
117
  SKIP_BLANKS;
10301
117
        if (RAW == '\''){
10302
59
      NEXT;
10303
59
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10304
1
          standalone = 0;
10305
1
                SKIP(2);
10306
58
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10307
58
                 (NXT(2) == 's')) {
10308
40
          standalone = 1;
10309
40
    SKIP(3);
10310
40
            } else {
10311
18
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10312
18
      }
10313
59
      if (RAW != '\'') {
10314
58
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10315
58
      } else
10316
1
          NEXT;
10317
59
  } else if (RAW == '"'){
10318
56
      NEXT;
10319
56
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10320
9
          standalone = 0;
10321
9
    SKIP(2);
10322
47
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10323
47
                 (NXT(2) == 's')) {
10324
22
          standalone = 1;
10325
22
                SKIP(3);
10326
25
            } else {
10327
25
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10328
25
      }
10329
56
      if (RAW != '"') {
10330
32
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10331
32
      } else
10332
24
          NEXT;
10333
56
  } else {
10334
2
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10335
2
        }
10336
117
    }
10337
7.90k
    return(standalone);
10338
7.90k
}
10339
10340
/**
10341
 * xmlParseXMLDecl:
10342
 * @ctxt:  an XML parser context
10343
 *
10344
 * DEPRECATED: Internal function, don't use.
10345
 *
10346
 * parse an XML declaration header
10347
 *
10348
 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10349
 */
10350
10351
void
10352
10.0k
xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10353
10.0k
    xmlChar *version;
10354
10355
    /*
10356
     * This value for standalone indicates that the document has an
10357
     * XML declaration but it does not have a standalone attribute.
10358
     * It will be overwritten later if a standalone attribute is found.
10359
     */
10360
10361
10.0k
    ctxt->standalone = -2;
10362
10363
    /*
10364
     * We know that '<?xml' is here.
10365
     */
10366
10.0k
    SKIP(5);
10367
10368
10.0k
    if (!IS_BLANK_CH(RAW)) {
10369
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10370
0
                 "Blank needed after '<?xml'\n");
10371
0
    }
10372
10.0k
    SKIP_BLANKS;
10373
10374
    /*
10375
     * We must have the VersionInfo here.
10376
     */
10377
10.0k
    version = xmlParseVersionInfo(ctxt);
10378
10.0k
    if (version == NULL) {
10379
10.0k
  xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10380
10.0k
    } else {
10381
58
  if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10382
      /*
10383
       * Changed here for XML-1.0 5th edition
10384
       */
10385
50
      if (ctxt->options & XML_PARSE_OLD10) {
10386
1
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10387
1
                "Unsupported version '%s'\n",
10388
1
                version);
10389
49
      } else {
10390
49
          if ((version[0] == '1') && ((version[1] == '.'))) {
10391
42
        xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10392
42
                      "Unsupported version '%s'\n",
10393
42
          version, NULL);
10394
42
    } else {
10395
7
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10396
7
              "Unsupported version '%s'\n",
10397
7
              version);
10398
7
    }
10399
49
      }
10400
50
  }
10401
58
  if (ctxt->version != NULL)
10402
0
      xmlFree((void *) ctxt->version);
10403
58
  ctxt->version = version;
10404
58
    }
10405
10406
    /*
10407
     * We may have the encoding declaration
10408
     */
10409
10.0k
    if (!IS_BLANK_CH(RAW)) {
10410
9.96k
        if ((RAW == '?') && (NXT(1) == '>')) {
10411
130
      SKIP(2);
10412
130
      return;
10413
130
  }
10414
9.83k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10415
9.83k
    }
10416
9.95k
    xmlParseEncodingDecl(ctxt);
10417
10418
    /*
10419
     * We may have the standalone status.
10420
     */
10421
9.95k
    if ((ctxt->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10422
2.93k
        if ((RAW == '?') && (NXT(1) == '>')) {
10423
2.05k
      SKIP(2);
10424
2.05k
      return;
10425
2.05k
  }
10426
881
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10427
881
    }
10428
10429
    /*
10430
     * We can grow the input buffer freely at that point
10431
     */
10432
7.90k
    GROW;
10433
10434
7.90k
    SKIP_BLANKS;
10435
7.90k
    ctxt->standalone = xmlParseSDDecl(ctxt);
10436
10437
7.90k
    SKIP_BLANKS;
10438
7.90k
    if ((RAW == '?') && (NXT(1) == '>')) {
10439
62
        SKIP(2);
10440
7.84k
    } else if (RAW == '>') {
10441
        /* Deprecated old WD ... */
10442
6.45k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10443
6.45k
  NEXT;
10444
6.45k
    } else {
10445
1.38k
        int c;
10446
10447
1.38k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10448
20.1k
        while ((PARSER_STOPPED(ctxt) == 0) &&
10449
20.1k
               ((c = CUR) != 0)) {
10450
19.9k
            NEXT;
10451
19.9k
            if (c == '>')
10452
1.17k
                break;
10453
19.9k
        }
10454
1.38k
    }
10455
7.90k
}
10456
10457
/**
10458
 * xmlParseMisc:
10459
 * @ctxt:  an XML parser context
10460
 *
10461
 * DEPRECATED: Internal function, don't use.
10462
 *
10463
 * parse an XML Misc* optional field.
10464
 *
10465
 * [27] Misc ::= Comment | PI |  S
10466
 */
10467
10468
void
10469
40.7k
xmlParseMisc(xmlParserCtxtPtr ctxt) {
10470
44.8k
    while (PARSER_STOPPED(ctxt) == 0) {
10471
43.4k
        SKIP_BLANKS;
10472
43.4k
        GROW;
10473
43.4k
        if ((RAW == '<') && (NXT(1) == '?')) {
10474
2.94k
      xmlParsePI(ctxt);
10475
40.5k
        } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10476
1.14k
      xmlParseComment(ctxt);
10477
39.3k
        } else {
10478
39.3k
            break;
10479
39.3k
        }
10480
43.4k
    }
10481
40.7k
}
10482
10483
static void
10484
18.1k
xmlFinishDocument(xmlParserCtxtPtr ctxt) {
10485
18.1k
    xmlDocPtr doc;
10486
10487
    /*
10488
     * SAX: end of the document processing.
10489
     */
10490
18.1k
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10491
18.1k
        ctxt->sax->endDocument(ctxt->userData);
10492
10493
18.1k
    doc = ctxt->myDoc;
10494
18.1k
    if (doc != NULL) {
10495
12.3k
        if (ctxt->wellFormed) {
10496
51
            doc->properties |= XML_DOC_WELLFORMED;
10497
51
            if (ctxt->valid)
10498
47
                doc->properties |= XML_DOC_DTDVALID;
10499
51
            if (ctxt->nsWellFormed)
10500
44
                doc->properties |= XML_DOC_NSVALID;
10501
51
        }
10502
10503
12.3k
        if (ctxt->options & XML_PARSE_OLD10)
10504
1.03k
            doc->properties |= XML_DOC_OLD10;
10505
10506
        /*
10507
         * Remove locally kept entity definitions if the tree was not built
10508
         */
10509
12.3k
  if (xmlStrEqual(doc->version, SAX_COMPAT_MODE)) {
10510
118
            xmlFreeDoc(doc);
10511
118
            ctxt->myDoc = NULL;
10512
118
        }
10513
12.3k
    }
10514
18.1k
}
10515
10516
/**
10517
 * xmlParseDocument:
10518
 * @ctxt:  an XML parser context
10519
 *
10520
 * Parse an XML document and invoke the SAX handlers. This is useful
10521
 * if you're only interested in custom SAX callbacks. If you want a
10522
 * document tree, use xmlCtxtParseDocument.
10523
 *
10524
 * Returns 0, -1 in case of error.
10525
 */
10526
10527
int
10528
18.1k
xmlParseDocument(xmlParserCtxtPtr ctxt) {
10529
18.1k
    if ((ctxt == NULL) || (ctxt->input == NULL))
10530
0
        return(-1);
10531
10532
18.1k
    GROW;
10533
10534
    /*
10535
     * SAX: detecting the level.
10536
     */
10537
18.1k
    xmlCtxtInitializeLate(ctxt);
10538
10539
    /*
10540
     * Document locator is unused. Only for backward compatibility.
10541
     */
10542
18.1k
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10543
18.1k
        xmlSAXLocator copy = xmlDefaultSAXLocator;
10544
18.1k
        ctxt->sax->setDocumentLocator(ctxt->userData, &copy);
10545
18.1k
    }
10546
10547
18.1k
    xmlDetectEncoding(ctxt);
10548
10549
18.1k
    if (CUR == 0) {
10550
0
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10551
0
  return(-1);
10552
0
    }
10553
10554
18.1k
    GROW;
10555
18.1k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10556
10557
  /*
10558
   * Note that we will switch encoding on the fly.
10559
   */
10560
10.0k
  xmlParseXMLDecl(ctxt);
10561
10.0k
  SKIP_BLANKS;
10562
10.0k
    } else {
10563
8.01k
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10564
8.01k
        if (ctxt->version == NULL) {
10565
0
            xmlErrMemory(ctxt);
10566
0
            return(-1);
10567
0
        }
10568
8.01k
    }
10569
18.1k
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10570
12.2k
        ctxt->sax->startDocument(ctxt->userData);
10571
18.1k
    if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10572
18.1k
        (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10573
0
  ctxt->myDoc->compression = ctxt->input->buf->compressed;
10574
0
    }
10575
10576
    /*
10577
     * The Misc part of the Prolog
10578
     */
10579
18.1k
    xmlParseMisc(ctxt);
10580
10581
    /*
10582
     * Then possibly doc type declaration(s) and more Misc
10583
     * (doctypedecl Misc*)?
10584
     */
10585
18.1k
    GROW;
10586
18.1k
    if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10587
10588
8.79k
  ctxt->inSubset = 1;
10589
8.79k
  xmlParseDocTypeDecl(ctxt);
10590
8.79k
  if (RAW == '[') {
10591
7.15k
      xmlParseInternalSubset(ctxt);
10592
7.15k
  }
10593
10594
  /*
10595
   * Create and update the external subset.
10596
   */
10597
8.79k
  ctxt->inSubset = 2;
10598
8.79k
  if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10599
8.79k
      (!ctxt->disableSAX))
10600
5.21k
      ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10601
5.21k
                                ctxt->extSubSystem, ctxt->extSubURI);
10602
8.79k
  ctxt->inSubset = 0;
10603
10604
8.79k
        xmlCleanSpecialAttr(ctxt);
10605
10606
8.79k
  xmlParseMisc(ctxt);
10607
8.79k
    }
10608
10609
    /*
10610
     * Time to start parsing the tree itself
10611
     */
10612
18.1k
    GROW;
10613
18.1k
    if (RAW != '<') {
10614
4.27k
        if (ctxt->wellFormed)
10615
3
            xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10616
3
                           "Start tag expected, '<' not found\n");
10617
13.8k
    } else {
10618
13.8k
  xmlParseElement(ctxt);
10619
10620
  /*
10621
   * The Misc part at the end
10622
   */
10623
13.8k
  xmlParseMisc(ctxt);
10624
10625
13.8k
        if (ctxt->input->cur < ctxt->input->end) {
10626
5.52k
            if (ctxt->wellFormed)
10627
1
          xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10628
8.30k
        } else if ((ctxt->input->buf != NULL) &&
10629
8.30k
                   (ctxt->input->buf->encoder != NULL) &&
10630
8.30k
                   (!xmlBufIsEmpty(ctxt->input->buf->raw))) {
10631
135
            xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
10632
135
                           "Truncated multi-byte sequence at EOF\n");
10633
135
        }
10634
13.8k
    }
10635
10636
18.1k
    ctxt->instate = XML_PARSER_EOF;
10637
18.1k
    xmlFinishDocument(ctxt);
10638
10639
18.1k
    if (! ctxt->wellFormed) {
10640
18.0k
  ctxt->valid = 0;
10641
18.0k
  return(-1);
10642
18.0k
    }
10643
10644
51
    return(0);
10645
18.1k
}
10646
10647
/**
10648
 * xmlParseExtParsedEnt:
10649
 * @ctxt:  an XML parser context
10650
 *
10651
 * parse a general parsed entity
10652
 * An external general parsed entity is well-formed if it matches the
10653
 * production labeled extParsedEnt.
10654
 *
10655
 * [78] extParsedEnt ::= TextDecl? content
10656
 *
10657
 * Returns 0, -1 in case of error. the parser context is augmented
10658
 *                as a result of the parsing.
10659
 */
10660
10661
int
10662
0
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10663
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
10664
0
        return(-1);
10665
10666
0
    xmlCtxtInitializeLate(ctxt);
10667
10668
    /*
10669
     * Document locator is unused. Only for backward compatibility.
10670
     */
10671
0
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10672
0
        xmlSAXLocator copy = xmlDefaultSAXLocator;
10673
0
        ctxt->sax->setDocumentLocator(ctxt->userData, &copy);
10674
0
    }
10675
10676
0
    xmlDetectEncoding(ctxt);
10677
10678
0
    if (CUR == 0) {
10679
0
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10680
0
    }
10681
10682
    /*
10683
     * Check for the XMLDecl in the Prolog.
10684
     */
10685
0
    GROW;
10686
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10687
10688
  /*
10689
   * Note that we will switch encoding on the fly.
10690
   */
10691
0
  xmlParseXMLDecl(ctxt);
10692
0
  SKIP_BLANKS;
10693
0
    } else {
10694
0
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10695
0
    }
10696
0
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10697
0
        ctxt->sax->startDocument(ctxt->userData);
10698
10699
    /*
10700
     * Doing validity checking on chunk doesn't make sense
10701
     */
10702
0
    ctxt->options &= ~XML_PARSE_DTDVALID;
10703
0
    ctxt->validate = 0;
10704
0
    ctxt->depth = 0;
10705
10706
0
    xmlParseContentInternal(ctxt);
10707
10708
0
    if (ctxt->input->cur < ctxt->input->end)
10709
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10710
10711
    /*
10712
     * SAX: end of the document processing.
10713
     */
10714
0
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10715
0
        ctxt->sax->endDocument(ctxt->userData);
10716
10717
0
    if (! ctxt->wellFormed) return(-1);
10718
0
    return(0);
10719
0
}
10720
10721
#ifdef LIBXML_PUSH_ENABLED
10722
/************************************************************************
10723
 *                  *
10724
 *    Progressive parsing interfaces        *
10725
 *                  *
10726
 ************************************************************************/
10727
10728
/**
10729
 * xmlParseLookupChar:
10730
 * @ctxt:  an XML parser context
10731
 * @c:  character
10732
 *
10733
 * Check whether the input buffer contains a character.
10734
 */
10735
static int
10736
0
xmlParseLookupChar(xmlParserCtxtPtr ctxt, int c) {
10737
0
    const xmlChar *cur;
10738
10739
0
    if (ctxt->checkIndex == 0) {
10740
0
        cur = ctxt->input->cur + 1;
10741
0
    } else {
10742
0
        cur = ctxt->input->cur + ctxt->checkIndex;
10743
0
    }
10744
10745
0
    if (memchr(cur, c, ctxt->input->end - cur) == NULL) {
10746
0
        size_t index = ctxt->input->end - ctxt->input->cur;
10747
10748
0
        if (index > LONG_MAX) {
10749
0
            ctxt->checkIndex = 0;
10750
0
            return(1);
10751
0
        }
10752
0
        ctxt->checkIndex = index;
10753
0
        return(0);
10754
0
    } else {
10755
0
        ctxt->checkIndex = 0;
10756
0
        return(1);
10757
0
    }
10758
0
}
10759
10760
/**
10761
 * xmlParseLookupString:
10762
 * @ctxt:  an XML parser context
10763
 * @startDelta: delta to apply at the start
10764
 * @str:  string
10765
 * @strLen:  length of string
10766
 *
10767
 * Check whether the input buffer contains a string.
10768
 */
10769
static const xmlChar *
10770
xmlParseLookupString(xmlParserCtxtPtr ctxt, size_t startDelta,
10771
0
                     const char *str, size_t strLen) {
10772
0
    const xmlChar *cur, *term;
10773
10774
0
    if (ctxt->checkIndex == 0) {
10775
0
        cur = ctxt->input->cur + startDelta;
10776
0
    } else {
10777
0
        cur = ctxt->input->cur + ctxt->checkIndex;
10778
0
    }
10779
10780
0
    term = BAD_CAST strstr((const char *) cur, str);
10781
0
    if (term == NULL) {
10782
0
        const xmlChar *end = ctxt->input->end;
10783
0
        size_t index;
10784
10785
        /* Rescan (strLen - 1) characters. */
10786
0
        if ((size_t) (end - cur) < strLen)
10787
0
            end = cur;
10788
0
        else
10789
0
            end -= strLen - 1;
10790
0
        index = end - ctxt->input->cur;
10791
0
        if (index > LONG_MAX) {
10792
0
            ctxt->checkIndex = 0;
10793
0
            return(ctxt->input->end - strLen);
10794
0
        }
10795
0
        ctxt->checkIndex = index;
10796
0
    } else {
10797
0
        ctxt->checkIndex = 0;
10798
0
    }
10799
10800
0
    return(term);
10801
0
}
10802
10803
/**
10804
 * xmlParseLookupCharData:
10805
 * @ctxt:  an XML parser context
10806
 *
10807
 * Check whether the input buffer contains terminated char data.
10808
 */
10809
static int
10810
0
xmlParseLookupCharData(xmlParserCtxtPtr ctxt) {
10811
0
    const xmlChar *cur = ctxt->input->cur + ctxt->checkIndex;
10812
0
    const xmlChar *end = ctxt->input->end;
10813
0
    size_t index;
10814
10815
0
    while (cur < end) {
10816
0
        if ((*cur == '<') || (*cur == '&')) {
10817
0
            ctxt->checkIndex = 0;
10818
0
            return(1);
10819
0
        }
10820
0
        cur++;
10821
0
    }
10822
10823
0
    index = cur - ctxt->input->cur;
10824
0
    if (index > LONG_MAX) {
10825
0
        ctxt->checkIndex = 0;
10826
0
        return(1);
10827
0
    }
10828
0
    ctxt->checkIndex = index;
10829
0
    return(0);
10830
0
}
10831
10832
/**
10833
 * xmlParseLookupGt:
10834
 * @ctxt:  an XML parser context
10835
 *
10836
 * Check whether there's enough data in the input buffer to finish parsing
10837
 * a start tag. This has to take quotes into account.
10838
 */
10839
static int
10840
0
xmlParseLookupGt(xmlParserCtxtPtr ctxt) {
10841
0
    const xmlChar *cur;
10842
0
    const xmlChar *end = ctxt->input->end;
10843
0
    int state = ctxt->endCheckState;
10844
0
    size_t index;
10845
10846
0
    if (ctxt->checkIndex == 0)
10847
0
        cur = ctxt->input->cur + 1;
10848
0
    else
10849
0
        cur = ctxt->input->cur + ctxt->checkIndex;
10850
10851
0
    while (cur < end) {
10852
0
        if (state) {
10853
0
            if (*cur == state)
10854
0
                state = 0;
10855
0
        } else if (*cur == '\'' || *cur == '"') {
10856
0
            state = *cur;
10857
0
        } else if (*cur == '>') {
10858
0
            ctxt->checkIndex = 0;
10859
0
            ctxt->endCheckState = 0;
10860
0
            return(1);
10861
0
        }
10862
0
        cur++;
10863
0
    }
10864
10865
0
    index = cur - ctxt->input->cur;
10866
0
    if (index > LONG_MAX) {
10867
0
        ctxt->checkIndex = 0;
10868
0
        ctxt->endCheckState = 0;
10869
0
        return(1);
10870
0
    }
10871
0
    ctxt->checkIndex = index;
10872
0
    ctxt->endCheckState = state;
10873
0
    return(0);
10874
0
}
10875
10876
/**
10877
 * xmlParseLookupInternalSubset:
10878
 * @ctxt:  an XML parser context
10879
 *
10880
 * Check whether there's enough data in the input buffer to finish parsing
10881
 * the internal subset.
10882
 */
10883
static int
10884
0
xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt) {
10885
    /*
10886
     * Sorry, but progressive parsing of the internal subset is not
10887
     * supported. We first check that the full content of the internal
10888
     * subset is available and parsing is launched only at that point.
10889
     * Internal subset ends with "']' S? '>'" in an unescaped section and
10890
     * not in a ']]>' sequence which are conditional sections.
10891
     */
10892
0
    const xmlChar *cur, *start;
10893
0
    const xmlChar *end = ctxt->input->end;
10894
0
    int state = ctxt->endCheckState;
10895
0
    size_t index;
10896
10897
0
    if (ctxt->checkIndex == 0) {
10898
0
        cur = ctxt->input->cur + 1;
10899
0
    } else {
10900
0
        cur = ctxt->input->cur + ctxt->checkIndex;
10901
0
    }
10902
0
    start = cur;
10903
10904
0
    while (cur < end) {
10905
0
        if (state == '-') {
10906
0
            if ((*cur == '-') &&
10907
0
                (cur[1] == '-') &&
10908
0
                (cur[2] == '>')) {
10909
0
                state = 0;
10910
0
                cur += 3;
10911
0
                start = cur;
10912
0
                continue;
10913
0
            }
10914
0
        }
10915
0
        else if (state == ']') {
10916
0
            if (*cur == '>') {
10917
0
                ctxt->checkIndex = 0;
10918
0
                ctxt->endCheckState = 0;
10919
0
                return(1);
10920
0
            }
10921
0
            if (IS_BLANK_CH(*cur)) {
10922
0
                state = ' ';
10923
0
            } else if (*cur != ']') {
10924
0
                state = 0;
10925
0
                start = cur;
10926
0
                continue;
10927
0
            }
10928
0
        }
10929
0
        else if (state == ' ') {
10930
0
            if (*cur == '>') {
10931
0
                ctxt->checkIndex = 0;
10932
0
                ctxt->endCheckState = 0;
10933
0
                return(1);
10934
0
            }
10935
0
            if (!IS_BLANK_CH(*cur)) {
10936
0
                state = 0;
10937
0
                start = cur;
10938
0
                continue;
10939
0
            }
10940
0
        }
10941
0
        else if (state != 0) {
10942
0
            if (*cur == state) {
10943
0
                state = 0;
10944
0
                start = cur + 1;
10945
0
            }
10946
0
        }
10947
0
        else if (*cur == '<') {
10948
0
            if ((cur[1] == '!') &&
10949
0
                (cur[2] == '-') &&
10950
0
                (cur[3] == '-')) {
10951
0
                state = '-';
10952
0
                cur += 4;
10953
                /* Don't treat <!--> as comment */
10954
0
                start = cur;
10955
0
                continue;
10956
0
            }
10957
0
        }
10958
0
        else if ((*cur == '"') || (*cur == '\'') || (*cur == ']')) {
10959
0
            state = *cur;
10960
0
        }
10961
10962
0
        cur++;
10963
0
    }
10964
10965
    /*
10966
     * Rescan the three last characters to detect "<!--" and "-->"
10967
     * split across chunks.
10968
     */
10969
0
    if ((state == 0) || (state == '-')) {
10970
0
        if (cur - start < 3)
10971
0
            cur = start;
10972
0
        else
10973
0
            cur -= 3;
10974
0
    }
10975
0
    index = cur - ctxt->input->cur;
10976
0
    if (index > LONG_MAX) {
10977
0
        ctxt->checkIndex = 0;
10978
0
        ctxt->endCheckState = 0;
10979
0
        return(1);
10980
0
    }
10981
0
    ctxt->checkIndex = index;
10982
0
    ctxt->endCheckState = state;
10983
0
    return(0);
10984
0
}
10985
10986
/**
10987
 * xmlCheckCdataPush:
10988
 * @cur: pointer to the block of characters
10989
 * @len: length of the block in bytes
10990
 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
10991
 *
10992
 * Check that the block of characters is okay as SCdata content [20]
10993
 *
10994
 * Returns the number of bytes to pass if okay, a negative index where an
10995
 *         UTF-8 error occurred otherwise
10996
 */
10997
static int
10998
0
xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
10999
0
    int ix;
11000
0
    unsigned char c;
11001
0
    int codepoint;
11002
11003
0
    if ((utf == NULL) || (len <= 0))
11004
0
        return(0);
11005
11006
0
    for (ix = 0; ix < len;) {      /* string is 0-terminated */
11007
0
        c = utf[ix];
11008
0
        if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11009
0
      if (c >= 0x20)
11010
0
    ix++;
11011
0
      else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11012
0
          ix++;
11013
0
      else
11014
0
          return(-ix);
11015
0
  } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11016
0
      if (ix + 2 > len) return(complete ? -ix : ix);
11017
0
      if ((utf[ix+1] & 0xc0 ) != 0x80)
11018
0
          return(-ix);
11019
0
      codepoint = (utf[ix] & 0x1f) << 6;
11020
0
      codepoint |= utf[ix+1] & 0x3f;
11021
0
      if (!xmlIsCharQ(codepoint))
11022
0
          return(-ix);
11023
0
      ix += 2;
11024
0
  } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11025
0
      if (ix + 3 > len) return(complete ? -ix : ix);
11026
0
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11027
0
          ((utf[ix+2] & 0xc0) != 0x80))
11028
0
        return(-ix);
11029
0
      codepoint = (utf[ix] & 0xf) << 12;
11030
0
      codepoint |= (utf[ix+1] & 0x3f) << 6;
11031
0
      codepoint |= utf[ix+2] & 0x3f;
11032
0
      if (!xmlIsCharQ(codepoint))
11033
0
          return(-ix);
11034
0
      ix += 3;
11035
0
  } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11036
0
      if (ix + 4 > len) return(complete ? -ix : ix);
11037
0
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11038
0
          ((utf[ix+2] & 0xc0) != 0x80) ||
11039
0
    ((utf[ix+3] & 0xc0) != 0x80))
11040
0
        return(-ix);
11041
0
      codepoint = (utf[ix] & 0x7) << 18;
11042
0
      codepoint |= (utf[ix+1] & 0x3f) << 12;
11043
0
      codepoint |= (utf[ix+2] & 0x3f) << 6;
11044
0
      codepoint |= utf[ix+3] & 0x3f;
11045
0
      if (!xmlIsCharQ(codepoint))
11046
0
          return(-ix);
11047
0
      ix += 4;
11048
0
  } else       /* unknown encoding */
11049
0
      return(-ix);
11050
0
      }
11051
0
      return(ix);
11052
0
}
11053
11054
/**
11055
 * xmlParseTryOrFinish:
11056
 * @ctxt:  an XML parser context
11057
 * @terminate:  last chunk indicator
11058
 *
11059
 * Try to progress on parsing
11060
 *
11061
 * Returns zero if no parsing was possible
11062
 */
11063
static int
11064
0
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11065
0
    int ret = 0;
11066
0
    size_t avail;
11067
0
    xmlChar cur, next;
11068
11069
0
    if (ctxt->input == NULL)
11070
0
        return(0);
11071
11072
0
    if ((ctxt->input != NULL) &&
11073
0
        (ctxt->input->cur - ctxt->input->base > 4096)) {
11074
0
        xmlParserShrink(ctxt);
11075
0
    }
11076
11077
0
    while (ctxt->disableSAX == 0) {
11078
0
        avail = ctxt->input->end - ctxt->input->cur;
11079
0
        if (avail < 1)
11080
0
      goto done;
11081
0
        switch (ctxt->instate) {
11082
0
            case XML_PARSER_EOF:
11083
          /*
11084
     * Document parsing is done !
11085
     */
11086
0
          goto done;
11087
0
            case XML_PARSER_START:
11088
                /*
11089
                 * Very first chars read from the document flow.
11090
                 */
11091
0
                if ((!terminate) && (avail < 4))
11092
0
                    goto done;
11093
11094
                /*
11095
                 * We need more bytes to detect EBCDIC code pages.
11096
                 * See xmlDetectEBCDIC.
11097
                 */
11098
0
                if ((CMP4(CUR_PTR, 0x4C, 0x6F, 0xA7, 0x94)) &&
11099
0
                    (!terminate) && (avail < 200))
11100
0
                    goto done;
11101
11102
0
                xmlDetectEncoding(ctxt);
11103
0
                ctxt->instate = XML_PARSER_XML_DECL;
11104
0
    break;
11105
11106
0
            case XML_PARSER_XML_DECL:
11107
0
    if ((!terminate) && (avail < 2))
11108
0
        goto done;
11109
0
    cur = ctxt->input->cur[0];
11110
0
    next = ctxt->input->cur[1];
11111
0
          if ((cur == '<') && (next == '?')) {
11112
        /* PI or XML decl */
11113
0
        if ((!terminate) &&
11114
0
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11115
0
      goto done;
11116
0
        if ((ctxt->input->cur[2] == 'x') &&
11117
0
      (ctxt->input->cur[3] == 'm') &&
11118
0
      (ctxt->input->cur[4] == 'l') &&
11119
0
      (IS_BLANK_CH(ctxt->input->cur[5]))) {
11120
0
      ret += 5;
11121
0
      xmlParseXMLDecl(ctxt);
11122
0
        } else {
11123
0
      ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11124
0
                        if (ctxt->version == NULL) {
11125
0
                            xmlErrMemory(ctxt);
11126
0
                            break;
11127
0
                        }
11128
0
        }
11129
0
    } else {
11130
0
        ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11131
0
        if (ctxt->version == NULL) {
11132
0
            xmlErrMemory(ctxt);
11133
0
      break;
11134
0
        }
11135
0
    }
11136
0
                if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
11137
0
                    xmlSAXLocator copy = xmlDefaultSAXLocator;
11138
0
                    ctxt->sax->setDocumentLocator(ctxt->userData, &copy);
11139
0
                }
11140
0
                if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11141
0
                    (!ctxt->disableSAX))
11142
0
                    ctxt->sax->startDocument(ctxt->userData);
11143
0
                ctxt->instate = XML_PARSER_MISC;
11144
0
    break;
11145
0
            case XML_PARSER_START_TAG: {
11146
0
          const xmlChar *name;
11147
0
    const xmlChar *prefix = NULL;
11148
0
    const xmlChar *URI = NULL;
11149
0
                int line = ctxt->input->line;
11150
0
    int nbNs = 0;
11151
11152
0
    if ((!terminate) && (avail < 2))
11153
0
        goto done;
11154
0
    cur = ctxt->input->cur[0];
11155
0
          if (cur != '<') {
11156
0
        xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
11157
0
                                   "Start tag expected, '<' not found");
11158
0
                    ctxt->instate = XML_PARSER_EOF;
11159
0
                    xmlFinishDocument(ctxt);
11160
0
        goto done;
11161
0
    }
11162
0
    if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11163
0
                    goto done;
11164
0
    if (ctxt->spaceNr == 0)
11165
0
        spacePush(ctxt, -1);
11166
0
    else if (*ctxt->space == -2)
11167
0
        spacePush(ctxt, -1);
11168
0
    else
11169
0
        spacePush(ctxt, *ctxt->space);
11170
0
#ifdef LIBXML_SAX1_ENABLED
11171
0
    if (ctxt->sax2)
11172
0
#endif /* LIBXML_SAX1_ENABLED */
11173
0
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &nbNs);
11174
0
#ifdef LIBXML_SAX1_ENABLED
11175
0
    else
11176
0
        name = xmlParseStartTag(ctxt);
11177
0
#endif /* LIBXML_SAX1_ENABLED */
11178
0
    if (name == NULL) {
11179
0
        spacePop(ctxt);
11180
0
                    ctxt->instate = XML_PARSER_EOF;
11181
0
                    xmlFinishDocument(ctxt);
11182
0
        goto done;
11183
0
    }
11184
0
#ifdef LIBXML_VALID_ENABLED
11185
    /*
11186
     * [ VC: Root Element Type ]
11187
     * The Name in the document type declaration must match
11188
     * the element type of the root element.
11189
     */
11190
0
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11191
0
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
11192
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11193
0
#endif /* LIBXML_VALID_ENABLED */
11194
11195
    /*
11196
     * Check for an Empty Element.
11197
     */
11198
0
    if ((RAW == '/') && (NXT(1) == '>')) {
11199
0
        SKIP(2);
11200
11201
0
        if (ctxt->sax2) {
11202
0
      if ((ctxt->sax != NULL) &&
11203
0
          (ctxt->sax->endElementNs != NULL) &&
11204
0
          (!ctxt->disableSAX))
11205
0
          ctxt->sax->endElementNs(ctxt->userData, name,
11206
0
                                  prefix, URI);
11207
0
      if (nbNs > 0)
11208
0
          xmlParserNsPop(ctxt, nbNs);
11209
0
#ifdef LIBXML_SAX1_ENABLED
11210
0
        } else {
11211
0
      if ((ctxt->sax != NULL) &&
11212
0
          (ctxt->sax->endElement != NULL) &&
11213
0
          (!ctxt->disableSAX))
11214
0
          ctxt->sax->endElement(ctxt->userData, name);
11215
0
#endif /* LIBXML_SAX1_ENABLED */
11216
0
        }
11217
0
        spacePop(ctxt);
11218
0
    } else if (RAW == '>') {
11219
0
        NEXT;
11220
0
                    nameNsPush(ctxt, name, prefix, URI, line, nbNs);
11221
0
    } else {
11222
0
        xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11223
0
           "Couldn't find end of Start Tag %s\n",
11224
0
           name);
11225
0
        nodePop(ctxt);
11226
0
        spacePop(ctxt);
11227
0
                    if (nbNs > 0)
11228
0
                        xmlParserNsPop(ctxt, nbNs);
11229
0
    }
11230
11231
0
                if (ctxt->nameNr == 0)
11232
0
                    ctxt->instate = XML_PARSER_EPILOG;
11233
0
                else
11234
0
                    ctxt->instate = XML_PARSER_CONTENT;
11235
0
                break;
11236
0
      }
11237
0
            case XML_PARSER_CONTENT: {
11238
0
    cur = ctxt->input->cur[0];
11239
11240
0
    if (cur == '<') {
11241
0
                    if ((!terminate) && (avail < 2))
11242
0
                        goto done;
11243
0
        next = ctxt->input->cur[1];
11244
11245
0
                    if (next == '/') {
11246
0
                        ctxt->instate = XML_PARSER_END_TAG;
11247
0
                        break;
11248
0
                    } else if (next == '?') {
11249
0
                        if ((!terminate) &&
11250
0
                            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11251
0
                            goto done;
11252
0
                        xmlParsePI(ctxt);
11253
0
                        ctxt->instate = XML_PARSER_CONTENT;
11254
0
                        break;
11255
0
                    } else if (next == '!') {
11256
0
                        if ((!terminate) && (avail < 3))
11257
0
                            goto done;
11258
0
                        next = ctxt->input->cur[2];
11259
11260
0
                        if (next == '-') {
11261
0
                            if ((!terminate) && (avail < 4))
11262
0
                                goto done;
11263
0
                            if (ctxt->input->cur[3] == '-') {
11264
0
                                if ((!terminate) &&
11265
0
                                    (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11266
0
                                    goto done;
11267
0
                                xmlParseComment(ctxt);
11268
0
                                ctxt->instate = XML_PARSER_CONTENT;
11269
0
                                break;
11270
0
                            }
11271
0
                        } else if (next == '[') {
11272
0
                            if ((!terminate) && (avail < 9))
11273
0
                                goto done;
11274
0
                            if ((ctxt->input->cur[2] == '[') &&
11275
0
                                (ctxt->input->cur[3] == 'C') &&
11276
0
                                (ctxt->input->cur[4] == 'D') &&
11277
0
                                (ctxt->input->cur[5] == 'A') &&
11278
0
                                (ctxt->input->cur[6] == 'T') &&
11279
0
                                (ctxt->input->cur[7] == 'A') &&
11280
0
                                (ctxt->input->cur[8] == '[')) {
11281
0
                                SKIP(9);
11282
0
                                ctxt->instate = XML_PARSER_CDATA_SECTION;
11283
0
                                break;
11284
0
                            }
11285
0
                        }
11286
0
                    }
11287
0
    } else if (cur == '&') {
11288
0
        if ((!terminate) && (!xmlParseLookupChar(ctxt, ';')))
11289
0
      goto done;
11290
0
        xmlParseReference(ctxt);
11291
0
                    break;
11292
0
    } else {
11293
        /* TODO Avoid the extra copy, handle directly !!! */
11294
        /*
11295
         * Goal of the following test is:
11296
         *  - minimize calls to the SAX 'character' callback
11297
         *    when they are mergeable
11298
         *  - handle an problem for isBlank when we only parse
11299
         *    a sequence of blank chars and the next one is
11300
         *    not available to check against '<' presence.
11301
         *  - tries to homogenize the differences in SAX
11302
         *    callbacks between the push and pull versions
11303
         *    of the parser.
11304
         */
11305
0
        if (avail < XML_PARSER_BIG_BUFFER_SIZE) {
11306
0
      if ((!terminate) && (!xmlParseLookupCharData(ctxt)))
11307
0
          goto done;
11308
0
                    }
11309
0
                    ctxt->checkIndex = 0;
11310
0
        xmlParseCharDataInternal(ctxt, !terminate);
11311
0
                    break;
11312
0
    }
11313
11314
0
                ctxt->instate = XML_PARSER_START_TAG;
11315
0
    break;
11316
0
      }
11317
0
            case XML_PARSER_END_TAG:
11318
0
    if ((!terminate) && (!xmlParseLookupChar(ctxt, '>')))
11319
0
        goto done;
11320
0
    if (ctxt->sax2) {
11321
0
              xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11322
0
        nameNsPop(ctxt);
11323
0
    }
11324
0
#ifdef LIBXML_SAX1_ENABLED
11325
0
      else
11326
0
        xmlParseEndTag1(ctxt, 0);
11327
0
#endif /* LIBXML_SAX1_ENABLED */
11328
0
    if (ctxt->nameNr == 0) {
11329
0
        ctxt->instate = XML_PARSER_EPILOG;
11330
0
    } else {
11331
0
        ctxt->instate = XML_PARSER_CONTENT;
11332
0
    }
11333
0
    break;
11334
0
            case XML_PARSER_CDATA_SECTION: {
11335
          /*
11336
     * The Push mode need to have the SAX callback for
11337
     * cdataBlock merge back contiguous callbacks.
11338
     */
11339
0
    const xmlChar *term;
11340
11341
0
                if (terminate) {
11342
                    /*
11343
                     * Don't call xmlParseLookupString. If 'terminate'
11344
                     * is set, checkIndex is invalid.
11345
                     */
11346
0
                    term = BAD_CAST strstr((const char *) ctxt->input->cur,
11347
0
                                           "]]>");
11348
0
                } else {
11349
0
        term = xmlParseLookupString(ctxt, 0, "]]>", 3);
11350
0
                }
11351
11352
0
    if (term == NULL) {
11353
0
        int tmp, size;
11354
11355
0
                    if (terminate) {
11356
                        /* Unfinished CDATA section */
11357
0
                        size = ctxt->input->end - ctxt->input->cur;
11358
0
                    } else {
11359
0
                        if (avail < XML_PARSER_BIG_BUFFER_SIZE + 2)
11360
0
                            goto done;
11361
0
                        ctxt->checkIndex = 0;
11362
                        /* XXX: Why don't we pass the full buffer? */
11363
0
                        size = XML_PARSER_BIG_BUFFER_SIZE;
11364
0
                    }
11365
0
                    tmp = xmlCheckCdataPush(ctxt->input->cur, size, 0);
11366
0
                    if (tmp <= 0) {
11367
0
                        tmp = -tmp;
11368
0
                        ctxt->input->cur += tmp;
11369
0
                        goto encoding_error;
11370
0
                    }
11371
0
                    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11372
0
                        if (ctxt->sax->cdataBlock != NULL)
11373
0
                            ctxt->sax->cdataBlock(ctxt->userData,
11374
0
                                                  ctxt->input->cur, tmp);
11375
0
                        else if (ctxt->sax->characters != NULL)
11376
0
                            ctxt->sax->characters(ctxt->userData,
11377
0
                                                  ctxt->input->cur, tmp);
11378
0
                    }
11379
0
                    SKIPL(tmp);
11380
0
    } else {
11381
0
                    int base = term - CUR_PTR;
11382
0
        int tmp;
11383
11384
0
        tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11385
0
        if ((tmp < 0) || (tmp != base)) {
11386
0
      tmp = -tmp;
11387
0
      ctxt->input->cur += tmp;
11388
0
      goto encoding_error;
11389
0
        }
11390
0
        if ((ctxt->sax != NULL) && (base == 0) &&
11391
0
            (ctxt->sax->cdataBlock != NULL) &&
11392
0
            (!ctxt->disableSAX)) {
11393
      /*
11394
       * Special case to provide identical behaviour
11395
       * between pull and push parsers on enpty CDATA
11396
       * sections
11397
       */
11398
0
       if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11399
0
           (!strncmp((const char *)&ctxt->input->cur[-9],
11400
0
                     "<![CDATA[", 9)))
11401
0
           ctxt->sax->cdataBlock(ctxt->userData,
11402
0
                                 BAD_CAST "", 0);
11403
0
        } else if ((ctxt->sax != NULL) && (base > 0) &&
11404
0
      (!ctxt->disableSAX)) {
11405
0
      if (ctxt->sax->cdataBlock != NULL)
11406
0
          ctxt->sax->cdataBlock(ctxt->userData,
11407
0
              ctxt->input->cur, base);
11408
0
      else if (ctxt->sax->characters != NULL)
11409
0
          ctxt->sax->characters(ctxt->userData,
11410
0
              ctxt->input->cur, base);
11411
0
        }
11412
0
        SKIPL(base + 3);
11413
0
        ctxt->instate = XML_PARSER_CONTENT;
11414
0
    }
11415
0
    break;
11416
0
      }
11417
0
            case XML_PARSER_MISC:
11418
0
            case XML_PARSER_PROLOG:
11419
0
            case XML_PARSER_EPILOG:
11420
0
    SKIP_BLANKS;
11421
0
                avail = ctxt->input->end - ctxt->input->cur;
11422
0
    if (avail < 1)
11423
0
        goto done;
11424
0
    if (ctxt->input->cur[0] == '<') {
11425
0
                    if ((!terminate) && (avail < 2))
11426
0
                        goto done;
11427
0
                    next = ctxt->input->cur[1];
11428
0
                    if (next == '?') {
11429
0
                        if ((!terminate) &&
11430
0
                            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11431
0
                            goto done;
11432
0
                        xmlParsePI(ctxt);
11433
0
                        break;
11434
0
                    } else if (next == '!') {
11435
0
                        if ((!terminate) && (avail < 3))
11436
0
                            goto done;
11437
11438
0
                        if (ctxt->input->cur[2] == '-') {
11439
0
                            if ((!terminate) && (avail < 4))
11440
0
                                goto done;
11441
0
                            if (ctxt->input->cur[3] == '-') {
11442
0
                                if ((!terminate) &&
11443
0
                                    (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11444
0
                                    goto done;
11445
0
                                xmlParseComment(ctxt);
11446
0
                                break;
11447
0
                            }
11448
0
                        } else if (ctxt->instate == XML_PARSER_MISC) {
11449
0
                            if ((!terminate) && (avail < 9))
11450
0
                                goto done;
11451
0
                            if ((ctxt->input->cur[2] == 'D') &&
11452
0
                                (ctxt->input->cur[3] == 'O') &&
11453
0
                                (ctxt->input->cur[4] == 'C') &&
11454
0
                                (ctxt->input->cur[5] == 'T') &&
11455
0
                                (ctxt->input->cur[6] == 'Y') &&
11456
0
                                (ctxt->input->cur[7] == 'P') &&
11457
0
                                (ctxt->input->cur[8] == 'E')) {
11458
0
                                if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11459
0
                                    goto done;
11460
0
                                ctxt->inSubset = 1;
11461
0
                                xmlParseDocTypeDecl(ctxt);
11462
0
                                if (RAW == '[') {
11463
0
                                    ctxt->instate = XML_PARSER_DTD;
11464
0
                                } else {
11465
                                    /*
11466
                                     * Create and update the external subset.
11467
                                     */
11468
0
                                    ctxt->inSubset = 2;
11469
0
                                    if ((ctxt->sax != NULL) &&
11470
0
                                        (!ctxt->disableSAX) &&
11471
0
                                        (ctxt->sax->externalSubset != NULL))
11472
0
                                        ctxt->sax->externalSubset(
11473
0
                                                ctxt->userData,
11474
0
                                                ctxt->intSubName,
11475
0
                                                ctxt->extSubSystem,
11476
0
                                                ctxt->extSubURI);
11477
0
                                    ctxt->inSubset = 0;
11478
0
                                    xmlCleanSpecialAttr(ctxt);
11479
0
                                    ctxt->instate = XML_PARSER_PROLOG;
11480
0
                                }
11481
0
                                break;
11482
0
                            }
11483
0
                        }
11484
0
                    }
11485
0
                }
11486
11487
0
                if (ctxt->instate == XML_PARSER_EPILOG) {
11488
0
                    if (ctxt->errNo == XML_ERR_OK)
11489
0
                        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11490
0
        ctxt->instate = XML_PARSER_EOF;
11491
0
                    xmlFinishDocument(ctxt);
11492
0
                } else {
11493
0
        ctxt->instate = XML_PARSER_START_TAG;
11494
0
    }
11495
0
    break;
11496
0
            case XML_PARSER_DTD: {
11497
0
                if ((!terminate) && (!xmlParseLookupInternalSubset(ctxt)))
11498
0
                    goto done;
11499
0
    xmlParseInternalSubset(ctxt);
11500
0
    ctxt->inSubset = 2;
11501
0
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11502
0
        (ctxt->sax->externalSubset != NULL))
11503
0
        ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11504
0
          ctxt->extSubSystem, ctxt->extSubURI);
11505
0
    ctxt->inSubset = 0;
11506
0
    xmlCleanSpecialAttr(ctxt);
11507
0
    ctxt->instate = XML_PARSER_PROLOG;
11508
0
                break;
11509
0
      }
11510
0
            default:
11511
0
                xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
11512
0
      "PP: internal error\n");
11513
0
    ctxt->instate = XML_PARSER_EOF;
11514
0
    break;
11515
0
  }
11516
0
    }
11517
0
done:
11518
0
    return(ret);
11519
0
encoding_error:
11520
    /* Only report the first error */
11521
0
    if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
11522
0
        xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
11523
0
        ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
11524
0
    }
11525
0
    return(0);
11526
0
}
11527
11528
/**
11529
 * xmlParseChunk:
11530
 * @ctxt:  an XML parser context
11531
 * @chunk:  chunk of memory
11532
 * @size:  size of chunk in bytes
11533
 * @terminate:  last chunk indicator
11534
 *
11535
 * Parse a chunk of memory in push parser mode.
11536
 *
11537
 * Assumes that the parser context was initialized with
11538
 * xmlCreatePushParserCtxt.
11539
 *
11540
 * The last chunk, which will often be empty, must be marked with
11541
 * the @terminate flag. With the default SAX callbacks, the resulting
11542
 * document will be available in ctxt->myDoc. This pointer will not
11543
 * be freed by the library.
11544
 *
11545
 * If the document isn't well-formed, ctxt->myDoc is set to NULL.
11546
 * The push parser doesn't support recovery mode.
11547
 *
11548
 * Returns an xmlParserErrors code (0 on success).
11549
 */
11550
int
11551
xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
11552
0
              int terminate) {
11553
0
    size_t curBase;
11554
0
    size_t maxLength;
11555
0
    int end_in_lf = 0;
11556
11557
0
    if ((ctxt == NULL) || (size < 0))
11558
0
        return(XML_ERR_ARGUMENT);
11559
0
    if (ctxt->disableSAX != 0)
11560
0
        return(ctxt->errNo);
11561
0
    if (ctxt->input == NULL)
11562
0
        return(XML_ERR_INTERNAL_ERROR);
11563
11564
0
    ctxt->input->flags |= XML_INPUT_PROGRESSIVE;
11565
0
    if (ctxt->instate == XML_PARSER_START)
11566
0
        xmlCtxtInitializeLate(ctxt);
11567
0
    if ((size > 0) && (chunk != NULL) && (!terminate) &&
11568
0
        (chunk[size - 1] == '\r')) {
11569
0
  end_in_lf = 1;
11570
0
  size--;
11571
0
    }
11572
11573
0
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
11574
0
        (ctxt->input->buf != NULL))  {
11575
0
  size_t pos = ctxt->input->cur - ctxt->input->base;
11576
0
  int res;
11577
11578
0
  res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11579
0
        xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
11580
0
  if (res < 0) {
11581
0
            xmlCtxtErrIO(ctxt, ctxt->input->buf->error, NULL);
11582
0
      xmlHaltParser(ctxt);
11583
0
      return(ctxt->errNo);
11584
0
  }
11585
0
    }
11586
11587
0
    xmlParseTryOrFinish(ctxt, terminate);
11588
11589
0
    curBase = ctxt->input->cur - ctxt->input->base;
11590
0
    maxLength = (ctxt->options & XML_PARSE_HUGE) ?
11591
0
                XML_MAX_HUGE_LENGTH :
11592
0
                XML_MAX_LOOKUP_LIMIT;
11593
0
    if (curBase > maxLength) {
11594
0
        xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
11595
0
                    "Buffer size limit exceeded, try XML_PARSE_HUGE\n");
11596
0
        xmlHaltParser(ctxt);
11597
0
    }
11598
11599
0
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11600
0
        return(ctxt->errNo);
11601
11602
0
    if ((end_in_lf == 1) && (ctxt->input != NULL) &&
11603
0
        (ctxt->input->buf != NULL)) {
11604
0
  size_t pos = ctxt->input->cur - ctxt->input->base;
11605
0
        int res;
11606
11607
0
  res = xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
11608
0
  xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
11609
0
        if (res < 0) {
11610
0
            xmlCtxtErrIO(ctxt, ctxt->input->buf->error, NULL);
11611
0
            xmlHaltParser(ctxt);
11612
0
            return(ctxt->errNo);
11613
0
        }
11614
0
    }
11615
0
    if (terminate) {
11616
  /*
11617
   * Check for termination
11618
   */
11619
0
        if ((ctxt->instate != XML_PARSER_EOF) &&
11620
0
            (ctxt->instate != XML_PARSER_EPILOG)) {
11621
0
            if (ctxt->nameNr > 0) {
11622
0
                const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
11623
0
                int line = ctxt->pushTab[ctxt->nameNr - 1].line;
11624
0
                xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
11625
0
                        "Premature end of data in tag %s line %d\n",
11626
0
                        name, line, NULL);
11627
0
            } else if (ctxt->instate == XML_PARSER_START) {
11628
0
                xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11629
0
            } else {
11630
0
                xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
11631
0
                               "Start tag expected, '<' not found\n");
11632
0
            }
11633
0
        } else if ((ctxt->input->buf != NULL) &&
11634
0
                   (ctxt->input->buf->encoder != NULL) &&
11635
0
                   (!xmlBufIsEmpty(ctxt->input->buf->raw))) {
11636
0
            xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
11637
0
                           "Truncated multi-byte sequence at EOF\n");
11638
0
        }
11639
0
  if (ctxt->instate != XML_PARSER_EOF) {
11640
0
            ctxt->instate = XML_PARSER_EOF;
11641
0
            xmlFinishDocument(ctxt);
11642
0
  }
11643
0
    }
11644
0
    if (ctxt->wellFormed == 0)
11645
0
  return((xmlParserErrors) ctxt->errNo);
11646
0
    else
11647
0
        return(0);
11648
0
}
11649
11650
/************************************************************************
11651
 *                  *
11652
 *    I/O front end functions to the parser     *
11653
 *                  *
11654
 ************************************************************************/
11655
11656
/**
11657
 * xmlCreatePushParserCtxt:
11658
 * @sax:  a SAX handler (optional)
11659
 * @user_data:  user data for SAX callbacks (optional)
11660
 * @chunk:  initial chunk (optional, deprecated)
11661
 * @size:  size of initial chunk in bytes
11662
 * @filename:  file name or URI (optional)
11663
 *
11664
 * Create a parser context for using the XML parser in push mode.
11665
 * See xmlParseChunk.
11666
 *
11667
 * Passing an initial chunk is useless and deprecated.
11668
 *
11669
 * @filename is used as base URI to fetch external entities and for
11670
 * error reports.
11671
 *
11672
 * Returns the new parser context or NULL in case of error.
11673
 */
11674
11675
xmlParserCtxtPtr
11676
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11677
0
                        const char *chunk, int size, const char *filename) {
11678
0
    xmlParserCtxtPtr ctxt;
11679
0
    xmlParserInputPtr input;
11680
11681
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
11682
0
    if (ctxt == NULL)
11683
0
  return(NULL);
11684
11685
0
    ctxt->options &= ~XML_PARSE_NODICT;
11686
0
    ctxt->dictNames = 1;
11687
11688
0
    input = xmlNewInputPush(ctxt, filename, chunk, size, NULL);
11689
0
    if (input == NULL) {
11690
0
  xmlFreeParserCtxt(ctxt);
11691
0
  return(NULL);
11692
0
    }
11693
0
    inputPush(ctxt, input);
11694
11695
0
    return(ctxt);
11696
0
}
11697
#endif /* LIBXML_PUSH_ENABLED */
11698
11699
/**
11700
 * xmlStopParser:
11701
 * @ctxt:  an XML parser context
11702
 *
11703
 * Blocks further parser processing
11704
 */
11705
void
11706
0
xmlStopParser(xmlParserCtxtPtr ctxt) {
11707
0
    if (ctxt == NULL)
11708
0
        return;
11709
0
    xmlHaltParser(ctxt);
11710
0
    if (ctxt->errNo != XML_ERR_NO_MEMORY)
11711
0
        ctxt->errNo = XML_ERR_USER_STOP;
11712
0
}
11713
11714
/**
11715
 * xmlCreateIOParserCtxt:
11716
 * @sax:  a SAX handler (optional)
11717
 * @user_data:  user data for SAX callbacks (optional)
11718
 * @ioread:  an I/O read function
11719
 * @ioclose:  an I/O close function (optional)
11720
 * @ioctx:  an I/O handler
11721
 * @enc:  the charset encoding if known (deprecated)
11722
 *
11723
 * DEPRECATED: Use xmlNewParserCtxt and xmlCtxtReadIO.
11724
 *
11725
 * Create a parser context for using the XML parser with an existing
11726
 * I/O stream
11727
 *
11728
 * Returns the new parser context or NULL
11729
 */
11730
xmlParserCtxtPtr
11731
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11732
                      xmlInputReadCallback ioread,
11733
                      xmlInputCloseCallback ioclose,
11734
0
                      void *ioctx, xmlCharEncoding enc) {
11735
0
    xmlParserCtxtPtr ctxt;
11736
0
    xmlParserInputPtr input;
11737
0
    const char *encoding;
11738
11739
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
11740
0
    if (ctxt == NULL)
11741
0
  return(NULL);
11742
11743
0
    encoding = xmlGetCharEncodingName(enc);
11744
0
    input = xmlNewInputIO(ctxt, NULL, ioread, ioclose, ioctx, encoding, 0);
11745
0
    if (input == NULL) {
11746
0
  xmlFreeParserCtxt(ctxt);
11747
0
        return (NULL);
11748
0
    }
11749
0
    inputPush(ctxt, input);
11750
11751
0
    return(ctxt);
11752
0
}
11753
11754
#ifdef LIBXML_VALID_ENABLED
11755
/************************************************************************
11756
 *                  *
11757
 *    Front ends when parsing a DTD       *
11758
 *                  *
11759
 ************************************************************************/
11760
11761
/**
11762
 * xmlIOParseDTD:
11763
 * @sax:  the SAX handler block or NULL
11764
 * @input:  an Input Buffer
11765
 * @enc:  the charset encoding if known
11766
 *
11767
 * Load and parse a DTD
11768
 *
11769
 * Returns the resulting xmlDtdPtr or NULL in case of error.
11770
 * @input will be freed by the function in any case.
11771
 */
11772
11773
xmlDtdPtr
11774
xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
11775
0
        xmlCharEncoding enc) {
11776
0
    xmlDtdPtr ret = NULL;
11777
0
    xmlParserCtxtPtr ctxt;
11778
0
    xmlParserInputPtr pinput = NULL;
11779
11780
0
    if (input == NULL)
11781
0
  return(NULL);
11782
11783
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
11784
0
    if (ctxt == NULL) {
11785
0
        xmlFreeParserInputBuffer(input);
11786
0
  return(NULL);
11787
0
    }
11788
11789
    /*
11790
     * generate a parser input from the I/O handler
11791
     */
11792
11793
0
    pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
11794
0
    if (pinput == NULL) {
11795
0
        xmlFreeParserInputBuffer(input);
11796
0
  xmlFreeParserCtxt(ctxt);
11797
0
  return(NULL);
11798
0
    }
11799
11800
    /*
11801
     * plug some encoding conversion routines here.
11802
     */
11803
0
    if (xmlPushInput(ctxt, pinput) < 0) {
11804
0
  xmlFreeParserCtxt(ctxt);
11805
0
  return(NULL);
11806
0
    }
11807
0
    if (enc != XML_CHAR_ENCODING_NONE) {
11808
0
        xmlSwitchEncoding(ctxt, enc);
11809
0
    }
11810
11811
    /*
11812
     * let's parse that entity knowing it's an external subset.
11813
     */
11814
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11815
0
    if (ctxt->myDoc == NULL) {
11816
0
  xmlErrMemory(ctxt);
11817
0
  return(NULL);
11818
0
    }
11819
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
11820
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11821
0
                                 BAD_CAST "none", BAD_CAST "none");
11822
11823
0
    xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
11824
11825
0
    if (ctxt->myDoc != NULL) {
11826
0
  if (ctxt->wellFormed) {
11827
0
      ret = ctxt->myDoc->extSubset;
11828
0
      ctxt->myDoc->extSubset = NULL;
11829
0
      if (ret != NULL) {
11830
0
    xmlNodePtr tmp;
11831
11832
0
    ret->doc = NULL;
11833
0
    tmp = ret->children;
11834
0
    while (tmp != NULL) {
11835
0
        tmp->doc = NULL;
11836
0
        tmp = tmp->next;
11837
0
    }
11838
0
      }
11839
0
  } else {
11840
0
      ret = NULL;
11841
0
  }
11842
0
        xmlFreeDoc(ctxt->myDoc);
11843
0
        ctxt->myDoc = NULL;
11844
0
    }
11845
0
    xmlFreeParserCtxt(ctxt);
11846
11847
0
    return(ret);
11848
0
}
11849
11850
/**
11851
 * xmlSAXParseDTD:
11852
 * @sax:  the SAX handler block
11853
 * @ExternalID:  a NAME* containing the External ID of the DTD
11854
 * @SystemID:  a NAME* containing the URL to the DTD
11855
 *
11856
 * DEPRECATED: Don't use.
11857
 *
11858
 * Load and parse an external subset.
11859
 *
11860
 * Returns the resulting xmlDtdPtr or NULL in case of error.
11861
 */
11862
11863
xmlDtdPtr
11864
xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
11865
0
                          const xmlChar *SystemID) {
11866
0
    xmlDtdPtr ret = NULL;
11867
0
    xmlParserCtxtPtr ctxt;
11868
0
    xmlParserInputPtr input = NULL;
11869
0
    xmlChar* systemIdCanonic;
11870
11871
0
    if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
11872
11873
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
11874
0
    if (ctxt == NULL) {
11875
0
  return(NULL);
11876
0
    }
11877
11878
    /*
11879
     * Canonicalise the system ID
11880
     */
11881
0
    systemIdCanonic = xmlCanonicPath(SystemID);
11882
0
    if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
11883
0
  xmlFreeParserCtxt(ctxt);
11884
0
  return(NULL);
11885
0
    }
11886
11887
    /*
11888
     * Ask the Entity resolver to load the damn thing
11889
     */
11890
11891
0
    if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
11892
0
  input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
11893
0
                                   systemIdCanonic);
11894
0
    if (input == NULL) {
11895
0
  xmlFreeParserCtxt(ctxt);
11896
0
  if (systemIdCanonic != NULL)
11897
0
      xmlFree(systemIdCanonic);
11898
0
  return(NULL);
11899
0
    }
11900
11901
    /*
11902
     * plug some encoding conversion routines here.
11903
     */
11904
0
    if (xmlPushInput(ctxt, input) < 0) {
11905
0
  xmlFreeParserCtxt(ctxt);
11906
0
  if (systemIdCanonic != NULL)
11907
0
      xmlFree(systemIdCanonic);
11908
0
  return(NULL);
11909
0
    }
11910
11911
0
    xmlDetectEncoding(ctxt);
11912
11913
0
    if (input->filename == NULL)
11914
0
  input->filename = (char *) systemIdCanonic;
11915
0
    else
11916
0
  xmlFree(systemIdCanonic);
11917
11918
    /*
11919
     * let's parse that entity knowing it's an external subset.
11920
     */
11921
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11922
0
    if (ctxt->myDoc == NULL) {
11923
0
  xmlErrMemory(ctxt);
11924
0
  xmlFreeParserCtxt(ctxt);
11925
0
  return(NULL);
11926
0
    }
11927
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
11928
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11929
0
                                 ExternalID, SystemID);
11930
0
    if (ctxt->myDoc->extSubset == NULL) {
11931
0
        xmlFreeDoc(ctxt->myDoc);
11932
0
        xmlFreeParserCtxt(ctxt);
11933
0
        return(NULL);
11934
0
    }
11935
0
    xmlParseExternalSubset(ctxt, ExternalID, SystemID);
11936
11937
0
    if (ctxt->myDoc != NULL) {
11938
0
  if (ctxt->wellFormed) {
11939
0
      ret = ctxt->myDoc->extSubset;
11940
0
      ctxt->myDoc->extSubset = NULL;
11941
0
      if (ret != NULL) {
11942
0
    xmlNodePtr tmp;
11943
11944
0
    ret->doc = NULL;
11945
0
    tmp = ret->children;
11946
0
    while (tmp != NULL) {
11947
0
        tmp->doc = NULL;
11948
0
        tmp = tmp->next;
11949
0
    }
11950
0
      }
11951
0
  } else {
11952
0
      ret = NULL;
11953
0
  }
11954
0
        xmlFreeDoc(ctxt->myDoc);
11955
0
        ctxt->myDoc = NULL;
11956
0
    }
11957
0
    xmlFreeParserCtxt(ctxt);
11958
11959
0
    return(ret);
11960
0
}
11961
11962
11963
/**
11964
 * xmlParseDTD:
11965
 * @ExternalID:  a NAME* containing the External ID of the DTD
11966
 * @SystemID:  a NAME* containing the URL to the DTD
11967
 *
11968
 * Load and parse an external subset.
11969
 *
11970
 * Returns the resulting xmlDtdPtr or NULL in case of error.
11971
 */
11972
11973
xmlDtdPtr
11974
0
xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
11975
0
    return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
11976
0
}
11977
#endif /* LIBXML_VALID_ENABLED */
11978
11979
/************************************************************************
11980
 *                  *
11981
 *    Front ends when parsing an Entity     *
11982
 *                  *
11983
 ************************************************************************/
11984
11985
static xmlNodePtr
11986
xmlCtxtParseContent(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
11987
590
                    int hasTextDecl, int buildTree) {
11988
590
    xmlNodePtr root = NULL;
11989
590
    xmlNodePtr list = NULL;
11990
590
    xmlChar *rootName = BAD_CAST "#root";
11991
590
    int result;
11992
11993
590
    if (buildTree) {
11994
590
        root = xmlNewDocNode(ctxt->myDoc, NULL, rootName, NULL);
11995
590
        if (root == NULL) {
11996
0
            xmlErrMemory(ctxt);
11997
0
            goto error;
11998
0
        }
11999
590
    }
12000
12001
590
    if (xmlPushInput(ctxt, input) < 0)
12002
0
        goto error;
12003
12004
590
    nameNsPush(ctxt, rootName, NULL, NULL, 0, 0);
12005
590
    spacePush(ctxt, -1);
12006
12007
590
    if (buildTree)
12008
590
        nodePush(ctxt, root);
12009
12010
590
    if (hasTextDecl) {
12011
0
        xmlDetectEncoding(ctxt);
12012
12013
        /*
12014
         * Parse a possible text declaration first
12015
         */
12016
0
        if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
12017
0
            (IS_BLANK_CH(NXT(5)))) {
12018
0
            xmlParseTextDecl(ctxt);
12019
            /*
12020
             * An XML-1.0 document can't reference an entity not XML-1.0
12021
             */
12022
0
            if ((xmlStrEqual(ctxt->version, BAD_CAST "1.0")) &&
12023
0
                (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12024
0
                xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12025
0
                               "Version mismatch between document and "
12026
0
                               "entity\n");
12027
0
            }
12028
0
        }
12029
0
    }
12030
12031
590
    xmlParseContentInternal(ctxt);
12032
12033
590
    if (ctxt->input->cur < ctxt->input->end)
12034
31
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12035
12036
590
    if ((ctxt->wellFormed) ||
12037
590
        ((ctxt->recovery) && (ctxt->errNo != XML_ERR_NO_MEMORY))) {
12038
446
        if (root != NULL) {
12039
446
            xmlNodePtr cur;
12040
12041
            /*
12042
             * Return the newly created nodeset after unlinking it from
12043
             * its pseudo parent.
12044
             */
12045
446
            cur = root->children;
12046
446
            list = cur;
12047
1.30k
            while (cur != NULL) {
12048
855
                cur->parent = NULL;
12049
855
                cur = cur->next;
12050
855
            }
12051
446
            root->children = NULL;
12052
446
            root->last = NULL;
12053
446
        }
12054
446
    }
12055
12056
    /*
12057
     * Read the rest of the stream in case of errors. We want
12058
     * to account for the whole entity size.
12059
     */
12060
592
    do {
12061
592
        ctxt->input->cur = ctxt->input->end;
12062
592
        xmlParserShrink(ctxt);
12063
592
        result = xmlParserGrow(ctxt);
12064
592
    } while (result > 0);
12065
12066
590
    if (buildTree)
12067
590
        nodePop(ctxt);
12068
12069
590
    namePop(ctxt);
12070
590
    spacePop(ctxt);
12071
12072
    /* xmlPopInput would free the stream */
12073
590
    inputPop(ctxt);
12074
12075
590
error:
12076
590
    xmlFreeNode(root);
12077
12078
590
    return(list);
12079
590
}
12080
12081
static void
12082
599
xmlCtxtParseEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr ent) {
12083
599
    xmlParserInputPtr input;
12084
599
    xmlNodePtr list;
12085
599
    unsigned long consumed;
12086
599
    int isExternal;
12087
599
    int buildTree;
12088
599
    int oldMinNsIndex;
12089
599
    int oldNodelen, oldNodemem;
12090
12091
599
    isExternal = (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY);
12092
599
    buildTree = (ctxt->node != NULL);
12093
12094
    /*
12095
     * Recursion check
12096
     */
12097
599
    if (ent->flags & XML_ENT_EXPANDING) {
12098
9
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
12099
9
        xmlHaltParser(ctxt);
12100
9
        goto error;
12101
9
    }
12102
12103
    /*
12104
     * Load entity
12105
     */
12106
590
    input = xmlNewEntityInputStream(ctxt, ent);
12107
590
    if (input == NULL)
12108
0
        goto error;
12109
12110
    /*
12111
     * When building a tree, we need to limit the scope of namespace
12112
     * declarations, so that entities don't reference xmlNs structs
12113
     * from the parent of a reference.
12114
     */
12115
590
    oldMinNsIndex = ctxt->nsdb->minNsIndex;
12116
590
    if (buildTree)
12117
590
        ctxt->nsdb->minNsIndex = ctxt->nsNr;
12118
12119
590
    oldNodelen = ctxt->nodelen;
12120
590
    oldNodemem = ctxt->nodemem;
12121
590
    ctxt->nodelen = 0;
12122
590
    ctxt->nodemem = 0;
12123
12124
    /*
12125
     * Parse content
12126
     *
12127
     * This initiates a recursive call chain:
12128
     *
12129
     * - xmlCtxtParseContent
12130
     * - xmlParseContentInternal
12131
     * - xmlParseReference
12132
     * - xmlCtxtParseEntity
12133
     *
12134
     * The nesting depth is limited by the maximum number of inputs,
12135
     * see xmlPushInput.
12136
     *
12137
     * It's possible to make this non-recursive (minNsIndex must be
12138
     * stored in the input struct) at the expense of code readability.
12139
     */
12140
12141
590
    ent->flags |= XML_ENT_EXPANDING;
12142
12143
590
    list = xmlCtxtParseContent(ctxt, input, isExternal, buildTree);
12144
12145
590
    ent->flags &= ~XML_ENT_EXPANDING;
12146
12147
590
    ctxt->nsdb->minNsIndex = oldMinNsIndex;
12148
590
    ctxt->nodelen = oldNodelen;
12149
590
    ctxt->nodemem = oldNodemem;
12150
12151
    /*
12152
     * Entity size accounting
12153
     */
12154
590
    consumed = input->consumed;
12155
590
    xmlSaturatedAddSizeT(&consumed, input->end - input->base);
12156
12157
590
    if ((ent->flags & XML_ENT_CHECKED) == 0)
12158
311
        xmlSaturatedAdd(&ent->expandedSize, consumed);
12159
12160
590
    if ((ent->flags & XML_ENT_PARSED) == 0) {
12161
311
        if (isExternal)
12162
0
            xmlSaturatedAdd(&ctxt->sizeentities, consumed);
12163
12164
311
        ent->children = list;
12165
12166
1.16k
        while (list != NULL) {
12167
851
            list->parent = (xmlNodePtr) ent;
12168
851
            if (list->next == NULL)
12169
152
                ent->last = list;
12170
851
            list = list->next;
12171
851
        }
12172
311
    } else {
12173
279
        xmlFreeNodeList(list);
12174
279
    }
12175
12176
590
    xmlFreeInputStream(input);
12177
12178
599
error:
12179
599
    ent->flags |= XML_ENT_PARSED | XML_ENT_CHECKED;
12180
599
}
12181
12182
/**
12183
 * xmlParseCtxtExternalEntity:
12184
 * @ctx:  the existing parsing context
12185
 * @URL:  the URL for the entity to load
12186
 * @ID:  the System ID for the entity to load
12187
 * @lst:  the return value for the set of parsed nodes
12188
 *
12189
 * Parse an external general entity within an existing parsing context
12190
 * An external general parsed entity is well-formed if it matches the
12191
 * production labeled extParsedEnt.
12192
 *
12193
 * [78] extParsedEnt ::= TextDecl? content
12194
 *
12195
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12196
 *    the parser error code otherwise
12197
 */
12198
12199
int
12200
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctxt, const xmlChar *URL,
12201
0
                           const xmlChar *ID, xmlNodePtr *listOut) {
12202
0
    xmlParserInputPtr input;
12203
0
    xmlNodePtr list;
12204
12205
0
    if (listOut != NULL)
12206
0
        *listOut = NULL;
12207
12208
0
    if (ctxt == NULL)
12209
0
        return(XML_ERR_ARGUMENT);
12210
12211
0
    input = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
12212
0
    if (input == NULL)
12213
0
        return(ctxt->errNo);
12214
12215
0
    xmlCtxtInitializeLate(ctxt);
12216
12217
0
    list = xmlCtxtParseContent(ctxt, input, /* hasTextDecl */ 1, 1);
12218
0
    if (*listOut != NULL)
12219
0
        *listOut = list;
12220
0
    else
12221
0
        xmlFreeNodeList(list);
12222
12223
0
    xmlFreeInputStream(input);
12224
0
    return(ctxt->errNo);
12225
0
}
12226
12227
#ifdef LIBXML_SAX1_ENABLED
12228
/**
12229
 * xmlParseExternalEntity:
12230
 * @doc:  the document the chunk pertains to
12231
 * @sax:  the SAX handler block (possibly NULL)
12232
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12233
 * @depth:  Used for loop detection, use 0
12234
 * @URL:  the URL for the entity to load
12235
 * @ID:  the System ID for the entity to load
12236
 * @lst:  the return value for the set of parsed nodes
12237
 *
12238
 * Parse an external general entity
12239
 * An external general parsed entity is well-formed if it matches the
12240
 * production labeled extParsedEnt.
12241
 *
12242
 * [78] extParsedEnt ::= TextDecl? content
12243
 *
12244
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12245
 *    the parser error code otherwise
12246
 */
12247
12248
int
12249
xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
12250
0
    int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *list) {
12251
0
    xmlParserCtxtPtr ctxt;
12252
0
    int ret;
12253
12254
0
    if (list != NULL)
12255
0
        *list = NULL;
12256
12257
0
    if (doc == NULL)
12258
0
        return(XML_ERR_ARGUMENT);
12259
12260
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12261
0
    if (ctxt == NULL)
12262
0
        return(XML_ERR_NO_MEMORY);
12263
12264
0
    ctxt->depth = depth;
12265
0
    ctxt->myDoc = doc;
12266
0
    ret = xmlParseCtxtExternalEntity(ctxt, URL, ID, list);
12267
12268
0
    xmlFreeParserCtxt(ctxt);
12269
0
    return(ret);
12270
0
}
12271
12272
/**
12273
 * xmlParseBalancedChunkMemory:
12274
 * @doc:  the document the chunk pertains to (must not be NULL)
12275
 * @sax:  the SAX handler block (possibly NULL)
12276
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12277
 * @depth:  Used for loop detection, use 0
12278
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
12279
 * @lst:  the return value for the set of parsed nodes
12280
 *
12281
 * Parse a well-balanced chunk of an XML document
12282
 * called by the parser
12283
 * The allowed sequence for the Well Balanced Chunk is the one defined by
12284
 * the content production in the XML grammar:
12285
 *
12286
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12287
 *
12288
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12289
 *    the parser error code otherwise
12290
 */
12291
12292
int
12293
xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12294
0
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
12295
0
    return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
12296
0
                                                depth, string, lst, 0 );
12297
0
}
12298
#endif /* LIBXML_SAX1_ENABLED */
12299
12300
/**
12301
 * xmlParseInNodeContext:
12302
 * @node:  the context node
12303
 * @data:  the input string
12304
 * @datalen:  the input string length in bytes
12305
 * @options:  a combination of xmlParserOption
12306
 * @lst:  the return value for the set of parsed nodes
12307
 *
12308
 * Parse a well-balanced chunk of an XML document
12309
 * within the context (DTD, namespaces, etc ...) of the given node.
12310
 *
12311
 * The allowed sequence for the data is a Well Balanced Chunk defined by
12312
 * the content production in the XML grammar:
12313
 *
12314
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12315
 *
12316
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12317
 * error code otherwise
12318
 */
12319
xmlParserErrors
12320
xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
12321
0
                      int options, xmlNodePtr *lst) {
12322
0
    xmlParserCtxtPtr ctxt;
12323
0
    xmlDocPtr doc = NULL;
12324
0
    xmlNodePtr fake, cur;
12325
0
    int nsnr = 0;
12326
12327
0
    xmlParserErrors ret = XML_ERR_OK;
12328
12329
    /*
12330
     * check all input parameters, grab the document
12331
     */
12332
0
    if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
12333
0
        return(XML_ERR_ARGUMENT);
12334
0
    switch (node->type) {
12335
0
        case XML_ELEMENT_NODE:
12336
0
        case XML_ATTRIBUTE_NODE:
12337
0
        case XML_TEXT_NODE:
12338
0
        case XML_CDATA_SECTION_NODE:
12339
0
        case XML_ENTITY_REF_NODE:
12340
0
        case XML_PI_NODE:
12341
0
        case XML_COMMENT_NODE:
12342
0
        case XML_DOCUMENT_NODE:
12343
0
        case XML_HTML_DOCUMENT_NODE:
12344
0
      break;
12345
0
  default:
12346
0
      return(XML_ERR_INTERNAL_ERROR);
12347
12348
0
    }
12349
0
    while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
12350
0
           (node->type != XML_DOCUMENT_NODE) &&
12351
0
     (node->type != XML_HTML_DOCUMENT_NODE))
12352
0
  node = node->parent;
12353
0
    if (node == NULL)
12354
0
  return(XML_ERR_INTERNAL_ERROR);
12355
0
    if (node->type == XML_ELEMENT_NODE)
12356
0
  doc = node->doc;
12357
0
    else
12358
0
        doc = (xmlDocPtr) node;
12359
0
    if (doc == NULL)
12360
0
  return(XML_ERR_INTERNAL_ERROR);
12361
12362
    /*
12363
     * allocate a context and set-up everything not related to the
12364
     * node position in the tree
12365
     */
12366
0
    if (doc->type == XML_DOCUMENT_NODE)
12367
0
  ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
12368
0
#ifdef LIBXML_HTML_ENABLED
12369
0
    else if (doc->type == XML_HTML_DOCUMENT_NODE) {
12370
0
  ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
12371
        /*
12372
         * When parsing in context, it makes no sense to add implied
12373
         * elements like html/body/etc...
12374
         */
12375
0
        options |= HTML_PARSE_NOIMPLIED;
12376
0
    }
12377
0
#endif
12378
0
    else
12379
0
        return(XML_ERR_INTERNAL_ERROR);
12380
12381
0
    if (ctxt == NULL)
12382
0
        return(XML_ERR_NO_MEMORY);
12383
12384
    /*
12385
     * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
12386
     * We need a dictionary for xmlCtxtInitializeLate, so if there's no doc dict
12387
     * we must wait until the last moment to free the original one.
12388
     */
12389
0
    if (doc->dict != NULL) {
12390
0
        if (ctxt->dict != NULL)
12391
0
      xmlDictFree(ctxt->dict);
12392
0
  ctxt->dict = doc->dict;
12393
0
    } else {
12394
0
        options |= XML_PARSE_NODICT;
12395
0
        ctxt->dictNames = 0;
12396
0
    }
12397
12398
0
    if (doc->encoding != NULL)
12399
0
        xmlSwitchEncodingName(ctxt, (const char *) doc->encoding);
12400
12401
0
    xmlCtxtUseOptions(ctxt, options);
12402
0
    xmlCtxtInitializeLate(ctxt);
12403
0
    ctxt->myDoc = doc;
12404
    /* parsing in context, i.e. as within existing content */
12405
0
    ctxt->input_id = 2;
12406
12407
    /*
12408
     * TODO: Use xmlCtxtParseContent
12409
     */
12410
12411
0
    fake = xmlNewDocComment(node->doc, NULL);
12412
0
    if (fake == NULL) {
12413
0
        xmlFreeParserCtxt(ctxt);
12414
0
  return(XML_ERR_NO_MEMORY);
12415
0
    }
12416
0
    xmlAddChild(node, fake);
12417
12418
0
    if (node->type == XML_ELEMENT_NODE)
12419
0
  nodePush(ctxt, node);
12420
12421
0
    if ((ctxt->html == 0) && (node->type == XML_ELEMENT_NODE)) {
12422
  /*
12423
   * initialize the SAX2 namespaces stack
12424
   */
12425
0
  cur = node;
12426
0
  while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
12427
0
      xmlNsPtr ns = cur->nsDef;
12428
0
            xmlHashedString hprefix, huri;
12429
12430
0
      while (ns != NULL) {
12431
0
                hprefix = xmlDictLookupHashed(ctxt->dict, ns->prefix, -1);
12432
0
                huri = xmlDictLookupHashed(ctxt->dict, ns->href, -1);
12433
0
                if (xmlParserNsPush(ctxt, &hprefix, &huri, ns, 1) > 0)
12434
0
                    nsnr++;
12435
0
    ns = ns->next;
12436
0
      }
12437
0
      cur = cur->parent;
12438
0
  }
12439
0
    }
12440
12441
0
    if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
12442
  /*
12443
   * ID/IDREF registration will be done in xmlValidateElement below
12444
   */
12445
0
  ctxt->loadsubset |= XML_SKIP_IDS;
12446
0
    }
12447
12448
0
#ifdef LIBXML_HTML_ENABLED
12449
0
    if (doc->type == XML_HTML_DOCUMENT_NODE)
12450
0
        __htmlParseContent(ctxt);
12451
0
    else
12452
0
#endif
12453
0
  xmlParseContentInternal(ctxt);
12454
12455
0
    if (ctxt->input->cur < ctxt->input->end)
12456
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12457
12458
0
    xmlParserNsPop(ctxt, nsnr);
12459
12460
0
    if ((ctxt->wellFormed) ||
12461
0
        ((ctxt->recovery) && (ctxt->errNo != XML_ERR_NO_MEMORY))) {
12462
0
        ret = XML_ERR_OK;
12463
0
    } else {
12464
0
  ret = (xmlParserErrors) ctxt->errNo;
12465
0
    }
12466
12467
    /*
12468
     * Return the newly created nodeset after unlinking it from
12469
     * the pseudo sibling.
12470
     */
12471
12472
0
    cur = fake->next;
12473
0
    fake->next = NULL;
12474
0
    node->last = fake;
12475
12476
0
    if (cur != NULL) {
12477
0
  cur->prev = NULL;
12478
0
    }
12479
12480
0
    *lst = cur;
12481
12482
0
    while (cur != NULL) {
12483
0
  cur->parent = NULL;
12484
0
  cur = cur->next;
12485
0
    }
12486
12487
0
    xmlUnlinkNode(fake);
12488
0
    xmlFreeNode(fake);
12489
12490
12491
0
    if (ret != XML_ERR_OK) {
12492
0
        xmlFreeNodeList(*lst);
12493
0
  *lst = NULL;
12494
0
    }
12495
12496
0
    if (doc->dict != NULL)
12497
0
        ctxt->dict = NULL;
12498
0
    xmlFreeParserCtxt(ctxt);
12499
12500
0
    return(ret);
12501
0
}
12502
12503
#ifdef LIBXML_SAX1_ENABLED
12504
/**
12505
 * xmlParseBalancedChunkMemoryRecover:
12506
 * @doc:  the document the chunk pertains to (must not be NULL)
12507
 * @sax:  the SAX handler block (possibly NULL)
12508
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12509
 * @depth:  Used for loop detection, use 0
12510
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
12511
 * @list:  the return value for the set of parsed nodes
12512
 * @recover: return nodes even if the data is broken (use 0)
12513
 *
12514
 * Parse a well-balanced chunk of an XML document
12515
 *
12516
 * The allowed sequence for the Well Balanced Chunk is the one defined by
12517
 * the content production in the XML grammar:
12518
 *
12519
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12520
 *
12521
 * Returns 0 if the chunk is well balanced, or thehe parser error code
12522
 * otherwise.
12523
 *
12524
 * In case recover is set to 1, the nodelist will not be empty even if
12525
 * the parsed chunk is not well balanced, assuming the parsing succeeded to
12526
 * some extent.
12527
 */
12528
int
12529
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12530
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *listOut,
12531
0
     int recover) {
12532
0
    xmlParserCtxtPtr ctxt;
12533
0
    xmlParserInputPtr input;
12534
0
    xmlNodePtr list;
12535
0
    int ret;
12536
12537
0
    if (listOut != NULL)
12538
0
        *listOut = NULL;
12539
12540
0
    if (string == NULL)
12541
0
        return(XML_ERR_ARGUMENT);
12542
12543
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12544
0
    if (ctxt == NULL)
12545
0
        return(XML_ERR_NO_MEMORY);
12546
12547
0
    xmlCtxtInitializeLate(ctxt);
12548
12549
0
    ctxt->depth = depth;
12550
0
    ctxt->myDoc = doc;
12551
0
    if (recover) {
12552
0
        ctxt->options |= XML_PARSE_RECOVER;
12553
0
        ctxt->recovery = 1;
12554
0
    }
12555
12556
0
    input = xmlNewStringInputStream(ctxt, string);
12557
0
    if (input == NULL)
12558
0
        return(ctxt->errNo);
12559
12560
0
    list = xmlCtxtParseContent(ctxt, input, /* hasTextDecl */ 0, 1);
12561
0
    if (listOut != NULL)
12562
0
        *listOut = list;
12563
0
    else
12564
0
        xmlFreeNodeList(list);
12565
12566
0
    ret = ctxt->errNo;
12567
12568
0
    xmlFreeInputStream(input);
12569
0
    xmlFreeParserCtxt(ctxt);
12570
0
    return(ret);
12571
0
}
12572
12573
/**
12574
 * xmlSAXParseEntity:
12575
 * @sax:  the SAX handler block
12576
 * @filename:  the filename
12577
 *
12578
 * DEPRECATED: Don't use.
12579
 *
12580
 * parse an XML external entity out of context and build a tree.
12581
 * It use the given SAX function block to handle the parsing callback.
12582
 * If sax is NULL, fallback to the default DOM tree building routines.
12583
 *
12584
 * [78] extParsedEnt ::= TextDecl? content
12585
 *
12586
 * This correspond to a "Well Balanced" chunk
12587
 *
12588
 * Returns the resulting document tree
12589
 */
12590
12591
xmlDocPtr
12592
0
xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
12593
0
    xmlDocPtr ret;
12594
0
    xmlParserCtxtPtr ctxt;
12595
12596
0
    ctxt = xmlCreateFileParserCtxt(filename);
12597
0
    if (ctxt == NULL) {
12598
0
  return(NULL);
12599
0
    }
12600
0
    if (sax != NULL) {
12601
0
        if (sax->initialized == XML_SAX2_MAGIC) {
12602
0
            *ctxt->sax = *sax;
12603
0
        } else {
12604
0
            memset(ctxt->sax, 0, sizeof(*ctxt->sax));
12605
0
            memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12606
0
        }
12607
0
        ctxt->userData = NULL;
12608
0
    }
12609
12610
0
    xmlParseExtParsedEnt(ctxt);
12611
12612
0
    if (ctxt->wellFormed) {
12613
0
  ret = ctxt->myDoc;
12614
0
    } else {
12615
0
        ret = NULL;
12616
0
        xmlFreeDoc(ctxt->myDoc);
12617
0
    }
12618
12619
0
    xmlFreeParserCtxt(ctxt);
12620
12621
0
    return(ret);
12622
0
}
12623
12624
/**
12625
 * xmlParseEntity:
12626
 * @filename:  the filename
12627
 *
12628
 * parse an XML external entity out of context and build a tree.
12629
 *
12630
 * [78] extParsedEnt ::= TextDecl? content
12631
 *
12632
 * This correspond to a "Well Balanced" chunk
12633
 *
12634
 * Returns the resulting document tree
12635
 */
12636
12637
xmlDocPtr
12638
0
xmlParseEntity(const char *filename) {
12639
0
    return(xmlSAXParseEntity(NULL, filename));
12640
0
}
12641
#endif /* LIBXML_SAX1_ENABLED */
12642
12643
/**
12644
 * xmlCreateEntityParserCtxt:
12645
 * @URL:  the entity URL
12646
 * @ID:  the entity PUBLIC ID
12647
 * @base:  a possible base for the target URI
12648
 *
12649
 * DEPRECATED: Use xmlNewInputURL.
12650
 *
12651
 * Create a parser context for an external entity
12652
 * Automatic support for ZLIB/Compress compressed document is provided
12653
 * by default if found at compile-time.
12654
 *
12655
 * Returns the new parser context or NULL
12656
 */
12657
xmlParserCtxtPtr
12658
xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
12659
0
                    const xmlChar *base) {
12660
0
    xmlParserCtxtPtr ctxt;
12661
0
    xmlParserInputPtr input;
12662
0
    xmlChar *uri = NULL;
12663
12664
0
    ctxt = xmlNewParserCtxt();
12665
0
    if (ctxt == NULL)
12666
0
  return(NULL);
12667
12668
0
    if (base != NULL) {
12669
0
        if (xmlBuildURISafe(URL, base, &uri) < 0)
12670
0
            goto error;
12671
0
        if (uri != NULL)
12672
0
            URL = uri;
12673
0
    }
12674
12675
0
    input = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
12676
0
    if (input == NULL)
12677
0
        goto error;
12678
12679
0
    if (inputPush(ctxt, input) < 0)
12680
0
        goto error;
12681
12682
0
    xmlFree(uri);
12683
0
    return(ctxt);
12684
12685
0
error:
12686
0
    xmlFree(uri);
12687
0
    xmlFreeParserCtxt(ctxt);
12688
0
    return(NULL);
12689
0
}
12690
12691
/************************************************************************
12692
 *                  *
12693
 *    Front ends when parsing from a file     *
12694
 *                  *
12695
 ************************************************************************/
12696
12697
/**
12698
 * xmlCreateURLParserCtxt:
12699
 * @filename:  the filename or URL
12700
 * @options:  a combination of xmlParserOption
12701
 *
12702
 * DEPRECATED: Use xmlNewParserCtxt and xmlCtxtReadFile.
12703
 *
12704
 * Create a parser context for a file or URL content.
12705
 * Automatic support for ZLIB/Compress compressed document is provided
12706
 * by default if found at compile-time and for file accesses
12707
 *
12708
 * Returns the new parser context or NULL
12709
 */
12710
xmlParserCtxtPtr
12711
xmlCreateURLParserCtxt(const char *filename, int options)
12712
0
{
12713
0
    xmlParserCtxtPtr ctxt;
12714
0
    xmlParserInputPtr input;
12715
12716
0
    ctxt = xmlNewParserCtxt();
12717
0
    if (ctxt == NULL)
12718
0
  return(NULL);
12719
12720
0
    xmlCtxtUseOptions(ctxt, options);
12721
0
    ctxt->linenumbers = 1;
12722
12723
0
    input = xmlLoadExternalEntity(filename, NULL, ctxt);
12724
0
    if (input == NULL) {
12725
0
  xmlFreeParserCtxt(ctxt);
12726
0
  return(NULL);
12727
0
    }
12728
0
    inputPush(ctxt, input);
12729
12730
0
    return(ctxt);
12731
0
}
12732
12733
/**
12734
 * xmlCreateFileParserCtxt:
12735
 * @filename:  the filename
12736
 *
12737
 * DEPRECATED: Use xmlNewParserCtxt and xmlCtxtReadFile.
12738
 *
12739
 * Create a parser context for a file content.
12740
 * Automatic support for ZLIB/Compress compressed document is provided
12741
 * by default if found at compile-time.
12742
 *
12743
 * Returns the new parser context or NULL
12744
 */
12745
xmlParserCtxtPtr
12746
xmlCreateFileParserCtxt(const char *filename)
12747
0
{
12748
0
    return(xmlCreateURLParserCtxt(filename, 0));
12749
0
}
12750
12751
#ifdef LIBXML_SAX1_ENABLED
12752
/**
12753
 * xmlSAXParseFileWithData:
12754
 * @sax:  the SAX handler block
12755
 * @filename:  the filename
12756
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
12757
 *             documents
12758
 * @data:  the userdata
12759
 *
12760
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
12761
 *
12762
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12763
 * compressed document is provided by default if found at compile-time.
12764
 * It use the given SAX function block to handle the parsing callback.
12765
 * If sax is NULL, fallback to the default DOM tree building routines.
12766
 *
12767
 * User data (void *) is stored within the parser context in the
12768
 * context's _private member, so it is available nearly everywhere in libxml
12769
 *
12770
 * Returns the resulting document tree
12771
 */
12772
12773
xmlDocPtr
12774
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
12775
0
                        int recovery, void *data) {
12776
0
    xmlDocPtr ret;
12777
0
    xmlParserCtxtPtr ctxt;
12778
0
    xmlParserInputPtr input;
12779
12780
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12781
0
    if (ctxt == NULL)
12782
0
  return(NULL);
12783
12784
0
    if (data != NULL)
12785
0
  ctxt->_private = data;
12786
12787
0
    if (recovery) {
12788
0
        ctxt->options |= XML_PARSE_RECOVER;
12789
0
        ctxt->recovery = 1;
12790
0
    }
12791
12792
0
    input = xmlNewInputURL(ctxt, filename, NULL, NULL, 0);
12793
12794
0
    ret = xmlCtxtParseDocument(ctxt, input);
12795
12796
0
    xmlFreeParserCtxt(ctxt);
12797
0
    return(ret);
12798
0
}
12799
12800
/**
12801
 * xmlSAXParseFile:
12802
 * @sax:  the SAX handler block
12803
 * @filename:  the filename
12804
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
12805
 *             documents
12806
 *
12807
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
12808
 *
12809
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12810
 * compressed document is provided by default if found at compile-time.
12811
 * It use the given SAX function block to handle the parsing callback.
12812
 * If sax is NULL, fallback to the default DOM tree building routines.
12813
 *
12814
 * Returns the resulting document tree
12815
 */
12816
12817
xmlDocPtr
12818
xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
12819
0
                          int recovery) {
12820
0
    return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
12821
0
}
12822
12823
/**
12824
 * xmlRecoverDoc:
12825
 * @cur:  a pointer to an array of xmlChar
12826
 *
12827
 * DEPRECATED: Use xmlReadDoc with XML_PARSE_RECOVER.
12828
 *
12829
 * parse an XML in-memory document and build a tree.
12830
 * In the case the document is not Well Formed, a attempt to build a
12831
 * tree is tried anyway
12832
 *
12833
 * Returns the resulting document tree or NULL in case of failure
12834
 */
12835
12836
xmlDocPtr
12837
0
xmlRecoverDoc(const xmlChar *cur) {
12838
0
    return(xmlSAXParseDoc(NULL, cur, 1));
12839
0
}
12840
12841
/**
12842
 * xmlParseFile:
12843
 * @filename:  the filename
12844
 *
12845
 * DEPRECATED: Use xmlReadFile.
12846
 *
12847
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12848
 * compressed document is provided by default if found at compile-time.
12849
 *
12850
 * Returns the resulting document tree if the file was wellformed,
12851
 * NULL otherwise.
12852
 */
12853
12854
xmlDocPtr
12855
0
xmlParseFile(const char *filename) {
12856
0
    return(xmlSAXParseFile(NULL, filename, 0));
12857
0
}
12858
12859
/**
12860
 * xmlRecoverFile:
12861
 * @filename:  the filename
12862
 *
12863
 * DEPRECATED: Use xmlReadFile with XML_PARSE_RECOVER.
12864
 *
12865
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12866
 * compressed document is provided by default if found at compile-time.
12867
 * In the case the document is not Well Formed, it attempts to build
12868
 * a tree anyway
12869
 *
12870
 * Returns the resulting document tree or NULL in case of failure
12871
 */
12872
12873
xmlDocPtr
12874
0
xmlRecoverFile(const char *filename) {
12875
0
    return(xmlSAXParseFile(NULL, filename, 1));
12876
0
}
12877
12878
12879
/**
12880
 * xmlSetupParserForBuffer:
12881
 * @ctxt:  an XML parser context
12882
 * @buffer:  a xmlChar * buffer
12883
 * @filename:  a file name
12884
 *
12885
 * DEPRECATED: Don't use.
12886
 *
12887
 * Setup the parser context to parse a new buffer; Clears any prior
12888
 * contents from the parser context. The buffer parameter must not be
12889
 * NULL, but the filename parameter can be
12890
 */
12891
void
12892
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
12893
                             const char* filename)
12894
0
{
12895
0
    xmlParserInputPtr input;
12896
12897
0
    if ((ctxt == NULL) || (buffer == NULL))
12898
0
        return;
12899
12900
0
    xmlClearParserCtxt(ctxt);
12901
12902
0
    input = xmlNewInputString(ctxt, filename, (const char *) buffer, NULL, 0);
12903
0
    if (input == NULL)
12904
0
        return;
12905
0
    inputPush(ctxt, input);
12906
0
}
12907
12908
/**
12909
 * xmlSAXUserParseFile:
12910
 * @sax:  a SAX handler
12911
 * @user_data:  The user data returned on SAX callbacks
12912
 * @filename:  a file name
12913
 *
12914
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
12915
 *
12916
 * parse an XML file and call the given SAX handler routines.
12917
 * Automatic support for ZLIB/Compress compressed document is provided
12918
 *
12919
 * Returns 0 in case of success or a error number otherwise
12920
 */
12921
int
12922
xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
12923
0
                    const char *filename) {
12924
0
    int ret = 0;
12925
0
    xmlParserCtxtPtr ctxt;
12926
12927
0
    ctxt = xmlCreateFileParserCtxt(filename);
12928
0
    if (ctxt == NULL) return -1;
12929
0
    if (sax != NULL) {
12930
0
        if (sax->initialized == XML_SAX2_MAGIC) {
12931
0
            *ctxt->sax = *sax;
12932
0
        } else {
12933
0
            memset(ctxt->sax, 0, sizeof(*ctxt->sax));
12934
0
            memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12935
0
        }
12936
0
  ctxt->userData = user_data;
12937
0
    }
12938
12939
0
    xmlParseDocument(ctxt);
12940
12941
0
    if (ctxt->wellFormed)
12942
0
  ret = 0;
12943
0
    else {
12944
0
        if (ctxt->errNo != 0)
12945
0
      ret = ctxt->errNo;
12946
0
  else
12947
0
      ret = -1;
12948
0
    }
12949
0
    if (ctxt->myDoc != NULL) {
12950
0
        xmlFreeDoc(ctxt->myDoc);
12951
0
  ctxt->myDoc = NULL;
12952
0
    }
12953
0
    xmlFreeParserCtxt(ctxt);
12954
12955
0
    return ret;
12956
0
}
12957
#endif /* LIBXML_SAX1_ENABLED */
12958
12959
/************************************************************************
12960
 *                  *
12961
 *    Front ends when parsing from memory     *
12962
 *                  *
12963
 ************************************************************************/
12964
12965
/**
12966
 * xmlCreateMemoryParserCtxt:
12967
 * @buffer:  a pointer to a char array
12968
 * @size:  the size of the array
12969
 *
12970
 * Create a parser context for an XML in-memory document. The input buffer
12971
 * must not contain a terminating null byte.
12972
 *
12973
 * Returns the new parser context or NULL
12974
 */
12975
xmlParserCtxtPtr
12976
0
xmlCreateMemoryParserCtxt(const char *buffer, int size) {
12977
0
    xmlParserCtxtPtr ctxt;
12978
0
    xmlParserInputPtr input;
12979
12980
0
    if (size < 0)
12981
0
  return(NULL);
12982
12983
0
    ctxt = xmlNewParserCtxt();
12984
0
    if (ctxt == NULL)
12985
0
  return(NULL);
12986
12987
0
    input = xmlNewInputMemory(ctxt, NULL, buffer, size, NULL, 0);
12988
0
    if (input == NULL) {
12989
0
  xmlFreeParserCtxt(ctxt);
12990
0
  return(NULL);
12991
0
    }
12992
0
    inputPush(ctxt, input);
12993
12994
0
    return(ctxt);
12995
0
}
12996
12997
#ifdef LIBXML_SAX1_ENABLED
12998
/**
12999
 * xmlSAXParseMemoryWithData:
13000
 * @sax:  the SAX handler block
13001
 * @buffer:  an pointer to a char array
13002
 * @size:  the size of the array
13003
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13004
 *             documents
13005
 * @data:  the userdata
13006
 *
13007
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
13008
 *
13009
 * parse an XML in-memory block and use the given SAX function block
13010
 * to handle the parsing callback. If sax is NULL, fallback to the default
13011
 * DOM tree building routines.
13012
 *
13013
 * User data (void *) is stored within the parser context in the
13014
 * context's _private member, so it is available nearly everywhere in libxml
13015
 *
13016
 * Returns the resulting document tree
13017
 */
13018
13019
xmlDocPtr
13020
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
13021
0
                          int size, int recovery, void *data) {
13022
0
    xmlDocPtr ret;
13023
0
    xmlParserCtxtPtr ctxt;
13024
0
    xmlParserInputPtr input;
13025
13026
0
    if (size < 0)
13027
0
        return(NULL);
13028
13029
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
13030
0
    if (ctxt == NULL)
13031
0
        return(NULL);
13032
13033
0
    if (data != NULL)
13034
0
  ctxt->_private=data;
13035
13036
0
    if (recovery) {
13037
0
        ctxt->options |= XML_PARSE_RECOVER;
13038
0
        ctxt->recovery = 1;
13039
0
    }
13040
13041
0
    input = xmlNewInputMemory(ctxt, NULL, buffer, size, NULL,
13042
0
                              XML_INPUT_BUF_STATIC);
13043
13044
0
    ret = xmlCtxtParseDocument(ctxt, input);
13045
13046
0
    xmlFreeParserCtxt(ctxt);
13047
0
    return(ret);
13048
0
}
13049
13050
/**
13051
 * xmlSAXParseMemory:
13052
 * @sax:  the SAX handler block
13053
 * @buffer:  an pointer to a char array
13054
 * @size:  the size of the array
13055
 * @recovery:  work in recovery mode, i.e. tries to read not Well Formed
13056
 *             documents
13057
 *
13058
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
13059
 *
13060
 * parse an XML in-memory block and use the given SAX function block
13061
 * to handle the parsing callback. If sax is NULL, fallback to the default
13062
 * DOM tree building routines.
13063
 *
13064
 * Returns the resulting document tree
13065
 */
13066
xmlDocPtr
13067
xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
13068
0
            int size, int recovery) {
13069
0
    return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
13070
0
}
13071
13072
/**
13073
 * xmlParseMemory:
13074
 * @buffer:  an pointer to a char array
13075
 * @size:  the size of the array
13076
 *
13077
 * DEPRECATED: Use xmlReadMemory.
13078
 *
13079
 * parse an XML in-memory block and build a tree.
13080
 *
13081
 * Returns the resulting document tree
13082
 */
13083
13084
0
xmlDocPtr xmlParseMemory(const char *buffer, int size) {
13085
0
   return(xmlSAXParseMemory(NULL, buffer, size, 0));
13086
0
}
13087
13088
/**
13089
 * xmlRecoverMemory:
13090
 * @buffer:  an pointer to a char array
13091
 * @size:  the size of the array
13092
 *
13093
 * DEPRECATED: Use xmlReadMemory with XML_PARSE_RECOVER.
13094
 *
13095
 * parse an XML in-memory block and build a tree.
13096
 * In the case the document is not Well Formed, an attempt to
13097
 * build a tree is tried anyway
13098
 *
13099
 * Returns the resulting document tree or NULL in case of error
13100
 */
13101
13102
0
xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
13103
0
   return(xmlSAXParseMemory(NULL, buffer, size, 1));
13104
0
}
13105
13106
/**
13107
 * xmlSAXUserParseMemory:
13108
 * @sax:  a SAX handler
13109
 * @user_data:  The user data returned on SAX callbacks
13110
 * @buffer:  an in-memory XML document input
13111
 * @size:  the length of the XML document in bytes
13112
 *
13113
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
13114
 *
13115
 * parse an XML in-memory buffer and call the given SAX handler routines.
13116
 *
13117
 * Returns 0 in case of success or a error number otherwise
13118
 */
13119
int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
13120
0
        const char *buffer, int size) {
13121
0
    int ret = 0;
13122
0
    xmlParserCtxtPtr ctxt;
13123
13124
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13125
0
    if (ctxt == NULL) return -1;
13126
0
    if (sax != NULL) {
13127
0
        if (sax->initialized == XML_SAX2_MAGIC) {
13128
0
            *ctxt->sax = *sax;
13129
0
        } else {
13130
0
            memset(ctxt->sax, 0, sizeof(*ctxt->sax));
13131
0
            memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
13132
0
        }
13133
0
  ctxt->userData = user_data;
13134
0
    }
13135
13136
0
    xmlParseDocument(ctxt);
13137
13138
0
    if (ctxt->wellFormed)
13139
0
  ret = 0;
13140
0
    else {
13141
0
        if (ctxt->errNo != 0)
13142
0
      ret = ctxt->errNo;
13143
0
  else
13144
0
      ret = -1;
13145
0
    }
13146
0
    if (ctxt->myDoc != NULL) {
13147
0
        xmlFreeDoc(ctxt->myDoc);
13148
0
  ctxt->myDoc = NULL;
13149
0
    }
13150
0
    xmlFreeParserCtxt(ctxt);
13151
13152
0
    return ret;
13153
0
}
13154
#endif /* LIBXML_SAX1_ENABLED */
13155
13156
/**
13157
 * xmlCreateDocParserCtxt:
13158
 * @str:  a pointer to an array of xmlChar
13159
 *
13160
 * Creates a parser context for an XML in-memory document.
13161
 *
13162
 * Returns the new parser context or NULL
13163
 */
13164
xmlParserCtxtPtr
13165
0
xmlCreateDocParserCtxt(const xmlChar *str) {
13166
0
    xmlParserCtxtPtr ctxt;
13167
0
    xmlParserInputPtr input;
13168
13169
0
    ctxt = xmlNewParserCtxt();
13170
0
    if (ctxt == NULL)
13171
0
  return(NULL);
13172
13173
0
    input = xmlNewInputString(ctxt, NULL, (const char *) str, NULL, 0);
13174
0
    if (input == NULL) {
13175
0
  xmlFreeParserCtxt(ctxt);
13176
0
  return(NULL);
13177
0
    }
13178
0
    inputPush(ctxt, input);
13179
13180
0
    return(ctxt);
13181
0
}
13182
13183
#ifdef LIBXML_SAX1_ENABLED
13184
/**
13185
 * xmlSAXParseDoc:
13186
 * @sax:  the SAX handler block
13187
 * @cur:  a pointer to an array of xmlChar
13188
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13189
 *             documents
13190
 *
13191
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadDoc.
13192
 *
13193
 * parse an XML in-memory document and build a tree.
13194
 * It use the given SAX function block to handle the parsing callback.
13195
 * If sax is NULL, fallback to the default DOM tree building routines.
13196
 *
13197
 * Returns the resulting document tree
13198
 */
13199
13200
xmlDocPtr
13201
0
xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
13202
0
    xmlDocPtr ret;
13203
0
    xmlParserCtxtPtr ctxt;
13204
0
    xmlSAXHandlerPtr oldsax = NULL;
13205
13206
0
    if (cur == NULL) return(NULL);
13207
13208
13209
0
    ctxt = xmlCreateDocParserCtxt(cur);
13210
0
    if (ctxt == NULL) return(NULL);
13211
0
    if (sax != NULL) {
13212
0
        oldsax = ctxt->sax;
13213
0
        ctxt->sax = sax;
13214
0
        ctxt->userData = NULL;
13215
0
    }
13216
13217
0
    xmlParseDocument(ctxt);
13218
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13219
0
    else {
13220
0
       ret = NULL;
13221
0
       xmlFreeDoc(ctxt->myDoc);
13222
0
       ctxt->myDoc = NULL;
13223
0
    }
13224
0
    if (sax != NULL)
13225
0
  ctxt->sax = oldsax;
13226
0
    xmlFreeParserCtxt(ctxt);
13227
13228
0
    return(ret);
13229
0
}
13230
13231
/**
13232
 * xmlParseDoc:
13233
 * @cur:  a pointer to an array of xmlChar
13234
 *
13235
 * DEPRECATED: Use xmlReadDoc.
13236
 *
13237
 * parse an XML in-memory document and build a tree.
13238
 *
13239
 * Returns the resulting document tree
13240
 */
13241
13242
xmlDocPtr
13243
0
xmlParseDoc(const xmlChar *cur) {
13244
0
    return(xmlSAXParseDoc(NULL, cur, 0));
13245
0
}
13246
#endif /* LIBXML_SAX1_ENABLED */
13247
13248
/************************************************************************
13249
 *                  *
13250
 *  New set (2.6.0) of simpler and more flexible APIs   *
13251
 *                  *
13252
 ************************************************************************/
13253
13254
/**
13255
 * DICT_FREE:
13256
 * @str:  a string
13257
 *
13258
 * Free a string if it is not owned by the "dict" dictionary in the
13259
 * current scope
13260
 */
13261
#define DICT_FREE(str)            \
13262
0
  if ((str) && ((!dict) ||       \
13263
0
      (xmlDictOwns(dict, (const xmlChar *)(str)) == 0)))  \
13264
0
      xmlFree((char *)(str));
13265
13266
/**
13267
 * xmlCtxtReset:
13268
 * @ctxt: an XML parser context
13269
 *
13270
 * Reset a parser context
13271
 */
13272
void
13273
xmlCtxtReset(xmlParserCtxtPtr ctxt)
13274
0
{
13275
0
    xmlParserInputPtr input;
13276
0
    xmlDictPtr dict;
13277
13278
0
    if (ctxt == NULL)
13279
0
        return;
13280
13281
0
    dict = ctxt->dict;
13282
13283
0
    while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
13284
0
        xmlFreeInputStream(input);
13285
0
    }
13286
0
    ctxt->inputNr = 0;
13287
0
    ctxt->input = NULL;
13288
13289
0
    ctxt->spaceNr = 0;
13290
0
    if (ctxt->spaceTab != NULL) {
13291
0
  ctxt->spaceTab[0] = -1;
13292
0
  ctxt->space = &ctxt->spaceTab[0];
13293
0
    } else {
13294
0
        ctxt->space = NULL;
13295
0
    }
13296
13297
13298
0
    ctxt->nodeNr = 0;
13299
0
    ctxt->node = NULL;
13300
13301
0
    ctxt->nameNr = 0;
13302
0
    ctxt->name = NULL;
13303
13304
0
    ctxt->nsNr = 0;
13305
0
    xmlParserNsReset(ctxt->nsdb);
13306
13307
0
    DICT_FREE(ctxt->version);
13308
0
    ctxt->version = NULL;
13309
0
    DICT_FREE(ctxt->encoding);
13310
0
    ctxt->encoding = NULL;
13311
0
    DICT_FREE(ctxt->extSubURI);
13312
0
    ctxt->extSubURI = NULL;
13313
0
    DICT_FREE(ctxt->extSubSystem);
13314
0
    ctxt->extSubSystem = NULL;
13315
0
    if (ctxt->myDoc != NULL)
13316
0
        xmlFreeDoc(ctxt->myDoc);
13317
0
    ctxt->myDoc = NULL;
13318
13319
0
    ctxt->standalone = -1;
13320
0
    ctxt->hasExternalSubset = 0;
13321
0
    ctxt->hasPErefs = 0;
13322
0
    ctxt->html = 0;
13323
0
    ctxt->instate = XML_PARSER_START;
13324
13325
0
    ctxt->wellFormed = 1;
13326
0
    ctxt->nsWellFormed = 1;
13327
0
    ctxt->disableSAX = 0;
13328
0
    ctxt->valid = 1;
13329
#if 0
13330
    ctxt->vctxt.userData = ctxt;
13331
    ctxt->vctxt.error = xmlParserValidityError;
13332
    ctxt->vctxt.warning = xmlParserValidityWarning;
13333
#endif
13334
0
    ctxt->record_info = 0;
13335
0
    ctxt->checkIndex = 0;
13336
0
    ctxt->endCheckState = 0;
13337
0
    ctxt->inSubset = 0;
13338
0
    ctxt->errNo = XML_ERR_OK;
13339
0
    ctxt->depth = 0;
13340
0
    ctxt->catalogs = NULL;
13341
0
    ctxt->sizeentities = 0;
13342
0
    ctxt->sizeentcopy = 0;
13343
0
    xmlInitNodeInfoSeq(&ctxt->node_seq);
13344
13345
0
    if (ctxt->attsDefault != NULL) {
13346
0
        xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
13347
0
        ctxt->attsDefault = NULL;
13348
0
    }
13349
0
    if (ctxt->attsSpecial != NULL) {
13350
0
        xmlHashFree(ctxt->attsSpecial, NULL);
13351
0
        ctxt->attsSpecial = NULL;
13352
0
    }
13353
13354
0
#ifdef LIBXML_CATALOG_ENABLED
13355
0
    if (ctxt->catalogs != NULL)
13356
0
  xmlCatalogFreeLocal(ctxt->catalogs);
13357
0
#endif
13358
0
    ctxt->nbErrors = 0;
13359
0
    ctxt->nbWarnings = 0;
13360
0
    if (ctxt->lastError.code != XML_ERR_OK)
13361
0
        xmlResetError(&ctxt->lastError);
13362
0
}
13363
13364
/**
13365
 * xmlCtxtResetPush:
13366
 * @ctxt: an XML parser context
13367
 * @chunk:  a pointer to an array of chars
13368
 * @size:  number of chars in the array
13369
 * @filename:  an optional file name or URI
13370
 * @encoding:  the document encoding, or NULL
13371
 *
13372
 * Reset a push parser context
13373
 *
13374
 * Returns 0 in case of success and 1 in case of error
13375
 */
13376
int
13377
xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
13378
                 int size, const char *filename, const char *encoding)
13379
0
{
13380
0
    xmlParserInputPtr input;
13381
13382
0
    if (ctxt == NULL)
13383
0
        return(1);
13384
13385
0
    xmlCtxtReset(ctxt);
13386
13387
0
    input = xmlNewInputPush(ctxt, filename, chunk, size, encoding);
13388
0
    if (input == NULL)
13389
0
        return(1);
13390
0
    inputPush(ctxt, input);
13391
13392
0
    return(0);
13393
0
}
13394
13395
static int
13396
xmlCtxtSetOptionsInternal(xmlParserCtxtPtr ctxt, int options, int keepMask)
13397
18.1k
{
13398
18.1k
    int allMask;
13399
13400
18.1k
    if (ctxt == NULL)
13401
0
        return(-1);
13402
13403
    /*
13404
     * XInclude options aren't handled by the parser.
13405
     *
13406
     * XML_PARSE_XINCLUDE
13407
     * XML_PARSE_NOXINCNODE
13408
     * XML_PARSE_NOBASEFIX
13409
     */
13410
18.1k
    allMask = XML_PARSE_RECOVER |
13411
18.1k
              XML_PARSE_NOENT |
13412
18.1k
              XML_PARSE_DTDLOAD |
13413
18.1k
              XML_PARSE_DTDATTR |
13414
18.1k
              XML_PARSE_DTDVALID |
13415
18.1k
              XML_PARSE_NOERROR |
13416
18.1k
              XML_PARSE_NOWARNING |
13417
18.1k
              XML_PARSE_PEDANTIC |
13418
18.1k
              XML_PARSE_NOBLANKS |
13419
18.1k
#ifdef LIBXML_SAX1_ENABLED
13420
18.1k
              XML_PARSE_SAX1 |
13421
18.1k
#endif
13422
18.1k
              XML_PARSE_NONET |
13423
18.1k
              XML_PARSE_NODICT |
13424
18.1k
              XML_PARSE_NSCLEAN |
13425
18.1k
              XML_PARSE_NOCDATA |
13426
18.1k
              XML_PARSE_COMPACT |
13427
18.1k
              XML_PARSE_OLD10 |
13428
18.1k
              XML_PARSE_HUGE |
13429
18.1k
              XML_PARSE_OLDSAX |
13430
18.1k
              XML_PARSE_IGNORE_ENC |
13431
18.1k
              XML_PARSE_BIG_LINES |
13432
18.1k
              XML_PARSE_NO_XXE;
13433
13434
18.1k
    ctxt->options = (ctxt->options & keepMask) | (options & allMask);
13435
13436
    /*
13437
     * For some options, struct members are historically the source
13438
     * of truth. The values are initalized from global variables and
13439
     * old code could also modify them directly. Several older API
13440
     * functions that don't take an options argument rely on these
13441
     * deprecated mechanisms.
13442
     *
13443
     * Once public access to struct members and the globals are
13444
     * disabled, we can use the options bitmask as source of
13445
     * truth, making all these struct members obsolete.
13446
     *
13447
     * The XML_DETECT_IDS flags is misnamed. It simply enables
13448
     * loading of the external subset.
13449
     */
13450
18.1k
    ctxt->recovery = (options & XML_PARSE_RECOVER) ? 1 : 0;
13451
18.1k
    ctxt->replaceEntities = (options & XML_PARSE_NOENT) ? 1 : 0;
13452
18.1k
    ctxt->loadsubset = (options & XML_PARSE_DTDLOAD) ? XML_DETECT_IDS : 0;
13453
18.1k
    ctxt->loadsubset |= (options & XML_PARSE_DTDATTR) ? XML_COMPLETE_ATTRS : 0;
13454
18.1k
    ctxt->validate = (options & XML_PARSE_DTDVALID) ? 1 : 0;
13455
18.1k
    ctxt->pedantic = (options & XML_PARSE_PEDANTIC) ? 1 : 0;
13456
18.1k
    ctxt->keepBlanks = (options & XML_PARSE_NOBLANKS) ? 0 : 1;
13457
18.1k
    ctxt->dictNames = (options & XML_PARSE_NODICT) ? 0 : 1;
13458
13459
    /*
13460
     * Changing SAX callbacks is a bad idea. This should be fixed.
13461
     */
13462
18.1k
    if (options & XML_PARSE_NOBLANKS) {
13463
3.38k
        ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
13464
3.38k
    }
13465
18.1k
    if (options & XML_PARSE_NOCDATA) {
13466
534
        ctxt->sax->cdataBlock = NULL;
13467
534
    }
13468
18.1k
    if (options & XML_PARSE_HUGE) {
13469
0
        if (ctxt->dict != NULL)
13470
0
            xmlDictSetLimit(ctxt->dict, 0);
13471
0
    }
13472
13473
18.1k
    ctxt->linenumbers = 1;
13474
13475
18.1k
    return(options & ~allMask);
13476
18.1k
}
13477
13478
/**
13479
 * xmlCtxtSetOptions:
13480
 * @ctxt: an XML parser context
13481
 * @options:  a bitmask of xmlParserOption values
13482
 *
13483
 * Applies the options to the parser context. Unset options are
13484
 * cleared.
13485
 *
13486
 * Available since 2.13.0. With older versions, you can use
13487
 * xmlCtxtUseOptions.
13488
 *
13489
 * XML_PARSE_RECOVER
13490
 *
13491
 * Enable "recovery" mode which allows non-wellformed documents.
13492
 * How this mode behaves exactly is unspecified and may change
13493
 * without further notice. Use of this feature is DISCOURAGED.
13494
 *
13495
 * XML_PARSE_NOENT
13496
 *
13497
 * Despite the confusing name, this option enables substitution
13498
 * of entities. The resulting tree won't contain any entity
13499
 * reference nodes.
13500
 *
13501
 * This option also enables loading of external entities (both
13502
 * general and parameter entities) which is dangerous. If you
13503
 * process untrusted data, it's recommended to set the
13504
 * XML_PARSE_NO_XXE option to disable loading of external
13505
 * entities.
13506
 *
13507
 * XML_PARSE_DTDLOAD
13508
 *
13509
 * Enables loading of an external DTD and the loading and
13510
 * substitution of external parameter entities. Has no effect
13511
 * if XML_PARSE_NO_XXE is set.
13512
 *
13513
 * XML_PARSE_DTDATTR
13514
 *
13515
 * Adds default attributes from the DTD to the result document.
13516
 *
13517
 * Implies XML_PARSE_DTDLOAD, but loading of external content
13518
 * can be disabled with XML_PARSE_NO_XXE.
13519
 *
13520
 * XML_PARSE_DTDVALID
13521
 *
13522
 * This option enables DTD validation which requires to load
13523
 * external DTDs and external entities (both general and
13524
 * parameter entities) unless XML_PARSE_NO_XXE was set.
13525
 *
13526
 * XML_PARSE_NO_XXE
13527
 *
13528
 * Disables loading of external DTDs or entities.
13529
 *
13530
 * XML_PARSE_NOERROR
13531
 *
13532
 * Disable error and warning reports to the error handlers.
13533
 * Errors are still accessible with xmlCtxtGetLastError.
13534
 *
13535
 * XML_PARSE_NOWARNING
13536
 *
13537
 * Disable warning reports.
13538
 *
13539
 * XML_PARSE_PEDANTIC
13540
 *
13541
 * Enable some pedantic warnings.
13542
 *
13543
 * XML_PARSE_NOBLANKS
13544
 *
13545
 * Remove some text nodes containing only whitespace from the
13546
 * result document. Which nodes are removed depends on DTD
13547
 * element declarations or a conservative heuristic. The
13548
 * reindenting feature of the serialization code relies on this
13549
 * option to be set when parsing. Use of this option is
13550
 * DISCOURAGED.
13551
 *
13552
 * XML_PARSE_SAX1
13553
 *
13554
 * Always invoke the deprecated SAX1 startElement and endElement
13555
 * handlers. This option is DEPRECATED.
13556
 *
13557
 * XML_PARSE_NONET
13558
 *
13559
 * Disable network access with the builtin HTTP and FTP clients.
13560
 *
13561
 * XML_PARSE_NODICT
13562
 *
13563
 * Create a document without interned strings, making all
13564
 * strings separate memory allocations.
13565
 *
13566
 * XML_PARSE_NSCLEAN
13567
 *
13568
 * Remove redundant namespace declarations from the result
13569
 * document.
13570
 *
13571
 * XML_PARSE_NOCDATA
13572
 *
13573
 * Output normal text nodes instead of CDATA nodes.
13574
 *
13575
 * XML_PARSE_COMPACT
13576
 *
13577
 * Store small strings directly in the node struct to save
13578
 * memory.
13579
 *
13580
 * XML_PARSE_OLD10
13581
 *
13582
 * Use old Name productions from before XML 1.0 Fifth Edition.
13583
 * This options is DEPRECATED.
13584
 *
13585
 * XML_PARSE_HUGE
13586
 *
13587
 * Relax some internal limits.
13588
 *
13589
 * Maximum size of text nodes, tags, comments, processing instructions,
13590
 * CDATA sections, entity values
13591
 *
13592
 * normal: 10M
13593
 * huge:    1B
13594
 *
13595
 * Maximum size of names, system literals, pubid literals
13596
 *
13597
 * normal: 50K
13598
 * huge:   10M
13599
 *
13600
 * Maximum nesting depth of elements
13601
 *
13602
 * normal:  256
13603
 * huge:   2048
13604
 *
13605
 * Maximum nesting depth of entities
13606
 *
13607
 * normal: 20
13608
 * huge:   40
13609
 *
13610
 * XML_PARSE_OLDSAX
13611
 *
13612
 * Enable an unspecified legacy mode for SAX parsers. This
13613
 * option is DEPRECATED.
13614
 *
13615
 * XML_PARSE_IGNORE_ENC
13616
 *
13617
 * Ignore the encoding in the XML declaration. This option is
13618
 * mostly unneeded these days. The only effect is to enforce
13619
 * UTF-8 decoding of ASCII-like data.
13620
 *
13621
 * XML_PARSE_BIG_LINES
13622
 *
13623
 * Enable reporting of line numbers larger than 65535.
13624
 *
13625
 * Returns 0 in case of success, the set of unknown or unimplemented options
13626
 *         in case of error.
13627
 */
13628
int
13629
xmlCtxtSetOptions(xmlParserCtxtPtr ctxt, int options)
13630
0
{
13631
0
    return(xmlCtxtSetOptionsInternal(ctxt, options, 0));
13632
0
}
13633
13634
/**
13635
 * xmlCtxtUseOptions:
13636
 * @ctxt: an XML parser context
13637
 * @options:  a combination of xmlParserOption
13638
 *
13639
 * DEPRECATED: Use xmlCtxtSetOptions.
13640
 *
13641
 * Applies the options to the parser context. The following options
13642
 * are never cleared and can only be enabled:
13643
 *
13644
 * XML_PARSE_NOERROR
13645
 * XML_PARSE_NOWARNING
13646
 * XML_PARSE_NONET
13647
 * XML_PARSE_NSCLEAN
13648
 * XML_PARSE_NOCDATA
13649
 * XML_PARSE_COMPACT
13650
 * XML_PARSE_OLD10
13651
 * XML_PARSE_HUGE
13652
 * XML_PARSE_OLDSAX
13653
 * XML_PARSE_IGNORE_ENC
13654
 * XML_PARSE_BIG_LINES
13655
 *
13656
 * Returns 0 in case of success, the set of unknown or unimplemented options
13657
 *         in case of error.
13658
 */
13659
int
13660
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
13661
18.1k
{
13662
18.1k
    int keepMask;
13663
13664
    /*
13665
     * For historic reasons, some options can only be enabled.
13666
     */
13667
18.1k
    keepMask = XML_PARSE_NOERROR |
13668
18.1k
               XML_PARSE_NOWARNING |
13669
18.1k
               XML_PARSE_NONET |
13670
18.1k
               XML_PARSE_NSCLEAN |
13671
18.1k
               XML_PARSE_NOCDATA |
13672
18.1k
               XML_PARSE_COMPACT |
13673
18.1k
               XML_PARSE_OLD10 |
13674
18.1k
               XML_PARSE_HUGE |
13675
18.1k
               XML_PARSE_OLDSAX |
13676
18.1k
               XML_PARSE_IGNORE_ENC |
13677
18.1k
               XML_PARSE_BIG_LINES;
13678
13679
18.1k
    return(xmlCtxtSetOptionsInternal(ctxt, options, keepMask));
13680
18.1k
}
13681
13682
/**
13683
 * xmlCtxtSetMaxAmplification:
13684
 * @ctxt: an XML parser context
13685
 * @maxAmpl:  maximum amplification factor
13686
 *
13687
 * To protect against exponential entity expansion ("billion laughs"), the
13688
 * size of serialized output is (roughly) limited to the input size
13689
 * multiplied by this factor. The default value is 5.
13690
 *
13691
 * When working with documents making heavy use of entity expansion, it can
13692
 * be necessary to increase the value. For security reasons, this should only
13693
 * be considered when processing trusted input.
13694
 */
13695
void
13696
xmlCtxtSetMaxAmplification(xmlParserCtxtPtr ctxt, unsigned maxAmpl)
13697
0
{
13698
0
    ctxt->maxAmpl = maxAmpl;
13699
0
}
13700
13701
/**
13702
 * xmlCtxtParseDocument:
13703
 * @ctxt:  an XML parser context
13704
 * @input:  parser input
13705
 *
13706
 * Parse an XML document and return the resulting document tree.
13707
 * Takes ownership of the input object.
13708
 *
13709
 * Returns the resulting document tree or NULL
13710
 */
13711
xmlDocPtr
13712
xmlCtxtParseDocument(xmlParserCtxtPtr ctxt, xmlParserInputPtr input)
13713
18.1k
{
13714
18.1k
    xmlDocPtr ret = NULL;
13715
13716
18.1k
    if ((ctxt == NULL) || (input == NULL))
13717
0
        return(NULL);
13718
13719
    /* assert(ctxt->inputNr == 0); */
13720
18.1k
    while (ctxt->inputNr > 0)
13721
0
        xmlFreeInputStream(inputPop(ctxt));
13722
13723
18.1k
    if (inputPush(ctxt, input) < 0) {
13724
0
        xmlFreeInputStream(input);
13725
0
        return(NULL);
13726
0
    }
13727
13728
18.1k
    xmlParseDocument(ctxt);
13729
13730
18.1k
    if ((ctxt->wellFormed) ||
13731
18.1k
        ((ctxt->recovery) && (ctxt->errNo != XML_ERR_NO_MEMORY))) {
13732
9.29k
        ret = ctxt->myDoc;
13733
9.29k
    } else {
13734
8.80k
        if (ctxt->errNo == XML_ERR_OK)
13735
0
            xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, "unknown error\n");
13736
13737
8.80k
        ret = NULL;
13738
8.80k
  xmlFreeDoc(ctxt->myDoc);
13739
8.80k
    }
13740
18.1k
    ctxt->myDoc = NULL;
13741
13742
    /* assert(ctxt->inputNr == 1); */
13743
36.2k
    while (ctxt->inputNr > 0)
13744
18.1k
        xmlFreeInputStream(inputPop(ctxt));
13745
13746
18.1k
    return(ret);
13747
18.1k
}
13748
13749
/**
13750
 * xmlReadDoc:
13751
 * @cur:  a pointer to a zero terminated string
13752
 * @URL:  base URL (optional)
13753
 * @encoding:  the document encoding (optional)
13754
 * @options:  a combination of xmlParserOption
13755
 *
13756
 * Convenience function to parse an XML document from a
13757
 * zero-terminated string.
13758
 *
13759
 * See xmlCtxtReadDoc for details.
13760
 *
13761
 * Returns the resulting document tree
13762
 */
13763
xmlDocPtr
13764
xmlReadDoc(const xmlChar *cur, const char *URL, const char *encoding,
13765
           int options)
13766
0
{
13767
0
    xmlParserCtxtPtr ctxt;
13768
0
    xmlParserInputPtr input;
13769
0
    xmlDocPtr doc;
13770
13771
0
    ctxt = xmlNewParserCtxt();
13772
0
    if (ctxt == NULL)
13773
0
        return(NULL);
13774
13775
0
    xmlCtxtUseOptions(ctxt, options);
13776
13777
0
    input = xmlNewInputString(ctxt, URL, (const char *) cur, encoding,
13778
0
                              XML_INPUT_BUF_STATIC);
13779
13780
0
    doc = xmlCtxtParseDocument(ctxt, input);
13781
13782
0
    xmlFreeParserCtxt(ctxt);
13783
0
    return(doc);
13784
0
}
13785
13786
/**
13787
 * xmlReadFile:
13788
 * @filename:  a file or URL
13789
 * @encoding:  the document encoding (optional)
13790
 * @options:  a combination of xmlParserOption
13791
 *
13792
 * Convenience function to parse an XML file from the filesystem,
13793
 * the network or a global user-define resource loader.
13794
 *
13795
 * See xmlCtxtReadFile for details.
13796
 *
13797
 * Returns the resulting document tree
13798
 */
13799
xmlDocPtr
13800
xmlReadFile(const char *filename, const char *encoding, int options)
13801
0
{
13802
0
    xmlParserCtxtPtr ctxt;
13803
0
    xmlParserInputPtr input;
13804
0
    xmlDocPtr doc;
13805
13806
0
    ctxt = xmlNewParserCtxt();
13807
0
    if (ctxt == NULL)
13808
0
        return(NULL);
13809
13810
0
    xmlCtxtUseOptions(ctxt, options);
13811
13812
0
    input = xmlNewInputURL(ctxt, filename, NULL, encoding, 0);
13813
13814
0
    doc = xmlCtxtParseDocument(ctxt, input);
13815
13816
0
    xmlFreeParserCtxt(ctxt);
13817
0
    return(doc);
13818
0
}
13819
13820
/**
13821
 * xmlReadMemory:
13822
 * @buffer:  a pointer to a char array
13823
 * @size:  the size of the array
13824
 * @url:  base URL (optional)
13825
 * @encoding:  the document encoding (optional)
13826
 * @options:  a combination of xmlParserOption
13827
 *
13828
 * Parse an XML in-memory document and build a tree. The input buffer must
13829
 * not contain a terminating null byte.
13830
 *
13831
 * See xmlCtxtReadMemory for details.
13832
 *
13833
 * Returns the resulting document tree
13834
 */
13835
xmlDocPtr
13836
xmlReadMemory(const char *buffer, int size, const char *url,
13837
              const char *encoding, int options)
13838
18.1k
{
13839
18.1k
    xmlParserCtxtPtr ctxt;
13840
18.1k
    xmlParserInputPtr input;
13841
18.1k
    xmlDocPtr doc;
13842
13843
18.1k
    if (size < 0)
13844
0
  return(NULL);
13845
13846
18.1k
    ctxt = xmlNewParserCtxt();
13847
18.1k
    if (ctxt == NULL)
13848
0
        return(NULL);
13849
13850
18.1k
    xmlCtxtUseOptions(ctxt, options);
13851
13852
18.1k
    input = xmlNewInputMemory(ctxt, url, buffer, size, encoding,
13853
18.1k
                              XML_INPUT_BUF_STATIC);
13854
13855
18.1k
    doc = xmlCtxtParseDocument(ctxt, input);
13856
13857
18.1k
    xmlFreeParserCtxt(ctxt);
13858
18.1k
    return(doc);
13859
18.1k
}
13860
13861
/**
13862
 * xmlReadFd:
13863
 * @fd:  an open file descriptor
13864
 * @URL:  base URL (optional)
13865
 * @encoding:  the document encoding (optional)
13866
 * @options:  a combination of xmlParserOption
13867
 *
13868
 * Parse an XML from a file descriptor and build a tree.
13869
 *
13870
 * See xmlCtxtReadFd for details.
13871
 *
13872
 * NOTE that the file descriptor will not be closed when the
13873
 * context is freed or reset.
13874
 *
13875
 * Returns the resulting document tree
13876
 */
13877
xmlDocPtr
13878
xmlReadFd(int fd, const char *URL, const char *encoding, int options)
13879
0
{
13880
0
    xmlParserCtxtPtr ctxt;
13881
0
    xmlParserInputPtr input;
13882
0
    xmlDocPtr doc;
13883
13884
0
    ctxt = xmlNewParserCtxt();
13885
0
    if (ctxt == NULL)
13886
0
        return(NULL);
13887
13888
0
    xmlCtxtUseOptions(ctxt, options);
13889
13890
0
    input = xmlNewInputFd(ctxt, URL, fd, encoding, 0);
13891
0
    input->buf->closecallback = NULL;
13892
13893
0
    doc = xmlCtxtParseDocument(ctxt, input);
13894
13895
0
    xmlFreeParserCtxt(ctxt);
13896
0
    return(doc);
13897
0
}
13898
13899
/**
13900
 * xmlReadIO:
13901
 * @ioread:  an I/O read function
13902
 * @ioclose:  an I/O close function (optional)
13903
 * @ioctx:  an I/O handler
13904
 * @URL:  base URL (optional)
13905
 * @encoding:  the document encoding (optional)
13906
 * @options:  a combination of xmlParserOption
13907
 *
13908
 * Parse an XML document from I/O functions and context and build a tree.
13909
 *
13910
 * See xmlCtxtReadIO for details.
13911
 *
13912
 * Returns the resulting document tree
13913
 */
13914
xmlDocPtr
13915
xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
13916
          void *ioctx, const char *URL, const char *encoding, int options)
13917
0
{
13918
0
    xmlParserCtxtPtr ctxt;
13919
0
    xmlParserInputPtr input;
13920
0
    xmlDocPtr doc;
13921
13922
0
    ctxt = xmlNewParserCtxt();
13923
0
    if (ctxt == NULL)
13924
0
        return(NULL);
13925
13926
0
    xmlCtxtUseOptions(ctxt, options);
13927
13928
0
    input = xmlNewInputIO(ctxt, URL, ioread, ioclose, ioctx, encoding, 0);
13929
13930
0
    doc = xmlCtxtParseDocument(ctxt, input);
13931
13932
0
    xmlFreeParserCtxt(ctxt);
13933
0
    return(doc);
13934
0
}
13935
13936
/**
13937
 * xmlCtxtReadDoc:
13938
 * @ctxt:  an XML parser context
13939
 * @str:  a pointer to a zero terminated string
13940
 * @URL:  base URL (optional)
13941
 * @encoding:  the document encoding (optional)
13942
 * @options:  a combination of xmlParserOption
13943
 *
13944
 * Parse an XML in-memory document and build a tree.
13945
 *
13946
 * @URL is used as base to resolve external entities and for error
13947
 * reporting.
13948
 *
13949
 * See xmlCtxtUseOptions for details.
13950
 *
13951
 * Returns the resulting document tree
13952
 */
13953
xmlDocPtr
13954
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar *str,
13955
               const char *URL, const char *encoding, int options)
13956
0
{
13957
0
    xmlParserInputPtr input;
13958
13959
0
    if (ctxt == NULL)
13960
0
        return(NULL);
13961
13962
0
    xmlCtxtReset(ctxt);
13963
0
    xmlCtxtUseOptions(ctxt, options);
13964
13965
0
    input = xmlNewInputString(ctxt, URL, (const char *) str, encoding,
13966
0
                              XML_INPUT_BUF_STATIC);
13967
13968
0
    return(xmlCtxtParseDocument(ctxt, input));
13969
0
}
13970
13971
/**
13972
 * xmlCtxtReadFile:
13973
 * @ctxt:  an XML parser context
13974
 * @filename:  a file or URL
13975
 * @encoding:  the document encoding (optional)
13976
 * @options:  a combination of xmlParserOption
13977
 *
13978
 * Parse an XML file from the filesystem, the network or a user-defined
13979
 * resource loader.
13980
 *
13981
 * See xmlNewInputURL and xmlCtxtUseOptions for details.
13982
 *
13983
 * Returns the resulting document tree
13984
 */
13985
xmlDocPtr
13986
xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
13987
                const char *encoding, int options)
13988
0
{
13989
0
    xmlParserInputPtr input;
13990
13991
0
    if (ctxt == NULL)
13992
0
        return(NULL);
13993
13994
0
    xmlCtxtReset(ctxt);
13995
0
    xmlCtxtUseOptions(ctxt, options);
13996
13997
0
    input = xmlNewInputURL(ctxt, filename, NULL, encoding, 0);
13998
13999
0
    return(xmlCtxtParseDocument(ctxt, input));
14000
0
}
14001
14002
/**
14003
 * xmlCtxtReadMemory:
14004
 * @ctxt:  an XML parser context
14005
 * @buffer:  a pointer to a char array
14006
 * @size:  the size of the array
14007
 * @URL:  base URL (optional)
14008
 * @encoding:  the document encoding (optional)
14009
 * @options:  a combination of xmlParserOption
14010
 *
14011
 * Parse an XML in-memory document and build a tree. The input buffer must
14012
 * not contain a terminating null byte.
14013
 *
14014
 * @URL is used as base to resolve external entities and for error
14015
 * reporting.
14016
 *
14017
 * See xmlCtxtUseOptions for details.
14018
 *
14019
 * Returns the resulting document tree
14020
 */
14021
xmlDocPtr
14022
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
14023
                  const char *URL, const char *encoding, int options)
14024
0
{
14025
0
    xmlParserInputPtr input;
14026
14027
0
    if ((ctxt == NULL) || (size < 0))
14028
0
        return(NULL);
14029
14030
0
    xmlCtxtReset(ctxt);
14031
0
    xmlCtxtUseOptions(ctxt, options);
14032
14033
0
    input = xmlNewInputMemory(ctxt, URL, buffer, size, encoding,
14034
0
                              XML_INPUT_BUF_STATIC);
14035
14036
0
    return(xmlCtxtParseDocument(ctxt, input));
14037
0
}
14038
14039
/**
14040
 * xmlCtxtReadFd:
14041
 * @ctxt:  an XML parser context
14042
 * @fd:  an open file descriptor
14043
 * @URL:  base URL (optional)
14044
 * @encoding:  the document encoding (optional)
14045
 * @options:  a combination of xmlParserOption
14046
 *
14047
 * Parse an XML document from a file descriptor and build a tree.
14048
 *
14049
 * NOTE that the file descriptor will not be closed when the
14050
 * context is freed or reset.
14051
 *
14052
 * @URL is used as base to resolve external entities and for error
14053
 * reporting.
14054
 *
14055
 * See xmlCtxtUseOptions for details.
14056
 *
14057
 * Returns the resulting document tree
14058
 */
14059
xmlDocPtr
14060
xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
14061
              const char *URL, const char *encoding, int options)
14062
0
{
14063
0
    xmlParserInputPtr input;
14064
14065
0
    if (ctxt == NULL)
14066
0
        return(NULL);
14067
14068
0
    xmlCtxtReset(ctxt);
14069
0
    xmlCtxtUseOptions(ctxt, options);
14070
14071
0
    input = xmlNewInputFd(ctxt, URL, fd, encoding, 0);
14072
0
    input->buf->closecallback = NULL;
14073
14074
0
    return(xmlCtxtParseDocument(ctxt, input));
14075
0
}
14076
14077
/**
14078
 * xmlCtxtReadIO:
14079
 * @ctxt:  an XML parser context
14080
 * @ioread:  an I/O read function
14081
 * @ioclose:  an I/O close function
14082
 * @ioctx:  an I/O handler
14083
 * @URL:  the base URL to use for the document
14084
 * @encoding:  the document encoding, or NULL
14085
 * @options:  a combination of xmlParserOption
14086
 *
14087
 * parse an XML document from I/O functions and source and build a tree.
14088
 * This reuses the existing @ctxt parser context
14089
 *
14090
 * @URL is used as base to resolve external entities and for error
14091
 * reporting.
14092
 *
14093
 * See xmlCtxtUseOptions for details.
14094
 *
14095
 * Returns the resulting document tree
14096
 */
14097
xmlDocPtr
14098
xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
14099
              xmlInputCloseCallback ioclose, void *ioctx,
14100
        const char *URL,
14101
              const char *encoding, int options)
14102
0
{
14103
0
    xmlParserInputPtr input;
14104
14105
0
    if (ctxt == NULL)
14106
0
        return(NULL);
14107
14108
0
    xmlCtxtReset(ctxt);
14109
0
    xmlCtxtUseOptions(ctxt, options);
14110
14111
0
    input = xmlNewInputIO(ctxt, URL, ioread, ioclose, ioctx, encoding, 0);
14112
14113
0
    return(xmlCtxtParseDocument(ctxt, input));
14114
0
}
14115